コード例 #1
0
def is_drnj_time(field_name, value):
    if type(value) == float:
        return value
    elif type(value) == str or type(value) == unicode:
        return py_utc_time2drnj_time(value)
    else:
        raise TypeError('Field: '
                        '%s'
                        ' is not of type drnj_time' % field_name)
コード例 #2
0
    def prepare_hist_and_plot(self, n_tweets, users, n_bins, campaign_id):
        import numpy
        import matplotlib.pyplot as plot

        plot_graphs = False

        hist = {
            'user_creation': {
                'data': None,
                'bins': None,
            },
            'user_n_tweets': {
                'data': None,
                'bins': None,
            },
            'user_n_tweets_overall': {
                'data': None,
                'bins': None,
            },
            'n_tweets': None,
            'n_unique_users': None,
            'n_default_profile_image': None,
            'n_lower_than_threshold': None,
        }

        self.logger.debug("How many tweets? %d" % n_tweets)
        hist['n_tweets'] = n_tweets

        # TODO: abort if there are more than 200000 tweets.
        if n_tweets > 200000:
            return
        #
        # How many unique users?
        #
        n_unique_users = len(users)
        self.logger.debug("How many unique users? %d" % n_unique_users)
        hist['n_unique_users'] = n_unique_users

        ######
        sec_title = "Histogram of user creation dates?"
        #

        tmp_dates = []
        for x in users:
            tmp_date = x['user']['created_at']
            if type(tmp_date) != float:
                tmp_date = py_utc_time2drnj_time(tmp_date)
            tmp_dates.append(tmp_date)
    #    tmp_dates = [py_utc_time2drnj_time(x['user']['created_at']) for x in users]

        (hist['user_creation']['data'], hist['user_creation']['bins']) = numpy.histogram(tmp_dates, bins=n_bins)

        if plot_graphs:
            bins = hist['user_creation']['bins'][:-1]
            width = (hist['user_creation']['bins'][1] - hist['user_creation']['bins'][0])/2
            plot.bar(bins, hist['user_creation']['data'], width=width, align='center')

            xticklabels = [time.strftime('%d %b %Y', time.gmtime(drnj_time2py_time(x))) for x in bins]

            plot.xticks(bins, xticklabels)
            plot.title(sec_title)
            #plot.show()
            plot.savefig('1.pdf', dpi=600)

        #####
        sec_title = "Histogram of number of tweets of each user in this campaign"
        tmp_counts = [int(x['n_user_tweets']) for x in users]
        #
        (hist['user_n_tweets']['data'], hist['user_n_tweets']['bins']) = numpy.histogram(tmp_counts, bins=n_bins)

        if plot_graphs:
            bins = hist['user_n_tweets']['bins'][:-1]
            data = hist['user_n_tweets']['data']
            width = (hist['user_n_tweets']['bins'][1] - hist['user_n_tweets']['bins'][0])/2
            plot.bar(bins, data, width=width, align='center')

            xticklabels = bins

            plot.xticks(bins, xticklabels)
            plot.title(sec_title)
            #plot.show()
            plot.savefig('2.pdf', dpi=600)

        #####
        sec_title = "What percentage of them used the default profile image?"
        #
        n_default_profile_image = 0
        for u in users:
            if u['user']['default_profile_image']:
                n_default_profile_image += 1

        hist['n_default_profile_image'] = n_default_profile_image
        self.logger.debug("%s: %0.2f%%" % (sec_title, 100*(float(n_default_profile_image)/n_unique_users)))
        #####
        sec_title = "Histogram of tweet counts of unique users"
        tmp_counts = [int(x['user']['statuses_count']) for x in users]

        (hist['user_n_tweets_overall']['data'],
         hist['user_n_tweets_overall']['bins']) = numpy.histogram(tmp_counts, bins=n_bins)

        if plot_graphs:
            bins = hist['user_n_tweets_overall']['bins'][:-1]
            data = hist['user_n_tweets_overall']['data']
            width = (hist['user_n_tweets_overall']['bins'][1] - hist['user_n_tweets_overall']['bins'][0])/2
            plot.bar(bins, data, width=width, align='center')

            xticklabels = bins

            plot.xticks(bins, xticklabels)
            plot.title(sec_title)
            #plot.show()
            plot.savefig('3.pdf', dpi=600)
        #
        sec_title = "What percentage of them have lower than 5 tweets?"
        n_lower_than_threshold = 0
        for u in users:
            if u['user']['statuses_count'] < 5:
                n_lower_than_threshold += 1

        hist['n_lower_than_threshold'] = n_lower_than_threshold
        self.logger.debug("%s: %0.2f%%" % (sec_title, 100*(float(n_lower_than_threshold)/n_unique_users)))

        self.logger.debug(hist)

        # converting numpy.array's to normal python lists.
        for k in hist.keys():
            if type(hist[k]) == dict:
                for k2 in hist[k].keys():
                    if type(hist[k][k2]) == type(numpy.array([])):
                        hist[k][k2] = list(hist[k][k2])

        hist = {'campaign_id': campaign_id,
                'histogram': hist,
                'created_at': now_in_drnj_time()}
        return hist
コード例 #3
0
ファイル: reporting-schema.py プロジェクト: kuzeygh/drenaj
# How many tweets?
n_tweets = tweets_coll.find(campaign_query).count()
print "How many tweets? %d" % n_tweets
hist['n_tweets'] = n_tweets
#
# How many unique users?
#
n_unique_users = len(users)
print "How many unique users? %d" % n_unique_users
hist['n_unique_users'] = n_unique_users

######
sec_title = "Histogram of user creation dates?"
#

tmp_dates = [py_utc_time2drnj_time(x['user']['created_at']) for x in users]

(hist['user_creation']['data'],
 hist['user_creation']['bins']) = numpy.histogram(tmp_dates, bins=100)

if plotGraphs:
    bins = hist['user_creation']['bins'][:-1]
    width = (hist['user_creation']['bins'][1] -
             hist['user_creation']['bins'][0]) / 2
    plot.bar(bins, hist['user_creation']['data'], width=width, align='center')

    xticklabels = [
        time.strftime('%d %b %Y', time.gmtime(drnj_time2py_time(x)))
        for x in bins
    ]
コード例 #4
0
ファイル: drenajmongomanager.py プロジェクト: kuzeygh/drenaj
    def prepare_hist_and_plot(self, n_tweets, users, n_bins, campaign_id):
        import numpy
        import matplotlib.pyplot as plot

        plot_graphs = False

        hist = {
            'user_creation': {
                'data': None,
                'bins': None,
            },
            'user_n_tweets': {
                'data': None,
                'bins': None,
            },
            'user_n_tweets_overall': {
                'data': None,
                'bins': None,
            },
            'n_tweets': None,
            'n_unique_users': None,
            'n_default_profile_image': None,
            'n_lower_than_threshold': None,
        }

        self.logger.debug("How many tweets? %d" % n_tweets)
        hist['n_tweets'] = n_tweets

        # TODO: abort if there are more than 200000 tweets.
        if n_tweets > 200000:
            return
        #
        # How many unique users?
        #
        n_unique_users = len(users)
        self.logger.debug("How many unique users? %d" % n_unique_users)
        hist['n_unique_users'] = n_unique_users

        ######
        sec_title = "Histogram of user creation dates?"
        #

        tmp_dates = []
        for x in users:
            tmp_date = x['user']['created_at']
            if type(tmp_date) != float:
                tmp_date = py_utc_time2drnj_time(tmp_date)
            tmp_dates.append(tmp_date)

    #    tmp_dates = [py_utc_time2drnj_time(x['user']['created_at']) for x in users]

        (hist['user_creation']['data'],
         hist['user_creation']['bins']) = numpy.histogram(tmp_dates,
                                                          bins=n_bins)

        if plot_graphs:
            bins = hist['user_creation']['bins'][:-1]
            width = (hist['user_creation']['bins'][1] -
                     hist['user_creation']['bins'][0]) / 2
            plot.bar(bins,
                     hist['user_creation']['data'],
                     width=width,
                     align='center')

            xticklabels = [
                time.strftime('%d %b %Y', time.gmtime(drnj_time2py_time(x)))
                for x in bins
            ]

            plot.xticks(bins, xticklabels)
            plot.title(sec_title)
            #plot.show()
            plot.savefig('1.pdf', dpi=600)

        #####
        sec_title = "Histogram of number of tweets of each user in this campaign"
        tmp_counts = [int(x['n_user_tweets']) for x in users]
        #
        (hist['user_n_tweets']['data'],
         hist['user_n_tweets']['bins']) = numpy.histogram(tmp_counts,
                                                          bins=n_bins)

        if plot_graphs:
            bins = hist['user_n_tweets']['bins'][:-1]
            data = hist['user_n_tweets']['data']
            width = (hist['user_n_tweets']['bins'][1] -
                     hist['user_n_tweets']['bins'][0]) / 2
            plot.bar(bins, data, width=width, align='center')

            xticklabels = bins

            plot.xticks(bins, xticklabels)
            plot.title(sec_title)
            #plot.show()
            plot.savefig('2.pdf', dpi=600)

        #####
        sec_title = "What percentage of them used the default profile image?"
        #
        n_default_profile_image = 0
        for u in users:
            if u['user']['default_profile_image']:
                n_default_profile_image += 1

        hist['n_default_profile_image'] = n_default_profile_image
        self.logger.debug("%s: %0.2f%%" %
                          (sec_title, 100 *
                           (float(n_default_profile_image) / n_unique_users)))
        #####
        sec_title = "Histogram of tweet counts of unique users"
        tmp_counts = [int(x['user']['statuses_count']) for x in users]

        (hist['user_n_tweets_overall']['data'],
         hist['user_n_tweets_overall']['bins']) = numpy.histogram(tmp_counts,
                                                                  bins=n_bins)

        if plot_graphs:
            bins = hist['user_n_tweets_overall']['bins'][:-1]
            data = hist['user_n_tweets_overall']['data']
            width = (hist['user_n_tweets_overall']['bins'][1] -
                     hist['user_n_tweets_overall']['bins'][0]) / 2
            plot.bar(bins, data, width=width, align='center')

            xticklabels = bins

            plot.xticks(bins, xticklabels)
            plot.title(sec_title)
            #plot.show()
            plot.savefig('3.pdf', dpi=600)
        #
        sec_title = "What percentage of them have lower than 5 tweets?"
        n_lower_than_threshold = 0
        for u in users:
            if u['user']['statuses_count'] < 5:
                n_lower_than_threshold += 1

        hist['n_lower_than_threshold'] = n_lower_than_threshold
        self.logger.debug("%s: %0.2f%%" %
                          (sec_title, 100 *
                           (float(n_lower_than_threshold) / n_unique_users)))

        self.logger.debug(hist)

        # converting numpy.array's to normal python lists.
        for k in hist.keys():
            if type(hist[k]) == dict:
                for k2 in hist[k].keys():
                    if type(hist[k][k2]) == type(numpy.array([])):
                        hist[k][k2] = list(hist[k][k2])

        hist = {
            'campaign_id': campaign_id,
            'histogram': hist,
            'created_at': now_in_drnj_time()
        }
        return hist
コード例 #5
0
# How many tweets?
n_tweets = tweets_coll.find(campaign_query).count()
print "How many tweets? %d" % n_tweets
hist['n_tweets'] = n_tweets
#
# How many unique users?
#
n_unique_users = len(users)
print "How many unique users? %d" % n_unique_users
hist['n_unique_users'] = n_unique_users

######
sec_title = "Histogram of user creation dates?"
#

tmp_dates = [py_utc_time2drnj_time(x['user']['created_at']) for x in users]

(hist['user_creation']['data'], hist['user_creation']['bins']) = numpy.histogram(tmp_dates, bins=100)

if plotGraphs:
    bins = hist['user_creation']['bins'][:-1]
    width = (hist['user_creation']['bins'][1] - hist['user_creation']['bins'][0])/2
    plot.bar(bins, hist['user_creation']['data'], width=width, align='center')

    xticklabels = [time.strftime('%d %b %Y', time.gmtime(drnj_time2py_time(x))) for x in bins]

    plot.xticks(bins, xticklabels)
    plot.title(sec_title)
    #plot.show()
    plot.savefig('1.pdf', dpi=600)