def is_drnj_time(field_name, value): if type(value) == float: return value elif type(value) == str or type(value) == unicode: return py_utc_time2drnj_time(value) else: raise TypeError('Field: ' '%s' ' is not of type drnj_time' % field_name)
def prepare_hist_and_plot(self, n_tweets, users, n_bins, campaign_id): import numpy import matplotlib.pyplot as plot plot_graphs = False hist = { 'user_creation': { 'data': None, 'bins': None, }, 'user_n_tweets': { 'data': None, 'bins': None, }, 'user_n_tweets_overall': { 'data': None, 'bins': None, }, 'n_tweets': None, 'n_unique_users': None, 'n_default_profile_image': None, 'n_lower_than_threshold': None, } self.logger.debug("How many tweets? %d" % n_tweets) hist['n_tweets'] = n_tweets # TODO: abort if there are more than 200000 tweets. if n_tweets > 200000: return # # How many unique users? # n_unique_users = len(users) self.logger.debug("How many unique users? %d" % n_unique_users) hist['n_unique_users'] = n_unique_users ###### sec_title = "Histogram of user creation dates?" # tmp_dates = [] for x in users: tmp_date = x['user']['created_at'] if type(tmp_date) != float: tmp_date = py_utc_time2drnj_time(tmp_date) tmp_dates.append(tmp_date) # tmp_dates = [py_utc_time2drnj_time(x['user']['created_at']) for x in users] (hist['user_creation']['data'], hist['user_creation']['bins']) = numpy.histogram(tmp_dates, bins=n_bins) if plot_graphs: bins = hist['user_creation']['bins'][:-1] width = (hist['user_creation']['bins'][1] - hist['user_creation']['bins'][0])/2 plot.bar(bins, hist['user_creation']['data'], width=width, align='center') xticklabels = [time.strftime('%d %b %Y', time.gmtime(drnj_time2py_time(x))) for x in bins] plot.xticks(bins, xticklabels) plot.title(sec_title) #plot.show() plot.savefig('1.pdf', dpi=600) ##### sec_title = "Histogram of number of tweets of each user in this campaign" tmp_counts = [int(x['n_user_tweets']) for x in users] # (hist['user_n_tweets']['data'], hist['user_n_tweets']['bins']) = numpy.histogram(tmp_counts, bins=n_bins) if plot_graphs: bins = hist['user_n_tweets']['bins'][:-1] data = hist['user_n_tweets']['data'] width = (hist['user_n_tweets']['bins'][1] - hist['user_n_tweets']['bins'][0])/2 plot.bar(bins, data, width=width, align='center') xticklabels = bins plot.xticks(bins, xticklabels) plot.title(sec_title) #plot.show() plot.savefig('2.pdf', dpi=600) ##### sec_title = "What percentage of them used the default profile image?" # n_default_profile_image = 0 for u in users: if u['user']['default_profile_image']: n_default_profile_image += 1 hist['n_default_profile_image'] = n_default_profile_image self.logger.debug("%s: %0.2f%%" % (sec_title, 100*(float(n_default_profile_image)/n_unique_users))) ##### sec_title = "Histogram of tweet counts of unique users" tmp_counts = [int(x['user']['statuses_count']) for x in users] (hist['user_n_tweets_overall']['data'], hist['user_n_tweets_overall']['bins']) = numpy.histogram(tmp_counts, bins=n_bins) if plot_graphs: bins = hist['user_n_tweets_overall']['bins'][:-1] data = hist['user_n_tweets_overall']['data'] width = (hist['user_n_tweets_overall']['bins'][1] - hist['user_n_tweets_overall']['bins'][0])/2 plot.bar(bins, data, width=width, align='center') xticklabels = bins plot.xticks(bins, xticklabels) plot.title(sec_title) #plot.show() plot.savefig('3.pdf', dpi=600) # sec_title = "What percentage of them have lower than 5 tweets?" n_lower_than_threshold = 0 for u in users: if u['user']['statuses_count'] < 5: n_lower_than_threshold += 1 hist['n_lower_than_threshold'] = n_lower_than_threshold self.logger.debug("%s: %0.2f%%" % (sec_title, 100*(float(n_lower_than_threshold)/n_unique_users))) self.logger.debug(hist) # converting numpy.array's to normal python lists. for k in hist.keys(): if type(hist[k]) == dict: for k2 in hist[k].keys(): if type(hist[k][k2]) == type(numpy.array([])): hist[k][k2] = list(hist[k][k2]) hist = {'campaign_id': campaign_id, 'histogram': hist, 'created_at': now_in_drnj_time()} return hist
# How many tweets? n_tweets = tweets_coll.find(campaign_query).count() print "How many tweets? %d" % n_tweets hist['n_tweets'] = n_tweets # # How many unique users? # n_unique_users = len(users) print "How many unique users? %d" % n_unique_users hist['n_unique_users'] = n_unique_users ###### sec_title = "Histogram of user creation dates?" # tmp_dates = [py_utc_time2drnj_time(x['user']['created_at']) for x in users] (hist['user_creation']['data'], hist['user_creation']['bins']) = numpy.histogram(tmp_dates, bins=100) if plotGraphs: bins = hist['user_creation']['bins'][:-1] width = (hist['user_creation']['bins'][1] - hist['user_creation']['bins'][0]) / 2 plot.bar(bins, hist['user_creation']['data'], width=width, align='center') xticklabels = [ time.strftime('%d %b %Y', time.gmtime(drnj_time2py_time(x))) for x in bins ]
def prepare_hist_and_plot(self, n_tweets, users, n_bins, campaign_id): import numpy import matplotlib.pyplot as plot plot_graphs = False hist = { 'user_creation': { 'data': None, 'bins': None, }, 'user_n_tweets': { 'data': None, 'bins': None, }, 'user_n_tweets_overall': { 'data': None, 'bins': None, }, 'n_tweets': None, 'n_unique_users': None, 'n_default_profile_image': None, 'n_lower_than_threshold': None, } self.logger.debug("How many tweets? %d" % n_tweets) hist['n_tweets'] = n_tweets # TODO: abort if there are more than 200000 tweets. if n_tweets > 200000: return # # How many unique users? # n_unique_users = len(users) self.logger.debug("How many unique users? %d" % n_unique_users) hist['n_unique_users'] = n_unique_users ###### sec_title = "Histogram of user creation dates?" # tmp_dates = [] for x in users: tmp_date = x['user']['created_at'] if type(tmp_date) != float: tmp_date = py_utc_time2drnj_time(tmp_date) tmp_dates.append(tmp_date) # tmp_dates = [py_utc_time2drnj_time(x['user']['created_at']) for x in users] (hist['user_creation']['data'], hist['user_creation']['bins']) = numpy.histogram(tmp_dates, bins=n_bins) if plot_graphs: bins = hist['user_creation']['bins'][:-1] width = (hist['user_creation']['bins'][1] - hist['user_creation']['bins'][0]) / 2 plot.bar(bins, hist['user_creation']['data'], width=width, align='center') xticklabels = [ time.strftime('%d %b %Y', time.gmtime(drnj_time2py_time(x))) for x in bins ] plot.xticks(bins, xticklabels) plot.title(sec_title) #plot.show() plot.savefig('1.pdf', dpi=600) ##### sec_title = "Histogram of number of tweets of each user in this campaign" tmp_counts = [int(x['n_user_tweets']) for x in users] # (hist['user_n_tweets']['data'], hist['user_n_tweets']['bins']) = numpy.histogram(tmp_counts, bins=n_bins) if plot_graphs: bins = hist['user_n_tweets']['bins'][:-1] data = hist['user_n_tweets']['data'] width = (hist['user_n_tweets']['bins'][1] - hist['user_n_tweets']['bins'][0]) / 2 plot.bar(bins, data, width=width, align='center') xticklabels = bins plot.xticks(bins, xticklabels) plot.title(sec_title) #plot.show() plot.savefig('2.pdf', dpi=600) ##### sec_title = "What percentage of them used the default profile image?" # n_default_profile_image = 0 for u in users: if u['user']['default_profile_image']: n_default_profile_image += 1 hist['n_default_profile_image'] = n_default_profile_image self.logger.debug("%s: %0.2f%%" % (sec_title, 100 * (float(n_default_profile_image) / n_unique_users))) ##### sec_title = "Histogram of tweet counts of unique users" tmp_counts = [int(x['user']['statuses_count']) for x in users] (hist['user_n_tweets_overall']['data'], hist['user_n_tweets_overall']['bins']) = numpy.histogram(tmp_counts, bins=n_bins) if plot_graphs: bins = hist['user_n_tweets_overall']['bins'][:-1] data = hist['user_n_tweets_overall']['data'] width = (hist['user_n_tweets_overall']['bins'][1] - hist['user_n_tweets_overall']['bins'][0]) / 2 plot.bar(bins, data, width=width, align='center') xticklabels = bins plot.xticks(bins, xticklabels) plot.title(sec_title) #plot.show() plot.savefig('3.pdf', dpi=600) # sec_title = "What percentage of them have lower than 5 tweets?" n_lower_than_threshold = 0 for u in users: if u['user']['statuses_count'] < 5: n_lower_than_threshold += 1 hist['n_lower_than_threshold'] = n_lower_than_threshold self.logger.debug("%s: %0.2f%%" % (sec_title, 100 * (float(n_lower_than_threshold) / n_unique_users))) self.logger.debug(hist) # converting numpy.array's to normal python lists. for k in hist.keys(): if type(hist[k]) == dict: for k2 in hist[k].keys(): if type(hist[k][k2]) == type(numpy.array([])): hist[k][k2] = list(hist[k][k2]) hist = { 'campaign_id': campaign_id, 'histogram': hist, 'created_at': now_in_drnj_time() } return hist
# How many tweets? n_tweets = tweets_coll.find(campaign_query).count() print "How many tweets? %d" % n_tweets hist['n_tweets'] = n_tweets # # How many unique users? # n_unique_users = len(users) print "How many unique users? %d" % n_unique_users hist['n_unique_users'] = n_unique_users ###### sec_title = "Histogram of user creation dates?" # tmp_dates = [py_utc_time2drnj_time(x['user']['created_at']) for x in users] (hist['user_creation']['data'], hist['user_creation']['bins']) = numpy.histogram(tmp_dates, bins=100) if plotGraphs: bins = hist['user_creation']['bins'][:-1] width = (hist['user_creation']['bins'][1] - hist['user_creation']['bins'][0])/2 plot.bar(bins, hist['user_creation']['data'], width=width, align='center') xticklabels = [time.strftime('%d %b %Y', time.gmtime(drnj_time2py_time(x))) for x in bins] plot.xticks(bins, xticklabels) plot.title(sec_title) #plot.show() plot.savefig('1.pdf', dpi=600)