def sen_run_over_db(cursor): users=db_get_all_users(cursor) tweets=[] cur_time=time.time() done=0 for i in range(0,len(users)): u=users[i] print(u,i,len(users)) if db_is_col(cursor,u,"sen")==False: db_add_col(cursor,u,"sen") print("adding") tweets=db_get_cols_from_table(cursor,u,["tweet_id","tweet","sen","date"]) for t in tweets: delta=int((cur_time-int(t[3]))/60/60/24) #print(delta) if delta<365*2: if t[2]=="None": st=t[1].replace("\\\\","") s=sentiment(st) print(".", end='', flush=True) #print(t[1],s,"new") db_update_record(cursor,u,"tweet_id",t[0],"sen",str(s)) #db_commit() #else: # print(t[2]) print("") db_commit()
def retweet_anal_to_flat_files(cursor): users = db_get_all_users(cursor) tweets = [] done = 0 v = [] if os.path.isdir("retweet_anal") == False: os.mkdir("retweet_anal") for i in range(0, len(users)): f = open(os.path.join("retweet_anal", users[i]), 'w') f.close() for i in range(0, len(users)): u = users[i] print(u) cur_time = time.time() tweets = db_get_cols_from_table(cursor, u, ["date", "tweet"]) rt = 0 origonal = 0 for ii in range(0, len(tweets)): t = tweets[ii][1] d = int(tweets[ii][0]) if t.startswith("RT @"): user = t.split(":")[0][4:] if user in users: short = t if len(short) > 100: short = short[:100] f = open(os.path.join("retweet_anal", user), 'a') f.write(str(d) + ":" + short + "\n") f.close()
def short_to_long_over_db(cursor): max_days = 365 users = db_get_all_users(cursor) tweets = [] done = 0 v = [] for i in range(0, len(users)): u = users[i] print(u) if db_is_col(cursor, u, "url") == False: db_add_col(cursor, u, "url", length="1000") print("adding url") cur_time = time.time() tweets = db_get_cols_from_table(cursor, u, ["date", "tweet", "url", "tweet_id"]) http = 0 urls = [] for ii in range(0, len(tweets)): date = int(tweets[ii][0]) t = tweets[ii][1] url = tweets[ii][2] id = tweets[ii][3] if url == "None": if ((cur_time - date) / 60 / 60 / 24) < max_days: ret = re.search("(?P<url>https?://[^\s]+)", t) if ret != None: ret = ret.group("url") if ret.count("\\u2026") == 0: urls.append([ret, id]) #print(urls) if len(urls) > 40 or (len(urls) > 0 and ii == len(tweets) - 1): #print("fetch",urls) urls_out = fetch_parallel(urls) #print("fetch..") for iii in range(0, len(urls_out)): if urls_out[iii][0] != "error": if urls_out[iii][0].startswith('https://'): urls_out[iii][0] = urls_out[iii][0][8:] if urls_out[iii][0].startswith('http://'): urls_out[iii][0] = urls_out[iii][0][7:] if len(urls_out[iii][0]) > 1000: urls_out[iii][0] = urls_out[iii][0][:1000] #print(urls[iii][0]) #print(urls_out[iii][0]) #print() #print(urls_out[iii]) db_update_record(cursor, u, "tweet_id", urls_out[iii][1], "url", urls_out[iii][0]) db_commit() urls = [] ids = []
def time_on_twitter(cursor): users=db_get_all_users(cursor) tweets=[] done=0 v=[] rt=0 origonal=0 for i in range(0,len(users)): u=users[i] print(u) cur_time=time.time() tweets=db_get_cols_from_table(cursor,u,["date","tweet"]) for ii in range(0,len(tweets)): t=tweets[ii][1] delta=(cur_time-int(tweets[ii][0]))/60/60/24 if delta<365: if t.startswith("RT "): rt=rt+1 else: origonal=origonal+1 print(origonal,rt,len(users),(origonal*30.0+rt*10.0)/60/60)
def at_usage_graph(cursor): print("making @ graph") words_delete_all() users = db_get_all_users(cursor) tweets = [] for u in users: tweets = db_get_tweets_in_last_time(cursor, u) for i in range(0, len(tweets)): word_add_array_at(tweets[i]) word_clean() names, values = words_ret_hist() names = names[:20] values = values[:20] names.reverse() values.reverse() #word_print() y_pos = np.arange(len(names)) plt.figure() #,dpi=300 figsize=(25.0, 16.0) bars = plt.barh(y_pos, values, align='center') plt.yticks(y_pos, names) plt.xlabel('Usage') plt.xticks(rotation='vertical') plt.savefig('/var/www/html/graphs/at_usage.png', bbox_inches='tight')
def do_retweet_anal(): cursor = db_get_mariadb_cursor() hist = [0] * 24 users = db_get_all_users(cursor) data = [] for u in users: print(u) lines = open(os.path.join("retweet_anal", u)).read().splitlines() date = [] tweets = [] for l in lines: s = l.split(":") date.append(int(s[0])) tweets.append(s[2][1:]) if len(tweets) > 0: tweets, date = (list(t) for t in zip(*sorted(zip(tweets, date)))) cur_t = "" for i in range(0, len(tweets)): if cur_t != tweets[i]: cur_t = tweets[i] start_time = get_tweet_time(cursor, u, tweets[i]) #print(tweets[i],start_time) delta = int((date[i] - start_time) / 60 / 60) data.append(delta) print(data) f = open("retweets.txt", 'w') for i in range(0, len(data)): f.write(str(data[i]) + "\n") f.close()
def __init__(self): super(main, self).__init__() self.setMinimumSize(800, 400) self.show() self.tweet_pos = 0 self.all_user_tweets = [] self.users = db_get_all_users(cursor) vbox = QVBoxLayout() self.status = QLabel("hello") hbox = QHBoxLayout() self.back = QPushButton("Back") self.back.clicked.connect(self.callback_back) self.next = QPushButton("Next") self.next.clicked.connect(self.callback_next) self.save = QPushButton("Save") self.save.clicked.connect(self.callback_save) self.prog = QProgressBar() hbox.addWidget(self.back) hbox.addWidget(self.next) hbox.addWidget(self.save) self.cb = QComboBox() self.cb.setStyleSheet("font: 24pt ;") self.cb_users = QComboBox() for file in class_get_files(): self.cb.addItem(file) for u in self.users: self.cb_users.addItem(u) self.cb_users.currentIndexChanged.connect(self.callback_user_changed) h_button_widget = QWidget() h_button_widget.setLayout(hbox) self.display = QTextEdit() self.display.setStyleSheet("font: 24pt ;") self.display.cursorPositionChanged.connect(self.callback_cursor_move) vbox.addWidget(self.cb_users) vbox.addWidget(self.cb) vbox.addWidget(self.display) vbox.addWidget(h_button_widget) vbox.addWidget(self.status) vbox.addWidget(self.prog) wvbox = QWidget() wvbox.setLayout(vbox) self.setCentralWidget(wvbox)
def time_domain(cursor, words=[]): print("Time domain", words) users = db_get_all_users(cursor) store = [0] * days store_float = [0.0] * days past = [] for i in range(0, len(store)): past.append(i) for user in users: user_make_hist(cursor, store, user, words=words) #print(store) #################### date1 = epoch_end date2 = time.time() # every monday mondays = WeekdayLocator(MONDAY) # every 3rd month months = MonthLocator(range(1, 13), bymonthday=1, interval=3) monthsFmt = DateFormatter("%b '%y") #################### title = ",".join(words) plt.figure(figsize=(7.5, 4.0)) plt.title("Tweets containing:" + title, fontsize=20) #plt.gcf().subplots_adjust(bottom=0.15) r = float(hash(title + "r") % 256) / 256 g = float(hash(title + "g") % 256) / 256 b = float(hash(title + "b") % 256) / 256 colors = (r, g, b) plt.bar(past, store, color=colors, edgecolor="none") plt.ylabel('Tweets/day', fontsize=20) plt.xlabel('Time (days in past)', fontsize=25) #ax = plt.axes() #ax.xaxis.set_major_locator(months) #ax.xaxis.set_major_formatter(monthsFmt) #ax.xaxis.set_minor_locator(mondays) #ax.autoscale_view() #fig = plt.figure(1) #fig.autofmt_xdate() save_file = "/var/www/html/graphs/time_domain.png" plt.savefig(save_file, bbox_inches='tight') tweet_image(save_file, title="The number of times MPs used the words '" + title + "' during the last year.")
def clas_stats(cursor): users = db_get_all_users(cursor) for u in users: print(u) words_delete_all() c = db_get_cols_from_table(cursor, u, ["clas"]) for w in c: #print(w) word_add(w[0]) words = words_ret_hist() w = "" for i in range(0, len(words[0])): w = w + words[0][i] + "=" + str(words[1][i]) + ";" w = w[:-1] db_update_record(cursor, "user_names", "user_id", u, "clas", w) db_commit() print(w) adas
def hashtag_get_most_used(cursor,delta=172800/2): words_delete_all() users=db_get_all_users(cursor) tweets=[] for u in users: tweets=db_get_tweets_in_last_time(cursor,u,delta=delta) for i in range(0,len(tweets)): word_add_array_hashtag(tweets[i]) word_clean() names,values=words_ret_hist() file = open("word_usage.txt","w") for i in range(0,len(names)): file.write(names[i]+"\n") file.close() return names,values
def re_tweet(cursor, delta=172800 / 2): words_delete_all() users = db_get_all_users(cursor) tweets = [] for u in users: #print(u) tweets = db_get_tweets_in_last_time(cursor, u, delta=1e10) tot = len(tweets) for i in range(0, len(tweets)): if tweets[i].startswith("RT @"): user = tweets[i].split(":")[0][3:] word_add_array_at(user) #word_clean() print(u) names, values = words_ret_hist(max_len=400) f = open('/var/www/html/stats/retweets.txt', 'w') print(names) for i in range(0, len(names)): ismp = users.count(names[i][1:]) party = db_user_get_job1(cursor, names[i][1:]) if party == None: party = "notmp" #print (names[i][1:],party) out = names[i] + " " + str(values[i]) + " " + str(ismp) + " " + party f.write(out + "\n") #print(names,values) f.close() aasdsad
def cal_retweets(cursor): users = db_get_all_users(cursor) tweets = [] done = 0 v = [] update = False if update == True: for i in range(0, len(users)): u = users[i] print(u) cur_time = time.time() tweets = db_get_cols_from_table(cursor, u, ["date", "tweet"]) rt = 0 origonal = 0 for ii in range(0, len(tweets)): t = tweets[ii][1] delta = (cur_time - int(tweets[ii][0])) / 60 / 60 / 24 if delta < 100.0: if t.startswith("RT "): rt = rt + 1 else: origonal = origonal + 1 if rt + origonal != 0: frac = 100.0 * rt / (rt + origonal) else: frac = 0.0 db_update_record(cursor, "user_names", "user_id", u, "retweets", str(frac)) db_commit() tweets_per_day = db_get_cols_from_table(cursor, "user_names", ["retweets", "job_type1"]) con = [] lab = [] lib = [] snp = [] for i in range(0, len(tweets_per_day)): party = tweets_per_day[i][1] if party.startswith("con") == True: con.append(int(tweets_per_day[i][0])) if party.startswith("lab") == True: lab.append(int(tweets_per_day[i][0])) if party.startswith("lib") == True: lib.append(int(tweets_per_day[i][0])) if party.startswith("snp") == True: snp.append(int(tweets_per_day[i][0])) if int(tweets_per_day[i][0]) != 0: v.append(int(tweets_per_day[i][0])) m = 100 dx = 1.0 x = 0.0 xbins = [] while (x < m): xbins.append(x) x = x + dx con[0] = 0.0 lab[0] = 0.0 snp[0] = 0.0 #lib[0]=0.0 for_web = False if for_web == True: plt.figure(figsize=(25.0, 6.0), dpi=300) plt.title("Re-tweets from MPs as % over last 100 days", fontsize=30) plt.gcf().subplots_adjust(bottom=0.15) plt.hist(v, bins=xbins, alpha=0.5, color='green') plt.hist(con, bins=xbins, alpha=0.8, color='blue') plt.hist(lab, bins=xbins, alpha=0.8, color='red') plt.hist(snp, bins=xbins, alpha=0.8, color='purple') plt.hist(lib, bins=xbins, alpha=0.8, color='yellow') plt.legend(('All', 'Con', 'Lab', 'SNP', "Lib"), fontsize=25) plt.ylabel('Number of MPs', fontsize=25) plt.xlabel('Percentage of tweets that are retweets', fontsize=25) plt.savefig("/var/www/html/graphs/retweets.png", bbox_inches='tight') else: matplotlib.rcParams['font.family'] = 'Open Sans' plt.figure(figsize=(6.0, 6.0), dpi=300) ax = plt.subplot(111) #plt.title("Re-tweets from MPs as % over last 100 days", fontsize=30) plt.gcf().subplots_adjust(bottom=0.15) ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) plt.hist(v, bins=xbins, alpha=1.0, color='#36845b') #plt.hist(con, bins=xbins, alpha=0.8,color="#00539f") #plt.hist(lab, bins=xbins, alpha=0.8,color="#d50000") #plt.hist(snp, bins=xbins, alpha=0.8,color="#fff685") #plt.hist(lib, bins=xbins, alpha=0.8,color='yellow') #plt.legend( ( 'Con', 'Lab','SNP'), fontsize=25) plt.ylabel('Number of MPs', fontsize=25) plt.xlabel('Percentage retweets', fontsize=25) plt.savefig("retweet_dist.png", bbox_inches='tight')
def clas(cursor, delta=172800 / 2): users = db_get_all_users(cursor) tweets = [] #users=["JoJohnsonUK"] done = 0 for i in range(271, 562): u = users[i] print(u, i, len(users)) #ada if db_is_col(cursor, u, "clas") == False: db_add_col(cursor, u, "clas") print("adding") tweets = db_get_cols_from_table(cursor, u, ["tweet_id", "tweet"]) for t in tweets: clas, tot, res = clas_clas_text(t[1]) db_update_record(cursor, u, "tweet_id", t[0], "clas", clas) #print(clas) #print(t) db_commit() #adas types = None if tot != 0: m = len(tweets) if m > 500: m = 500 for i in range(0, m): #print(tweets[i]) out = clas_clas_text(tweets[i]) if types == None: types = res else: #print(types) for ii in range(0, len(res)): types[ii][1] = types[ii][1] + res[ii][1] if out != "unknown": clas = clas + 1 path = "/var/www/html/interests/" + u + ".txt" #print(types) #asadasds if clas != 0: types = sorted(types, key=itemgetter(1), reverse=True) sum_ids = 0 for i in range(0, len(types)): sum_ids = sum_ids + types[i][1] clas_perent = 100.0 * (clas / tot) print(u, types, clas_perent) f = open(path, 'w') for i in range(0, len(types)): f.write(types[i][0] + " " + str(int((types[i][1] / sum_ids) * 100.0)) + "\n") #f.write("clas "+str(int(clas_perent))+"\n") f.close()
def cal_tweets_per_day(cursor): users = db_get_all_users(cursor) tweets = [] done = 0 v = [] update = False if update == True: for i in range(0, len(users)): u = users[i] print(u) cur_time = time.time() tweets = db_get_cols_from_table(cursor, u, ["date"]) count = 0 for ii in range(0, len(tweets)): delta = (cur_time - int(tweets[ii][0])) / 60 / 60 / 24 if delta < 100.0: count = count + 1 db_update_record(cursor, "user_names", "user_id", u, "tweets_per_day", str(count / 100.0)) #v.append(count) db_commit() tweets_per_day = db_get_cols_from_table(cursor, "user_names", ["tweets_per_day", "job_type1"]) con = [] lab = [] lib = [] snp = [] for i in range(0, len(tweets_per_day)): party = tweets_per_day[i][1] if party.startswith("con") == True: con.append(int(tweets_per_day[i][0])) if party.startswith("lab") == True: lab.append(int(tweets_per_day[i][0])) if party.startswith("lib") == True: lib.append(int(tweets_per_day[i][0])) if party.startswith("snp") == True: snp.append(int(tweets_per_day[i][0])) v.append(int(tweets_per_day[i][0])) m = 60 dx = 1.0 x = 0.0 xbins = [] while (x < m): xbins.append(x) x = x + dx for_web = False if for_web == True: plt.figure(figsize=(25.0, 6.0), dpi=300) plt.title("Tweets from MPs per day", fontsize=30) plt.gcf().subplots_adjust(bottom=0.15) plt.hist(v, bins=xbins, alpha=0.5, color='green') plt.hist(con, bins=xbins, alpha=0.8, color='blue') plt.hist(lab, bins=xbins, alpha=0.8, color='red') plt.hist(snp, bins=xbins, alpha=0.8, color='purple') plt.hist(lib, bins=xbins, alpha=0.8, color='yellow') plt.legend(('All', 'Con', 'Lab', 'SNP', "Lib"), fontsize=25) plt.ylabel('Number of MPs', fontsize=25) plt.xlabel('Number of tweets/day', fontsize=25) plt.savefig("/var/www/html/graphs/tweets_per_day.png", bbox_inches='tight') else: matplotlib.rcParams['font.family'] = 'Open Sans' ###############All############# plt.clf() plt.figure(figsize=(6.0, 6.0), dpi=300) ax = plt.subplot(111) #plt.title("Tweets from MPs per day", fontsize=30) plt.gcf().subplots_adjust(bottom=0.15) ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.hist(v, bins=xbins, alpha=1.0, color='#36845b') plt.ylabel('Number of MPs', fontsize=20) plt.xlabel('Number of tweets/day', fontsize=20) plt.savefig("./tweets_per_day/tweets_per_day_all.png", bbox_inches='tight') ###############Con############# ax = setup_graph() axes = plt.gca() axes.set_ylim([0, 40]) plt.hist(con, bins=xbins, alpha=0.8, color="#00539f") #plt.legend( ( 'Con', 'Lab','SNP',"Lib"), fontsize=20) #plt.ylabel('Number of MPs', fontsize=20) #plt.xlabel('Number of tweets/day', fontsize=20) plt.savefig("./tweets_per_day/tweets_per_day_all_con.png", bbox_inches='tight') ###############Lab############# ax = setup_graph() axes = plt.gca() axes.set_ylim([0, 40]) plt.hist(lab, bins=xbins, alpha=0.8, color="#d50000") #plt.legend( ( 'Con', 'Lab','SNP',"Lib"), fontsize=20) #plt.ylabel('Number of MPs', fontsize=20) #plt.xlabel('Number of tweets/day', fontsize=20) plt.savefig("./tweets_per_day/tweets_per_day_all_lab.png", bbox_inches='tight') ###############SNP############# ax = setup_graph() axes = plt.gca() axes.set_ylim([0, 40]) plt.hist(snp, bins=xbins, alpha=0.8, color="#fff685") #plt.legend( ( 'Con', 'Lab','SNP',"Lib"), fontsize=20) #plt.ylabel('Number of MPs', fontsize=20) #plt.xlabel('Number of tweets/day', fontsize=20) plt.savefig("./tweets_per_day/tweets_per_day_all_snp.png", bbox_inches='tight') ###############Lib############# ax = setup_graph() plt.hist(lib, bins=xbins, alpha=0.8, color='#faa01a') #plt.legend( ( 'Con', 'Lab','SNP',"Lib"), fontsize=20) #plt.ylabel('Number of MPs', fontsize=20) #plt.xlabel('Number of tweets/day', fontsize=20) plt.savefig("./tweets_per_day/tweets_per_day_all_lib.png", bbox_inches='tight')
def http_retweets(cursor): users = db_get_all_users(cursor) tweets = [] done = 0 v = [] update = False if update == True: for i in range(0, len(users)): u = users[i] print(u) cur_time = time.time() tweets = db_get_cols_from_table(cursor, u, ["date", "tweet"]) http = 0 for ii in range(0, len(tweets)): t = tweets[ii][1] delta = (cur_time - int(tweets[ii][0])) / 60 / 60 / 24 if delta < 100.0: if t.count("http"): http = http + 1 if http != 0: frac = 100.0 * http / (len(tweets)) else: frac = 0.0 db_update_record(cursor, "user_names", "user_id", u, "http_tweets", str(frac)) db_commit() tweets_per_day = db_get_cols_from_table(cursor, "user_names", ["http_tweets", "job_type1"]) print(tweets_per_day) con = [] lab = [] lib = [] snp = [] for i in range(0, len(tweets_per_day)): party = tweets_per_day[i][1] if party.startswith("con") == True: con.append(int(tweets_per_day[i][0])) if party.startswith("lab") == True: lab.append(int(tweets_per_day[i][0])) if party.startswith("lib") == True: lib.append(int(tweets_per_day[i][0])) if party.startswith("snp") == True: snp.append(int(tweets_per_day[i][0])) v.append(int(tweets_per_day[i][0])) m = 100 dx = 1.0 x = 0.0 xbins = [] while (x < m): xbins.append(x) x = x + dx plt.figure(figsize=(25.0, 6.0), dpi=300) plt.title("Percentage of tweets which contain web links", fontsize=30) plt.gcf().subplots_adjust(bottom=0.15) plt.hist(v, bins=xbins, alpha=0.5, color='green') plt.hist(con, bins=xbins, alpha=0.8, color='blue') plt.hist(lab, bins=xbins, alpha=0.8, color='red') plt.hist(snp, bins=xbins, alpha=0.8, color='purple') plt.hist(lib, bins=xbins, alpha=0.8, color='yellow') plt.legend(('All', 'Con', 'Lab', 'SNP', "Lib"), fontsize=25) plt.ylabel('Number of MPs', fontsize=25) plt.xlabel('Percent', fontsize=25) plt.savefig("/var/www/html/graphs/http_tweets.png", bbox_inches='tight')
def hashtag_flow(cursor): print("making hashtag flow graph") path="/var/www/html/graphs/" thumbs="/var/www/html/thumbs" if os.path.isdir(thumbs)==False: os.mkdir(thumbs) os.chdir("/var/www/html/graphs/") for f in glob.glob("hashtag_flow*.png"): os.remove(f) file_number=0 ago=0.0 pngs="" plt.figure(figsize=(8.0, 4.0)) #color = cm.inferno_r(np.linspace(.4,.8, 20)) loop=0 pop_hash_tags="" while(ago<48): print("ago=",ago) words_delete_all() users=db_get_all_users(cursor) tweets=[] for u in users: tweets=db_get_tweets_in_time_frame(cursor,u,width=4,time_ago=ago) for i in range(0,len(tweets)): word_add_array_hashtag(tweets[i]) word_clean() names,values=words_ret_hist() if len(names)>=10: names=names[:10] values=values[:10] names.reverse() values.reverse() if loop==0: pop_hash_tags=" ".join(names) color=[] #names[5]="#brexit" for i in range(0,len(names)): names[i]=names[i].strip().lower() r = float(hash(names[i]+"r") % 256) / 256 g = float(hash(names[i]+"g") % 256) / 256 b = float(hash(names[i]+"b") % 256) / 256 color.append([r,g,b,1.0]) #print(names[i],(r,g,b,1.0)) #print(color) #word_print() y_pos = np.arange(len(names)) #,dpi=300 figsize=(25.0, 16.0) plt.cla() bars=plt.barh(y_pos, values, align='center',color=color) plt.yticks(y_pos, names) t=time.time() t=t-ago*60*60 ago_to_2dp="%.2f" % ago plt.title("Number of tweets "+str(ago_to_2dp)+" hours ago from MPs") plt.xlabel('Tweets') plt.xlim([0,40]) plt.xticks(rotation='vertical') plt.subplots_adjust(left=0.35, right=0.95, top=0.9, bottom=0.2) plt.savefig('/var/www/html/graphs/hashtag_flow'+str(file_number)+'.png') pngs=pngs+" hashtag_flow"+str(file_number)+'.png' file_number=file_number+1 ago=ago+0.25 loop=loop+1 os.system("convert -delay 30 -loop 0 -quality 50% "+pngs+" hashtag_flow.gif") m = hashlib.md5() m.update(str(time.time()).encode('utf-8')) random_file=m.hexdigest()+".gif" shutil.copyfile(os.path.join(path,"hashtag_flow.gif"), os.path.join(thumbs,random_file)) my_twitter_tweet("Top hashtags used by MPs in last 48 hours: http://mpstweets.com/flow.php?fname="+random_file+" "+pop_hash_tags)
def noun_anal(cursor): words_delete_all() users=db_get_all_users(cursor) tweets=[] v=[] update=False if update==True: print(len(users)) for i in range(0,len(users)): u=users[i] print(i,u) cur_time=time.time() tweets=db_get_cols_from_table(cursor,u,["tweet","date"]) for ii in range(0,len(tweets)): t=tweets[ii][0] date=int(tweets[ii][1]) if ((cur_time-date)/60/60/24)<100.0: #print(t) word_add_array(t) names,values=words_ret_hist() f = open('noun_hist.dat', 'w') for i in range(0,len(names)): f.write(names[i]+" "+str(values[i])+"\n") f.close() lines = open('noun_hist.dat').read().splitlines() http=[] times=[] for i in range(0,len(lines)): a=lines[i].split() http.append(a[0]) times.append(int(a[1])) if i>15: break http.reverse() times.reverse() y_pos = np.arange(len(http)) for_web=False if for_web==True: plt.figure(figsize=(25.0, 16.0),dpi=300) bars=plt.bar(y_pos, times,color="blue") plt.xticks(y_pos, http, fontsize=35) plt.legend(loc='best', fontsize=30) plt.ylabel('Usage (Tweets)', fontsize=30) #plt.yscale('log', fontsize=30) plt.yticks(fontsize=30) plt.xticks(rotation=45, rotation_mode="anchor", ha="right") plt.tight_layout() plt.savefig('/var/www/html/graphs/nouns.png') else: plt.figure(figsize=(10.0, 10.0),dpi=300) ax = plt.subplot(111) ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) bars=plt.barh(y_pos, times,color="#36845b") for tick in ax.xaxis.get_major_ticks(): tick.label.set_fontsize(25) plt.yticks(y_pos, http, fontsize=25) plt.xticks(rotation=45, rotation_mode="anchor", ha="right") plt.legend(loc='best', fontsize=25) plt.xlabel('Usage (Tweets)', fontsize=25) #plt.yscale('log', fontsize=30) plt.tight_layout() plt.savefig('nouns.png')
def topics(cursor): users = db_get_all_users(cursor) tweets = [] done = 0 v = [] update = True days_in_past = 200 cur_time = time.time() - 7 * 24 * 60 * 60 if update == True: brexit = [0] * days_in_past defence = [0] * days_in_past econ = [0] * days_in_past education = [0] * days_in_past health = [0] * days_in_past home_aff = [0] * days_in_past international = [0] * days_in_past misc = [0] * days_in_past party = [0] * days_in_past sci_env = [0] * days_in_past sport = [0] * days_in_past terror = [0] * days_in_past transport = [0] * days_in_past for i in range(0, int(len(users) / 1)): # u = users[i] print(u) tweets = db_get_cols_from_table(cursor, u, ["date", "clas"]) #internal=0 #external=0 for ii in range(0, len(tweets)): c = tweets[ii][1] delta = int((cur_time - int(tweets[ii][0])) / 60 / 60 / 24) if delta < days_in_past: if c == "health": health[delta] += 1 elif c == "transport": transport[delta] += 1 elif c == "education": education[delta] += 1 elif c == "sci_env" or c == "science": sci_env[delta] += 1 elif c == "party": party[delta] += 1 elif c == "brexit": brexit[delta] += 1 m = days_in_past dx = 1.0 x = 0.0 xbins = [] while (x < m): xbins.append(x) x = x + dx matplotlib.rcParams['font.family'] = 'Open Sans' plt.figure(figsize=(25.0, 6.0), dpi=300) ax = plt.subplot(111) ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) plt.gcf().subplots_adjust(bottom=0.15) plt.semilogy(xbins, health, alpha=1.0, color="#36845b") plt.semilogy(xbins, transport, alpha=1.0, color="#a3d9bc") plt.semilogy(xbins, education, alpha=1.0, color="#808080") plt.semilogy(xbins, sci_env, alpha=1.0, color="#305496") plt.semilogy(xbins, party, alpha=1.0, color="#ffc000") plt.tick_params(axis='y', labelsize=25) plt.tick_params(axis='x', labelsize=25) plt.legend( ('Health', "Transport", "Education", "Science/Env", "Political"), fontsize=25) plt.xlim((0, 225)) plt.ylabel('Number of tweets', fontsize=25) plt.xlabel('Days in past', fontsize=25) plt.savefig("topics.png", bbox_inches='tight')