def getPyhmeter(tweet_text): #TODO: remove URL and other stuff to clean text pattern = re.compile('(@\w*)|(\|.*)|(#\s*)') removedAt = pattern.sub('', tweet_text) tk = TweetTokenizer() tokens = tk.tokenize(removedAt) return pyhmeter.HMeter(tokens, file, 1)
def main(): def printTweet(descr, t): print(descr) print("Username: %s" % t.username) print("Retweets: %d" % t.retweets) print("Text: %s" % t.text) print("Mentions: %s" % t.mentions) print("Hashtags: %s\n" % t.hashtags) # data=pd.read_csv(os.getcwd()+"/followers.csv") # # print(data) # l=[] # print(data["screen_name"][0][35:(len(data["screen_name"][0])-(len(data["screen_name"][0])-36)/2)-4]) # # print(data["screen_name"][1]) # f=open(os.getcwd()+"/followers.txt","a") # for i in range(len(data["screen_name"])): # f.write(data["screen_name"][i][35:(len(data["screen_name"][i])-(len(data["screen_name"][i])-36)/2)-4]+"\n") # l.append(data["screen_name"][i][35:(len(data["screen_name"][i])-(len(data["screen_name"][i])-36)/2)-4]) # # print(l) data = pd.read_csv(os.getcwd() + "/followers.txt", header=None) # print(data[0]) sample_scores = pyhmeter.load_scores("Data_Set_S1.txt") scores = [] dates = [] days = ["%.2d" % i for i in range(1, 30)] cnt = 0 for month in ["%.2d" % i for i in range(1, 13)]: for day in range(len(days) - 1): date = "2019-" + str(month) + "-" + str(days[day]) next_date = "2019-" + str(month) + "-" + str(days[day + 1]) day_score = 0 for user in data[0][:100]: print("querying...") cnt += 1 print(cnt) tweetCriteria = got.manager.TweetCriteria().setUsername( user).setQuerySearch("ClimateChange").setSince( date).setUntil(next_date).setMaxTweets(1) sum = 0 for i in got.manager.TweetManager.getTweets(tweetCriteria): # printTweet("### Example 1 - Get tweets by username [barackobama]", i) h = pyhmeter.HMeter(list((i.text).split()), sample_scores) sum += h.happiness_score() # dates.append(i.date) # print(i.date) # l.append(h.happiness_score()) # print(h.happiness_score()) # cnt+=1 # print(cnt) sum /= i day_score += sum day_score /= 100 dates.append(date) scores.append(day_score) datetimes = matplotlib.dates.date2num(dates) plt.plot_date(datetimes, scores, marker=None, linestyle="-") plt.xticks(rotation=70) plt.xlabel("time") plt.ylabel("hedonometer score") plt.savefig( "/home/saksham/Twitter_Project/GetOldTweets-python-master/test3.png") plt.show()
def main(): def printTweet(descr, t): print(descr) print("Username: %s" % t.username) print("Retweets: %d" % t.retweets) print("Text: %s" % t.text) print("Mentions: %s" % t.mentions) print("Hashtags: %s\n" % t.hashtags) data = pd.read_csv(os.getcwd() + "/followers.txt", header=None) # print(data[0]) sample_scores = pyhmeter.load_scores("Data_Set_S1.txt") scores = [] dates = [] cnt = 0 average_score = 0 for month in ["%.2d" % i for i in range(1, 13)]: monthly_score = 0 user_cnt = 0 cnt = 0 for user in data[0].sample(n=2000): tweetCriteria = got.manager.TweetCriteria().setUsername( user).setQuerySearch("ClimateChange").setSince( "2019-" + month + "-01").setUntil("2019-" + month + "-30").setMaxTweets(10) # sum=0 # cnt=0 for i in got.manager.TweetManager.getTweets(tweetCriteria): # printTweet("### Example 1 - Get tweets by username [barackobama]", i) user_cnt += 1 h = pyhmeter.HMeter(list((i.text).split()), sample_scores) if h.happiness_score() is not None: monthly_score += h.happiness_score() print(cnt) cnt += 1 print(user) print(h.happiness_score()) # dates.append(i.date) # print(i.date) # l.append(h.happiness_score()) # print(h.happiness_score()) if cnt != 0: # monthly_score+=sum monthly_score /= cnt print(monthly_score) scores.append(monthly_score) average_score += monthly_score dates.append(month) # datetimes=matplotlib.dates.date2num(dates) # plt.plot_date(datetimes,scores,marker=None,linestyle="-") average_score /= len(scores) print(scores, dates) plt.plot(dates, scores, label="actual data", color="red") plt.axhline(y=average_score, label="average score") # plt.plot(average_score) plt.xticks(rotation=70) plt.xlabel("time(month)") plt.ylabel("hedonometer score") plt.legend() # plt.title("") plt.savefig( "/home/saksham/Twitter_Project/GetOldTweets-python-master/test3.png") plt.show()
dates = [] tweets = [] url_rem = [] neutral_rem = [] normal = [] url_score = [] neutral_score = [] # tweetCriteria=got.manager.TweetCriteria().setUsername("barackobama").setQuerySearch("ClimateChange").setSince('2019-03-01').setUntil('2019-12-30').setMaxTweets(10000) tweetCriteria = got.manager.TweetCriteria().setUsername( "KremlinRussia_E").setQuerySearch("Climate").setSince( "2009-12-30").setUntil("2019-12-30").setMaxTweets(100000) print(got.manager.TweetManager.getTweets(tweetCriteria)) for i in got.manager.TweetManager.getTweets(tweetCriteria): text = re.sub(r'[^\x00-\x7F]+', ' ', i.text) tweets.append(text) h = pyhmeter.HMeter(list(i.text.split()), sample_scores) normal.append(h.happiness_score()) h.deltah = 1.0 neutral_score.append(h.happiness_score()) neutral_rem.append(h.matchlist) dates.append(i.date) data = pd.DataFrame( list(zip(tweets, neutral_rem, normal, neutral_score)), columns=['normal', 'cleaned_tweets', 'no_clean', 'cleaned_score']) data.to_csv(os.getcwd() + "/putin(2009-2020)cleaned(deltah=1.0).csv", sep="|") datetimes = matplotlib.dates.date2num(dates) plt.plot_date(datetimes, neutral_score, marker=None, linestyle="-") plt.xticks(rotation=70) plt.xlabel("time") plt.ylabel("hedonometer score")