Beispiel #1
0
def getPyhmeter(tweet_text):
    #TODO: remove URL and other stuff to clean text
    pattern = re.compile('(@\w*)|(\|.*)|(#\s*)')
    removedAt = pattern.sub('', tweet_text)
    tk = TweetTokenizer()
    tokens = tk.tokenize(removedAt)

    return pyhmeter.HMeter(tokens, file, 1)
Beispiel #2
0
def main():
    def printTweet(descr, t):
        print(descr)
        print("Username: %s" % t.username)
        print("Retweets: %d" % t.retweets)
        print("Text: %s" % t.text)
        print("Mentions: %s" % t.mentions)
        print("Hashtags: %s\n" % t.hashtags)

    # data=pd.read_csv(os.getcwd()+"/followers.csv")
    # # print(data)
    # l=[]
    # print(data["screen_name"][0][35:(len(data["screen_name"][0])-(len(data["screen_name"][0])-36)/2)-4])
    # # print(data["screen_name"][1])
    # f=open(os.getcwd()+"/followers.txt","a")
    # for i in range(len(data["screen_name"])):
    #     f.write(data["screen_name"][i][35:(len(data["screen_name"][i])-(len(data["screen_name"][i])-36)/2)-4]+"\n")
    #     l.append(data["screen_name"][i][35:(len(data["screen_name"][i])-(len(data["screen_name"][i])-36)/2)-4])
    # # print(l)

    data = pd.read_csv(os.getcwd() + "/followers.txt", header=None)
    # print(data[0])
    sample_scores = pyhmeter.load_scores("Data_Set_S1.txt")
    scores = []
    dates = []
    days = ["%.2d" % i for i in range(1, 30)]
    cnt = 0
    for month in ["%.2d" % i for i in range(1, 13)]:
        for day in range(len(days) - 1):
            date = "2019-" + str(month) + "-" + str(days[day])
            next_date = "2019-" + str(month) + "-" + str(days[day + 1])
            day_score = 0
            for user in data[0][:100]:
                print("querying...")
                cnt += 1
                print(cnt)
                tweetCriteria = got.manager.TweetCriteria().setUsername(
                    user).setQuerySearch("ClimateChange").setSince(
                        date).setUntil(next_date).setMaxTweets(1)

                sum = 0
                for i in got.manager.TweetManager.getTweets(tweetCriteria):
                    # printTweet("### Example 1 - Get tweets by username [barackobama]", i)
                    h = pyhmeter.HMeter(list((i.text).split()), sample_scores)
                    sum += h.happiness_score()
                    # dates.append(i.date)
                    # print(i.date)
                    # l.append(h.happiness_score())
                    # print(h.happiness_score())

                    # cnt+=1
                    # print(cnt)
                sum /= i
                day_score += sum
            day_score /= 100
            dates.append(date)
            scores.append(day_score)
    datetimes = matplotlib.dates.date2num(dates)
    plt.plot_date(datetimes, scores, marker=None, linestyle="-")
    plt.xticks(rotation=70)
    plt.xlabel("time")
    plt.ylabel("hedonometer score")
    plt.savefig(
        "/home/saksham/Twitter_Project/GetOldTweets-python-master/test3.png")
    plt.show()
Beispiel #3
0
def main():
    def printTweet(descr, t):
        print(descr)
        print("Username: %s" % t.username)
        print("Retweets: %d" % t.retweets)
        print("Text: %s" % t.text)
        print("Mentions: %s" % t.mentions)
        print("Hashtags: %s\n" % t.hashtags)

    data = pd.read_csv(os.getcwd() + "/followers.txt", header=None)
    # print(data[0])
    sample_scores = pyhmeter.load_scores("Data_Set_S1.txt")
    scores = []
    dates = []
    cnt = 0
    average_score = 0
    for month in ["%.2d" % i for i in range(1, 13)]:
        monthly_score = 0
        user_cnt = 0
        cnt = 0
        for user in data[0].sample(n=2000):
            tweetCriteria = got.manager.TweetCriteria().setUsername(
                user).setQuerySearch("ClimateChange").setSince(
                    "2019-" + month + "-01").setUntil("2019-" + month +
                                                      "-30").setMaxTweets(10)
            # sum=0
            # cnt=0
            for i in got.manager.TweetManager.getTweets(tweetCriteria):
                # printTweet("### Example 1 - Get tweets by username [barackobama]", i)
                user_cnt += 1
                h = pyhmeter.HMeter(list((i.text).split()), sample_scores)
                if h.happiness_score() is not None:
                    monthly_score += h.happiness_score()
                    print(cnt)
                    cnt += 1
                print(user)
                print(h.happiness_score())
                # dates.append(i.date)
                # print(i.date)
                # l.append(h.happiness_score())
                # print(h.happiness_score())
        if cnt != 0:
            # monthly_score+=sum
            monthly_score /= cnt
        print(monthly_score)
        scores.append(monthly_score)
        average_score += monthly_score
        dates.append(month)
    # datetimes=matplotlib.dates.date2num(dates)
    # plt.plot_date(datetimes,scores,marker=None,linestyle="-")
    average_score /= len(scores)
    print(scores, dates)
    plt.plot(dates, scores, label="actual data", color="red")
    plt.axhline(y=average_score, label="average score")
    # plt.plot(average_score)
    plt.xticks(rotation=70)
    plt.xlabel("time(month)")
    plt.ylabel("hedonometer score")
    plt.legend()
    # plt.title("")
    plt.savefig(
        "/home/saksham/Twitter_Project/GetOldTweets-python-master/test3.png")
    plt.show()
dates = []
tweets = []
url_rem = []
neutral_rem = []
normal = []
url_score = []
neutral_score = []
# tweetCriteria=got.manager.TweetCriteria().setUsername("barackobama").setQuerySearch("ClimateChange").setSince('2019-03-01').setUntil('2019-12-30').setMaxTweets(10000)
tweetCriteria = got.manager.TweetCriteria().setUsername(
    "KremlinRussia_E").setQuerySearch("Climate").setSince(
        "2009-12-30").setUntil("2019-12-30").setMaxTweets(100000)
print(got.manager.TweetManager.getTweets(tweetCriteria))
for i in got.manager.TweetManager.getTweets(tweetCriteria):
    text = re.sub(r'[^\x00-\x7F]+', ' ', i.text)
    tweets.append(text)
    h = pyhmeter.HMeter(list(i.text.split()), sample_scores)
    normal.append(h.happiness_score())
    h.deltah = 1.0
    neutral_score.append(h.happiness_score())
    neutral_rem.append(h.matchlist)
    dates.append(i.date)

data = pd.DataFrame(
    list(zip(tweets, neutral_rem, normal, neutral_score)),
    columns=['normal', 'cleaned_tweets', 'no_clean', 'cleaned_score'])
data.to_csv(os.getcwd() + "/putin(2009-2020)cleaned(deltah=1.0).csv", sep="|")
datetimes = matplotlib.dates.date2num(dates)
plt.plot_date(datetimes, neutral_score, marker=None, linestyle="-")
plt.xticks(rotation=70)
plt.xlabel("time")
plt.ylabel("hedonometer score")