Ejemplo n.º 1
0
def save_tweets():
    connect_to_mongo()
    tweets_by_emotion = get_clean_tweets(EMOTION_CLASS_MAP,
                                         NUM_CLEAN_TWEETS_PER_CLASS)

    for emotion, tweets in tweets_by_emotion.items():
        for tweet in tweets:
            print("Fetched Tweet ID - " + tweet.get("id_str"))
            try:
                tweet_model = Tweet.objects(id_str=tweet.get("id_str")).get()
            except DoesNotExist:
                tweet_model = Tweet(id_str=tweet.get("id_str"))

            tweet_model.text = tweet.get("text")
            tweet_model.emotion_label = emotion
            tweet_model.created_at = date_parse(tweet.get("created_at"))

            # get rid of duplications
            try:
                tweet_model.save()
            except NotUniqueError:
                print(
                    f"Tweet {tweet_model.id_str} alredy exists - will discard......."
                )
Ejemplo n.º 2
0
import json

import pandas as pd
from sklearn.metrics import confusion_matrix, classification_report

from source.database_classes import connect_to_mongo, Tweet, ProcessedTweet

#this python returns the classification report with the statistical analysis of the data fetched and processed from twitter
connect_to_mongo()

initial_df = pd.read_csv("data/tweets_saved.csv")
initial_df = initial_df[["emotion_label", "id_str"]]
initial_df['id_str'] = initial_df['id_str'].astype(str)

results_df = pd.read_csv("data/tweets_processed.csv")
results_df = results_df[["emotion_label", "id_str"]]
results_df['id_str'] = results_df['id_str'].astype(str)

merged = pd.merge(initial_df, results_df, on="id_str")
merged["id_str"] = merged["id_str"].apply(str)

SENTIMENT_TO_EMOTION = {
    "Excitement": "excitement",
    "Happiness": "happy",
    "Fear": "fear",
    "Surprise": "surprise",
    "Pleasant": "pleasant",
    "Anger": "anger",
}