예제 #1
0
 def classify_replies_tweets(self):
     tdb = TweetDB()
     tweets = list(tdb.all_replies())
     tweets_with_emotion_indexes, total = self.classify_sentences([t.text for t in tweets])
     tweets_with_emotion = [int(tweets[i].parent_tweet.retweet_count) for i in tweets_with_emotion_indexes]
     tweets_with_emotion_mean = np.array(tweets_with_emotion).mean() if tweets_with_emotion else 0
     self.logger.info('%d of %d on %s category (retweets average: %.2f)' % (
         len(tweets_with_emotion), total, self.emotion_label, tweets_with_emotion_mean))
예제 #2
0
def predict_replies(filepath, **kwargs):
    identified_replies = []
    files_tec = [os.path.join('output', '%s-tec.pickle' % e) for e in emotions]
    for i, f in enumerate(files_tec):
        # nb = SingleEmotionSemEvalNaiveBayes(emotion_index=i, filename=f)
        nb = SingleEmotionTECNaiveBayes(emotion_name=emotions[i], filename=f)
        tdb = TweetDB()
        tweets = list(tdb.all_replies())
        tweets_with_emotion_indexes, total = nb.classify_sentences(
            [t.text for t in tweets])
        identified_replies.append(tweets_with_emotion_indexes)
        print('%d of %d on %s category' %
              (len(tweets_with_emotion_indexes), total, emotions[i]))

    # Count how many additional emotions each tweet has, and prints to the console the average and median for each emotion
    for index, tweets_indexes in enumerate(identified_replies):
        other_tweets_indexes = [
            ti for tis in identified_replies for ti in tis
            if tis != tweets_indexes
        ]
        repeated_tweets_indexes = [
            other_tweets_indexes.count(ti) for ti in tweets_indexes
            if ti in other_tweets_indexes
        ]
        one_emotion_tweets = [
            tweets[ti] for ti in tweets_indexes
            if ti not in other_tweets_indexes
        ]
        print(
            '%s: %d out of %d have been identified with another emotion as well (avg: %.2f, mdn: %.2f)'
            % (emotions[index], len(repeated_tweets_indexes),
               len(tweets_indexes), np.mean(repeated_tweets_indexes),
               np.median(repeated_tweets_indexes)))

        print('%d tweets have been identified only with %s' %
              ((len(tweets_indexes) - len(repeated_tweets_indexes)),
               emotions[index]))

        get_parent_tweets_metrics(one_emotion_tweets, emotions[index])

    count_repeated_emotions(tweets, identified_replies)