def classify_replies_tweets(self): tdb = TweetDB() tweets = list(tdb.all_replies()) tweets_with_emotion_indexes, total = self.classify_sentences([t.text for t in tweets]) tweets_with_emotion = [int(tweets[i].parent_tweet.retweet_count) for i in tweets_with_emotion_indexes] tweets_with_emotion_mean = np.array(tweets_with_emotion).mean() if tweets_with_emotion else 0 self.logger.info('%d of %d on %s category (retweets average: %.2f)' % ( len(tweets_with_emotion), total, self.emotion_label, tweets_with_emotion_mean))
def predict_replies(filepath, **kwargs): identified_replies = [] files_tec = [os.path.join('output', '%s-tec.pickle' % e) for e in emotions] for i, f in enumerate(files_tec): # nb = SingleEmotionSemEvalNaiveBayes(emotion_index=i, filename=f) nb = SingleEmotionTECNaiveBayes(emotion_name=emotions[i], filename=f) tdb = TweetDB() tweets = list(tdb.all_replies()) tweets_with_emotion_indexes, total = nb.classify_sentences( [t.text for t in tweets]) identified_replies.append(tweets_with_emotion_indexes) print('%d of %d on %s category' % (len(tweets_with_emotion_indexes), total, emotions[i])) # Count how many additional emotions each tweet has, and prints to the console the average and median for each emotion for index, tweets_indexes in enumerate(identified_replies): other_tweets_indexes = [ ti for tis in identified_replies for ti in tis if tis != tweets_indexes ] repeated_tweets_indexes = [ other_tweets_indexes.count(ti) for ti in tweets_indexes if ti in other_tweets_indexes ] one_emotion_tweets = [ tweets[ti] for ti in tweets_indexes if ti not in other_tweets_indexes ] print( '%s: %d out of %d have been identified with another emotion as well (avg: %.2f, mdn: %.2f)' % (emotions[index], len(repeated_tweets_indexes), len(tweets_indexes), np.mean(repeated_tweets_indexes), np.median(repeated_tweets_indexes))) print('%d tweets have been identified only with %s' % ((len(tweets_indexes) - len(repeated_tweets_indexes)), emotions[index])) get_parent_tweets_metrics(one_emotion_tweets, emotions[index]) count_repeated_emotions(tweets, identified_replies)