def similar_tweeters(user1, user2): u1tweets = usertweets.UserTweets(user1) u2tweets = usertweets.UserTweets(user2) u1text = [t.text for t in u1tweets] u2text = [t.text for t in u2tweets] u1tokens = [tokenize_tweets(t) for t in u1text] u2tokens = [tokenize_tweets(t) for t in u2text] u1filtered = remove_stop(u1tokens) u2filtered = remove_stop(u2tokens) return cos_sim(u1filtered, u2filtered)
def similar_tweeters(user1, user2): u1tweets = usertweets.UserTweets(user1) u2tweets = usertweets.UserTweets(user2) u1text = [t.text for t in u1tweets] u2text = [t.text for t in u2tweets] u1tokens = [tokenize_tweets(t) for t in u1text] u2tokens = [tokenize_tweets(t) for t in u2text] u1filtered = remove_stop(u1tokens) u2filtered = remove_stop(u2tokens) stemmer = PorterStemmer() stem1 = stem_words(u1filtered, stemmer) stem2 = stem_words(u2filtered, stemmer) return cos_sim(stem1, stem2)
def similar_tweeters(user1, user2): # Load the list of stop words STOP_WORDS = _load_stop_words() # Fetch User Tweets user1_tweets = usertweets.UserTweets(user1) user2_tweets = usertweets.UserTweets(user2) # Tokenize Tweets user1_tweets = list(_tokenize((t.text for t in user1_tweets), STOP_WORDS)) user2_tweets = list(_tokenize((t.text for t in user2_tweets), STOP_WORDS)) # Return words that are in both lists return (word_1 for word_1 in user1_tweets if word_1 in user2_tweets)
def similar_tweeters(user1, user2): """Output similarity value for two different users based on 200 tweets.""" u1 = usertweets.UserTweets(user1) u2 = usertweets.UserTweets(user2) tt_u1 = " ".join( _word_set(_remove_solo_words(_get_important_tweet_words(u1)))) tt_u2 = " ".join( _word_set(_remove_solo_words(_get_important_tweet_words(u2)))) doc1 = nlp(tt_u1) doc2 = nlp(tt_u2) sim = doc1.similarity(doc2) print(sim)
def similar_tweeters(user1, user2): user1_tweets = nlp(' '.join( tokenize_user_tweets(usertweets.UserTweets(user1)))) user2_tweets = nlp(' '.join( tokenize_user_tweets(usertweets.UserTweets(user2)))) return user1_tweets.similarity(user2_tweets)