Ejemplo n.º 1
0
def similar_tweeters(user1, user2):
    u1tweets = usertweets.UserTweets(user1)
    u2tweets = usertweets.UserTweets(user2)
    u1text = [t.text for t in u1tweets]
    u2text = [t.text for t in u2tweets]
    u1tokens = [tokenize_tweets(t) for t in u1text]
    u2tokens = [tokenize_tweets(t) for t in u2text]
    u1filtered = remove_stop(u1tokens)
    u2filtered = remove_stop(u2tokens)
    return cos_sim(u1filtered, u2filtered)
Ejemplo n.º 2
0
def similar_tweeters(user1, user2):
    u1tweets = usertweets.UserTweets(user1)
    u2tweets = usertweets.UserTweets(user2)
    u1text = [t.text for t in u1tweets]
    u2text = [t.text for t in u2tweets]
    u1tokens = [tokenize_tweets(t) for t in u1text]
    u2tokens = [tokenize_tweets(t) for t in u2text]
    u1filtered = remove_stop(u1tokens)
    u2filtered = remove_stop(u2tokens)
    stemmer = PorterStemmer()
    stem1 = stem_words(u1filtered, stemmer)
    stem2 = stem_words(u2filtered, stemmer)
    return cos_sim(stem1, stem2)
Ejemplo n.º 3
0
def similar_tweeters(user1, user2):
    # Load the list of stop words
    STOP_WORDS = _load_stop_words()

    # Fetch User Tweets
    user1_tweets = usertweets.UserTweets(user1)
    user2_tweets = usertweets.UserTweets(user2)

    # Tokenize Tweets
    user1_tweets = list(_tokenize((t.text for t in user1_tweets), STOP_WORDS))
    user2_tweets = list(_tokenize((t.text for t in user2_tweets), STOP_WORDS))

    # Return words that are in both lists
    return (word_1 for word_1 in user1_tweets if word_1 in user2_tweets)
Ejemplo n.º 4
0
def similar_tweeters(user1, user2):
    """Output similarity value for two different users based on 200 tweets."""
    u1 = usertweets.UserTweets(user1)
    u2 = usertweets.UserTweets(user2)

    tt_u1 = " ".join(
        _word_set(_remove_solo_words(_get_important_tweet_words(u1))))
    tt_u2 = " ".join(
        _word_set(_remove_solo_words(_get_important_tweet_words(u2))))

    doc1 = nlp(tt_u1)
    doc2 = nlp(tt_u2)

    sim = doc1.similarity(doc2)

    print(sim)
Ejemplo n.º 5
0
def similar_tweeters(user1, user2):
    user1_tweets = nlp(' '.join(
        tokenize_user_tweets(usertweets.UserTweets(user1))))
    user2_tweets = nlp(' '.join(
        tokenize_user_tweets(usertweets.UserTweets(user2))))
    return user1_tweets.similarity(user2_tweets)