Python nMostCommonTokens Exemples, stats.nMostCommonTokens Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : twitter_analysis.py Projet : chsahit/social_computing_final

def nMostCommonWords(pathToTwitterData: str, n: int):
    tweetTexts = allTweetTexts(pathToTwitterData)
    return stats.nMostCommonTokens(tweetTexts, n)

Exemple #2

0

Afficher le fichier

Fichier : twitter_analysis.py Projet : chsahit/social_computing_final

def nMostCommonBigrams(pathToTwitterData: str, n: int):
    tweetTexts = allTweetTexts(pathToTwitterData)
    return stats.nMostCommonTokens(tweetTexts, n, stats.bigramsInText)

Exemple #3

0

Afficher le fichier

Fichier : twitter_analysis.py Projet : chsahit/social_computing_final

def nMostCommonHashtags(pathToTwitterData: str, n: int) -> List[Tuple[str, int]]:
    tweets = tweetsFromFile(pathToTwitterData)
    return stats.nMostCommonTokens(tweets, n, hashtagsInTweet)

Exemple #4

0

Afficher le fichier

Fichier : reddit_analysis.py Projet : chsahit/social_computing_final

def nMostCommonWords(pathToRedditData: str, n: int):
    redditTexts = getRedditTexts(pathToRedditData)
    return stats.nMostCommonTokens(redditTexts, n)

Exemple #5

0

Afficher le fichier

Fichier : reddit_analysis.py Projet : chsahit/social_computing_final

def nMostCommonBigrams(pathToRedditData: str, n: int):
    redditTexts = getRedditTexts(pathToRedditData)
    return stats.nMostCommonTokens(redditTexts, n, stats.bigramsInText)

Exemple #6

0

Afficher le fichier

Fichier : analysis.py Projet : chsahit/social_computing_final

    posts = None
    if args.platform == "twitter":
        posts = twitter_analysis.allTweetTexts(args.dataPath)
    elif args.platform == "reddit":
        posts = reddit_analysis.getRedditTexts(args.dataPath)
    else:
        raise

    features = None
    genderer = None
    exclusionFilter = lambda x: True
    stopWordFilter = lambda x: True
    if args.ngram == 1:
        features = [
            token for token, count in stats.nMostCommonTokens(
                posts, args.numTokens, stats.wordsInText)
        ]
        genderer = makeWordGenderer(allGenderedWords)

        if args.filterExcluded:
            exclusionFilter = makeIsNotExcludedUnigram(allGenderedWords)
        if args.filterStop:
            stopWordFilter = makeIsNotStopWordUnigram()
    elif args.ngram == 2:
        features = [
            token for token, count in stats.nMostCommonTokens(
                posts, args.numTokens, stats.bigramsInText)
        ]
        genderer = makeBigramGenderer(allGenderedWords)

        if args.filterExcluded: