def nMostCommonWords(pathToTwitterData: str, n: int): tweetTexts = allTweetTexts(pathToTwitterData) return stats.nMostCommonTokens(tweetTexts, n)
def nMostCommonBigrams(pathToTwitterData: str, n: int): tweetTexts = allTweetTexts(pathToTwitterData) return stats.nMostCommonTokens(tweetTexts, n, stats.bigramsInText)
def nMostCommonHashtags(pathToTwitterData: str, n: int) -> List[Tuple[str, int]]: tweets = tweetsFromFile(pathToTwitterData) return stats.nMostCommonTokens(tweets, n, hashtagsInTweet)
def nMostCommonWords(pathToRedditData: str, n: int): redditTexts = getRedditTexts(pathToRedditData) return stats.nMostCommonTokens(redditTexts, n)
def nMostCommonBigrams(pathToRedditData: str, n: int): redditTexts = getRedditTexts(pathToRedditData) return stats.nMostCommonTokens(redditTexts, n, stats.bigramsInText)
posts = None if args.platform == "twitter": posts = twitter_analysis.allTweetTexts(args.dataPath) elif args.platform == "reddit": posts = reddit_analysis.getRedditTexts(args.dataPath) else: raise features = None genderer = None exclusionFilter = lambda x: True stopWordFilter = lambda x: True if args.ngram == 1: features = [ token for token, count in stats.nMostCommonTokens( posts, args.numTokens, stats.wordsInText) ] genderer = makeWordGenderer(allGenderedWords) if args.filterExcluded: exclusionFilter = makeIsNotExcludedUnigram(allGenderedWords) if args.filterStop: stopWordFilter = makeIsNotStopWordUnigram() elif args.ngram == 2: features = [ token for token, count in stats.nMostCommonTokens( posts, args.numTokens, stats.bigramsInText) ] genderer = makeBigramGenderer(allGenderedWords) if args.filterExcluded: