def buildNGramDict(taggedReviews, n=1, applyFn=sentenceSumSentiment, filterFn=filtering.chainFilter(filtering.lower, filtering.removeStopwords)): ngramDict = defaultdict(lambda: 0) for taggedReview in taggedReviews: for taggedSentence in taggedReview: sentenceSentiment = applyFn(taggedSentence) for ngram in nltk.ngrams(filterFn(filtering.tokenize(taggedSentence.sentence)), n): ngramDict[ngram] += ngramDict[ngram] return normalize(ngramDict)
def buildWordSentimentDict(taggedReviews, applyFn=sentenceSumSentiment, filterFn=filtering.chainFilter(filtering.lower, filtering.removeStopwords)): """ Builds a dictionary of word sentiments from training data by taking the running average of applying fn (defaults to sentenceSumSentiment). Filters out words contained in the filterDict argument. """ sentimentDict = defaultdict(lambda: 0) for taggedReview in taggedReviews: for taggedSentence in taggedReview: tokenizedSentence = filtering.tokenize(taggedSentence.sentence) filteredSentence = filterFn(tokenizedSentence) for word in filteredSentence: sentimentDict[word] += applyFn(taggedSentence) return normalize(sentimentDict)