def buildNGramDict(taggedReviews, n=1, applyFn=sentenceSumSentiment, filterFn=filtering.chainFilter(filtering.lower, filtering.removeStopwords)): ngramDict = defaultdict(lambda: 0) for taggedReview in taggedReviews: for taggedSentence in taggedReview: sentenceSentiment = applyFn(taggedSentence) for ngram in nltk.ngrams(filterFn(filtering.tokenize(taggedSentence.sentence)), n): ngramDict[ngram] += ngramDict[ngram] return normalize(ngramDict)
def buildWordSentimentDict(taggedReviews, applyFn=sentenceSumSentiment, filterFn=filtering.chainFilter(filtering.lower, filtering.removeStopwords)): """ Builds a dictionary of word sentiments from training data by taking the running average of applying fn (defaults to sentenceSumSentiment). Filters out words contained in the filterDict argument. """ sentimentDict = defaultdict(lambda: 0) for taggedReview in taggedReviews: for taggedSentence in taggedReview: tokenizedSentence = filtering.tokenize(taggedSentence.sentence) filteredSentence = filterFn(tokenizedSentence) for word in filteredSentence: sentimentDict[word] += applyFn(taggedSentence) return normalize(sentimentDict)
# load static dicts for name, path in staticSentimentDicts: dictSym = "{}SentimentsDict".format(name) exec("{} = util.loadWordSentimentDict(os.path.join(originalDir, '{}'))".format(dictSym, path)) loadedSentimentDicts.append((dictSym, eval(dictSym))) # compile learned dicts using various apply fucntions for name, fn in taggedSentenceEvaluationFunctions: exec("learned_{}_sentiments = util.buildWordSentimentDict(taggedReviews, applyFn={})".format(name, fn)) dictName = "learned_{}_sentiments".format(name) loadedSentimentDicts.append((dictName, eval(dictName))) filterFn = filtering.chainFilter(filtering.lower, filtering.removeStopwords) for name, sentimentDict in loadedSentimentDicts: exec("""def total_sentiment_{0}(inp): total = 0 for word in filterFn(filtering.tokenize(inp)): if word in {0}: total += {0}[word] return total""".format(name)) fnName = "total_sentiment_{}".format(name) definedFns.append((fnName, eval(fnName))) exec("""def num_positive_sentiment_words_{0}(inp): totalPos = 0 for word in filterFn(filtering.tokenize(inp)): if word in {0} and {0}[word] > 0: totalPos += 1