예제 #1
0
def buildNGramDict(taggedReviews,
                   n=1,
                   applyFn=sentenceSumSentiment,
                   filterFn=filtering.chainFilter(filtering.lower,
                                                  filtering.removeStopwords)):
    ngramDict = defaultdict(lambda: 0)
    for taggedReview in taggedReviews:
        for taggedSentence in taggedReview:
            sentenceSentiment = applyFn(taggedSentence)
            for ngram in nltk.ngrams(filterFn(filtering.tokenize(taggedSentence.sentence)), n):
                ngramDict[ngram] += ngramDict[ngram]
    return normalize(ngramDict)
예제 #2
0
def buildWordSentimentDict(taggedReviews,
                           applyFn=sentenceSumSentiment,
                           filterFn=filtering.chainFilter(filtering.lower,
                                                          filtering.removeStopwords)):
    """
    Builds a dictionary of word sentiments from training data by taking the
    running average of applying fn (defaults to sentenceSumSentiment). Filters
    out words contained in the filterDict argument.
    """
    sentimentDict = defaultdict(lambda: 0)
    for taggedReview in taggedReviews:
        for taggedSentence in taggedReview:
            tokenizedSentence = filtering.tokenize(taggedSentence.sentence)
            filteredSentence = filterFn(tokenizedSentence)
            for word in filteredSentence:
                sentimentDict[word] += applyFn(taggedSentence)
    return normalize(sentimentDict)
예제 #3
0
# load static dicts
for name, path in staticSentimentDicts:
    dictSym = "{}SentimentsDict".format(name)
    exec("{} = util.loadWordSentimentDict(os.path.join(originalDir, '{}'))".format(dictSym,
                                                                                   path))
    loadedSentimentDicts.append((dictSym, eval(dictSym)))

# compile learned dicts using various apply fucntions

for name, fn in taggedSentenceEvaluationFunctions:
    exec("learned_{}_sentiments = util.buildWordSentimentDict(taggedReviews, applyFn={})".format(name, fn))
    dictName = "learned_{}_sentiments".format(name)
    loadedSentimentDicts.append((dictName, eval(dictName)))

filterFn = filtering.chainFilter(filtering.lower,
                                 filtering.removeStopwords)
for name, sentimentDict in loadedSentimentDicts:
    exec("""def total_sentiment_{0}(inp):
        total = 0
        for word in filterFn(filtering.tokenize(inp)):
            if word in {0}:
                total += {0}[word]
        return total""".format(name))
    fnName = "total_sentiment_{}".format(name)
    definedFns.append((fnName, eval(fnName)))

    exec("""def num_positive_sentiment_words_{0}(inp):
            totalPos = 0
            for word in filterFn(filtering.tokenize(inp)):
                if word in {0} and {0}[word] > 0:
                    totalPos += 1