Esempio n. 1
0
def buildNGramDict(taggedReviews,
                   n=1,
                   applyFn=sentenceSumSentiment,
                   filterFn=filtering.chainFilter(filtering.lower,
                                                  filtering.removeStopwords)):
    ngramDict = defaultdict(lambda: 0)
    for taggedReview in taggedReviews:
        for taggedSentence in taggedReview:
            sentenceSentiment = applyFn(taggedSentence)
            for ngram in nltk.ngrams(filterFn(filtering.tokenize(taggedSentence.sentence)), n):
                ngramDict[ngram] += ngramDict[ngram]
    return normalize(ngramDict)
Esempio n. 2
0
def buildWordSentimentDict(taggedReviews,
                           applyFn=sentenceSumSentiment,
                           filterFn=filtering.chainFilter(filtering.lower,
                                                          filtering.removeStopwords)):
    """
    Builds a dictionary of word sentiments from training data by taking the
    running average of applying fn (defaults to sentenceSumSentiment). Filters
    out words contained in the filterDict argument.
    """
    sentimentDict = defaultdict(lambda: 0)
    for taggedReview in taggedReviews:
        for taggedSentence in taggedReview:
            tokenizedSentence = filtering.tokenize(taggedSentence.sentence)
            filteredSentence = filterFn(tokenizedSentence)
            for word in filteredSentence:
                sentimentDict[word] += applyFn(taggedSentence)
    return normalize(sentimentDict)