def filter_corpus(): review_words = movie_reviews.words() print "# Review Words", len(review_words) res = cm.filter_sw(review_words) print "# After filter", len(res) return res
def doc_features(doc): doc_words = cytoolz.frequencies(cm.filter_sw(doc)) # initialize to 0 features = zero_features.copy() word_matches = match(doc_words, word_features) for word in word_matches: features[word] = doc_words[word] return features
def doc_features(doc): doc_words = cytoolz.frequencies(cm.filter_sw(doc)) # initialize to 0 features = zero_features.copy() word_matches = match(doc_words, word_features) for word in word_matches: features[word] = (doc_words[word]) return features
def freq_dict(file_words): filtered = cm.filter_sw(file_words[1].split()) fd = cytoolz.frequencies(filtered) return fd