def mostCommonNGramsFromPosts(posts, N=12): # Basic Algorithm: for p in posts: n = e.normalizeString(p["raw_message"]) p["unigrams"] = e.nGramsFromString(n,1) p["bigrams"] = e.nGramsFromString(n,2) p["trigrams"] = e.nGramsFromString(n,3) p["normalized_string"] = n # count unigrams and bigrams common = lambda s: nCommonNgrams(countNgrams(posts, s),N) return [common("unigrams"), common("bigrams"), common("trigrams")]
def mostCommonNGramsFromString(string, N=8): n = e.normalizeString(string) nGrams = [1,2,3] nGrams = map(lambda a: e.nGramsFromString(n,a),nGrams) count = [{},{},{}] for i in [0,1,2]: for gram in nGrams[i]: count[i][gram] = count[i].get(gram,0) + 1 count[i] = flipDictionary(count[i]) print(nCommonNgrams(count[i],N)) return nGrams