Esempio n. 1
0
def mostCommonNGramsFromPosts(posts, N=12):
    # Basic Algorithm:
    for p in posts:
      n = e.normalizeString(p["raw_message"])
      p["unigrams"] = e.nGramsFromString(n,1)
      p["bigrams"] = e.nGramsFromString(n,2)
      p["trigrams"] = e.nGramsFromString(n,3)
      p["normalized_string"] = n
    # count unigrams and bigrams
    common = lambda s: nCommonNgrams(countNgrams(posts, s),N)
    return [common("unigrams"), common("bigrams"), common("trigrams")]
Esempio n. 2
0
def mostCommonNGramsFromString(string, N=8):
    n = e.normalizeString(string)
    nGrams = [1,2,3]
    nGrams = map(lambda a: e.nGramsFromString(n,a),nGrams)
    count = [{},{},{}]
    for i in [0,1,2]:
        for gram in nGrams[i]:
            count[i][gram] = count[i].get(gram,0) + 1
        count[i] = flipDictionary(count[i])
        print(nCommonNgrams(count[i],N))
    return nGrams