def canopy(n): df = fl_data() df2 = oge_data() x = ngram_index(df, 'nname', n=n) for s in shingle(df2.nname[0],k=n): print(len(x[s]))
def ngram_index(df, name_field, n=4): index = defaultdict(set) for idx, row in df.iterrows(): for s in shingle(row[name_field], k=n): index[s].add(idx) return index