def calcNgramSim(a, b): num_shared_ngrams = len(a.intersection(b)) #num_all_ngrams = len(a.difference(b)) + len(b.difference(a)) + num_shared_ngrams num_all_ngrams = len(list(a)) + len(list(b)) - num_shared_ngrams if num_all_ngrams != 0: sim = NGram.ngram_similarity(num_shared_ngrams, num_all_ngrams) else: sim = 0 return sim