def search(self, text): qtrigrams = utils.get_trigrams(text) scores = [] for tid, dtrigrams in self.tweets.iteritems(): union = len(qtrigrams | dtrigrams) intersection = len(qtrigrams & dtrigrams) scores.append((tid, intersection/float(union))) #print scores scores = sorted(scores, key=lambda tup: tup[1])[::-1] return scores
def index_tweet(self, tweet): tid = tweet['id'] text = tweet['text'] trigrams = utils.get_trigrams(text) self.tweets[tid] = trigrams self.N += 1