def retrieval_cosine(self, query): scores = dict() tokens = Tokens() query_terms = tokens.edit_query(query) query_weights = dict(collections.Counter(query_terms).items()) for query in query_weights: query_weights[query] = tf(query_weights[query]) for query in query_weights: term = query doc_ids = self.L(term) for doc in doc_ids: if doc[0] not in scores: scores[doc[0]] = tf(doc[1]) * tf(query_weights[term]) else: new_value = scores[doc[0]] + tf(doc[1]) * tf( query_weights[term]) scores[doc[0]] = new_value for id in scores: norm_value = self.inverted_index.get_norms(str(id)) scores[id] = round( scores[id] / (norm_value * self.get_query_norms(query_weights)), 2) scores = sorted(scores.items(), key=lambda kv: kv[1], reverse=True) return scores