Beispiel #1
0
    def retrieval_cosine(self, query):
        scores = dict()
        tokens = Tokens()
        query_terms = tokens.edit_query(query)
        query_weights = dict(collections.Counter(query_terms).items())
        for query in query_weights:
            query_weights[query] = tf(query_weights[query])

        for query in query_weights:
            term = query
            doc_ids = self.L(term)
            for doc in doc_ids:
                if doc[0] not in scores:
                    scores[doc[0]] = tf(doc[1]) * tf(query_weights[term])
                else:
                    new_value = scores[doc[0]] + tf(doc[1]) * tf(
                        query_weights[term])
                    scores[doc[0]] = new_value
        for id in scores:
            norm_value = self.inverted_index.get_norms(str(id))
            scores[id] = round(
                scores[id] /
                (norm_value * self.get_query_norms(query_weights)), 2)
        scores = sorted(scores.items(), key=lambda kv: kv[1], reverse=True)
        return scores