Ejemplo n.º 1
0
 def compute_scores(self, list_of_docs, query: Query):
     """ Scores each document, depending of the tokens it contains."""
     print("Search Engine is computing search scores ...")
     query_tf_idf = {}
     vocab_query = query.get_vocabulary()
     # get the tf_idf for words in the query
     for word in vocab_query:
         tf_idf = query.get_term_frequency(word) * self.collection.compute_idf(word)
         query_tf_idf[word] = tf_idf
     # score the documents which contain the words
     doc_scores = {}
     for doc_id in list_of_docs:
         score = 0
         for word in vocab_query:
             normalized_tf = self.collection.log_normalization(
                 term=word, id_document=doc_id
             )
             if normalized_tf == 0:  # the word is not in the document
                 doc_tf_idf = 0
             else:
                 doc_tf_idf = normalized_tf * self.collection.compute_idf(word) #tf-idf for the word in the document
             score += query_tf_idf[word] * doc_tf_idf
         doc_scores[doc_id] = score
     return doc_scores