def cosine_score(simple_index, query, document): """Oblicza cosine score.""" terms = set(simple_index.inverted_index.keys()).union(query) doc_vector = [calc_tf_idf(simple_index, term, document) for term in terms] qry_vector = [] for term in terms: value = 0 if term in query: value = calc_idf(simple_index, term) * query.count(term) qry_vector.append(value) num = sum(a*b for a, b in zip(doc_vector, qry_vector)) den = sqrt(sum(a*a for a in doc_vector))*sqrt(sum(a*a for a in qry_vector)) return num/den
def calc_tf_idf(simple_index, term, document): """Oblicza wartość TF-IDF.""" return calc_tf(simple_index, term, document) * calc_idf(simple_index, term)