Ejemplo n.º 1
0
def cosine_score(simple_index, query, document):
    """Oblicza cosine score."""
    terms = set(simple_index.inverted_index.keys()).union(query)
    doc_vector = [calc_tf_idf(simple_index, term, document) for term in terms]
    qry_vector = []
    for term in terms:
        value = 0
        if term in query:
            value = calc_idf(simple_index, term) * query.count(term)
        qry_vector.append(value)
    num = sum(a*b for a, b in zip(doc_vector, qry_vector))
    den = sqrt(sum(a*a for a in doc_vector))*sqrt(sum(a*a for a in qry_vector))
    return num/den
Ejemplo n.º 2
0
def overlap_score(simple_index, query, document):
    """Oblicza overlap score."""
    return sum(calc_tf_idf(simple_index, term, document) for term in query)