Exemple #1
0
def main():
    unigram = build_inverted_indexer()
    dict_idf = store_tfidfs(unigram)
    if os.path.exists(dest_folder + file + ".csv"):
        os.remove(dest_folder + file + ".csv")
    queries = get_parsed_queries()
    for key in queries:
        query_id = key
        compute_cosine_similarity(query_id.strip(),
                                  query_stopping(queries[key]), dict_idf)
Exemple #2
0
def main():
    file = "cosine_similiarity"
    system_name = "COSINE_SIM"
    unigram = build_inverted_indexer()
    dict_idf = store_tfidfs(unigram)
    if os.path.exists(dest_folder + file + ".csv"):
        os.remove(dest_folder + file + ".csv")
    queries = get_parsed_queries()
    for key in queries:
        query_id = key
        compute_cosine_similarity(query_id.strip(), queries[key], dict_idf,
                                  file, system_name)
Exemple #3
0
top_kwords = 20

top_kreldocs_query = OrderedDict()

sorted_scores_query = OrderedDict()
vocabulary = OrderedDict()
doc_vectors = OrderedDict()

#relevant_vectors=list()
#nonrelevant_vectors=list()
query_vector = OrderedDict()

folder = "cosine_results/"
filename = "cosine_similiarity_task1.csv"

queries = get_parsed_queries()
all_counters = counters_all_docs()
all_docs = all_counters.keys()


def read_cosine_similarity_results():
    with open(folder + filename, "r") as fd:
        reader = csv.reader(fd)
        scores_query = OrderedDict()
        for rows in reader:
            scores_query.setdefault(rows[0], []).append([rows[2], rows[3]])
        sorted_scores_query = sorted(scores_query.items())
        for key, value in sorted_scores_query:
            top_kreldocs_query.setdefault(key, []).append(value[:kdocs])

Exemple #4
0
def main():
    queryList = get_parsed_queries()
    docsCounters = create_counters()
    N = len(docsCounters)
    compute_bm25(docsCounters, N, create_query_counters(queryList),
                 get_relevance_data())