Beispiel #1
0
def benchmark(model, model_name, docs, idx2key):
    qrels, queries = read_ap.read_qrels()

    overall_ser = {}

    # Adopted version from the TFIDF benchmark test
    print("Running GENSIM Benchmark")
    # collect results
    for qid in tqdm(qrels):
        query_text = queries[qid]
        results = rank(model, docs, query_text)
        #print(results)
        overall_ser[qid] = dict([(idx2key[idx], score)
                                 for idx, score in results])

    print(overall_ser)

    #print(overall_ser[100])
    evaluator = pytrec_eval.RelevanceEvaluator(qrels, {'map', 'ndcg'})
    metrics = evaluator.evaluate(overall_ser)

    json_filename = f"./json_files/benchmark_{model_name}.json"

    # dump to JSON
    with open(json_filename, "w") as writer:
        json.dump(metrics, writer, indent=1)

    return json_filename
Beispiel #2
0
def compute_metrics(dictionary, model, index, corpus_type, num_topics,
                    doc_ids):
    """
    Compute MAP and nDCG scores and save to json file.
    """
    metric_path = ("./LSI_results/LSI_{}_and_{}_topics.json".format(
        corpus_type, num_topics))
    #check whether metrics for corpus type and num_topics were already generated
    if not os.path.exists(metric_path):

        # Get ranking of document for every query and compute the MAP and NDCG score.
        qrels, queries = ra.read_qrels()
        overall_ser = {}  #ranking per query
        for qid in tqdm(qrels):
            query = queries[qid]
            ranking = query_similarity(query, dictionary, model, index,
                                       doc_ids)
            overall_ser[qid] = ranking

        # Compute model evaluation scores per query
        evaluator = pytrec_eval.RelevanceEvaluator(qrels, {'map', 'ndcg'})
        metrics = evaluator.evaluate(overall_ser)

        with open(
                "./LSI_results/LSI_{}_and_{}_topics.json".format(
                    corpus_type, num_topics), "w") as writer:
            json.dump(metrics, writer, indent=1)
    else:
        print('metrics for LSI_{} with {} topics were already computed'.format(
            corpus_type, num_topics))
def evaluate_doc2vec(doc2vec_model, description, test_subset=False):

    qrels, queries = read_ap.read_qrels()

    if test_subset:
        queries = {
            qid: q
            for qid, q in queries.items() if int(qid) < 101 and int(qid) > 75
        }

    overall_ser = {}
    # collect results
    for qid in queries:
        results = rank_query_given_document(queries[qid], doc2vec_model)
        overall_ser[qid] = dict(results)

        if int(qid) not in np.arange(76, 101):
            evaluate.write_trec_results(qid, results, f"./doc2vec/results/")

    evaluator = pytrec_eval.RelevanceEvaluator(qrels, {'map', 'ndcg'})
    metrics = evaluator.evaluate(overall_ser)

    if not test_subset:
        with open(f"./doc2vec/results/doc2vec_{description}.json",
                  "w") as writer:
            json.dump(metrics, writer, indent=1)

    return metrics
Beispiel #4
0
def get_ranking(n_topics=num_topics):
    ''' get ranking for all queries '''

    # load queries
    qrels, queries = read_ap.read_qrels()

    # load model
    lda_bow = LdaModel.load(os.path.join(models_path, 'lda_bow_multi'))

    # load corpus of full vectors
    with open('./objects/lda_bow_full', 'rb') as f:
        corpus_full = pkl.load(f)

    # load dictionary
    with open('./objects/dictionary_lda', 'rb') as f:
        dictionary = pkl.load(f)

    # process docs
    processed_docs = read_ap.get_processed_docs()
    doc_keys = processed_docs.keys()
    idx2key = {i: key for i, key in enumerate(doc_keys)}

    overall_ser = {}

    # loop over queries
    for qid in tqdm(qrels):
        query_text = queries[qid]
        sims = get_sims(lda_bow, query_text, corpus_full, dictionary, n_topics)

        overall_ser[qid] = dict([(idx2key[idx], np.float64(score))
                                 for idx, score in sims])

    with open('./objects/overal_ser_lda', 'wb') as f:
        pkl.dump(overall_ser, f)
def compute_metrics(docs, vocab_embs, word2id, id2word):
    """
    For a trained model, compute the MAP and NDCG based on a set of queries and
    all documents in the corpus.

    Returns:
        metrics: a nested dict of queries and their MAP and NDCG scores.
    """
    # Create document embeddings
    if not os.path.exists("./pickles/word2vec_doc_embs.pkl"):
        print("constructing document embeddings")
        doc_embs = {}
        keys = list(docs.keys())
        for d in tqdm(keys):
            doc = docs[d]
            doc_emb = create_doc_emb(vocab_embs, doc, word2id, id2word)
            doc_embs[d] = doc_emb

        with open("./pickles/word2vec_doc_embs.pkl", "wb") as writer:
            pkl.dump(doc_embs, writer)
    else:
        with open("./pickles/word2vec_doc_embs.pkl", "rb") as reader:
            doc_embs = pkl.load(reader)

    # Create query embedding and compare to every docuemnt embedding
    qrels, queries = ra.read_qrels()
    overall_ser = {}  #ranking per query
    for qid in tqdm(qrels):
        query = queries[qid]
        query = ra.process_text(query)
        query_emb = create_doc_emb(vocab_embs, query, word2id, id2word)
        ranking, trec_results = get_ranking(qid, query_emb, doc_embs,
                                            vocab_embs)
        overall_ser[qid] = ranking

        if not int(qid) in range(76, 100):
            with open("./results/word2vec_trec.csv", "a+") as f:
                f.write("\n".join("{},{},{},{},{},{}".format(
                    x[0], x[1], x[2], x[3], x[4], x[5]) for x in trec_results))
                f.write("\n")

    # Compute the MAP and NDCG per query
    evaluator = pytrec_eval.RelevanceEvaluator(qrels, {'map', 'ndcg'})
    metrics = evaluator.evaluate(overall_ser)

    # Get the average model evaluation scores over all queries
    average = {'map': 0, 'ndcg': 0}
    for q in list(metrics.values()):
        average['map'] += q['map']
        average['ndcg'] += q['ndcg']
    average['map'] = average['map'] / len(queries)
    average['ndcg'] = average['ndcg'] / len(queries)
    print(
        'average model evaluation scores over all queries {}'.format(average))

    return (metrics)
Beispiel #6
0
def main():
    parser = argparse.ArgumentParser()
    qrels, queries = read_ap.read_qrels()
    parser.add_argument('qrel')
    parser.add_argument('run', nargs=2)

    # A bit too strict, as it does not allow for parametrized measures,
    # but sufficient for the example.
    parser.add_argument('--measure',
                        choices=pytrec_eval.supported_measures,
                        required=True)
def evaluate_queries(model, doc_ids, dictionary, corpus_modelspace, tfidf, index, save_path='LSI'):
    qrels, queries = read_ap.read_qrels()

    overall_result = {}

    for query_id, query in tqdm(queries.items()):
        results = rank_docs(query, model, doc_ids, dictionary, corpus_modelspace, tfidf_model=tfidf, index=index)
        overall_result[query_id] = dict(results)

        if int(query_id) not in np.arange(76, 101):
            evaluate.write_trec_results(query_id, results, save_path)

    evaluator = pytrec_eval.RelevanceEvaluator(qrels, {'map', 'ndcg'})
    metrics = evaluator.evaluate(overall_result)

    return metrics
Beispiel #8
0
def get_json():
    '''load overal_serr from pickle and create json'''

    with open('./objects/overal_ser_lda', 'rb') as f:
        overal_serr = pkl.load(f)

    qrels, queries = read_ap.read_qrels()

    print('pytreccing')

    evaluator = pytrec_eval.RelevanceEvaluator(qrels, {'map', 'ndcg'})
    metrics = evaluator.evaluate(overal_serr)

    print('dumping json')
    with open(f'./json_files/lda_bow_kl.json', 'w') as f:
        json.dump(metrics, f, indent=1)
Beispiel #9
0
def main():
    docs_by_id = ra.get_processed_docs()
    path = "./doc2vec_models/{}".format(config.model_name)
    # print(path)
    if not os.path.exists(path):
        print("Model not yet trained, starting training now.")
        train_corpus = create_corpus(docs_by_id)
        model = train_doc2vec(train_corpus)
    else:
        print("Model already trained, loading the file.")
        model = gensim.models.doc2vec.Doc2Vec.load(path)

    qrels, queries = ra.read_qrels()
    print(queries)

    overall_ser = {}
    trec_path = "./results/trec_doc2vec.csv"

    # Write TREC results column headers to file
    with open(trec_path, "w") as f:
        f.write("query-id, Q0, document-id, rank, score, STANDARD\n")

    print("Evaluating doc2vec model:", config.model_name)

    # Loop over all queries and predict most relevant docs
    for qid in tqdm(qrels):
        query_text = queries[qid]
        results, trec_results = rank_docs(model, query_text, qid, config.model_name)
        results = dict(results)
        overall_ser[qid] = results
        # Write all test queries to TREC format file
        if not int(qid) in range(76,100):
            with open(trec_path, "a+") as f:
                f.write("\n".join("{},{},{},{},{},{}".format(x[0], x[1],x[2],x[3],x[4],x[5]) for x in trec_results))
                f.write("\n")

    # run evaluation with `qrels` as the ground truth relevance judgements
    # here, we are measuring MAP and NDCG
    evaluator = pytrec_eval.RelevanceEvaluator(qrels, {'map', 'ndcg'})
    metrics = evaluator.evaluate(overall_ser)

    # dump this to JSON
    # *Not* Optional - This is submitted in the assignment!
    json_path = "./results/{}.json".format(config.model_name)
    with open(json_path, "w") as writer:
        json.dump(metrics, writer, indent=1)
Beispiel #10
0
        results = list(results.items())
        results.sort(key=lambda _: -_[1])
        return results


if __name__ == "__main__":

    # ensure dataset is downloaded
    download_ap.download_dataset()
    # pre-process the text
    docs_by_id = read_ap.get_processed_docs()

    # Create instance for retrieval
    tfidf_search = TfIdfRetrieval(docs_by_id)
    # read in the qrels
    qrels, queries = read_ap.read_qrels()

    overall_ser = {}

    print("Running TFIDF Benchmark")
    # collect results
    for qid in tqdm(qrels): 
        query_text = queries[qid]

        results = tfidf_search.search(query_text)
        overall_ser[qid] = dict(results)
    
    results_lines = []
    for qid in overall_ser:
        for doc_id in overall_ser[qid]:
            results_lines.append(str(qid) + '\tQO\t' + doc_id + '\t0\t' + str(overall_ser[qid][doc_id]) + '\tSTANDARD\n')