def benchmark(model, model_name, docs, idx2key): qrels, queries = read_ap.read_qrels() overall_ser = {} # Adopted version from the TFIDF benchmark test print("Running GENSIM Benchmark") # collect results for qid in tqdm(qrels): query_text = queries[qid] results = rank(model, docs, query_text) #print(results) overall_ser[qid] = dict([(idx2key[idx], score) for idx, score in results]) print(overall_ser) #print(overall_ser[100]) evaluator = pytrec_eval.RelevanceEvaluator(qrels, {'map', 'ndcg'}) metrics = evaluator.evaluate(overall_ser) json_filename = f"./json_files/benchmark_{model_name}.json" # dump to JSON with open(json_filename, "w") as writer: json.dump(metrics, writer, indent=1) return json_filename
def compute_metrics(dictionary, model, index, corpus_type, num_topics, doc_ids): """ Compute MAP and nDCG scores and save to json file. """ metric_path = ("./LSI_results/LSI_{}_and_{}_topics.json".format( corpus_type, num_topics)) #check whether metrics for corpus type and num_topics were already generated if not os.path.exists(metric_path): # Get ranking of document for every query and compute the MAP and NDCG score. qrels, queries = ra.read_qrels() overall_ser = {} #ranking per query for qid in tqdm(qrels): query = queries[qid] ranking = query_similarity(query, dictionary, model, index, doc_ids) overall_ser[qid] = ranking # Compute model evaluation scores per query evaluator = pytrec_eval.RelevanceEvaluator(qrels, {'map', 'ndcg'}) metrics = evaluator.evaluate(overall_ser) with open( "./LSI_results/LSI_{}_and_{}_topics.json".format( corpus_type, num_topics), "w") as writer: json.dump(metrics, writer, indent=1) else: print('metrics for LSI_{} with {} topics were already computed'.format( corpus_type, num_topics))
def evaluate_doc2vec(doc2vec_model, description, test_subset=False): qrels, queries = read_ap.read_qrels() if test_subset: queries = { qid: q for qid, q in queries.items() if int(qid) < 101 and int(qid) > 75 } overall_ser = {} # collect results for qid in queries: results = rank_query_given_document(queries[qid], doc2vec_model) overall_ser[qid] = dict(results) if int(qid) not in np.arange(76, 101): evaluate.write_trec_results(qid, results, f"./doc2vec/results/") evaluator = pytrec_eval.RelevanceEvaluator(qrels, {'map', 'ndcg'}) metrics = evaluator.evaluate(overall_ser) if not test_subset: with open(f"./doc2vec/results/doc2vec_{description}.json", "w") as writer: json.dump(metrics, writer, indent=1) return metrics
def get_ranking(n_topics=num_topics): ''' get ranking for all queries ''' # load queries qrels, queries = read_ap.read_qrels() # load model lda_bow = LdaModel.load(os.path.join(models_path, 'lda_bow_multi')) # load corpus of full vectors with open('./objects/lda_bow_full', 'rb') as f: corpus_full = pkl.load(f) # load dictionary with open('./objects/dictionary_lda', 'rb') as f: dictionary = pkl.load(f) # process docs processed_docs = read_ap.get_processed_docs() doc_keys = processed_docs.keys() idx2key = {i: key for i, key in enumerate(doc_keys)} overall_ser = {} # loop over queries for qid in tqdm(qrels): query_text = queries[qid] sims = get_sims(lda_bow, query_text, corpus_full, dictionary, n_topics) overall_ser[qid] = dict([(idx2key[idx], np.float64(score)) for idx, score in sims]) with open('./objects/overal_ser_lda', 'wb') as f: pkl.dump(overall_ser, f)
def compute_metrics(docs, vocab_embs, word2id, id2word): """ For a trained model, compute the MAP and NDCG based on a set of queries and all documents in the corpus. Returns: metrics: a nested dict of queries and their MAP and NDCG scores. """ # Create document embeddings if not os.path.exists("./pickles/word2vec_doc_embs.pkl"): print("constructing document embeddings") doc_embs = {} keys = list(docs.keys()) for d in tqdm(keys): doc = docs[d] doc_emb = create_doc_emb(vocab_embs, doc, word2id, id2word) doc_embs[d] = doc_emb with open("./pickles/word2vec_doc_embs.pkl", "wb") as writer: pkl.dump(doc_embs, writer) else: with open("./pickles/word2vec_doc_embs.pkl", "rb") as reader: doc_embs = pkl.load(reader) # Create query embedding and compare to every docuemnt embedding qrels, queries = ra.read_qrels() overall_ser = {} #ranking per query for qid in tqdm(qrels): query = queries[qid] query = ra.process_text(query) query_emb = create_doc_emb(vocab_embs, query, word2id, id2word) ranking, trec_results = get_ranking(qid, query_emb, doc_embs, vocab_embs) overall_ser[qid] = ranking if not int(qid) in range(76, 100): with open("./results/word2vec_trec.csv", "a+") as f: f.write("\n".join("{},{},{},{},{},{}".format( x[0], x[1], x[2], x[3], x[4], x[5]) for x in trec_results)) f.write("\n") # Compute the MAP and NDCG per query evaluator = pytrec_eval.RelevanceEvaluator(qrels, {'map', 'ndcg'}) metrics = evaluator.evaluate(overall_ser) # Get the average model evaluation scores over all queries average = {'map': 0, 'ndcg': 0} for q in list(metrics.values()): average['map'] += q['map'] average['ndcg'] += q['ndcg'] average['map'] = average['map'] / len(queries) average['ndcg'] = average['ndcg'] / len(queries) print( 'average model evaluation scores over all queries {}'.format(average)) return (metrics)
def main(): parser = argparse.ArgumentParser() qrels, queries = read_ap.read_qrels() parser.add_argument('qrel') parser.add_argument('run', nargs=2) # A bit too strict, as it does not allow for parametrized measures, # but sufficient for the example. parser.add_argument('--measure', choices=pytrec_eval.supported_measures, required=True)
def evaluate_queries(model, doc_ids, dictionary, corpus_modelspace, tfidf, index, save_path='LSI'): qrels, queries = read_ap.read_qrels() overall_result = {} for query_id, query in tqdm(queries.items()): results = rank_docs(query, model, doc_ids, dictionary, corpus_modelspace, tfidf_model=tfidf, index=index) overall_result[query_id] = dict(results) if int(query_id) not in np.arange(76, 101): evaluate.write_trec_results(query_id, results, save_path) evaluator = pytrec_eval.RelevanceEvaluator(qrels, {'map', 'ndcg'}) metrics = evaluator.evaluate(overall_result) return metrics
def get_json(): '''load overal_serr from pickle and create json''' with open('./objects/overal_ser_lda', 'rb') as f: overal_serr = pkl.load(f) qrels, queries = read_ap.read_qrels() print('pytreccing') evaluator = pytrec_eval.RelevanceEvaluator(qrels, {'map', 'ndcg'}) metrics = evaluator.evaluate(overal_serr) print('dumping json') with open(f'./json_files/lda_bow_kl.json', 'w') as f: json.dump(metrics, f, indent=1)
def main(): docs_by_id = ra.get_processed_docs() path = "./doc2vec_models/{}".format(config.model_name) # print(path) if not os.path.exists(path): print("Model not yet trained, starting training now.") train_corpus = create_corpus(docs_by_id) model = train_doc2vec(train_corpus) else: print("Model already trained, loading the file.") model = gensim.models.doc2vec.Doc2Vec.load(path) qrels, queries = ra.read_qrels() print(queries) overall_ser = {} trec_path = "./results/trec_doc2vec.csv" # Write TREC results column headers to file with open(trec_path, "w") as f: f.write("query-id, Q0, document-id, rank, score, STANDARD\n") print("Evaluating doc2vec model:", config.model_name) # Loop over all queries and predict most relevant docs for qid in tqdm(qrels): query_text = queries[qid] results, trec_results = rank_docs(model, query_text, qid, config.model_name) results = dict(results) overall_ser[qid] = results # Write all test queries to TREC format file if not int(qid) in range(76,100): with open(trec_path, "a+") as f: f.write("\n".join("{},{},{},{},{},{}".format(x[0], x[1],x[2],x[3],x[4],x[5]) for x in trec_results)) f.write("\n") # run evaluation with `qrels` as the ground truth relevance judgements # here, we are measuring MAP and NDCG evaluator = pytrec_eval.RelevanceEvaluator(qrels, {'map', 'ndcg'}) metrics = evaluator.evaluate(overall_ser) # dump this to JSON # *Not* Optional - This is submitted in the assignment! json_path = "./results/{}.json".format(config.model_name) with open(json_path, "w") as writer: json.dump(metrics, writer, indent=1)
results = list(results.items()) results.sort(key=lambda _: -_[1]) return results if __name__ == "__main__": # ensure dataset is downloaded download_ap.download_dataset() # pre-process the text docs_by_id = read_ap.get_processed_docs() # Create instance for retrieval tfidf_search = TfIdfRetrieval(docs_by_id) # read in the qrels qrels, queries = read_ap.read_qrels() overall_ser = {} print("Running TFIDF Benchmark") # collect results for qid in tqdm(qrels): query_text = queries[qid] results = tfidf_search.search(query_text) overall_ser[qid] = dict(results) results_lines = [] for qid in overall_ser: for doc_id in overall_ser[qid]: results_lines.append(str(qid) + '\tQO\t' + doc_id + '\t0\t' + str(overall_ser[qid][doc_id]) + '\tSTANDARD\n')