def main(): """ Do not modify. Run and evaluate all methods. """ queries, relevances, docs = read_data() NHITS = 10 indexer = index.Index(docs) scorers = [ score.Cosine(), score.RSV(), score.BM25(k=1, b=.5), score.BM25(k=1, b=1), score.BM25(k=2, b=.5), score.BM25(k=2, b=1) ] evaluators = [ evaluate.Precision(), evaluate.Recall(), evaluate.F1(), evaluate.MAP() ] all_results = run_all(queries, relevances, docs, indexer, scorers, evaluators, NHITS) write_results(all_results, 'Results.md')
def get_scores(data): # load samples # create tfidf representation # get cosine similarity MAPs, MRRs, P1s, P5s = [], [], [], [] auc = meter.AUCMeter() for id, (pos, neg) in data.id2posneg.iteritems(): # total number of positive examples pn = len(pos) if pn == 0: continue # get query index and vector q = data.vectorizer.transform([data.id2str(id)])[0] # get positive and negative vectors pos = [data.id2str(i) for i in pos] neg = [data.id2str(i) for i in neg] posneg = data.vectorizer.transform(pos + neg) # compute cosine similarity cos_sim = cosine_similarity(q, posneg).tolist()[0] # get scores cos_sim = zip(cos_sim, [int(i < pn) for i in range(len(cos_sim))], [None for i in range(len(cos_sim))]) cos_sim = (sorted(cos_sim, key=lambda x: x[0], reverse=True), pn) MAPs.append(evaluate.MAP(cos_sim)) MRRs.append(evaluate.MRR(cos_sim)) P1s.append(evaluate.P(cos_sim, 1)) P5s.append(evaluate.P(cos_sim, 5)) auc.add(np.array([s[0] for s in cos_sim[0]]), np.array([s[1] for s in cos_sim[0]])) avg = lambda x: sum(x) / float(len(x)) return avg(MAPs), avg(MRRs), avg(P1s), avg(P5s), auc.value(0.05)