예제 #1
0
파일: main.py 프로젝트: setr/cs429
def main():
    """ Do not modify.
    Run and evaluate all methods.
    """
    queries, relevances, docs = read_data()
    NHITS = 10
    indexer = index.Index(docs)

    scorers = [
        score.Cosine(),
        score.RSV(),
        score.BM25(k=1, b=.5),
        score.BM25(k=1, b=1),
        score.BM25(k=2, b=.5),
        score.BM25(k=2, b=1)
    ]

    evaluators = [
        evaluate.Precision(),
        evaluate.Recall(),
        evaluate.F1(),
        evaluate.MAP()
    ]

    all_results = run_all(queries, relevances, docs, indexer, scorers,
                          evaluators, NHITS)
    write_results(all_results, 'Results.md')
예제 #2
0
def get_scores(data):
    # load samples
    # create tfidf representation

    # get cosine similarity
    MAPs, MRRs, P1s, P5s = [], [], [], []
    auc = meter.AUCMeter()
    for id, (pos, neg) in data.id2posneg.iteritems():
        # total number of positive examples
        pn = len(pos)
        if pn == 0: continue

        # get query index and vector
        q = data.vectorizer.transform([data.id2str(id)])[0]

        # get positive and negative vectors
        pos = [data.id2str(i) for i in pos]
        neg = [data.id2str(i) for i in neg]
        posneg = data.vectorizer.transform(pos + neg)

        # compute cosine similarity
        cos_sim = cosine_similarity(q, posneg).tolist()[0]

        # get scores
        cos_sim = zip(cos_sim, [int(i < pn) for i in range(len(cos_sim))],
                      [None for i in range(len(cos_sim))])
        cos_sim = (sorted(cos_sim, key=lambda x: x[0], reverse=True), pn)
        MAPs.append(evaluate.MAP(cos_sim))
        MRRs.append(evaluate.MRR(cos_sim))
        P1s.append(evaluate.P(cos_sim, 1))
        P5s.append(evaluate.P(cos_sim, 5))
        auc.add(np.array([s[0] for s in cos_sim[0]]),
                np.array([s[1] for s in cos_sim[0]]))

    avg = lambda x: sum(x) / float(len(x))
    return avg(MAPs), avg(MRRs), avg(P1s), avg(P5s), auc.value(0.05)