Ejemplo n.º 1
0
def eval_diff_vect(WR):
    """
    Uses the DiffVect dataset for performing 1-NN relation classification.
    We will use PairDiff to create a vector for a word-pair and then measure the similarity
    between the target pair and the reamining word-pairs in the dataset.
    If the 1-NN has the same relation label as the target pair, then we consider it to
    be a correct match. We compute accuracy = correct_matches / total_instances.
    """
    analogy_file = open(os.path.join(pkg_dir, "../benchmarks/diff-vec"))
    relation = {}
    pairs = []
    label = ""
    while 1:
        line = analogy_file.readline()
        if len(line) == 0:
            break
        if line.startswith(':'):  # This is a label
            label = line.split(':')[1].strip()
        else:
            p = line.strip().split()
            (a, b) = p
            pairs.append((a, b))
            relation[(a, b)] = label
    analogy_file.close()
    n = len(pairs)
    M = numpy.zeros((n, WR.dim), dtype=numpy.float64)
    for (i, (a, b)) in enumerate(pairs):
        M[i, :] = normalize(get_embedding(a, WR) - get_embedding(b, WR))
    S = numpy.dot(M, M.T)
    preds = (-S).argsort()[:, 1]
    corrects = sum(
        [relation[pairs[i]] == relation[pairs[preds[i]]] for i in range(n)])
    accuracy = float(100 * corrects) / float(n)
    print "DiffVec Accuracy =", accuracy
    return accuracy
Ejemplo n.º 2
0
def eval_SAT_Analogies(WR, method):
    """
    Solve SAT word analogy questions using the vectors. 
    """
    from sat import SAT
    S = SAT()
    questions = S.getQuestions()
    corrects = total = skipped = 0
    for Q in questions:
        total += 1
        (q_first, q_second) = Q['QUESTION']
        if q_first['word'] in WR.vects and q_second['word'] in WR.vects:
            va = get_embedding(q_first['word'], WR)
            vb = get_embedding(q_second['word'], WR)
            max_sim = -100
            max_cand = -100
            for (i, (c_first, c_second)) in enumerate(Q["CHOICES"]):
                sim = 0
                if c_first['word'] in WR.vects and c_second['word'] in WR.vects:
                    vc = get_embedding(c_first['word'], WR)
                    vd = get_embedding(c_second['word'], WR)
                    sim = scoring_formula(va, vb, vc, vd, method)
                    if max_sim < sim:
                        max_sim = sim
                        max_cand = i
            if max_cand == Q['ANS']:
                corrects += 1
        else:
            skipped += 1
    acc = float(100 * corrects) / float(total)
    coverage = 100.0 - (float(100 * skipped) / float(total))
    print "SAT Accuracy = %f (%d / %d)" % (acc, corrects, total)
    print "Qustion coverage = %f (skipped = %d)" % (coverage, skipped)
    return {"acc": acc, "coverage": coverage}
Ejemplo n.º 3
0
def eval_SemEval(WR, method):
    """
    Answer SemEval questions. 
    """
    from semeval import SemEval
    S = SemEval(os.path.join(pkg_dir, "../benchmarks/semeval"))
    total_accuracy = 0
    print "Total no. of instances in SemEval =", len(S.data)
    for Q in S.data:
        scores = []
        for (first, second) in Q["wpairs"]:
            val = 0
            for (p_first, p_second) in Q["paradigms"]:
                va = get_embedding(first, WR)
                vb = get_embedding(second, WR)
                vc = get_embedding(p_first, WR)
                vd = get_embedding(p_second, WR)
                val += scoring_formula(va, vb, vc, vd, method)
            val /= float(len(Q["paradigms"]))
            scores.append(((first, second), val))

        # sort the scores and write to a file.
        scores.sort(lambda x, y: -1 if x[1] > y[1] else 1)
        score_fname = os.path.join(pkg_dir,
                                   "../work/semeval/%s.txt" % Q["filename"])
        score_file = open(score_fname, 'w')
        for ((first, second), score) in scores:
            score_file.write('%f "%s:%s"\n' % (score, first, second))
        score_file.close()
        total_accuracy += S.get_accuracy(score_fname, Q["filename"])
    acc = total_accuracy / float(len(S.data))
    print "SemEval Accuracy =", acc
    return {"acc": acc}
Ejemplo n.º 4
0
def eval_MSR_Analogies(WR, M, cands):
    """
    Evaluate the accuracy of the learnt vectors on the analogy task using MSR dataset. 
    We consider the set of fourth words in the test dataset as the
    candidate space for the correct answer.
    """
    analogy_file = open(
        os.path.join(pkg_dir, "../benchmarks/msr-analogies.txt"))
    questions = []
    total_questions = 0
    corrects = 0
    while 1:
        line = analogy_file.readline()
        if len(line) == 0:
            break
        p = line.strip().split()
        total_questions += 1
        questions.append((p[0], p[1], p[2], p[3]))
    analogy_file.close()

    print "== MSR Analogy Dataset =="
    print "Total no. of questions =", len(questions)
    print "Total no. of candidates =", len(cands)

    # predict the fourth word for each question.
    count = 1
    for (a, b, c, d) in questions:
        print "%d / %d" % (count, len(questions)), "\r",
        count += 1
        # set of candidates for the current question are the fourth
        # words in all questions, except the three words for the current question.
        scores = []
        va = get_embedding(a, WR)
        vb = get_embedding(b, WR)
        vc = get_embedding(c, WR)
        x = normalize(vb - va + vc)
        s = numpy.dot(M, x)
        nns = [cands[i] for i in (-s).argsort()[:4]]
        nns = filter(lambda y: y not in [a, b, c], nns)
        if nns[0] == d:
            corrects += 1
    accuracy = float(corrects) / float(len(questions))
    print "MSR accuracy =", accuracy
    return {"accuracy": accuracy}
Ejemplo n.º 5
0
def evaluate_embeddings(embed_fname, dim, res_fname):
    """
    This function can be used to evaluate an embedding.
    """
    res = {}
    WR = WordReps()
    # We will load vectors only for the words in the benchmarks.
    words = set()
    with open(os.path.join(pkg_dir, "../benchmarks/all_words.txt")) as F:
        for line in F:
            words.add(line.strip())
    WR.read_model(embed_fname, dim, words)

    # semantic similarity benchmarks.
    benchmarks = ["ws", "rg", "mc", "rw", "scws", "men", "simlex"]
    for bench in benchmarks:
        (corr, sig) = get_correlation(
            os.path.join(pkg_dir, "../benchmarks/%s_pairs.txt" % bench),
            WR.vects, "spearman")
        print "%s = %f" % (bench, corr)
        res[bench] = corr

    cands = list(words)
    M = numpy.zeros((len(cands), WR.dim), dtype=numpy.float64)
    for (i, w) in enumerate(cands):
        M[i, :] = normalize(get_embedding(w, WR))

    # word analogy benchmarks.
    res["Google_res"] = eval_Google_Analogies(WR, M, cands)
    res["MSR_res"] = eval_MSR_Analogies(WR, M, cands)
    res["SemEval_res"] = eval_SemEval(WR, "CosAdd")
    res["DiffVec_acc"] = eval_diff_vect(WR)
    #res["SAT_res"] = eval_SAT_Analogies(WR, scoring_method)

    res_file = open(res_fname, 'w')
    res_file.write(
        "#RG, MC, WS, RW, SCWS, MEN, SimLex, sem, syn, total, SemEval, MSR, DiffVec\n"
    )
    res_file.write("%f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f\n" %
                   (res["rg"], res["mc"], res["ws"], res["rw"], res["scws"],
                    res["men"], res["simlex"], res["Google_res"]["semantic"],
                    res["Google_res"]["syntactic"], res["Google_res"]["total"],
                    res["SemEval_res"]["acc"], res["MSR_res"]["accuracy"],
                    res["DiffVec_acc"]))
    res_file.close()
    return res
Ejemplo n.º 6
0
def eval_Google_Analogies(WR, M, cands):
    """
    Evaluate the accuracy of the learnt vectors on the analogy task. 
    We consider the set of fourth words in the test dataset as the
    candidate space for the correct answer.
    """
    analogy_file = open(
        os.path.join(pkg_dir, "../benchmarks/google-analogies.txt"))
    questions = collections.OrderedDict()
    total_questions = {}
    corrects = {}
    while 1:
        line = analogy_file.readline()
        if len(line) == 0:
            break
        if line.startswith(':'):  # This is a label
            label = line.split(':')[1].strip()
            questions[label] = []
            total_questions[label] = 0
            corrects[label] = 0
        else:
            p = line.strip().split()
            total_questions[label] += 1
            questions[label].append((p[0], p[1], p[2], p[3]))
    analogy_file.close()

    print "== Google Analogy Dataset =="
    print "Total no. of question types =", len(questions)
    print "Total no. of candidates =", len(cands)

    # predict the fourth word for each question.
    count = 1
    for label in questions:
        for (a, b, c, d) in questions[label]:
            print "%d%% (%d / %d)" % ((100 * count) / float(valid_questions),
                                      count, valid_questions), "\r",
            count += 1
            va = get_embedding(a, WR)
            vb = get_embedding(b, WR)
            vc = get_embedding(c, WR)
            x = normalize(vb - va + vc)
            s = numpy.dot(M, x)
            nns = [cands[i] for i in (-s).argsort()[:4]]
            nns = filter(lambda y: y not in [a, b, c], nns)
            if nns[0] == d:
                corrects[label] += 1

    # Compute accuracy
    n = semantic_total = syntactic_total = semantic_corrects = syntactic_corrects = 0
    for label in total_questions:
        n += total_questions[label]
        if label.startswith("gram"):
            syntactic_total += total_questions[label]
            syntactic_corrects += corrects[label]
        else:
            semantic_total += total_questions[label]
            semantic_corrects += corrects[label]
    print "Percentage of questions attempted = %f (%d / %d)" % (
        (100 * valid_questions) / float(n), valid_questions, n)
    for label in questions:
        acc = float(100 * corrects[label]) / float(total_questions[label])
        print "%s = %f (correct = %d, attempted = %d, total = %d)" % (
            label, acc, corrects[label], len(
                questions[label]), total_questions[label])
    semantic_accuracy = float(100 * semantic_corrects) / float(semantic_total)
    syntactic_accuracy = float(
        100 * syntactic_corrects) / float(syntactic_total)
    total_corrects = semantic_corrects + syntactic_corrects
    accuracy = float(100 * total_corrects) / float(n)
    print "Semantic Accuracy =", semantic_accuracy
    print "Syntactic Accuracy =", syntactic_accuracy
    print "Total accuracy =", accuracy
    return {
        "semantic": semantic_accuracy,
        "syntactic": syntactic_accuracy,
        "total": accuracy
    }