Ejemplo n.º 1
0
def mult_cos(va, vb, vc, vd):
    """
    Uses the following formula for scoring:
    log(cos(vb, vd)) + log(cos(vc,vd)) - log(cos(va,vd))
    """
    first = (1.0 + cosine(vb, vd)) / 2.0
    second = (1.0 + cosine(vc, vd)) / 2.0
    third = (1.0 + cosine(va, vd)) / 2.0
    score = numpy.log(first) + numpy.log(second) - numpy.log(third)
    return score
Ejemplo n.º 2
0
def elementwise_multiplication(va, vb, vc, vd):
    """
    Represent the first word-pair by the elementwise multiplication of va and vb.
    Do the same for vc and vd. Finally measure the cosine similarity between the
    two resultant vectors.
    """
    return cosine(va * vb, vc * vd)
Ejemplo n.º 3
0
def add_cos(va, vb, vc, vd):
    """
    Uses the following formula for scoring:
    cos(vb - va + vc, vd)
    """
    x = normalize(vb - va + vc)
    return cosine(x, vd)
Ejemplo n.º 4
0
def get_correlation(dataset_fname, vects, corr_measure):
    """
    Measure the cosine similarities for words in the dataset using their representations 
    given in vects. Next, compute the correlation coefficient. Specify method form
    spearman and pearson.
    """
    ignore_missing = False
    global VERBOSE
    if VERBOSE:
        if ignore_missing:
            sys.stderr.write("Ignoring missing pairs\n")
        else:
            sys.stderr.write("Not ignoring missing pairs\n")
    mcFile = open(dataset_fname)
    mcPairs = {}
    mcWords = set()
    for line in mcFile:
        p = line.strip().split()
        mcPairs[(p[0], p[1])] = float(p[2])
        mcWords.add(p[0])
        mcWords.add(p[1])
    mcFile.close()
    #print "Total no. of unique words in the dataset =", len(mcWords)
    found = mcWords.intersection(set(vects.keys()))
    #print "Total no. of words found =", len(found)
    missing = []
    for x in mcWords:
        if x not in vects:
            missing.append(x)
    human = []
    computed = []
    found_pairs = False
    missing_count = 0
    for wp in mcPairs:
        (x, y) = wp
        if (x in missing or y in missing):
            missing_count += 1
            if ignore_missing:
                continue
            else:
                comp = 0
        else:
            found_pairs = True
            comp = cosine(vects[x], vects[y])
        rating = mcPairs[wp]
        human.append(rating)
        computed.append(comp)
        #print "%s, %s, %f, %f" % (x, y, rating, comp)
    if VERBOSE:
        sys.stderr.write("Missing pairs = %d (out of %d)\n" %
                         (missing_count, len(mcPairs)))

    if found_pairs is False:
        #print "No pairs were scored!"
        return (0, 0)
    if corr_measure == "pearson":
        return scipy.stats.pearsonr(computed, human)
    elif corr_measure == "spearman":
        return scipy.stats.spearmanr(computed, human)
    else:
        raise ValueError
    pass
Ejemplo n.º 5
0
def PairDiff(va, vb, vc, vd):
    """
    Uses the following formula for scoring:
    cos(vd - vc, vb - va)
    """
    return cosine(normalize(vd - vc), normalize(vb - va))
Ejemplo n.º 6
0
def subt_cos(va, vb, vc, vd):
    """
    Uses the following formula for scoring:
    cos(va - vc, vb - vd)
    """
    return cosine(normalize(va - vc), normalize(vb - vd))
Ejemplo n.º 7
0
def domain_funct(va, vb, vc, vd):
    """
    Uses the Formula proposed by Turney in Domain and Function paper.
    """
    return numpy.sqrt(
        (1.0 + cosine(va, vc)) / 2.0 * (1.0 + cosine(vb, vd)) / 2.0)