def mult_cos(va, vb, vc, vd): """ Uses the following formula for scoring: log(cos(vb, vd)) + log(cos(vc,vd)) - log(cos(va,vd)) """ first = (1.0 + cosine(vb, vd)) / 2.0 second = (1.0 + cosine(vc, vd)) / 2.0 third = (1.0 + cosine(va, vd)) / 2.0 score = numpy.log(first) + numpy.log(second) - numpy.log(third) return score
def elementwise_multiplication(va, vb, vc, vd): """ Represent the first word-pair by the elementwise multiplication of va and vb. Do the same for vc and vd. Finally measure the cosine similarity between the two resultant vectors. """ return cosine(va * vb, vc * vd)
def add_cos(va, vb, vc, vd): """ Uses the following formula for scoring: cos(vb - va + vc, vd) """ x = normalize(vb - va + vc) return cosine(x, vd)
def get_correlation(dataset_fname, vects, corr_measure): """ Measure the cosine similarities for words in the dataset using their representations given in vects. Next, compute the correlation coefficient. Specify method form spearman and pearson. """ ignore_missing = False global VERBOSE if VERBOSE: if ignore_missing: sys.stderr.write("Ignoring missing pairs\n") else: sys.stderr.write("Not ignoring missing pairs\n") mcFile = open(dataset_fname) mcPairs = {} mcWords = set() for line in mcFile: p = line.strip().split() mcPairs[(p[0], p[1])] = float(p[2]) mcWords.add(p[0]) mcWords.add(p[1]) mcFile.close() #print "Total no. of unique words in the dataset =", len(mcWords) found = mcWords.intersection(set(vects.keys())) #print "Total no. of words found =", len(found) missing = [] for x in mcWords: if x not in vects: missing.append(x) human = [] computed = [] found_pairs = False missing_count = 0 for wp in mcPairs: (x, y) = wp if (x in missing or y in missing): missing_count += 1 if ignore_missing: continue else: comp = 0 else: found_pairs = True comp = cosine(vects[x], vects[y]) rating = mcPairs[wp] human.append(rating) computed.append(comp) #print "%s, %s, %f, %f" % (x, y, rating, comp) if VERBOSE: sys.stderr.write("Missing pairs = %d (out of %d)\n" % (missing_count, len(mcPairs))) if found_pairs is False: #print "No pairs were scored!" return (0, 0) if corr_measure == "pearson": return scipy.stats.pearsonr(computed, human) elif corr_measure == "spearman": return scipy.stats.spearmanr(computed, human) else: raise ValueError pass
def PairDiff(va, vb, vc, vd): """ Uses the following formula for scoring: cos(vd - vc, vb - va) """ return cosine(normalize(vd - vc), normalize(vb - va))
def subt_cos(va, vb, vc, vd): """ Uses the following formula for scoring: cos(va - vc, vb - vd) """ return cosine(normalize(va - vc), normalize(vb - vd))
def domain_funct(va, vb, vc, vd): """ Uses the Formula proposed by Turney in Domain and Function paper. """ return numpy.sqrt( (1.0 + cosine(va, vc)) / 2.0 * (1.0 + cosine(vb, vd)) / 2.0)