import time google = NGD() msn = NMD() yahoo = NYD() def compare( a, b , google, msn, yahoo): g = google.distance(a,b) m = msn.distance(a,b) y = yahoo.distance(a,b) print 'for "%s" "%s"' % (a,b) print 'google: %f msn: %f yahoo: %f' % (g,m,y) print '' pairs = [('by','with'), ('quantum','physics'), ('quantum', 'football')] print time.ctime() b = time.time() #print google.distances((pairs*30)[:21]) a = time.time() print time.ctime() print 'took %d seconds' % (a-b) b = time.time() print google.distances((pairs*30)[:65], True) a = time.time() print time.ctime() print 'took %d seconds' % (a-b)
"article", "paper", ] # now we calculate the distance name_best_match = {} for n1 in mails_per_name.keys(): best_match_dist = 3.0 best_match = ("", "") for context in contexts: tuples = [] n2s = [] for n2 in mails_per_name.keys(): if n1 != n2: tuples.append((n1, n2)) n2s.append(n2) dists = ngd.distances(tuples, context) for (n1, n2), dist in dists.iteritems(): if dist < best_match_dist: best_match_dist = dist best_match = (n2, context) name_best_match[n1] = best_match print "------------------------" print "best_matches" for n, (n2, context) in name_best_match.iteritems(): if n2 != "": # use the non-domain email with the best hits best = "" best_hits = -1 for m in mails_per_name[n2]: