Exemple #1
0
import time

google = NGD()
msn = NMD()
yahoo = NYD()

def compare( a, b , google, msn, yahoo):

    g = google.distance(a,b)
    m = msn.distance(a,b)
    y = yahoo.distance(a,b)

    print 'for "%s" "%s"' % (a,b)
    print 'google: %f msn: %f yahoo: %f' % (g,m,y)
    print ''

pairs = [('by','with'), ('quantum','physics'), ('quantum', 'football')]

print time.ctime()
b = time.time()
#print google.distances((pairs*30)[:21])
a = time.time()
print time.ctime()
print 'took %d seconds' % (a-b)
b = time.time()
print google.distances((pairs*30)[:65], True)
a = time.time()
print time.ctime()
print 'took %d seconds' % (a-b)
Exemple #2
0
    "article",
    "paper",
]
# now we calculate the distance
name_best_match = {}
for n1 in mails_per_name.keys():
    best_match_dist = 3.0
    best_match = ("", "")
    for context in contexts:
        tuples = []
        n2s = []
        for n2 in mails_per_name.keys():
            if n1 != n2:
                tuples.append((n1, n2))
                n2s.append(n2)
        dists = ngd.distances(tuples, context)
        for (n1, n2), dist in dists.iteritems():
            if dist < best_match_dist:
                best_match_dist = dist
                best_match = (n2, context)

    name_best_match[n1] = best_match

print "------------------------"
print "best_matches"
for n, (n2, context) in name_best_match.iteritems():
    if n2 != "":
        # use the non-domain email with the best hits
        best = ""
        best_hits = -1
        for m in mails_per_name[n2]: