Ejemplo n.º 1
0
def nbest(relation):
    best1 = 0
    best5 = 0
    best10 = 0
    for r in relation:
        wordlist = r.split(' ')

        w1 = word_to_vec_dict[wordlist[0]]
        w2 = word_to_vec_dict[wordlist[1]]
        w4 = word_to_vec_dict[wordlist[3]]
        ret = distsim.show_nearest(
            word_to_vec_dict, w1 - w2 + w4,
            set([wordlist[0], wordlist[1], wordlist[3]]), distsim.cossim_dense)
        print ret
        if wordlist[2] == ret[0][0]:
            best1 += 1
        for r in range(5):
            if wordlist[2] == ret[r][0]:
                best5 += 1
        for r in range(10):
            if wordlist[2] == ret[r][0]:
                best10 += 1
    totalword = len(relation)
    accbest1 = round(float(best1) / totalword, 2)
    accbest5 = round(float(best5) / totalword, 2)
    accbest10 = round(float(best10) / totalword, 2)
    return accbest1, accbest5, accbest10
Ejemplo n.º 2
0
def table(lines):
    data = []
    for line in lines:
        data.append(line.split(' '))
    data = [data]
    t = PrettyTable(['Class', '1-best', '5-best', '10-best'])
    title = ['adversarial1', 'adversarial2']
    p = [1, 5, 10]
    for num in range(len(data)):
        total = len(data[num])
        count = [0, 0, 0]
        for row in data[num]:
            word_to_vec_dict = distsim.load_word2vec("nyt_word2vec.4k")
            w1 = word_to_vec_dict[row[0]]
            w2 = word_to_vec_dict[row[1]]
            w4 = word_to_vec_dict[row[3]]
            ret = distsim.show_nearest(word_to_vec_dict, w1 - w2 + w4,
                                       set([row[0], row[1], row[3]]),
                                       distsim.cossim_dense)
            true = row[2]
            for i in range(len(p)):
                l = [j[0] for j in ret[:p[i]]]
                if true in l:
                    count[i] += 1
        t.add_row([
            title[num], count[0] / float(total), count[1] / float(total),
            count[2] / float(total)
        ])
    print t
def calculateAccuracies(solutions):
    word_to_vec_dict = distsim.load_word2vec("nyt_word2vec.4k")
    accuracies = {}
    errors = {}

    for k, v in solutions.iteritems():
        #print 'group: ' + str(k)
        accuracies[k] = (float(0.0), float(0.0), float(0.0))
        errors[k] = ''
        size = len(v)
        hasShownError = False
        for i in v:
            w1 = word_to_vec_dict[i[0]]
            w2 = word_to_vec_dict[i[1]]
            w4 = word_to_vec_dict[i[3]]
            ret = distsim.show_nearest(word_to_vec_dict, w1 - w2 + w4,
                                       set([str(i[0]),
                                            str(i[1]),
                                            str(i[3])]), distsim.cossim_dense)
            isInTenBest = False
            for n in range(len(ret)):
                (p, q) = ret[n]
                if p == i[2]:
                    isInTenBest = True
                    if n == 0:
                        (x, y, z) = accuracies[k]
                        x += 1.0
                        y += 1.0
                        z += 1.0
                        accuracies[k] = (x, y, z)
                    elif n < 5:
                        (x, y, z) = accuracies[k]
                        y += 1.0
                        z += 1.0
                        accuracies[k] = (x, y, z)
                    else:
                        (x, y, z) = accuracies[k]
                        z += 1.0
                        accuracies[k] = (x, y, z)

            if not isInTenBest and not hasShownError:
                errors[k] = (i[0], i[1], i[2], i[3], ret[0][0])
                hasShownError = True

        (a, b, c) = accuracies[k]
        a /= size
        b /= size
        c /= size
        accuracies[k] = (a, b, c)
        #print accuracies[k]

    #print errors
    return (accuracies, errors)
Ejemplo n.º 4
0
def n_best_accuracy(n, strings):
    total = len(strings)
    count = 0
    for line in strings:
        items = line.strip().split(" ")
        w1 = word_to_vec_dict[items[0].strip()]
        w2 = word_to_vec_dict[items[1].strip()]
        w3 = items[2].strip()
        w4 = word_to_vec_dict[items[3].strip()]
        ret = distsim.show_nearest(
            word_to_vec_dict, w1 - w2 + w4,
            set([items[0].strip(), items[1].strip(), items[3].strip()]),
            distsim.cossim_dense)
        for i in range(n):
            if w3 == ret[i][0]:
                count += 1
                break
    return count * 100.0 / total
Ejemplo n.º 5
0
def sim_compare(word_to_vec_dict,w1,w2,w3,w4):
    ans = [0,0,0]
    rank = {}
    ww1 = word_to_vec_dict[w1]
    ww2 = word_to_vec_dict[w2]
    ww4 = word_to_vec_dict[w4]
    ret = distsim.show_nearest(word_to_vec_dict,
                           ww1-w2+ww4,
                           set([w1,w2,w4]),
                           distsim.cossim_dense)
    rank = print_result(ret,ret[0][0],rank)
    if w3 in rank and rank[w3] == 1:
        ans[0] += 1
        ans[1] += 1
        ans[2] += 1
    elif w3 in rank and rank[w3] <= 5 and rank[w3] > 1:
        ans[1] += 1
        ans[2] += 1
    elif w3 in rank and rank[w3] <= 10 and rank[w3] > 5:
        ans[2] += 1
    rinfo = w1+" : "+w2+" :: {} : "+w4
    print rinfo.format(ret[0][0]),'--- Compare:',ret[0][0],w3
    
    return ans
 #print g
 #print groups
 best1 = 0
 best5 = 0
 best10 = 0
 N = len(groups[g])
 print g
 for line in groups[g]:
     words = line.split()
     #print words[0]+":"+words[1]+"::"+words[2]+":"+words[3]
     expected = words[2]
     w1 = word_to_vec_dict[words[0]]
     w2 = word_to_vec_dict[words[1]]
     w4 = word_to_vec_dict[words[3]]
     ret = distsim.show_nearest(word_to_vec_dict, w1 - w2 + w4,
                                set([words[0], words[1], words[3]]),
                                distsim.cossim_dense)
     print(" {} : {} :: {} : {}".format(words[0], words[1], ret[0][0],
                                        words[3]))
     #print "-------"
     for i in range(0, 10):
         try:
             #print ret[i][0]
             if (ret[i][0] == expected):
                 #print ret[i][0]+" @ position "+str(i)
                 if (i == 0):
                     best1 += 1
                     best5 += 1
                     best10 += 1
                 if (i > 0 and i < 5):
                     best5 += 1
Ejemplo n.º 7
0
match_position = defaultdict(list)
catorder = []

for line in file:
    line = line.strip().split()
    if line[0] == '//':
        continue
    if line[0] == ':':
        cat = line[1]
        catorder.append(cat)
    else:
        word0 = word_to_vec_dict[line[0]]
        word1 = word_to_vec_dict[line[1]]
        word3 = word_to_vec_dict[line[3]]
        ret = distsim.show_nearest(word_to_vec_dict, word0 - word1 + word3,
                                   set([line[0], line[1], line[3]]),
                                   distsim.cossim_dense)
        count = 0
        while (count < len(ret)):
            if ret[count][0] == line[2]:
                break
            else:
                count += 1
        if count != len(ret):
            match_position[cat].append([count + 1])
        else:
            match_position[cat].append([-1])
            print cat + " " + str(line) + "\n"
            print str(ret)

for key in catorder:
Ejemplo n.º 8
0
#!/usr/bin/env python
import distsim

word_to_ccdict = distsim.load_contexts("nytcounts.4k")

### provide your answer below

###Answer examples; replace with your choices

print 'jack'
for i, (word, score) in enumerate(distsim.show_nearest(word_to_ccdict,
                                                       word_to_ccdict['jack'],
                                                       set(['jack']),
                                                       distsim.cossim_sparse),
                                  start=1):
    print("{}: {} ({})".format(i, word, score))

print '\n'
print 'man'
for i, (word, score) in enumerate(distsim.show_nearest(word_to_ccdict,
                                                       word_to_ccdict['man'],
                                                       set(['man']),
                                                       distsim.cossim_sparse),
                                  start=1):
    print("{}: {} ({})".format(i, word, score))

print '\n'
print 'nice'
for i, (word, score) in enumerate(distsim.show_nearest(word_to_ccdict,
                                                       word_to_ccdict['nice'],
                                                       set(['nice']),
Ejemplo n.º 9
0
#!/usr/bin/env python
import distsim
word_to_vec_dict = distsim.load_word2vec("nyt_word2vec.4k")
###Provide your answer below

###Answer examples; replace with your choices

print("Word 1 is::::america")
for i, (word, score) in enumerate(
        distsim.show_nearest(word_to_vec_dict, word_to_vec_dict['america'],
                             set(['america']), distsim.cossim_dense)):
    print("{}: {} ({})".format(i, word, score))
print("--------------------")

print("Word 2 is::::years")
for i, (word, score) in enumerate(
        distsim.show_nearest(word_to_vec_dict, word_to_vec_dict['years'],
                             set(['years']), distsim.cossim_dense)):
    print("{}: {} ({})".format(i, word, score))
print("--------------------")

print("Word 3 is::::great")
for i, (word, score) in enumerate(
        distsim.show_nearest(word_to_vec_dict, word_to_vec_dict['great'],
                             set(['great']), distsim.cossim_dense)):
    print("{}: {} ({})".format(i, word, score))
print("--------------------")

print("Word 4 is::::run")
for i, (word, score) in enumerate(
        distsim.show_nearest(word_to_vec_dict, word_to_vec_dict['run'],
Ejemplo n.º 10
0
Archivo: q5.py Proyecto: pratcooper/NLP
#!/usr/bin/env python
import distsim
word_to_vec_dict = distsim.load_word2vec("nyt_word2vec.4k")
###Provide your answer below

print(distsim.show_nearest(word_to_vec_dict, word_to_vec_dict['california'],set(['california']),distsim.cossim_dense))

print(distsim.show_nearest(word_to_vec_dict, word_to_vec_dict['doctors'],set(['doctors']),distsim.cossim_dense))

print(distsim.show_nearest(word_to_vec_dict, word_to_vec_dict['small'],set(['small']),distsim.cossim_dense))

print(distsim.show_nearest(word_to_vec_dict, word_to_vec_dict['draw'],set(['draw']),distsim.cossim_dense))

print(distsim.show_nearest(word_to_vec_dict, word_to_vec_dict['month'],set(['month']),distsim.cossim_dense))

print(distsim.show_nearest(word_to_vec_dict, word_to_vec_dict['france'],set(['france']),distsim.cossim_dense))

###Answer examples
print(distsim.show_nearest(word_to_vec_dict, word_to_vec_dict['jack'],set(['jack']),distsim.cossim_dense))
Ejemplo n.º 11
0
#!/usr/bin/env python
import distsim

# you may have to replace this line if it is too slow
word_to_ccdict = distsim.load_contexts("nytcounts.4k")

### provide your answer below
ans = list()
###Answer examples
ans.append(
    distsim.show_nearest(word_to_ccdict, word_to_ccdict['miami'],
                         set(['miami']), distsim.cossim_sparse))
ans.append(
    distsim.show_nearest(word_to_ccdict, word_to_ccdict['doctor'],
                         set(['doctor']), distsim.cossim_sparse))
ans.append(
    distsim.show_nearest(word_to_ccdict, word_to_ccdict['giant'],
                         set(['giant']), distsim.cossim_sparse))
ans.append(
    distsim.show_nearest(word_to_ccdict, word_to_ccdict['agree'],
                         set(['agree']), distsim.cossim_sparse))
ans.append(
    distsim.show_nearest(word_to_ccdict, word_to_ccdict['terrorist'],
                         set(['terrorist']), distsim.cossim_sparse))
ans.append(
    distsim.show_nearest(word_to_ccdict, word_to_ccdict['hotel'],
                         set(['hotel']), distsim.cossim_sparse))
ans.append(
    distsim.show_nearest(word_to_ccdict, word_to_ccdict['hospital'],
                         set(['hospital']), distsim.cossim_sparse))
ans.append(
#!/usr/bin/env python
import distsim
word_to_vec_dict = distsim.load_word2vec("nyt_word2vec.4k")
###Provide your answer below

###Answer examples
distsim.show_nearest(word_to_vec_dict, word_to_vec_dict['jack'], set(['jack']),
                     distsim.cossim_dense)
#!/usr/bin/env python
import distsim

word_to_vec_dict = distsim.load_word2vec("../nyt_word2vec.4k")
for i, (word,
        score) in enumerate(distsim.show_nearest(word_to_vec_dict,
                                                 word_to_vec_dict['company'],
                                                 set(['company']),
                                                 distsim.cossim_dense),
                            start=1):
    print("{}: {} ({})".format(i, word, score))
Ejemplo n.º 14
0
#!/usr/bin/env python
import distsim

# you may have to replace this line if it is too slow
word_to_ccdict = distsim.load_contexts("nytcounts.4k")

### provide your answer below

###Answer examples
#distsim.show_nearest(word_to_ccdict, word_to_ccdict['jack'],set(['jack']),distsim.cossim_sparse)
# people
rihanna = distsim.show_nearest(word_to_ccdict, word_to_ccdict['rihanna'],
                               set(['rihanna']), distsim.cossim_sparse)
obama = distsim.show_nearest(word_to_ccdict, word_to_ccdict['obama'],
                             set(['obama']), distsim.cossim_sparse)
# companies
nba = distsim.show_nearest(word_to_ccdict, word_to_ccdict['nba'], set(['nba']),
                           distsim.cossim_sparse)
netflix = distsim.show_nearest(word_to_ccdict, word_to_ccdict['netflix'],
                               set(['netflix']), distsim.cossim_sparse)
# country
iran = distsim.show_nearest(word_to_ccdict, word_to_ccdict['iran'],
                            set(['iran']), distsim.cossim_sparse)
# common nouns
terrorism = distsim.show_nearest(word_to_ccdict, word_to_ccdict['terrorism'],
                                 set(['terrorism']), distsim.cossim_sparse)
economy = distsim.show_nearest(word_to_ccdict, word_to_ccdict['economy'],
                               set(['economy']), distsim.cossim_sparse)
data = distsim.show_nearest(word_to_ccdict, word_to_ccdict['data'],
                            set(['data']), distsim.cossim_sparse)
sex = distsim.show_nearest(word_to_ccdict, word_to_ccdict['sex'], set(['sex']),
#!/usr/bin/env python
import distsim
"""
word_to_vec_dict = distsim.load_word2vec("nyt_word2vec.4k")
king = word_to_vec_dict['king']
man = word_to_vec_dict['man']
woman = word_to_vec_dict['woman']

ret = distsim.show_nearest(word_to_vec_dict,
                           king-man+woman,
                           set(['king','man','woman']),
                           distsim.cossim_dense)
"""

word_to_ccdict = distsim.load_contexts("nytcounts.4k")
king = word_to_ccdict['king']
man = word_to_ccdict['man']
woman = word_to_ccdict['woman']

d = {}
for key1 in king:
    if key1 in man and key1 in woman:
        d[key1] = king.get(key1) - man.get(key1) + woman.get(key1)

ret = distsim.show_nearest(word_to_ccdict, d, set(['king', 'man', 'woman']),
                           distsim.cossim_sparse)
print("king : man :: {} : woman".format(ret[0][0]))
Ejemplo n.º 16
0
category_list = []
category_num_dict = defaultdict(list)
for line in f:
    line = line.strip('\n')
    if line[0] == '//':
        continue
    elif line[0] == ':':
        category = line.split(' ')[1]
        category_list.append(category)
    word = line.strip().split(' ')
    if len(word) == 4:
        word1_dict = word_to_vec_dict[word[0]]
        word2_dict = word_to_vec_dict[word[1]]
        word4_dict = word_to_vec_dict[word[3]]
        ret = distsim.show_nearest(word_to_vec_dict,
                                   word1_dict - word2_dict + word4_dict,
                                   set([word[0], word[1],
                                        word[3]]), distsim.cossim_dense)

        count = 0
        find = False
        while (count < len(ret)):
            if ret[count][0] == word[2]:
                count += 1
                find = True
                break
            else:
                count += 1
        if find == False:
            count = None
            print word
            print ret
Ejemplo n.º 17
0
for g in range(1, 9):
    analogy = []
    for i in range(0, len(L)):
        if L[i][0] == g and len(L[i][1].split()) == 4:
            analogy.append(L[i][1].split())

    best1 = best5 = best10 = 0

    for a in range(0, len(analogy)):

        first = word_to_vec_dict[analogy[a][0]]
        second = word_to_vec_dict[analogy[a][1]]
        fourth = word_to_vec_dict[analogy[a][3]]

        ret = distsim.show_nearest(
            word_to_vec_dict, first - second + fourth,
            set([analogy[a][0], analogy[a][1], analogy[a][3]]),
            distsim.cossim_dense)
        #ret = distsim.show_nearest(word_to_ccdict,z,set([ analogy[a][0], analogy[a][1], analogy[a][3] ]),distsim.cossim_sparse)

        if analogy[a][2] == ret[0][0]:
            best1 += 1
        if analogy[a][2] in [w[0] for w in ret[0:5]]:
            best5 += 1
        if analogy[a][2] in [w[0] for w in ret[0:10]]:
            best10 += 1
        """
		print("---------------------------------------------------------------------------------------")
		for i in range(0,len(ret)):
			print(analogy[a][0]+" : "+analogy[a][1]+" :: "+ret[i][0]+" : "+analogy[a][3])
		print("---------------------------------------------------------------------------------------")
		"""
Ejemplo n.º 18
0
     correct1 = 0
     correct5 = 0
     correct10 = 0
     total = 0
 if length - 4 == 0:
     total += 1
     flag = True
     word1 = w[0].strip('\t')
     word2 = w[1].strip('\t')
     word3 = w[2].strip('\t')
     word4 = w[3].strip('\t')
     words1 = word_to_vec_dict[word1]
     words2 = word_to_vec_dict[word2]
     words4 = word_to_vec_dict[word4]
     ret = distsim.show_nearest(word_to_vec_dict,
                                words1 - words2 + words4,
                                set([word1, word2,
                                     word4]), distsim.cossim_dense)
     sim = []
     for i in ret:
         sim.append(i[0])
     pos = -1
     if word3 in sim:
         pos = sim.index(word3)
     if pos != -1 and pos < 10:
         correct10 += 1
     if pos != -1 and pos < 5:
         correct5 += 1
     if pos != -1 and pos < 1:
         correct1 += 1
 elif line[0] == ':':
     s = line.split(' ')
Ejemplo n.º 19
0
#!/usr/bin/env python
import distsim
word_to_vec_dict = distsim.load_word2vec("nyt_word2vec.4k")
###Provide your answer below

###Answer examples; replace with your choices
print 'china'
for i, (word,
        score) in enumerate(distsim.show_nearest(word_to_vec_dict,
                                                 word_to_vec_dict['china'],
                                                 set(['china']),
                                                 distsim.cossim_dense),
                            start=1):
    print("{}: {} ({})".format(i, word, score))
print 'human'
for i, (word,
        score) in enumerate(distsim.show_nearest(word_to_vec_dict,
                                                 word_to_vec_dict['human'],
                                                 set(['human']),
                                                 distsim.cossim_dense),
                            start=1):
    print("{}: {} ({})".format(i, word, score))
print 'handsome'
for i, (word,
        score) in enumerate(distsim.show_nearest(word_to_vec_dict,
                                                 word_to_vec_dict['handsome'],
                                                 set(['handsome']),
                                                 distsim.cossim_dense),
                            start=1):
    print("{}: {} ({})".format(i, word, score))
print 'fight'
Ejemplo n.º 20
0
 num_snt = 0  #counting number of sentences
 for i in temp:
     num_snt += 1
     #print "i",i
     words = i.split(" ")
     word1 = word_to_vec_dict[words[0].strip()]
     word2 = word_to_vec_dict[words[1].strip()]
     word3 = words[2].strip()
     word4 = word_to_vec_dict[words[3].strip()]
     new_word = word1 - word2 + word4
     # print "words[0]",words[0]
     # print "words[0]",words[1]
     # print "words[0]",words[3]
     #for i, (word, score) in enumerate(distsim.show_nearest(word_to_vec_dict,new_word,set([words[0].strip(),words[1].strip(),words[3].strip()]),distsim.cossim_dense), start=1):
     ret = distsim.show_nearest(
         word_to_vec_dict, new_word,
         set([words[0].strip(), words[1].strip(), words[3].strip()]),
         distsim.cossim_dense)
     counter = 0
     #print ret[counter][0]
     while counter <= 9:
         if counter == 0 and ret[counter][0] == word3:
             #print "ji",ret[counter][0]
             best_1 += 1
             best_5 += 1
             best_10 += 1
             break
         elif counter > 0 and counter <= 4 and ret[counter][0] == word3:
             best_5 += 1
             best_10 += 1
             break
         elif counter > 4 and counter <= 9 and ret[counter][0] == word3:
Ejemplo n.º 21
0
#!/usr/bin/env python
import distsim

word_to_vec_dict = distsim.load_word2vec("nyt_word2vec.4k")
#===============================================================================
# king = word_to_vec_dict['king']
# man = word_to_vec_dict['man']
# woman = word_to_vec_dict['woman']
# ret = distsim.show_nearest(word_to_vec_dict,
#                            king-man+woman,                      # <-------------------------------  THE CORE OF RESASONING
#                            set(['king','man','woman']),
#                            distsim.cossim_dense)
# print("king : man :: {} : woman".format(ret[0][0]))
#===============================================================================

king = word_to_vec_dict['great']
man = word_to_vec_dict['greatest']
woman = word_to_vec_dict['biggest']
ret = distsim.show_nearest(
    word_to_vec_dict,
    king - man +
    woman,  # <-------------------------------  THE CORE OF RESASONING
    set(['great', 'greatest', 'biggest']),
    distsim.cossim_dense)
print("king : man :: {} : woman".format(ret[0][0]))
Ejemplo n.º 22
0
#!/usr/bin/env python
import distsim
word_to_vec_dict = distsim.load_word2vec("nyt_word2vec.4k")
king = word_to_vec_dict['king']
man = word_to_vec_dict['man']
woman = word_to_vec_dict['woman']
ret = distsim.show_nearest(word_to_vec_dict, king - man + woman,
                           set(['king', 'man', 'woman']), distsim.cossim_dense)
print("king : man :: {} : woman".format(ret[0][0]))
Ejemplo n.º 23
0
#!/usr/bin/env python
import distsim
word_to_vec_dict = distsim.load_word2vec("nyt_word2vec.4k")
###Provide your answer below

###Answer examples; replace with your choices

print 'jack'
for i, (word, score) in enumerate(distsim.show_nearest(word_to_vec_dict, word_to_vec_dict['jack'],set(['jack']),distsim.cossim_dense)):
    print("{}: {} ({})".format(i, word, score))

print '\n'
print 'man'
for i, (word, score) in enumerate(distsim.show_nearest(word_to_vec_dict, word_to_vec_dict['man'],set(['man']),distsim.cossim_dense)):
    print("{}: {} ({})".format(i, word, score))

print '\n'
print 'nice'
for i, (word, score) in enumerate(distsim.show_nearest(word_to_vec_dict, word_to_vec_dict['nice'],set(['nice']),distsim.cossim_dense)):
    print("{}: {} ({})".format(i, word, score))


print '\n'
print 'move'
for i, (word, score) in enumerate(distsim.show_nearest(word_to_vec_dict, word_to_vec_dict['move'],set(['move']),distsim.cossim_dense)):
    print("{}: {} ({})".format(i, word, score))

print '\n'
print 'father'
for i, (word, score) in enumerate(distsim.show_nearest(word_to_vec_dict, word_to_vec_dict['father'],set(['father']),distsim.cossim_dense)):
    print("{}: {} ({})".format(i, word, score))
Ejemplo n.º 24
0
#!/usr/bin/env python
import distsim
# you may have to replace this line if it is too slow
word_to_vec_dict = distsim.load_word2vec("nyt_word2vec.4k")

### provide your answer below
experiment_list = ['edward', 'school', 'red', 'saved', 'eyebrows', 'church']
###Answer examples

for e_word in experiment_list:
    print 'Experiment =', e_word
    if e_word not in word_to_vec_dict:
        print e_word, ' does not exist in lookup dictionary'
        continue
    for i, (word,
            score) in enumerate(distsim.show_nearest(word_to_vec_dict,
                                                     word_to_vec_dict[e_word],
                                                     set([e_word]),
                                                     distsim.cossim_dense),
                                start=1):
        print("{}: {} ({})".format(i, word, score))

    print '\n---------------------------------------------\n'
Ejemplo n.º 25
0
task_accuracies = defaultdict(list)

file = open("word-test.v3.txt", "r")

for line in file:
  line = line.strip('\n')
  if line.startswith('//'):
    continue
  if line.startswith(":"):
    category = line[2:]
  else:
    words = line.split()
    w1_dict = word_to_vec_dict[words[0]]
    w2_dict = word_to_vec_dict[words[1]]
    w4_dict = word_to_vec_dict[words[3]]
    result = distsim.show_nearest(word_to_vec_dict, w1_dict - w2_dict + w4_dict, set([words[0],words[1],words[3]]), distsim.cossim_dense)
    i = 0
    match = False
    for vec in result:
      i+=1
      if vec[0] == words[2]:
        match = True
        break
    if not match:
      i = 0 
    task_accuracies[category].append(i)

for key, value in task_accuracies.items():
  top1 = 0
  top5 = 0
  top10 = 0
#!/usr/bin/env python
import distsim
word_to_ccdict = distsim.load_contexts("nytcounts.4k")
#word_to_ccdict = distsim.load_contexts("nytcounts.4k")
for i, (word, score) in enumerate(distsim.show_nearest(word_to_ccdict,
                                                       word_to_ccdict['dog'],
                                                       set(['dog']),
                                                       distsim.cossim_sparse),
                                  start=1):
    print("{}: {} ({})".format(i, word, score))
Ejemplo n.º 27
0
#!/usr/bin/env python
import distsim

word_to_ccdict = distsim.load_contexts("nytcounts.4k")


### provide your answer below


###Answer examples; replace with your choices
for i, (word, score) in enumerate(distsim.show_nearest(word_to_ccdict, word_to_ccdict['florida'],set(['florida']),distsim.cossim_sparse), start=1):
    print("{}: {} ({})".format(i, word, score))
for i, (word, score) in enumerate(distsim.show_nearest(word_to_ccdict, word_to_ccdict['teachers'],set(['teachers']),distsim.cossim_sparse), start=1):
    print("{}: {} ({})".format(i, word, score))
for i, (word, score) in enumerate(distsim.show_nearest(word_to_ccdict, word_to_ccdict['single'],set(['single']),distsim.cossim_sparse), start=1):
    print("{}: {} ({})".format(i, word, score))
for i, (word, score) in enumerate(distsim.show_nearest(word_to_ccdict, word_to_ccdict['buy'],set(['buy']),distsim.cossim_sparse), start=1):
    print("{}: {} ({})".format(i, word, score))
for i, (word, score) in enumerate(distsim.show_nearest(word_to_ccdict, word_to_ccdict['week'],set(['week']),distsim.cossim_sparse), start=1):
    print("{}: {} ({})".format(i, word, score))
for i, (word, score) in enumerate(distsim.show_nearest(word_to_ccdict, word_to_ccdict['china'],set(['china']),distsim.cossim_sparse), start=1):
    print("{}: {} ({})".format(i, word, score))
Ejemplo n.º 28
0
#!/usr/bin/env python
import distsim

word_to_ccdict = distsim.load_contexts("nytcounts.4k")

### provide your answer below
# proper-noun america
# common-noun years
# adjective great
# verb run
# word 5 between
# word 6 wife

print("Word 1 is::::america")
for i, (word, score) in enumerate(distsim.show_nearest(word_to_ccdict, word_to_ccdict['america'],set(['america']),distsim.cossim_sparse), start=1):
    print("{}: {} ({})".format(i, word, score))
print("--------------------")

print("Word 2 is::::years")
for i, (word, score) in enumerate(distsim.show_nearest(word_to_ccdict, word_to_ccdict['years'],set(['years']),distsim.cossim_sparse), start=1):
    print("{}: {} ({})".format(i, word, score))
print("--------------------")

print("Word 3 is::::great")
for i, (word, score) in enumerate(distsim.show_nearest(word_to_ccdict, word_to_ccdict['great'],set(['great']),distsim.cossim_sparse), start=1):
    print("{}: {} ({})".format(i, word, score))
print("--------------------")

print("Word 4 is::::run")
for i, (word, score) in enumerate(distsim.show_nearest(word_to_ccdict, word_to_ccdict['run'],set(['run']),distsim.cossim_sparse), start=1):
    print("{}: {} ({})".format(i, word, score))
Ejemplo n.º 29
0
#!/usr/bin/env python
import distsim
word_to_vec_dict = distsim.load_word2vec("nyt_word2vec.4k")
###Provide your answer below

###Answer examples
print "jack", distsim.show_nearest(word_to_vec_dict, word_to_vec_dict['jack'],
                                   set(['jack']), distsim.cossim_dense)
print "london", distsim.show_nearest(word_to_vec_dict,
                                     word_to_vec_dict['london'],
                                     set(['london']), distsim.cossim_dense)
print "month", distsim.show_nearest(word_to_vec_dict,
                                    word_to_vec_dict['month'], set(['month']),
                                    distsim.cossim_dense)
print "attack", distsim.show_nearest(word_to_vec_dict,
                                     word_to_vec_dict['attack'],
                                     set(['attack']), distsim.cossim_dense)
print "happy", distsim.show_nearest(word_to_vec_dict,
                                    word_to_vec_dict['happy'], set(['happy']),
                                    distsim.cossim_dense)
print "jail", distsim.show_nearest(word_to_vec_dict, word_to_vec_dict['jail'],
                                   set(['jail']), distsim.cossim_dense)
print "fantastic", distsim.show_nearest(word_to_vec_dict,
                                        word_to_vec_dict['fantastic'],
                                        set(['fantastic']),
                                        distsim.cossim_dense)
Ejemplo n.º 30
0
#!/usr/bin/env python
import distsim
word_to_vec_dict = distsim.load_word2vec("nyt_word2vec.4k")
king = word_to_vec_dict['usa']
man = word_to_vec_dict['dollar']
woman = word_to_vec_dict['won']
ret = distsim.show_nearest(word_to_vec_dict, king - man + woman,
                           set(['usa', 'dollar', 'won']), distsim.cossim_dense)
print("usa : dollar :: {} : won".format(ret[0][0]))