def main(): ## start = time.time() word_to_vec_dict = distsim.load_word2vec("nyt_word2vec.4k") ## word_to_vec_dict = distsim.load_word2vec("glove.6B.300d.txt") line_count = 0 total,o_b,f_b,t_b = 0,0,0,0 best_v = [] with open("word-test.v3.txt") as infile: for line in infile: line_count += 1 if line_count == 1: continue tmpv = line.strip(' \n\t').split() if tmpv[0] != ':': best_v = sim_compare(word_to_vec_dict,tmpv[0],tmpv[1],tmpv[2],tmpv[3]) o_b += best_v[0] f_b += best_v[1] t_b += best_v[2] total += 1 else: if(line_count != 2): print '1-best:',o_b/total print '5-best:',f_b/total print '10-best:',t_b/total print ' ' total,o_b,f_b,t_b = 0,0,0,0 best_v = [] print 'For analogy:',tmpv[1] print '1-best:',o_b/total print '5-best:',f_b/total print '10-best:',t_b/total
def table(lines): data = [] for line in lines: data.append(line.split(' ')) data = [data] t = PrettyTable(['Class', '1-best', '5-best', '10-best']) title = ['adversarial1', 'adversarial2'] p = [1, 5, 10] for num in range(len(data)): total = len(data[num]) count = [0, 0, 0] for row in data[num]: word_to_vec_dict = distsim.load_word2vec("nyt_word2vec.4k") w1 = word_to_vec_dict[row[0]] w2 = word_to_vec_dict[row[1]] w4 = word_to_vec_dict[row[3]] ret = distsim.show_nearest(word_to_vec_dict, w1 - w2 + w4, set([row[0], row[1], row[3]]), distsim.cossim_dense) true = row[2] for i in range(len(p)): l = [j[0] for j in ret[:p[i]]] if true in l: count[i] += 1 t.add_row([ title[num], count[0] / float(total), count[1] / float(total), count[2] / float(total) ]) print t
def calculateAccuracies(solutions): word_to_vec_dict = distsim.load_word2vec("nyt_word2vec.4k") accuracies = {} errors = {} for k, v in solutions.iteritems(): #print 'group: ' + str(k) accuracies[k] = (float(0.0), float(0.0), float(0.0)) errors[k] = '' size = len(v) hasShownError = False for i in v: w1 = word_to_vec_dict[i[0]] w2 = word_to_vec_dict[i[1]] w4 = word_to_vec_dict[i[3]] ret = distsim.show_nearest(word_to_vec_dict, w1 - w2 + w4, set([str(i[0]), str(i[1]), str(i[3])]), distsim.cossim_dense) isInTenBest = False for n in range(len(ret)): (p, q) = ret[n] if p == i[2]: isInTenBest = True if n == 0: (x, y, z) = accuracies[k] x += 1.0 y += 1.0 z += 1.0 accuracies[k] = (x, y, z) elif n < 5: (x, y, z) = accuracies[k] y += 1.0 z += 1.0 accuracies[k] = (x, y, z) else: (x, y, z) = accuracies[k] z += 1.0 accuracies[k] = (x, y, z) if not isInTenBest and not hasShownError: errors[k] = (i[0], i[1], i[2], i[3], ret[0][0]) hasShownError = True (a, b, c) = accuracies[k] a /= size b /= size c /= size accuracies[k] = (a, b, c) #print accuracies[k] #print errors return (accuracies, errors)
#!/usr/bin/env python import distsim word_to_vec_dict = distsim.load_word2vec("nyt_word2vec.4k") ###Provide your answer below ###Answer examples print "jack", distsim.show_nearest(word_to_vec_dict, word_to_vec_dict['jack'], set(['jack']), distsim.cossim_dense) print "london", distsim.show_nearest(word_to_vec_dict, word_to_vec_dict['london'], set(['london']), distsim.cossim_dense) print "month", distsim.show_nearest(word_to_vec_dict, word_to_vec_dict['month'], set(['month']), distsim.cossim_dense) print "attack", distsim.show_nearest(word_to_vec_dict, word_to_vec_dict['attack'], set(['attack']), distsim.cossim_dense) print "happy", distsim.show_nearest(word_to_vec_dict, word_to_vec_dict['happy'], set(['happy']), distsim.cossim_dense) print "jail", distsim.show_nearest(word_to_vec_dict, word_to_vec_dict['jail'], set(['jail']), distsim.cossim_dense) print "fantastic", distsim.show_nearest(word_to_vec_dict, word_to_vec_dict['fantastic'], set(['fantastic']), distsim.cossim_dense)
import distsim #word_to_vec_dict = distsim.load_word2vec("nyt_word2vec.4k") word_to_vec_dict = distsim.load_word2vec("glove.6B.50d.txt") #word_to_ccdict = distsim.load_contexts("nytcounts.4k") L = [] relation = [] group = 0 with open('word-test.v3.txt', "r") as f_in: #with open('word-test.v3.txt',"r") as f_in: for line in f_in: if line[0] == ':': group += 1 L.append([group, line]) relation.append(line.strip('\n').replace(':', '')) else: L.append([group, line]) Accuracy_Best1 = [] Accuracy_Best5 = [] Accuracy_Best10 = [] for g in range(1, 9): analogy = [] for i in range(0, len(L)): if L[i][0] == g and len(L[i][1].split()) == 4: analogy.append(L[i][1].split()) best1 = best5 = best10 = 0 for a in range(0, len(analogy)):
#!/usr/bin/env python #from __future__ import division import distsim from collections import defaultdict file = open('q8.txt', 'r') word_to_vec_dict = distsim.load_word2vec("wiki-news-300d-1M.vec4") match_position = defaultdict(list) catorder = [] for line in file: line = line.strip().split() if line[0] == '//': continue if line[0] == ':': cat = line[1] catorder.append(cat) else: word0 = word_to_vec_dict[line[0]] word1 = word_to_vec_dict[line[1]] word3 = word_to_vec_dict[line[3]] ret = distsim.show_nearest(word_to_vec_dict, word0 - word1 + word3, set([line[0], line[1], line[3]]), distsim.cossim_dense) count = 0 while (count < len(ret)): if ret[count][0] == line[2]: break else: count += 1 if count != len(ret):
#!/usr/bin/env python import distsim from collections import defaultdict f = open('q8_data.txt', 'r') word_to_vec_dict = distsim.load_word2vec("glove.twitter.27B.100d.txt") category_list = [] category_num_dict = defaultdict(list) for line in f: line = line.strip('\n') if line[0] == '//': continue elif line[0] == ':': category = line.split(' ')[1] category_list.append(category) word = line.strip().split(' ') if len(word) == 4: word1_dict = word_to_vec_dict[word[0]] word2_dict = word_to_vec_dict[word[1]] word4_dict = word_to_vec_dict[word[3]] ret = distsim.show_nearest(word_to_vec_dict, word1_dict - word2_dict + word4_dict, set([word[0], word[1], word[3]]), distsim.cossim_dense) count = 0 find = False while (count < len(ret)): if ret[count][0] == word[2]: count += 1 find = True break
def q7_answer(reasoningData): def _get_n_best_count(analogy_returnedVectors, n_best): #count of words in n_best range - if they also hold right value count = 0 for correct_retVec_tpl in analogy_returnedVectors: if n_best > len(correct_retVec_tpl[1]): if correct_retVec_tpl[0] in correct_retVec_tpl[ 1][:len(correct_retVec_tpl[1])]: count += 1 elif correct_retVec_tpl[0] in correct_retVec_tpl[1][:n_best]: count += 1 return float(count) word_to_vec_dict = distsim.load_word2vec("nyt_word2vec.4k") relation_group = {} result_comp = {} col_len = max([len(x) for x in reasoningData]) print '\n1 NEGATIVE EXAMPLE FROM EACH GROUP( Element3: Incorrect Prediction / Correct Value):\n' for groupName, list_of_analogies in reasoningData.iteritems(): relation_group[groupName] = [] result_comp.setdefault(groupName, {'matched': 0, 'unmatched': 0}) incorrect_pred_example_shown = False for analogy in list_of_analogies: returned_vectors = distsim.show_nearest( word_to_vec_dict, word_to_vec_dict[analogy[0]] - word_to_vec_dict[analogy[1]] + word_to_vec_dict[analogy[3]], # <-THE CORE OF RESASONING set([analogy[0], analogy[1], analogy[3]]), distsim.cossim_dense) returned_vectors = [x[0] for x in returned_vectors] relation_group[groupName].append((analogy[2], returned_vectors)) if analogy[2] == returned_vectors[0]: result_comp[groupName]['matched'] += 1 else: result_comp[groupName]['unmatched'] += 1 if not incorrect_pred_example_shown: print groupName.ljust(col_len), ' Predicted / Actual : ',\ analogy[0]+" : "+ analogy[1]+ " :: "+ returned_vectors[0]+'/'+analogy[2]+' : '+ analogy[3] incorrect_pred_example_shown = True del word_to_vec_dict ########################### Print analysis relation_kind_accuracy = [] for groupName, match_unmatch_dict in result_comp.iteritems(): relation_kind_accuracy.append([ groupName, round( float(match_unmatch_dict['matched']) / (match_unmatch_dict['matched'] + match_unmatch_dict['unmatched']), 3) ]) relation_kind_accuracy = sorted(relation_kind_accuracy, key=lambda x: x[1], reverse=True) print '\nGROUPS SORTED BY REASONING ACCURACY:' for groupName, accuracy in relation_kind_accuracy: print groupName.ljust(col_len), ' Accuracy:', accuracy print '\n' #result_table = [] print ''.ljust(col_len), 'TOP_1', '\t', 'TOP_5', '\t', 'TOP_10' for groupName, analogy_returnedVectors in relation_group.iteritems(): result_row = [groupName] for n_best in [1, 5, 10]: top_n = _get_n_best_count(analogy_returnedVectors, n_best) result_row.append(round(top_n / len(analogy_returnedVectors), 3)) #result_table.append(result_row) print result_row[0].ljust( col_len), result_row[1], '\t', result_row[2], '\t', result_row[3] print '\n'
cur.append(line.split(' ')) data.append(cur) data = data[1:] t = PrettyTable(['Class', '1-best', '5-best', '10-best']) # capi- tal 0, currency 1, city-in-state 2, family 3, adjective-to-adverb 4, comparative 5, superlative 6, and nationality-adjective 7. title = [ 'capital', 'currency', 'city-in-state', 'family', 'adjective-to-adverb', 'comparative', 'superlative', 'nationality-adjective' ] p = [1, 5, 10] for num in range(len(data)): total = len(data[num]) count = [0, 0, 0] e = [] for row in data[num]: word_to_vec_dict = distsim.load_word2vec("deps.words") w1 = word_to_vec_dict[row[0]] w2 = word_to_vec_dict[row[1]] w4 = word_to_vec_dict[row[3]] ret = distsim.show_nearest(word_to_vec_dict, w1 - w2 + w4, set([row[0], row[1], row[3]]), distsim.cossim_dense) true = row[2] for i in range(len(p)): l = [j[0] for j in ret[:p[i]]] if true in l: count[i] += 1 elif p[i] == 1: e.append([ret[0][0], true]) t.add_row([ title[num], count[0] / float(total), count[1] / float(total),