def create_representation(args): rep_type = args['<representation>'] path = args['<representation_path>'] neg = int(args['--neg']) w_c = args['--w+c'] eig = float(args['--eig']) if rep_type == 'PPMI': if w_c: raise Exception('w+c is not implemented for PPMI.') else: return PositiveExplicit(path, True, neg) elif rep_type == 'SVD': if w_c: return EnsembleEmbedding(SVDEmbedding(path, False, eig, False), SVDEmbedding(path, False, eig, True), True) else: return SVDEmbedding(path, True, eig) elif rep_type == 'GLOVE': return GLOVEEmbedding(path, True) else: if w_c: return EnsembleEmbedding(Embedding(path + '.words', False), Embedding(path + '.contexts', False), True) else: return Embedding(path + '.words', True)
def intersection_align(embed1, embed2, post_normalize=True): """ Get the intersection of two embeddings. Returns embeddings with common vocabulary and indices. """ common_vocab = list(filter(set(embed1.iw).__contains__, embed2.iw)) newvecs1 = np.empty((len(common_vocab), embed1.m.shape[1])) newvecs2 = np.empty((len(common_vocab), embed2.m.shape[1])) for i in range(len(common_vocab)): newvecs1[i] = embed1.m[embed1.wi[common_vocab[i]]] newvecs2[i] = embed2.m[embed2.wi[common_vocab[i]]] return Embedding(newvecs1, common_vocab, normalize=post_normalize), Embedding( newvecs2, common_vocab, normalize=post_normalize)
def align_cloud(year, rep_type, main_dir, num, dim, wordlist, **rep_args): print "Aligning cloud year:", year avg_embed_mat = np.zeros((len(wordlist), dim)) for i in range(1, num + 1): # Iterates throug the embeddings print i finname = main_dir + "/embedding_" + str(i) + "/noinit/" + str( dim) + "/" + str(year) foutname = main_dir + "/embedding_" + str(i) + "/noinit/" + str( dim) + "/aligned/" + str(year) other_embed = create_representation( rep_type, finname, **rep_args) # Loads the individual embedding keep_indices = [other_embed.wi[word] for word in wordlist] other_embed = Embedding( other_embed.m[keep_indices, :], wordlist, normalize=False) # Synchronize the order of words if i == 1: base_embed = other_embed ortho = np.eye(dim) else: ortho = alignment.get_procrustes_mat(base_embed, other_embed) aligned_embed_mat = (other_embed.m).dot( ortho) # Rotates the embedding to the reference avg_embed_mat += aligned_embed_mat / num # Creates avarage embedding np.save(foutname + "-w.npy", aligned_embed_mat) write_pickle(other_embed.iw, foutname + "-vocab.pkl") foutname = main_dir + "/embedding_avg/" + str(year) np.save(foutname + "-w.npy", avg_embed_mat) write_pickle(base_embed.iw, foutname + "-vocab.pkl")
def align_years(years, rep_type, main_dir, num, dim, **rep_args): print "Aligning years to each other" first_iter = True base_embed = None for year in years: # Iterates through years print year year_embed = create_representation( rep_type, main_dir + "/embedding_avg/" + str(year), **rep_args) # Loads the individual embedding if first_iter: aligned_embed = year_embed first_iter = False else: ortho = alignment.get_procrustes_mat(base_embed, year_embed) aligned_embed = Embedding( (year_embed.m).dot(ortho), year_embed.iw, normalize=False) # Rotates to the previous year embedding for i in range( 1, num + 1): # Align all the embedding the same way as the avarage finname = main_dir + "/embedding_" + str(i) + "/noinit/" + str( dim) + "/aligned/" + str(year) foutname = main_dir + "/embedding_" + str( i) + "/noinit/" + str(dim) + "/aligned/" + str(year) mat = np.load(finname + "-w.npy") mat = mat.dot(ortho) np.save(foutname + "-w.npy", mat) base_embed = aligned_embed foutname = main_dir + "/embedding_avg/aligned/" + str(year) np.save(foutname + "-w.npy", aligned_embed.m) write_pickle(aligned_embed.iw, foutname + "-vocab.pkl")
def linear_align(base_embed, other_embed): """ Align other embedding to base embedding using best linear transform. NOTE: Assumes indices are aligned """ basevecs = base_embed.m othervecs = other_embed.m fixedvecs = othervecs.dot(np.linalg.pinv(othervecs)).dot(basevecs) return Embedding(fixedvecs, other_embed.iw)
def smart_procrustes_align(base_embed, other_embed, post_normalize=True): in_base_embed, in_other_embed = intersection_align(base_embed, other_embed, post_normalize=False) base_vecs = in_base_embed.m other_vecs = in_other_embed.m m = other_vecs.T.dot(base_vecs) u, _, v = np.linalg.svd(m) ortho = u.dot(v) return Embedding((other_embed.m).dot(ortho), other_embed.iw, normalize=post_normalize)
def main(): args = docopt(""" Usage: sgns2text.py [options] <sgns_path> <output_path> Options: --w+c Use ensemble of word and context vectors """) sgns_path = args['<sgns_path>'] output_path = args['<output_path>'] w_c = args['--w+c'] if w_c: sgns = EnsembleEmbedding(Embedding(sgns_path + '.words', False), Embedding(sgns_path + '.contexts', False), True) else: sgns = Embedding(sgns_path + '.words', True) with open(output_path, 'w') as f: for i, w in enumerate(sgns.iw): print >>f, w, ' '.join([str(x) for x in sgns.m[i]])
def procrustes_align(base_embed, other_embed): """ Align other embedding to base embeddings via Procrustes. Returns best distance-preserving aligned version of other_embed NOTE: Assumes indices are aligned """ basevecs = base_embed.m - base_embed.m.mean(0) othervecs = other_embed.m - other_embed.m.mean(0) m = othervecs.T.dot(basevecs) u, _, v = np.linalg.svd(m) ortho = u.dot(v) fixedvecs = othervecs.dot(ortho) return Embedding(fixedvecs, other_embed.iw)
def load(cls, path, years, **kwargs): embeds = collections.OrderedDict() for year in years: for file in os.listdir(path) : if(re.match(".*-w.npy",file)): year_name = str(year) break elif(re.match(".*"+str(year)+".*",file)): year_name = file break else: year_name = None if(year_name): embeds[year] = Embedding.load(path + "/" + year_name, **kwargs) else: print("Couldn't load data of year"+str(year)) return SequentialEmbedding(embeds)
def main(): args = docopt(""" Usage: eval_reliability.py [options] <folders>... Options: --words FILE Use FILE with list of words (1 per line) to measure reliabilty --ws FILES Testsets for word similarity evaluation, use "," as separator! --ana FILES Testsets for analogy evaluation, use "," as separator! --closest N Use N closest neighbors to measure reliability [default: 10] """) folders = args["<folders>"] closest = int(args["--closest"]) word_list = args["--words"] ws_test_sets = [read_ws_test_set(path) for path in args["--ws"].split(",")] as_test_sets = [ read_as_test_set(path) for path in args["--ana"].split(",") ] as_xi_and_ix = [get_vocab_as(test_set) for test_set in as_test_sets] words = words_to_evaluate_file( word_list) if word_list else argswords_to_evaluate(representations) representations = [] for folder in folders: representations.append(Embedding(folder + '/vec', True)) #only works for SGNS! #comparisson over all subsets if len(representations) < 2: raise Exception("Need multiple models for evaluation") evaluated = [ " ".join([str(evaluate_ws(r, w)) for r in representations]) for w in ws_test_sets ] for i, test_set in enumerate(as_test_sets): evaluated.append(" ".join([ str( evaluate_as(r, test_set, as_xi_and_ix[i][0], as_xi_and_ix[i][1])) for r in representations ])) evaluated.append(reliability(representations, words, closest)) print("\t".join(evaluated))
def wiktionary_eval(src_vecs, trg_vecs, wiktionary_file, b_reverse, b_print=False, b_include_oov=False, eval_dir=None, src_lang_code=None, trg_lang_code=None, precision_at_N=1): OOV = None INV = None if eval_dir is not None and src_lang_code is not None and trg_lang_code is not None: # These extra params are to do with outputting OOV and INV words during evaluation oov_filename = './' + eval_dir + '/wiktionary-eval/oov-' + src_lang_code + '-' + trg_lang_code + '.txt' inv_filename = './' + eval_dir + '/wiktionary-eval/inv-' + src_lang_code + '-' + trg_lang_code + '.txt' OOV = set() # Use sets, so that the words get de-duped INV = set() Es = Embedding(src_vecs, True) Et = Embedding(trg_vecs, True) BX = [(l.split("|||")[-1].strip(), l.split("|||")[0].strip()) for l in open(wiktionary_file).readlines()] if b_reverse: # Reverse the evaluation source/target BX = [(t, s) for s, t in BX] Es, Et = Et, Es BD = [] for s, t in BX: if s in Es.wi and t in Et.wi: BD.append((s, t)) if INV is not None: INV.add(s + ' ||| ' + t) else: if b_include_oov: BD.append((s, t)) if OOV is not None: OOV.add(s + ' ||| ' + t) p1, tot = 0, 0 for s, t in BD: vs = Es.represent(s) scores = vs.dot(Et.m.T) if precision_at_N == 1: cand = Et.iw[np.nanargmax(scores)] if t == cand: p1 += 1 else: is_match = False indices = np.argsort(scores)[-precision_at_N:] # positions for index in indices: cand = Et.iw[index] if t == cand: is_match = True break if is_match: p1 += 1 tot += 1 if b_print: print '{0:.4f}'.format(p1 / tot) if eval_dir is not None and src_lang_code is not None and trg_lang_code is not None: inv_f = open(inv_filename, 'w') for inv in INV: inv_f.write(inv + '\n') inv_f.close() oov_f = open(oov_filename, 'w') for oov in OOV: oov_f.write(oov + '\n') oov_f.close() return p1 / tot
def load(cls, path, years, **kwargs): embeds = collections.OrderedDict() for year in years: embeds[year] = Embedding.load(path + "/" + str(year), **kwargs) return SequentialEmbedding(embeds)
def main(): usage = "%prog sgns.words sgns.words.vocab" parser = OptionParser(usage=usage) #parser.add_option('-n', dest='n', default=10, # help='Most similar: default=%default') #parser.add_option('--boolarg', action="store_true", dest="boolarg", default=False, # help='Keyword argument: default=%default') (options, args) = parser.parse_args() vec_file = args[0] vocab_file = args[1] #n = int(options.n) emb = Embedding(vec_file) subset = {} with open(vocab_file, 'r') as f: vocab = f.readlines() vocab = [v.strip() for v in vocab] vocab = [v for v in vocab if v.startswith('msa-')] for word in vocab: subset[word] = emb.represent(word) keys = list(subset.keys()) n_words = len(keys) emb_size = len(subset[keys[0]]) for word in keys: print(word) print("Doing dimensionality reduction") vectors = np.zeros([n_words, emb_size]) for word_i, word in enumerate(keys): vectors[word_i, :] = subset[word] tsne = TSNE(n_components=2) proj = tsne.fit_transform(vectors) #pca = PCA(n_components=2) #proj = pca.fit_transform(vectors) #transformer = np.random.randn(emb_size, 2) #proj = np.dot(vectors, transformer) print("plotting") fig, ax = plt.subplots(figsize=(8, 6)) for i, word in enumerate(keys): ax.scatter([proj[i, 0]], [proj[i, 1]], color='k', alpha=0.6, s=1, edgecolors=None) if word == 'msa-orlando-nightclub-massacre': ax.text(proj[i, 0], proj[i, 1], 'Orlando', fontsize=8, alpha=0.6, ha='center', va='baseline') if word == 'msa-san-bernardino,-california': ax.text(proj[i, 0], proj[i, 1], 'San Bernadino', fontsize=8, alpha=0.6, ha='center', va='baseline') if word == 'msa-amnicola-training-center,-chattanooga': ax.text(proj[i, 0], proj[i, 1], 'Chattanooga', fontsize=8, alpha=0.6, ha='center', va='baseline') if word == 'msa-columbine-high-school': ax.text(proj[i, 0], proj[i, 1], 'Columbine', fontsize=8, alpha=0.6, ha='center', va='baseline') if word == 'msa-westside-middle-school': ax.text(proj[i, 0], proj[i, 1], 'Westside Middle School', fontsize=8, alpha=0.6, ha='center', va='baseline') if word == 'msa-heritage-high-school': ax.text(proj[i, 0], proj[i, 1], 'Heritage High School', fontsize=8, alpha=0.6, ha='center', va='baseline') if word == 'msa-virginia-tech--campus': ax.text(proj[i, 0], proj[i, 1], 'Virginia Tech', fontsize=8, alpha=0.6, ha='center', va='baseline') if word == 'msa-tucson,-arizona': ax.text(proj[i, 0], proj[i, 1], 'Tuscon, Arizona', fontsize=8, alpha=0.6, ha='center', va='baseline') if word == 'msa-movie-theater-in-aurora': ax.text(proj[i, 0], proj[i, 1], 'Aurora', fontsize=8, alpha=0.6, ha='center', va='baseline') if word == 'msa-mother-emanuel-ame-church': ax.text(proj[i, 0], proj[i, 1], 'Mother Emanuel', fontsize=8, alpha=0.6, ha='center', va='baseline') if word == 'Umpqua Community College': ax.text(proj[i, 0], proj[i, 1], 'Umpqua Community College', fontsize=8, alpha=0.6, ha='center', va='baseline') #if word in target_words: # ax.scatter([proj[i, 0]], [proj[i, 1]], color='k', alpha=0.6, s=1, edgecolors=None) # ax.text(proj[i, 0], proj[i, 1], word, fontsize=8, alpha=0.6, ha='center', va='baseline', color='red') #else: # ax.scatter([proj[i, 0]], [proj[i, 1]], color='k', alpha=0.6, s=1, edgecolors=None) # ax.text(proj[i, 0], proj[i, 1], word, fontsize=8, alpha=0.6, ha='center', va='baseline') previous_x = None previous_y = None """ for i, year in enumerate(years): target_word = base_word + '_' + str(year) index = keys.index(target_word) x = proj[index, 0] y = proj[index, 1] if previous_x is not None: plt.plot([previous_x, x], [previous_y, y], c='b', linewidth=1, alpha=(0.8 * i/len(years) + 0.1)) ax.scatter([x], [y], color='red', alpha=0.5, s=2, edgecolors=None) previous_x = x previous_y = y """ plt.savefig('shooters.pdf', bbox_inches='tight')
parser.add_argument("test_path", help="Path to test data") parser.add_argument("--word-path", help="Path to sorted list of context words", default="") parser.add_argument("--num-context", type=int, help="Number context words to use", default=-1) parser.add_argument("--type", default="PPMI") args = parser.parse_args() if args.type == "PPMI": year = int(args.vec_path.split("/")[-1].split(".")[0]) if args.num_context != -1 and args.word_path == "": raise Exception( "Must specify path to context word file if the context words are to be restricted!" ) elif args.word_path != "": _, context_words = ioutils.load_target_context_words( [year], args.word_path, -1, args.num_context) context_words = context_words[year] else: context_words = None rep = Explicit.load(args.vec_path, restricted_context=context_words) elif args.type == "SVD": rep = SVDEmbedding(args.vec_path, eig=0.0) else: rep = Embedding.load(args.vec_path, add_context=False) data = read_test_set(args.test_path) correlation = evaluate(rep, data) print "Correlation: " + str(correlation)
def __init__(self, model_code): model_name = models[model_code] year = model_name.split('-')[-1] self.embed = Embedding.load('files/sgns/'+year) self.embed.normalize()
from representations.embedding import Embedding import numpy as np import matplotlib.pyplot as plt import pandas as pd import itertools as it from viz.scripts import closest_over_time from viz import mplot, mcommon from subprocess import check_output import random random.seed(1111) ## !! use python2, because github uses python2 model_dir = 'C:/Users/dat/Documents/SemEval2017Task4/4B-English/BertSentiment/' os.chdir(model_dir) model_name = 'word_vector768' tweet2017all = Embedding.load(model_name) print('model {}'.format(model_name)) word_list = ['misogynistic', 'feminist', 'caitlyn_jenner', 'he', 'she'] for word in word_list: print('word {}'.format(word)) print(tweet2017all.closest(word, 20)) mplot.plot_one_word(word, tweet2017all)
def alignment_eval(src_vecs, trg_vecs, align_file, src_test_file, trg_test_file, b_reverse, b_print=False): Es = Embedding(src_vecs, True) Et = Embedding(trg_vecs, True) poss_aligns = [[int(x) for x in l.strip().split()[:3]] for l in open(align_file).readlines()] sure_aligns = [[int(x) for x in l.strip().split()[:3]] for l in open(align_file).readlines() if l.strip().split()[-1] != "P"] ssents = [ l.strip().split(">")[1].split("<")[0].split() for l in codecs.open( src_test_file, 'r', "utf8", errors='ignore').readlines() ] tsents = [ l.strip().split(">")[1].split("<")[0].split() for l in codecs.open( trg_test_file, 'r', "utf8", errors='ignore').readlines() ] #swap 3 and 4 if b_reverse: # Reverse the evaluation source/target poss_aligns = [(sid, twid, swid) for sid, swid, twid in poss_aligns] sure_aligns = [(sid, twid, swid) for sid, swid, twid in sure_aligns] Es, Et = Et, Es ssents, tsents = tsents, ssents poss_aligns = gb([(sid - 1, (swid - 1, twid - 1)) for sid, swid, twid in poss_aligns]) sure_aligns = gb([(sid - 1, (swid - 1, twid - 1)) for sid, swid, twid in sure_aligns]) size_a = 0.0 size_s = 0.0 size_a_and_s = 0.0 size_a_and_p = 0.0 for sid, (ssent, tsent) in enumerate(zip(ssents, tsents)): alignment = set() twords = [tword.split("_")[0].lower() for tword in tsent] tvecs = [ Et.represent(tword) if tword in Et.wi else None for tword in twords ] for swid, sword in enumerate(ssent): sword = sword.split("_")[0].lower() if sword in Es.wi: svec = Es.represent(sword) sims = [ svec.dot(tvec) if tvec is not None else -1.0 for tvec in tvecs ] alignment.add((swid, np.argmax(sims))) #print(sword,twords[np.argmax(sims)]) sure = sure_aligns[sid] poss = poss_aligns[sid] size_a += float(len(alignment)) size_s += float(len(sure)) s_a = alignment & sure p_a = alignment & poss size_a_and_s += float(len(s_a)) size_a_and_p += float(len(p_a)) if b_print: print '{0:.4f}'.format( (size_a_and_s + size_a_and_p) / (size_a + size_s)) return (size_a_and_s + size_a_and_p) / (size_a + size_s)
import codecs import glob import numpy as np from scipy import stats from itertools import groupby sys.path.append('hyperwords') from representations.embedding import Embedding def gb(collection): keyfunc = lambda x: x[0] groups = groupby(sorted(collection, key=keyfunc), keyfunc) return {k: set([v for k_, v in g]) for k, g in groups} Es = Embedding(sys.argv[1], True) Et = Embedding(sys.argv[2], True) poss_aligns = [[int(x) for x in l.strip().split()[:3]] for l in open(sys.argv[3]).readlines()] sure_aligns = [[int(x) for x in l.strip().split()[:3]] for l in open(sys.argv[3]).readlines() if l.strip().split()[-1] != "P"] ssents = [ l.strip().split(">")[1].split("<")[0].split() for l in codecs.open( sys.argv[4], 'r', "utf8", errors='ignore').readlines() ] tsents = [ l.strip().split(">")[1].split("<")[0].split() for l in codecs.open( sys.argv[5], 'r', "utf8", errors='ignore').readlines()
actual, expected = zip(*results) print "OOV: ", oov return spearmanr(actual, expected)[0] if __name__ == '__main__': parser = ArgumentParser("Run word similarity benchmark") parser.add_argument("vec_path", help="Path to word vectors") parser.add_argument("test_path", help="Path to test data") parser.add_argument("--word-path", help="Path to sorted list of context words", default="") parser.add_argument("--num-context", type=int, help="Number context words to use", default=-1) parser.add_argument("--type", default="PPMI") args = parser.parse_args() if args.type == "PPMI": year = int(args.vec_path.split("/")[-1].split(".")[0]) if args.num_context != -1 and args.word_path == "": raise Exception("Must specify path to context word file if the context words are to be restricted!") elif args.word_path != "": _, context_words = ioutils.load_target_context_words([year], args.word_path, -1, args.num_context) context_words = context_words[year] else: context_words = None rep = Explicit.load(args.vec_path, restricted_context=context_words) elif args.type == "SVD": rep = SVDEmbedding(args.vec_path, eig=0.0) else: rep = Embedding.load(args.vec_path, add_context=False) data = read_test_set(args.test_path) correlation = evaluate(rep, data) print "Correlation: " + str(correlation)
def main(): usage = "%prog sgns.words" parser = OptionParser(usage=usage) #parser.add_option('-n', dest='n', default=10, # help='Most similar: default=%default') #parser.add_option('--boolarg', action="store_true", dest="boolarg", default=False, # help='Keyword argument: default=%default') (options, args) = parser.parse_args() vec_file = args[0] #n = int(options.n) emb = Embedding(vec_file) subset = {} target_words = [] target_vectors = [] words = [] years = list(range(1987, 2008)) print("Collecting vectors") target_word = 'terrorism_pre911' target_words.append(target_word) subset[target_word] = emb.represent(target_word) target_vectors.append(emb.represent(target_word)) target_word = 'terrorism_post911' target_words.append(target_word) subset[target_word] = emb.represent(target_word) target_vectors.append(emb.represent(target_word)) mean_vector = np.mean(target_vectors, axis=0) closest = emb.closest_to_vec(mean_vector, n=50) words = [pair[1] for pair in closest] words = [word for word in words if word != 'terrorism'] words = [word for word in words if not re.search(r'\d', word)] for word in words: subset[word] = emb.represent(word) keys = list(subset.keys()) n_words = len(keys) emb_size = len(subset[keys[0]]) for word in keys: print(word) print("Doing dimensionality reduction") vectors = np.zeros([n_words, emb_size]) dist1 = np.zeros(n_words) dist2 = np.zeros(n_words) for word_i, word in enumerate(keys): vectors[word_i, :] = subset[word] dist1[word_i] = np.abs(cosine_similarity(subset[word].reshape(-1, 1), subset['terrorism_pre911'].reshape(-1, 1))[0][0]) dist2[word_i] = np.abs(cosine_similarity(subset[word].reshape(-1, 1), subset['terrorism_post911'].reshape(-1, 1))[0][0]) tsne = TSNE(n_components=2) proj = tsne.fit_transform(vectors) #pca = PCA(n_components=2) #proj = pca.fit_transform(vectors) #transformer = np.random.randn(emb_size, 2) #proj = np.dot(vectors, transformer) print("plotting") fig, ax = plt.subplots(figsize=(8, 6)) for i, word in enumerate(keys): if word in target_words: ax.scatter([proj[i, 0]], [proj[i, 1]], color='k', alpha=0.6, s=1, edgecolors=None) ax.text(proj[i, 0], proj[i, 1], word, fontsize=8, alpha=0.6, ha='center', va='baseline', color='red') else: ax.scatter([proj[i, 0]], [proj[i, 1]], color='k', alpha=0.6, s=1, edgecolors=None) ax.text(proj[i, 0], proj[i, 1], word, fontsize=8, alpha=0.6, ha='center', va='baseline') previous_x = None previous_y = None """ for i, year in enumerate(years): target_word = base_word + '_' + str(year) index = keys.index(target_word) x = proj[index, 0] y = proj[index, 1] if previous_x is not None: plt.plot([previous_x, x], [previous_y, y], c='b', linewidth=1, alpha=(0.8 * i/len(years) + 0.1)) ax.scatter([x], [y], color='red', alpha=0.5, s=2, edgecolors=None) previous_x = x previous_y = y """ plt.savefig('test.pdf', bbox_inches='tight') fig, ax = plt.subplots(figsize=(8, 6)) for i, word in enumerate(keys): if word not in target_words: ax.scatter(dist1[i], dist2[i], color='k', alpha=0.6, s=1, edgecolors=None) ax.text(dist1[i], dist2[i], word, fontsize=8, alpha=0.6, ha='center', va='baseline') plt.savefig('test2.pdf', bbox_inches='tight')
from __future__ import division import sys import numpy as np sys.path.append('hyperwords') from representations.embedding import Embedding Es = Embedding(sys.argv[1], True) Et = Embedding(sys.argv[2], True) BX = [(l.split("|||")[-1].strip(), l.split("|||")[0].strip()) for l in open(sys.argv[3]).readlines()] if sys.argv[-1] == 'R': BX = [(t, s) for s, t in BX] Es, Et = Et, Es BD=[] for s,t in BX: if s in Es.wi and t in Et.wi: BD.append((s,t)) p1, tot = 0, 0 for s, t in BD: vs = Es.represent(s) scores = vs.dot(Et.m.T) cand = Et.iw[np.nanargmax(scores)] if t==cand: p1+=1 tot+=1 print '{0:.4f}'.format(p1/tot)