def writeVectors(): vecFileName = config.results_path + "vectors.out" vecFile = open(vecFileName, 'w') mats = sio.loadmat(config.corpus_path + 'vars.normalized.100.mat') We_orig = mats.get('We') params = sio.loadmat(config.corpus_path + 'params_rae.mat') W1 = params.get('W1') W2 = params.get('W2') b1 = params.get('b1') We = params.get('We') b = params.get('b') W = params.get('W') hiddenSize = 100 nExamples = 5 print "loading data.." rnnData_train = RNNDataCorpus() rnnData_train.load_data_srl(load_file=config.train_data_srl, nExamples=nExamples) print 'writing vectors to: ', vecFileName for ii in range(len(rnnData_train.allSNum)): sNum = rnnData_train.allSNum[ii] sStr = rnnData_train.allSStr[ii] sTree = rnnData_train.allSTree[ii] sKids = rnnData_train.allSKids[ii] words_indexed = np.where(sNum >= 0)[0] #L is only the part of the embedding matrix that is relevant for this sentence #L is deltaWe if We.shape[1] != 0: L = We[:, words_indexed] words_embedded = We_orig[:, words_indexed] + L else: words_embedded = We_orig[:, words_indexed] # sl = words_embedded.shape[1] tree = Tree() tree.pp = all #np.zeros(((2*sl-1),1)) tree.nodeScores = np.zeros(len(sNum)) # tree.nodeNames = np.arange(1,(2*sl-1)) tree.kids = np.zeros((len(sNum), 2)) tree.nodeFeatures = np.zeros((hiddenSize, len(sNum))) tree.nodeFeatures[:, :len(words_indexed)] = words_embedded toMerge = np.zeros(shape=(words_indexed.shape), dtype='int32') toMerge[:] = words_indexed[:] while len(toMerge) > 1: # find unpaired bottom leaf pairs (initially words) that share parent i = -1 foundGoodPair = False while (not foundGoodPair): i += 1 if sTree[toMerge[i]] == sTree[toMerge[i + 1]]: foundGoodPair = True newParent = sTree[toMerge[i]] kid1 = toMerge[i] kid2 = toMerge[i + 1] tree.kids[newParent, :] = [kid1, kid2] # set new parent to be possible merge candidate toMerge[i] = newParent # delete other kid toMerge = np.delete(toMerge, i + 1) c1 = tree.nodeFeatures[:, kid1] c2 = tree.nodeFeatures[:, kid2] p = np.tanh(np.dot(W1, c1) + np.dot(W2, c2) + b1.flatten()) tree.nodeFeatures[:, newParent] = p vec = tree.nodeFeatures[-1] vecFile.write(" ".join([str(x) for x in vec]) + '\n') vecFile.close() print "finished! "
def writeVectors(): vecFileName = config.results_path+"vectors.out" vecFile = open(vecFileName, 'w') mats = sio.loadmat(config.corpus_path+'vars.normalized.100.mat') We_orig = mats.get('We') params = sio.loadmat(config.corpus_path+'params_rae.mat') W1 = params.get('W1') W2 = params.get('W2') b1 = params.get('b1') We = params.get('We') b = params.get('b') W = params.get('W') hiddenSize = 100 nExamples = 5 print "loading data.." rnnData_train = RNNDataCorpus() rnnData_train.load_data_srl(load_file=config.train_data_srl, nExamples=nExamples) print 'writing vectors to: ', vecFileName for ii in range(len(rnnData_train.allSNum)): sNum = rnnData_train.allSNum[ii] sStr = rnnData_train.allSStr[ii] sTree = rnnData_train.allSTree[ii] sKids = rnnData_train.allSKids[ii] words_indexed = np.where(sNum >= 0)[0] #L is only the part of the embedding matrix that is relevant for this sentence #L is deltaWe if We.shape[1] != 0: L = We[:, words_indexed] words_embedded = We_orig[:, words_indexed] + L; else : words_embedded = We_orig[:, words_indexed] # sl = words_embedded.shape[1] tree = Tree() tree.pp = all#np.zeros(((2*sl-1),1)) tree.nodeScores = np.zeros(len(sNum)) # tree.nodeNames = np.arange(1,(2*sl-1)) tree.kids = np.zeros((len(sNum),2)) tree.nodeFeatures = np.zeros((hiddenSize, len(sNum))) tree.nodeFeatures[:,:len(words_indexed)] = words_embedded; toMerge = np.zeros(shape=(words_indexed.shape), dtype='int32') toMerge[:] = words_indexed[:] while len(toMerge)>1 : # find unpaired bottom leaf pairs (initially words) that share parent i=-1; foundGoodPair = False while (not foundGoodPair ) : i += 1 if sTree[toMerge[i]]==sTree[toMerge[i+1]]: foundGoodPair = True newParent = sTree[toMerge[i]] kid1 = toMerge[i] kid2 = toMerge[i+1] tree.kids[newParent,:] = [kid1, kid2]; # set new parent to be possible merge candidate toMerge[i] = newParent; # delete other kid toMerge = np.delete(toMerge,i+1) c1 = tree.nodeFeatures[:,kid1] c2 = tree.nodeFeatures[:,kid2] p = np.tanh(np.dot(W1,c1) + np.dot(W2,c2) + b1.flatten()) tree.nodeFeatures[:,newParent] = p; vec = tree.nodeFeatures[-1] vecFile.write(" ".join([str(x) for x in vec])+'\n') vecFile.close() print "finished! "