def writeVectors(): vecFileName = config.results_path + "vectors.out" vecFile = open(vecFileName, 'w') mats = sio.loadmat(config.corpus_path + 'vars.normalized.100.mat') We_orig = mats.get('We') params = sio.loadmat(config.corpus_path + 'params_rae.mat') W1 = params.get('W1') W2 = params.get('W2') b1 = params.get('b1') We = params.get('We') b = params.get('b') W = params.get('W') hiddenSize = 100 nExamples = 5 print "loading data.." rnnData_train = RNNDataCorpus() rnnData_train.load_data_srl(load_file=config.train_data_srl, nExamples=nExamples) print 'writing vectors to: ', vecFileName for ii in range(len(rnnData_train.allSNum)): sNum = rnnData_train.allSNum[ii] sStr = rnnData_train.allSStr[ii] sTree = rnnData_train.allSTree[ii] sKids = rnnData_train.allSKids[ii] words_indexed = np.where(sNum >= 0)[0] #L is only the part of the embedding matrix that is relevant for this sentence #L is deltaWe if We.shape[1] != 0: L = We[:, words_indexed] words_embedded = We_orig[:, words_indexed] + L else: words_embedded = We_orig[:, words_indexed] # sl = words_embedded.shape[1] tree = Tree() tree.pp = all #np.zeros(((2*sl-1),1)) tree.nodeScores = np.zeros(len(sNum)) # tree.nodeNames = np.arange(1,(2*sl-1)) tree.kids = np.zeros((len(sNum), 2)) tree.nodeFeatures = np.zeros((hiddenSize, len(sNum))) tree.nodeFeatures[:, :len(words_indexed)] = words_embedded toMerge = np.zeros(shape=(words_indexed.shape), dtype='int32') toMerge[:] = words_indexed[:] while len(toMerge) > 1: # find unpaired bottom leaf pairs (initially words) that share parent i = -1 foundGoodPair = False while (not foundGoodPair): i += 1 if sTree[toMerge[i]] == sTree[toMerge[i + 1]]: foundGoodPair = True newParent = sTree[toMerge[i]] kid1 = toMerge[i] kid2 = toMerge[i + 1] tree.kids[newParent, :] = [kid1, kid2] # set new parent to be possible merge candidate toMerge[i] = newParent # delete other kid toMerge = np.delete(toMerge, i + 1) c1 = tree.nodeFeatures[:, kid1] c2 = tree.nodeFeatures[:, kid2] p = np.tanh(np.dot(W1, c1) + np.dot(W2, c2) + b1.flatten()) tree.nodeFeatures[:, newParent] = p vec = tree.nodeFeatures[-1] vecFile.write(" ".join([str(x) for x in vec]) + '\n') vecFile.close() print "finished! "
def writeVectors(): vecFileName = config.results_path+"vectors.out" vecFile = open(vecFileName, 'w') mats = sio.loadmat(config.corpus_path+'vars.normalized.100.mat') We_orig = mats.get('We') params = sio.loadmat(config.corpus_path+'params_rae.mat') W1 = params.get('W1') W2 = params.get('W2') b1 = params.get('b1') We = params.get('We') b = params.get('b') W = params.get('W') hiddenSize = 100 nExamples = 5 print "loading data.." rnnData_train = RNNDataCorpus() rnnData_train.load_data_srl(load_file=config.train_data_srl, nExamples=nExamples) print 'writing vectors to: ', vecFileName for ii in range(len(rnnData_train.allSNum)): sNum = rnnData_train.allSNum[ii] sStr = rnnData_train.allSStr[ii] sTree = rnnData_train.allSTree[ii] sKids = rnnData_train.allSKids[ii] words_indexed = np.where(sNum >= 0)[0] #L is only the part of the embedding matrix that is relevant for this sentence #L is deltaWe if We.shape[1] != 0: L = We[:, words_indexed] words_embedded = We_orig[:, words_indexed] + L; else : words_embedded = We_orig[:, words_indexed] # sl = words_embedded.shape[1] tree = Tree() tree.pp = all#np.zeros(((2*sl-1),1)) tree.nodeScores = np.zeros(len(sNum)) # tree.nodeNames = np.arange(1,(2*sl-1)) tree.kids = np.zeros((len(sNum),2)) tree.nodeFeatures = np.zeros((hiddenSize, len(sNum))) tree.nodeFeatures[:,:len(words_indexed)] = words_embedded; toMerge = np.zeros(shape=(words_indexed.shape), dtype='int32') toMerge[:] = words_indexed[:] while len(toMerge)>1 : # find unpaired bottom leaf pairs (initially words) that share parent i=-1; foundGoodPair = False while (not foundGoodPair ) : i += 1 if sTree[toMerge[i]]==sTree[toMerge[i+1]]: foundGoodPair = True newParent = sTree[toMerge[i]] kid1 = toMerge[i] kid2 = toMerge[i+1] tree.kids[newParent,:] = [kid1, kid2]; # set new parent to be possible merge candidate toMerge[i] = newParent; # delete other kid toMerge = np.delete(toMerge,i+1) c1 = tree.nodeFeatures[:,kid1] c2 = tree.nodeFeatures[:,kid2] p = np.tanh(np.dot(W1,c1) + np.dot(W2,c2) + b1.flatten()) tree.nodeFeatures[:,newParent] = p; vec = tree.nodeFeatures[-1] vecFile.write(" ".join([str(x) for x in vec])+'\n') vecFile.close() print "finished! "
def forwardPropTree(W, WO, Wcat, Wv, Wo, sNum,sTree, sStr=None, sNN=None, indicies=None, params=None): wsz = params.wordSize r = params.rankWo words = np.where(sNum>=0)[0] numTotalNodes = len(sNum) allV = Wv[:,sNum[words]] allO = Wo[:,sNum[words]] thisTree = Tree() # set tree structure of tree thisTree.pp = sTree #to check # set which nodes are leaf nodes thisTree.isLeafVec = np.zeros(numTotalNodes); thisTree.isLeafVec[words] = 1; thisTree.nodeNames = np.arange(len(sTree)) thisTree.nodeLabels = sNum; # the inputs to the parent thisTree.ParIn_z = np.zeros((wsz,numTotalNodes)) # empty for leaf nodes thisTree.ParIn_a = np.zeros((wsz,numTotalNodes)) #node vectors thisTree.nodeAct_a = np.zeros((wsz, numTotalNodes)) # the new operators thisTree.nodeOp_A = np.zeros((wsz**2,numTotalNodes)) # the scores for each decision thisTree.score = np.zeros(numTotalNodes); # the children of each node (for speed) thisTree.kids = np.zeros((numTotalNodes,2), dtype='int32'); # initialize the vectors and operators of the words (leaf nodes) thisTree.nodeAct_a[:,words] = allV; for thisWordNum in range(len(words)): diag_a = np.diag(allO[:wsz,thisWordNum]) U = allO[wsz:wsz*(1+r),thisWordNum].reshape(wsz,r) V = allO[wsz*(1+r):,thisWordNum].reshape(wsz, r) A = diag_a + np.dot(U, np.transpose(V)) A = A.reshape(wsz**2) thisTree.nodeOp_A[:, thisWordNum] = A toMerge = np.zeros(shape=(words.shape), dtype='int32') toMerge[:] = words[:] while len(toMerge)>1 : # find unpaired bottom leaf pairs (initially words) that share parent i=-1; foundGoodPair = False while (not foundGoodPair ) : i += 1 if sTree[toMerge[i]]==sTree[toMerge[i+1]]: foundGoodPair = True newParent = sTree[toMerge[i]] kid1 = toMerge[i] kid2 = toMerge[i+1] thisTree.kids[newParent,:] = [kid1, kid2]; # set new parent to be possible merge candidate toMerge[i] = newParent; # delete other kid toMerge = np.delete(toMerge,i+1) a = thisTree.nodeAct_a[:,kid1]; A = thisTree.nodeOp_A[:,kid1].reshape(wsz,wsz) b = thisTree.nodeAct_a[:,kid2]; B = thisTree.nodeOp_A[:,kid2].reshape(wsz,wsz) l_a = np.dot(B,a) r_a = np.dot(A,b) C = np.concatenate((l_a,r_a, np.ndarray([1]))) thisTree.nodeAct_a[:,newParent] = np.tanh(np.dot(W,C)) P_A = (np.dot(WO,np.vstack((A,B)))).reshape(wsz**2) # save all this for backprop: thisTree.ParIn_a[:,kid1] = l_a thisTree.ParIn_a[:,kid2] = r_a thisTree.nodeOp_A[:,newParent] = P_A return thisTree
def forwardPropTree(W, WO, Wcat, Wv, Wo, sNum, sTree, sStr=None, sNN=None, indicies=None, params=None): wsz = params.wordSize r = params.rankWo words = np.where(sNum >= 0)[0] numTotalNodes = len(sNum) allV = Wv[:, sNum[words]] allO = Wo[:, sNum[words]] thisTree = Tree() # set tree structure of tree thisTree.pp = sTree #to check # set which nodes are leaf nodes thisTree.isLeafVec = np.zeros(numTotalNodes) thisTree.isLeafVec[words] = 1 thisTree.nodeNames = np.arange(len(sTree)) thisTree.nodeLabels = sNum # the inputs to the parent thisTree.ParIn_z = np.zeros((wsz, numTotalNodes)) # empty for leaf nodes thisTree.ParIn_a = np.zeros((wsz, numTotalNodes)) #node vectors thisTree.nodeAct_a = np.zeros((wsz, numTotalNodes)) # the new operators thisTree.nodeOp_A = np.zeros((wsz**2, numTotalNodes)) # the scores for each decision thisTree.score = np.zeros(numTotalNodes) # the children of each node (for speed) thisTree.kids = np.zeros((numTotalNodes, 2), dtype='int32') # initialize the vectors and operators of the words (leaf nodes) thisTree.nodeAct_a[:, words] = allV for thisWordNum in range(len(words)): diag_a = np.diag(allO[:wsz, thisWordNum]) U = allO[wsz:wsz * (1 + r), thisWordNum].reshape(wsz, r) V = allO[wsz * (1 + r):, thisWordNum].reshape(wsz, r) A = diag_a + np.dot(U, np.transpose(V)) A = A.reshape(wsz**2) thisTree.nodeOp_A[:, thisWordNum] = A toMerge = np.zeros(shape=(words.shape), dtype='int32') toMerge[:] = words[:] while len(toMerge) > 1: # find unpaired bottom leaf pairs (initially words) that share parent i = -1 foundGoodPair = False while (not foundGoodPair): i += 1 if sTree[toMerge[i]] == sTree[toMerge[i + 1]]: foundGoodPair = True newParent = sTree[toMerge[i]] kid1 = toMerge[i] kid2 = toMerge[i + 1] thisTree.kids[newParent, :] = [kid1, kid2] # set new parent to be possible merge candidate toMerge[i] = newParent # delete other kid toMerge = np.delete(toMerge, i + 1) a = thisTree.nodeAct_a[:, kid1] A = thisTree.nodeOp_A[:, kid1].reshape(wsz, wsz) b = thisTree.nodeAct_a[:, kid2] B = thisTree.nodeOp_A[:, kid2].reshape(wsz, wsz) l_a = np.dot(B, a) r_a = np.dot(A, b) C = np.concatenate((l_a, r_a, np.ndarray([1]))) thisTree.nodeAct_a[:, newParent] = np.tanh(np.dot(W, C)) P_A = (np.dot(WO, np.vstack((A, B)))).reshape(wsz**2) # save all this for backprop: thisTree.ParIn_a[:, kid1] = l_a thisTree.ParIn_a[:, kid2] = r_a thisTree.nodeOp_A[:, newParent] = P_A return thisTree