def getWordPairVec(w1, w2): # root = u"I:/数据/word12585relation30/rel_30_ref_TFIDF/ref_800_TFIDF/rel_spc/spc_1_350/word_matrix/" wordList = getWordList() WordEmbedding = getEmbedding.getEmbeddingMat_HLBL() w1vec = WordEmbedding[wordList.index(w1), :] w2vec = WordEmbedding[wordList.index(w2), :] vec = hstack([w1vec, w2vec]) return vec
def getWordPairVec(w1,w2): # root = u"I:/数据/word12585relation30/rel_30_ref_TFIDF/ref_800_TFIDF/rel_spc/spc_1_350/word_matrix/" wordList = getWordList() WordEmbedding = getEmbedding.getEmbeddingMat_HLBL() w1vec = WordEmbedding[wordList.index(w1),:] w2vec = WordEmbedding[wordList.index(w2),:] vec = hstack([w1vec,w2vec]) return vec
if item[1] in wordList and item[2] in wordList and item[3] in wordList and item[4] in wordList and item[5] in wordList and item[6] in wordList: sampleList.append(line) rfile.close() wfile = open(u"res/emnlp2013_turk_HLBL.txt","w") wfile.writelines(item for item in sampleList) def getWordList(): rfile = open(u"I:/数据/embedings/hlbl/words.txt") wordList = [] for line in rfile: wordList.append(line.strip("\n")) rfile.close() # arr = np.array(wordList) return wordList WordEmbedding = getEmbedding.getEmbeddingMat_HLBL() wordList = getWordList() def sentCombMat_verb(w1,w2,w3): w2Mat = WordEmbedding[wordList.index(w1),:] return w2Mat def vecSim(x, y): # result1 = x.T.dot(y) # result1 = result1.tolist()[0][0] #svd 之前的稀疏原始矩阵用这个 # x = x*wArr # y = y*wArr result1 = np.dot(x,y) result2 = np.linalg.norm(x)
rfile.close() wfile = open(u"res/emnlp2013_turk_HLBL.txt", "w") wfile.writelines(item for item in sampleList) def getWordList(): rfile = open(u"I:/数据/embedings/hlbl/words.txt") wordList = [] for line in rfile: wordList.append(line.strip("\n")) rfile.close() # arr = np.array(wordList) return wordList WordEmbedding = getEmbedding.getEmbeddingMat_HLBL() wordList = getWordList() def sentCombMat_Mult(w1, w2, w3): w1Mat = WordEmbedding[wordList.index(w1), :] w2Mat = WordEmbedding[wordList.index(w1), :] w3Mat = WordEmbedding[wordList.index(w1), :] return w1Mat * w2Mat * w3Mat def vecSim(x, y): # result1 = x.T.dot(y) # result1 = result1.tolist()[0][0] #svd 之前的稀疏原始矩阵用这个