def getDelRefMap(): delRefList = getdelRef() chosedRef = getRefList() WordEmbedding = getEmbedding.getEmbeddingMat_senna() SennaList = getSennaList() refmap = [] # count = 0 for i in range(100000, 120000): word1 = delRefList[i] print(i) # count += 1 if i % 100 == 0: wfile = open("res/sennaMap_4739_E.txt", "w") wfile.writelines(item + '\n' for item in refmap) if word1 not in SennaList: continue w1vec = WordEmbedding[SennaList.index(word1), :] sim = [] for word2 in chosedRef: if word2 not in SennaList: sim.append(0) continue w2vec = WordEmbedding[SennaList.index(word2), :] sim.append(vecSim(w1vec, w2vec)) chosed = chosedRef[np.argmax(sim)] item = word1 + "\t" + chosed print(item) refmap.append(item) wfile = open("res/sennaMap_4739_E.txt", "w") wfile.writelines(item + '\n' for item in refmap)
def getDelRefMap(): delRefList = getdelRef() chosedRef = getRefList() WordEmbedding = getEmbedding.getEmbeddingMat_senna() SennaList = getSennaList() refmap = open("res/sennaMap_4739_1-20000.txt").readlines() # count = 0 # start = len(refmap) for i in range(50000,100000): word1 = delRefList[i] print(i) # count += 1 if i%100 == 0: wfile = open("res/sennaMap_4739_D.txt","w") wfile.writelines(item+'\n' for item in refmap) if word1 not in SennaList: continue w1vec = WordEmbedding[SennaList.index(word1),:] sim = [] for word2 in chosedRef: if word2 not in SennaList: sim.append(0) continue w2vec = WordEmbedding[SennaList.index(word2),:] sim.append(vecSim(w1vec,w2vec)) chosed = chosedRef[np.argmax(sim)] item = word1+"\t"+chosed print(item) refmap.append(item) wfile = open("res/sennaMap_4739_D.txt","w") wfile.writelines(item+'\n' for item in refmap)
def getWordPairVec(w1,w2): # root = u"I:/数据/word12585relation30/rel_30_ref_TFIDF/ref_800_TFIDF/rel_spc/spc_1_350/word_matrix/" wordList = getWordList() WordEmbedding = getEmbedding.getEmbeddingMat_senna() w1vec = WordEmbedding[wordList.index(w1),:] w2vec = WordEmbedding[wordList.index(w2),:] vec = hstack([w1vec,w2vec]) # print np.shape(w1Mat)[1] return vec
def getWordPairVec(w1, w2): # root = u"I:/数据/word12585relation30/rel_30_ref_TFIDF/ref_800_TFIDF/rel_spc/spc_1_350/word_matrix/" wordList = getWordList() WordEmbedding = getEmbedding.getEmbeddingMat_senna() w1vec = WordEmbedding[wordList.index(w1), :] w2vec = WordEmbedding[wordList.index(w2), :] vec = hstack([w1vec, w2vec]) # print np.shape(w1Mat)[1] return vec
def getDelRefMap(): delRefList = getdelRef() chosedRef = getRefList() WordEmbedding = getEmbedding.getEmbeddingMat_senna() SennaList = getSennaList() map = [] for word1 in delRefList: if word1 not in SennaList: continue w1vec = WordEmbedding[SennaList.index(word1), :] sim = [] for word2 in chosedRef: if word2 not in SennaList: sim.append(0) continue w2vec = WordEmbedding[SennaList.index(word2), :] sim.append(vecSim(w1vec, w2vec)) chosed = chosedRef[np.argmax(sim)] print(word1 + "\t" + chosed) map.append(word1 + "\t" + chosed) wfile = open("res/sennaMap6830.txt", "w") wfile.writelines(item + '\n' for item in map)
def getDelRefMap(): delRefList = getdelRef() chosedRef = getRefList() WordEmbedding = getEmbedding.getEmbeddingMat_senna() SennaList = getSennaList() map = [] for word1 in delRefList: if word1 not in SennaList: continue w1vec = WordEmbedding[SennaList.index(word1),:] sim = [] for word2 in chosedRef: if word2 not in SennaList: sim.append(0) continue w2vec = WordEmbedding[SennaList.index(word2),:] sim.append(vecSim(w1vec,w2vec)) chosed = chosedRef[np.argmax(sim)] print(word1+"\t"+chosed) map.append(word1+"\t"+chosed) wfile = open("res/sennaMap6830.txt","w") wfile.writelines(item+'\n' for item in map)
def vecSim(x, y): result1 = np.dot(x,y) result2 = np.linalg.norm(x) result3 = np.linalg.norm(y) if(result2*result3 == 0): return 1 cos = result1/(result2*result3) sim = 0.5+0.5*cos return sim delRefList = getdelRef() chosedRef = getRefList() WordEmbedding = getEmbedding.getEmbeddingMat_senna() SennaList = getSennaList() refmap = [] # count = 0 def getDelRefMap(lock,index): count = 0 for i in range(index,index*30000): word1 = delRefList[i] # for word1 in delRefList: # print(count) # count += 1 print(count) if word1 not in SennaList:
rfile.close() wfile = open(u"res/GS2011_SENNA.txt", "w") wfile.writelines(item for item in sampleList) def getWordList(): rfile = open(u"I:/数据/embedings/senna/words.txt") wordList = [] for line in rfile: wordList.append(line.strip("\n")) rfile.close() # arr = np.array(wordList) return wordList WordEmbedding = getEmbedding.getEmbeddingMat_senna() wordList = getWordList() def sentCombMat_Mult(w1, w2, w3): w1Mat = WordEmbedding[wordList.index(w1), :] w2Mat = WordEmbedding[wordList.index(w1), :] w3Mat = WordEmbedding[wordList.index(w1), :] return w1Mat * w2Mat * w3Mat def vecSim(x, y): result1 = np.dot(x, y)