def WordAveraging(sent, WV, dim): summ = [0.0] * dim A = 0.0 sent_A = preprocessor1(re.sub(r"[\n(\[\])]", "", sent)).split(" ") for word in sent_A: if word in WV: #and word not in stop: A = A + 1.0 for i in range(0, len(WV[word])): summ[i] = summ[i] + float((WV[word])[i]) if A != 0: for i in range(0, dim): summ[i] = summ[i] / A return summ
def WordAveraging(sent, WV, dim): summ = [0.0] * (dim) #summ.extend([1]) A = 0.0; sent_A=preprocessor1(re.sub(r"[\n(\[\])]", "", sent)).split(" ") for word in sent_A: if word in WV : #and word not in stop: A = A + 1.0 for i in range(0, dim): summ[i] = summ[i] + float((WV[word])[i]) if A != 0: #A = 1 for i in range(0, dim): summ[i] = summ[i] / A #print len(summ) return summ;
def readTree_att_Scalar(filePath, W1, WV, dim, hierarchyType, attScaler, activationFunc): EDUs = {} EDUs_main = {} if os.path.exists(filePath): if os.stat(filePath).st_size > 0 : file_1 = open(filePath, "r") for line in file_1: line = re.sub(r"[\n(\[\])]", "", line) arr = re.split("\s*,\s*",line) arr2 = re.split("\'", line) relation = arr2[len(arr2)-2] hierarchy = arr2[len(arr2)-4] if arr[0] == arr[1]: if int(arr[0]) / 10 != 0: arr[0] = "9" + arr[0] vector = WordAveraging(preprocessor1(re.sub(r"[\n,.:'(\[\])]", "", arr2[1])), WV, dim) #vector = tanh(vector) if hierarchy == hierarchyType: vector = np.multiply(attScaler, vector) EDUs_main [arr[0]] = ClassNode.Node(True, False, 0, 0, "", hierarchy, relation, vector, "") EDUs[arr[0]] = "hi" else: numconcat = "" for num in range(int(arr[0]), int(arr[1])+1): if num/10 !=0 : num="9"+str(num) numconcat += `int(num)` childs={} i=1 numconcat2 = numconcat EDUitem = sortEduKey(EDUs_main.keys(), reverse=True) for key in EDUitem: key = str(key) if numconcat!='' and key in numconcat: childs[i] = EDUs_main[key].vector if i==1: rightChild = key EDUs_main[key].child = "right" elif i==2: leftChild = key EDUs_main[key].child = "left" #print len(childs[i]), key i = 2 numconcat = numconcat.replace(key,"") del EDUs[key] EDUs[numconcat2]="hi" vector = feedforward_act(np.concatenate([childs[2], childs[1]], 0), W1, activationFunc) if hierarchy == hierarchyType: vector = np.multiply(attScaler, vector) EDUs_main[numconcat2]=ClassNode.Node(False, False, leftChild, rightChild, "" , hierarchy, relation, vector, "") eduKey=EDUs.keys() eduKey.sort() if len(eduKey)>1: vector = feedforward_act(np.concatenate([EDUs_main[eduKey[0]].vector, EDUs_main[eduKey[1]].vector], 0), W1, activationFunc) EDUs[eduKey[0]+eduKey[1]] = "hi" EDUs_main[eduKey[0]].child = "left" EDUs_main[eduKey[1]].child = "right" if hierarchy == hierarchyType: vector = np.multiply(attScaler, vector) EDUs_main[eduKey[0]+eduKey[1]] = ClassNode.Node(False, True, eduKey[0], eduKey[1],"", "", "", vector, "") del EDUs[eduKey[0]] del EDUs[eduKey[1]] return EDUs_main else: return EDUs_main
def readTree_att_NSWeight(filePath, W1, WV, dim, WSat, WNu, activationFunc): #print filePath EDUs = {} EDUs_main = {} if os.path.exists(filePath): if os.stat(filePath).st_size > 0 : file_1 = open(filePath, "r") for line in file_1: line = re.sub(r"[\n(\[\])]", "", line) arr = re.split("\s*,\s*",line) arr2 = re.split("\'", line) relation = arr2[len(arr2)-2] hierarchy = arr2[len(arr2)-4] if arr[0] == arr[1]: if int(arr[0]) / 10 != 0: arr[0] = "9" + arr[0] vector = WordAveraging(preprocessor1(re.sub(r"[\n,.:'(\[\])]", "", arr2[1])), WV, dim) #print vector if np.absolute(max(vector))>2: print "vayyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy" if np.sum(np.absolute(vector))/dim >=0.8: for item in EDUs_main.keys(): print item, " ", EDUs_main[item].vector print "khodaaaaaaaaaaaaaaaaaa" #vector = tanh (vector) EDUs_main [arr[0]] = ClassNode.Node(True, False, 0, 0, "", hierarchy, relation, vector, "") EDUs[arr[0]] = "hi" else: numconcat = "" for num in range(int(arr[0]), int(arr[1])+1): if num/10 !=0 : num="9"+str(num) numconcat += `int(num)` childs={} i=1 numconcat2 = numconcat EDUitem = sortEduKey(EDUs_main.keys(), reverse=True) for key in EDUitem: key = str(key) if numconcat!='' and key in numconcat: childs[i] = EDUs_main[key].vector #print key, " ", childs[i] childs[i] = apply_attention(childs[i], EDUs_main[key].nodeHierarchy, WNu, WSat, activationFunc) if np.sum(np.absolute(childs[i])) / dim >= 0.8: for item in EDUs_main.keys(): print item, " ", EDUs_main[item].vector print key, " ", childs[i] print "khodaaaaaaaaaaaaaaaaaa" if i==1: rightChild = key EDUs_main[key].child = "right" elif i==2: leftChild = key EDUs_main[key].child = "left" i = 2 numconcat = numconcat.replace(key,"") del EDUs[key] EDUs[numconcat2]="hi" vector = feedforward_act(np.concatenate([childs[2], childs[1]], 0), W1, activationFunc) EDUs_main[numconcat2]=ClassNode.Node(False, False, leftChild, rightChild, "" , hierarchy, relation, vector, "") eduKey=EDUs.keys() eduKey.sort() if len(eduKey)>1: EDU_0 = apply_attention(EDUs_main[eduKey[0]].vector, EDUs_main[eduKey[0]].nodeHierarchy, WNu, WSat, activationFunc) EDU_1 = apply_attention(EDUs_main[eduKey[1]].vector, EDUs_main[eduKey[1]].nodeHierarchy, WNu, WSat, activationFunc) vector = feedforward_act(np.concatenate([EDU_0, EDU_1], 0), W1, activationFunc) EDUs[eduKey[0]+eduKey[1]] = "hi" EDUs_main[eduKey[0]].child = "left" EDUs_main[eduKey[1]].child = "right" # if hierarchy == "Nucleus": # vector = np.matmul(vector, WNu) # elif hierarchy == "Satellite": # vector = np.matmul(vector, WSat) EDUs_main[eduKey[0]+eduKey[1]] = ClassNode.Node(False, True, eduKey[0], eduKey[1],"", "", "", vector, "") del EDUs[eduKey[0]] del EDUs[eduKey[1]] return EDUs_main else: return EDUs_main