def calculateError_validation(path_Folder, mode, WV, dim, W1 , W2, OutputFile, WSat, WNu, iteration, activationFunc): posFileList_test = os.listdir(path_Folder + "pos/") negFileList_test = os.listdir(path_Folder + "neg/") numberofSamples_test = min(len(posFileList_test), len(negFileList_test)) sumErr=0.0 for k in range(0, numberofSamples_test): path_File_test = path_Folder + "pos/" + posFileList_test[k] EDUs = readTree_att_NSWeight(path_File_test, W1, WV, dim, WSat, WNu, activationFunc) y = [1.0, 0] eduKeys = sortEduKey(EDUs.keys(), reverse=True) input2 = EDUs[str(eduKeys[0])].vector y_in = feedforward(input2, W2) output = feedforward_act(input2, W2, activationFunc) sumErr += MSE(y, output) path_File_test = path_Folder + "neg/" + negFileList_test[k] #print negFileList_test[k] EDUs = readTree_att_NSWeight(path_File_test, W1, WV, dim, WSat, WNu, activationFunc) y = [0, 1.0] eduKeys = sortEduKey(EDUs.keys(), reverse=True) input2 = EDUs[str(eduKeys[0])].vector y_in = feedforward(input2, W2) output = feedforward_act(input2, W2, activationFunc) sumErr += MSE(y, output) totall_Err = sumErr/(2*numberofSamples_test) return totall_Err print iteration, " ", mode , " ", totall_Err OutputFile.write("%s,%s,%s\n" % (iteration, mode, totall_Err))
def calculateError_validation_EDUs(allEDUs, mode, WV, dim, W1 , W2, OutputFile, WSat, WNu, iteration, activationFunc): target_list = np.zeros([0, 2]) output_list = np.zeros([0, 2]) EDU_key = allEDUs.keys() for EDUid in EDU_key: EDUs = allEDUs [EDUid] EDUs = update_EDU(EDUs, W1, WSat, WNu, dim, activationFunc) if (len(EDUs) > 0 and EDUid>0): y = [1.0, 0] #eduKeys = sortEduKey(EDUs.keys(), reverse=True) eduKeys = sortEduKey(EDUs.keys(), reverse=True) input2 = EDUs[str(eduKeys[0])].vector output = feedforward_act(input2, W2, global_outputActivation) target_list = np.concatenate((target_list, [y]), 0) output_list = np.concatenate((output_list, [output]), 0) if (len(EDUs) > 0 and EDUid < 0): y = [0, 1.0] # eduKeys = sortEduKey(EDUs.keys(), reverse=True) eduKeys = sortEduKey(EDUs.keys(), reverse=True) input2 = EDUs[str(eduKeys[0])].vector #y_in = feedforward(input2, W2) output = feedforward_act(input2, W2, global_outputActivation) target_list = np.concatenate((target_list, [y]), 0) output_list = np.concatenate((output_list, [output]), 0) totall_Err = MSE(target_list, output_list) print(iteration, " ", mode , " ", totall_Err) OutputFile.write("%s,%s,%s\n" % (iteration, mode, totall_Err)) return totall_Err
def test_AttWeight(path_Folder, mode, WV, dim, W1 , W2, OutputFile, WSat, WNu, iteration, activationFunc): posFileList_test = os.listdir(path_Folder + "pos/") negFileList_test = os.listdir(path_Folder + "neg/") numberofSamples_test = min(len(posFileList_test), len(negFileList_test)) #numberofSamples_test=100 tp = 0 fp = 0 tn = 0 fn = 0 for k in range(0, numberofSamples_test): path_File_test = path_Folder + "pos/" + posFileList_test[k] #print posFileList_test[k] EDUs = readTree_att_NSWeight(path_File_test, W1, WV, dim, WSat, WNu, activationFunc) y = [1.0, 0] eduKeys = sortEduKey(EDUs.keys(), reverse=True) input2 = EDUs[str(eduKeys[0])].vector # print "pos" # print input2 y_in = feedforward(input2, W2) output = feedforward_act(input2, W2, activationFunc) #print output if output[0] > output[1]: tp += 1 else: fn += 1 path_File_test = path_Folder + "neg/" + negFileList_test[k] #print negFileList_test[k] EDUs = readTree_att_NSWeight(path_File_test, W1, WV, dim, WSat, WNu, activationFunc) eduKeys = sortEduKey(EDUs.keys(), reverse=True) input2 = EDUs[str(eduKeys[0])].vector # print "neg" # print input2 y_in = feedforward(input2, W2) output = feedforward_act(input2, W2, activationFunc) if output[0] < output[1]: tn += 1 else: fp += 1 accuracy = float(tp + tn) / (tp + tn + fp + fn) if (tp+fp) == 0: precision = 0 else: precision = float(tp) / (tp + fp) recall = float(tp) / (tp + fn) if (precision + recall) == 0: F1 =0 else: F1 = 2 * (float(precision * recall)) / (precision + recall) print iteration, " ", mode , " ", tp, " ", tn, " ", fp, " ", fn, " ", accuracy, " ", precision, " ", recall, " ", F1 OutputFile.write("%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n" % (iteration, mode, tp, tn, fp, fn, accuracy, precision, recall, F1))
def train_for_each_Sample_AttWeight (EDUs, EDUs_test, y, W1, W21, W22, eta, dim, activationFunc, dropOutPercent): W1_copy = W1.copy() W21_copy = W21.copy() W22_copy = W22.copy() indexNode = dropOut(len(W1[0]), dropOutPercent) #W1_doc = dropcolrow(W1, indexNode, False) #indexNode = dropOut(len(W1_query[0]), dropOutPercent) W1 = dropcolrow(W1, indexNode, False) indexNode2 = [] indexNode2.extend(indexNode) indexNode2.extend(indexNode*2) W21 = dropcolrow(W21, indexNode2, True) eduKeys_test = sortEduKey(EDUs_test.keys(), reverse=True) input2_test = EDUs_test[str(eduKeys_test[0])].vector eduKeys = sortEduKey(EDUs.keys(), reverse=True) input2 = EDUs[str(eduKeys[0])].vector input = (np.concatenate([input2, input2_test], 0)) #y_in1 = feedforward(input, W21) output1 = feedforward_act(input, W21, activationFunc) y_in = feedforward(output1, W22) output = feedforward_act(output1, W22, global_outputActivation) error_soft = softmax_error(y, output, y_in, global_outputActivation) delta_W22 = calculate_deltaW(error_soft, output1) error_hidden = non_softmax_error(error_soft, W22, input, W21, activationFunc) delta_W21 = calculate_deltaW(error_hidden, input) delta_W1_doc = BpthroughTree(EDUs, error_hidden, W1, W21, dim, activationFunc, True) delta_W1_query = BpthroughTree(EDUs_test, error_hidden, W1, W21, dim, activationFunc, False) #print ("=============== : ", np.sum(delta_W1_doc[:, indexNode])) delta_W1_doc = dropcolrow(delta_W1_doc, indexNode, False) delta_W1_query = dropcolrow(delta_W1_query, indexNode, False) delta_W21 = dropcolrow(delta_W21, indexNode2, True) delta_W = np.divide(np.add(delta_W1_doc, delta_W1_query), 2) W21 = update_weight(eta, W21_copy, delta_W21) W22 = update_weight(eta, W22_copy, delta_W22) W1 = update_weight(eta, W1_copy, delta_W) return W1, W21, W22
def calculateError_validation_pair(allEDUs, mode, WV, dim, W1, W21, W22, OutputFile, iteration, activationFunc, pairs): #FileList = os.listdir(path_Folder) numberofSamples = len(pairs) target_list=[] output_list=[] for pair in pairs: #j in range(0, numberofSamples): filenames = pair.split(' ') #path_File_test = path_Folder + filenames[0] EDUs_test = allEDUs[filenames[0]]#readTree_att_NSWeight(path_File_test, W1_query, WV, dim, activationFunc) EDUs_test = update_EDU(EDUs_test, W1, dim, activationFunc) eduKeys_test = sortEduKey(EDUs_test.keys(), reverse=True) input2_test = EDUs_test[str(eduKeys_test[0])].vector #path_File_test = path_Folder + filenames[1] EDUs = allEDUs[filenames[1]]#readTree_att_NSWeight(path_File_test, W1_doc, WV, dim, activationFunc) EDUs = update_EDU(EDUs, W1, dim, activationFunc) eduKeys = sortEduKey(EDUs.keys(), reverse=True) input2 = EDUs[str(eduKeys[0])].vector input = np.concatenate([input2, input2_test], 0) output1 = feedforward_act(input, W21, activationFunc) output = feedforward_act(output1, W22, global_outputActivation) if (filenames[0].split("-")[1] == filenames[1].split("-")[1]): if len(W22[0]) == 1: y = [1.0] else: y = [0.8, -0.8] target_list.append(y) output_list.append(output) else: if len(W22[0]) == 1: y = [-1.0] else: y = [-0.8, 0.8] target_list.append(y) output_list.append(output) totall_Err = MSE(output_list, target_list) print(iteration, " ", mode , " ", totall_Err) OutputFile.write("%s,%s,%s\n" % (iteration, mode, totall_Err)) return totall_Err
def train_for_each_Sample (EDUs, y, W1, W2, eta, activationFunc): eduKeys = sortEduKey(EDUs.keys(), reverse=True) input2 = EDUs[str(eduKeys[0])].vector y_in = feedforward(input2, W2) output = feedforward_act(input2, W2, activationFunc) #print output error_soft = softmax_error(y, output, y_in, activationFunc) delta_W2 = calculate_deltaW(error_soft, input2) delta_W1 = BpthroughTree(EDUs, error_soft, W1, W2, activationFunc) W2 = update_weight(eta, W2, delta_W2) W1 = update_weight(eta, W1, delta_W1) return W1, W2
def calculateError_validation(path_Folder, mode, WV, dim, W1_doc, W1_query, W2, OutputFile, iteration, activationFunc): FileList = os.listdir(path_Folder) numberofSamples = len(FileList) target_list=[] output_list=[] for j in range(0, numberofSamples): path_File_test = path_Folder + FileList[j] EDUs_test = readTree_att_NSWeight(path_File_test, W1_query, WV, dim, activationFunc) eduKeys_test = sortEduKey(EDUs_test.keys(), reverse=True) input2_test = EDUs_test[str(eduKeys_test[0])].vector for k in range(0, numberofSamples): path_File_test = path_Folder + FileList[k] EDUs = readTree_att_NSWeight(path_File_test, W1_doc, WV, dim, activationFunc) eduKeys = sortEduKey(EDUs.keys(), reverse=True) input2 = EDUs[str(eduKeys[0])].vector input = np.concatenate([input2, input2_test], 0) output = feedforward_act(input, W2, global_outputActivation) #print output if (FileList[j].split("-")[1] == FileList[k].split("-")[1]): y = [1.0] target_list.append(y) output_list.append(output) else: if (k % 5 == 0): y = [0.0] target_list.append(y) output_list.append(output) totall_Err = cross_entropy(target_list, output_list) print(iteration, " ", mode , " ", totall_Err) OutputFile.write("%s,%s,%s\n" % (iteration, mode, totall_Err)) return totall_Err
def test_AttWeight_DrHarati_pair(allEDUs, mode, WV, dim, W1, W21, W22, OutputFile, iteration, activationFunc, pairs): numberofSamples = len(pairs) #numberofSamples_test = 50 tp = 0 fp = 0 tn = 0 fn = 0 sim = 0 notsim = 0 for pair in pairs: #j in range(0, numberofSamples): filenames = pair.split(' ') EDUs_test = allEDUs[filenames[0]] EDUs_test = update_EDU(EDUs_test, W1, dim, activationFunc) eduKeys_test = sortEduKey(EDUs_test.keys(), reverse=True) input2_test = EDUs_test[str(eduKeys_test[0])].vector EDUs = allEDUs[filenames[1]] EDUs = update_EDU(EDUs, W1, dim, activationFunc) eduKeys = sortEduKey(EDUs.keys(), reverse=True) input2 = EDUs[str(eduKeys[0])].vector input = np.concatenate([input2, input2_test], 0) output1 = feedforward_act(input, W21, activationFunc) output = feedforward_act(output1, W22, global_outputActivation) if (filenames[0].split("-")[1] == filenames[1].split("-")[1]): sim += 1 if (sim % 500 == 0): print("Similar ", filenames[0].split("-")[1], filenames[1].split("-")[1], output) #if output[0] > output[1]: if (len(W22[0]) == 1): if output[0] > 0: # print("Similar ", filenames[0].split("-")[1], filenames[1].split("-")[1], output) tp += 1 else: # print("Similar ", filenames[0].split("-")[1], filenames[1].split("-")[1], output) fn += 1 else: if output[0] > output[1]: # print("Similar ", filenames[0].split("-")[1], filenames[1].split("-")[1], output) tp += 1 else: # print("Similar ", filenames[0].split("-")[1], filenames[1].split("-")[1], output) fn += 1 else: notsim += 1 if (notsim % 500 == 0): print("Not-Similar ", filenames[0].split("-")[1], filenames[1].split("-")[1], output) #if output[0] < output[1]: if (len(W22[0]) == 1): if output[0] < 0: # print("Not-Similar ", filenames[0].split("-")[1], filenames[1].split("-")[1], output) tn += 1 else: # print("Not-Similar ", filenames[0].split("-")[1], filenames[1].split("-")[1], output) fp += 1 else: if output[0] < output[1]: # print("Not-Similar ", filenames[0].split("-")[1], filenames[1].split("-")[1], output) tn += 1 else: # print("Not-Similar ", filenames[0].split("-")[1], filenames[1].split("-")[1], output) fp += 1 print (sim, notsim) accuracy = float(tp + tn) / (tp + tn + fp + fn) precision, recall, F1 = calculate_eval_metrics(tp, tn, fp, fn) print(iteration, " ", mode , " ", tp, " ", tn, " ", fp, " ", fn, " ", accuracy, " ", precision, " ", recall, " ", F1) OutputFile.write("%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n" % (iteration, mode, tp, tn, fp, fn, accuracy, precision, recall, F1))
def readTree_att_Scalar(filePath, W1, WV, dim, hierarchyType, attScaler, activationFunc): EDUs = {} EDUs_main = {} if os.path.exists(filePath): if os.stat(filePath).st_size > 0 : file_1 = open(filePath, "r") for line in file_1: line = re.sub(r"[\n(\[\])]", "", line) arr = re.split("\s*,\s*",line) arr2 = re.split("\'", line) relation = arr2[len(arr2)-2] hierarchy = arr2[len(arr2)-4] if arr[0] == arr[1]: if int(arr[0]) / 10 != 0: arr[0] = "9" + arr[0] vector = WordAveraging(preprocessor1(re.sub(r"[\n,.:'(\[\])]", "", arr2[1])), WV, dim) #vector = tanh(vector) if hierarchy == hierarchyType: vector = np.multiply(attScaler, vector) EDUs_main [arr[0]] = ClassNode.Node(True, False, 0, 0, "", hierarchy, relation, vector, "") EDUs[arr[0]] = "hi" else: numconcat = "" for num in range(int(arr[0]), int(arr[1])+1): if num/10 !=0 : num="9"+str(num) numconcat += `int(num)` childs={} i=1 numconcat2 = numconcat EDUitem = sortEduKey(EDUs_main.keys(), reverse=True) for key in EDUitem: key = str(key) if numconcat!='' and key in numconcat: childs[i] = EDUs_main[key].vector if i==1: rightChild = key EDUs_main[key].child = "right" elif i==2: leftChild = key EDUs_main[key].child = "left" #print len(childs[i]), key i = 2 numconcat = numconcat.replace(key,"") del EDUs[key] EDUs[numconcat2]="hi" vector = feedforward_act(np.concatenate([childs[2], childs[1]], 0), W1, activationFunc) if hierarchy == hierarchyType: vector = np.multiply(attScaler, vector) EDUs_main[numconcat2]=ClassNode.Node(False, False, leftChild, rightChild, "" , hierarchy, relation, vector, "") eduKey=EDUs.keys() eduKey.sort() if len(eduKey)>1: vector = feedforward_act(np.concatenate([EDUs_main[eduKey[0]].vector, EDUs_main[eduKey[1]].vector], 0), W1, activationFunc) EDUs[eduKey[0]+eduKey[1]] = "hi" EDUs_main[eduKey[0]].child = "left" EDUs_main[eduKey[1]].child = "right" if hierarchy == hierarchyType: vector = np.multiply(attScaler, vector) EDUs_main[eduKey[0]+eduKey[1]] = ClassNode.Node(False, True, eduKey[0], eduKey[1],"", "", "", vector, "") del EDUs[eduKey[0]] del EDUs[eduKey[1]] return EDUs_main else: return EDUs_main
def test_AttWeight_DrHarati(path_Folder, mode, WV, dim, W1 , W2, OutputFile, WSat, WNu, iteration, activationFunc): posFileList_test = os.listdir(path_Folder + "pos/") negFileList_test = os.listdir(path_Folder + "neg/") numberofSamples_test = min(len(posFileList_test), len(negFileList_test)) #numberofSamples_test=100 tp = 0 fp = 0 tn = 0 fn = 0 for k in range(0, numberofSamples_test): path_File_test = path_Folder + "pos/" + posFileList_test[k] EDUs = readTree_att_NSWeight(path_File_test, W1, WV, dim, WSat, WNu, activationFunc) y = [1.0, 0] eduKeys = sortEduKey(EDUs.keys(), reverse=True) input2 = EDUs[str(eduKeys[0])].vector y_in = feedforward(input2, W2) output = feedforward_act(input2, W2, activationFunc) if output[0] == output[1]: print "input", input2 # print "W2", W2[0, :] # print "W1", W1[0, :] # print "WN", WNu[0, :] # print "WS", WSat[0, :] # print "pos", output if output[0] > output[1]: #print "pos ", output tp += 1 else: fn += 1 path_File_test = path_Folder + "neg/" + negFileList_test[k] EDUs = readTree_att_NSWeight(path_File_test, W1, WV, dim, WSat, WNu, activationFunc) eduKeys = sortEduKey(EDUs.keys(), reverse=True) input2 = EDUs[str(eduKeys[0])].vector y_in = feedforward(input2, W2) output = feedforward_act(input2, W2, activationFunc) if output[0] == output[1]: print "input", input2 # print "W2", W2[0, :] # print "W1", W1[0,:] # print "WN", WNu[0,:] # print "WS", WSat[0,:] # print "neg",output if output[0] < output[1]: #print "neg ", output tn += 1 else: fp += 1 accuracy = float(tp + tn) / (tp + tn + fp + fn) precision, recall, F1 = calculate_eval_metrics(tp, tn, fp, fn) print iteration, " ", mode , " ", tp, " ", tn, " ", fp, " ", fn, " ", accuracy, " ", precision, " ", recall, " ", F1 OutputFile.write("%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n" % (iteration, mode, tp, tn, fp, fn, accuracy, precision, recall, F1))
def readTree_att_NSWeight(filePath, W1, WV, dim, WSat, WNu, activationFunc): #print filePath EDUs = {} EDUs_main = {} if os.path.exists(filePath): if os.stat(filePath).st_size > 0 : file_1 = open(filePath, "r") for line in file_1: line = re.sub(r"[\n(\[\])]", "", line) arr = re.split("\s*,\s*",line) arr2 = re.split("\'", line) relation = arr2[len(arr2)-2] hierarchy = arr2[len(arr2)-4] if arr[0] == arr[1]: if int(arr[0]) / 10 != 0: arr[0] = "9" + arr[0] vector = WordAveraging(preprocessor1(re.sub(r"[\n,.:'(\[\])]", "", arr2[1])), WV, dim) #print vector if np.absolute(max(vector))>2: print "vayyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy" if np.sum(np.absolute(vector))/dim >=0.8: for item in EDUs_main.keys(): print item, " ", EDUs_main[item].vector print "khodaaaaaaaaaaaaaaaaaa" #vector = tanh (vector) EDUs_main [arr[0]] = ClassNode.Node(True, False, 0, 0, "", hierarchy, relation, vector, "") EDUs[arr[0]] = "hi" else: numconcat = "" for num in range(int(arr[0]), int(arr[1])+1): if num/10 !=0 : num="9"+str(num) numconcat += `int(num)` childs={} i=1 numconcat2 = numconcat EDUitem = sortEduKey(EDUs_main.keys(), reverse=True) for key in EDUitem: key = str(key) if numconcat!='' and key in numconcat: childs[i] = EDUs_main[key].vector #print key, " ", childs[i] childs[i] = apply_attention(childs[i], EDUs_main[key].nodeHierarchy, WNu, WSat, activationFunc) if np.sum(np.absolute(childs[i])) / dim >= 0.8: for item in EDUs_main.keys(): print item, " ", EDUs_main[item].vector print key, " ", childs[i] print "khodaaaaaaaaaaaaaaaaaa" if i==1: rightChild = key EDUs_main[key].child = "right" elif i==2: leftChild = key EDUs_main[key].child = "left" i = 2 numconcat = numconcat.replace(key,"") del EDUs[key] EDUs[numconcat2]="hi" vector = feedforward_act(np.concatenate([childs[2], childs[1]], 0), W1, activationFunc) EDUs_main[numconcat2]=ClassNode.Node(False, False, leftChild, rightChild, "" , hierarchy, relation, vector, "") eduKey=EDUs.keys() eduKey.sort() if len(eduKey)>1: EDU_0 = apply_attention(EDUs_main[eduKey[0]].vector, EDUs_main[eduKey[0]].nodeHierarchy, WNu, WSat, activationFunc) EDU_1 = apply_attention(EDUs_main[eduKey[1]].vector, EDUs_main[eduKey[1]].nodeHierarchy, WNu, WSat, activationFunc) vector = feedforward_act(np.concatenate([EDU_0, EDU_1], 0), W1, activationFunc) EDUs[eduKey[0]+eduKey[1]] = "hi" EDUs_main[eduKey[0]].child = "left" EDUs_main[eduKey[1]].child = "right" # if hierarchy == "Nucleus": # vector = np.matmul(vector, WNu) # elif hierarchy == "Satellite": # vector = np.matmul(vector, WSat) EDUs_main[eduKey[0]+eduKey[1]] = ClassNode.Node(False, True, eduKey[0], eduKey[1],"", "", "", vector, "") del EDUs[eduKey[0]] del EDUs[eduKey[1]] return EDUs_main else: return EDUs_main