def IR_Evaluation_Test(EDUs_Test, EDUs_Train, path_file_Test, path_file_Train, W1, WV, dim, activationFunc, iteration, OutputFile, numberofDoc): precisions = [] TestFileList = os.listdir(path_file_Test) TrainFileList = os.listdir(path_file_Train) trainDocuments = [] trainDocuments_label = [] for tr in range(0, len(TrainFileList)): EDUs_tr = EDUs_Train[TrainFileList[tr]] EDUs_tr = update_EDU(EDUs_tr, W1, dim, activationFunc) # path_File_train = path_file_Train + TrainFileList[tr] # EDUs_train = readTree_att_NSWeight(path_File_train, W1_doc, WV, dim, activationFunc) eduKeys = sortEduKey(EDUs_tr.keys(), reverse=True) trainRep = EDUs_tr[str(eduKeys[0])].vector trainDocuments.append(trainRep) trainDocuments_label.append(TrainFileList[tr].split("-")[1]) trainDocuments = np.array(trainDocuments) trainDocuments_label = np.array(trainDocuments_label) #print trainDocuments_label for te in range(0, len(TestFileList)): EDUs_te = EDUs_Test[TestFileList[te]] EDUs_te = update_EDU(EDUs_te, W1, dim, activationFunc) eduKeys = sortEduKey(EDUs_te.keys(), reverse=True) testRep = EDUs_te[str(eduKeys[0])].vector testRep = np.array(testRep).reshape(1, -1) if (np.isnan(testRep).any()): print (TestFileList[te]) distances = cosine_similarity(testRep, trainDocuments) main_label = TestFileList[te].split("-")[1] predicted_labels = trainDocuments_label[(((distances.argsort())[0])[-1*numberofDoc:][::-1])] print (predicted_labels) predicted_labels = sorted(predicted_labels) predicts = {x: predicted_labels.count(x) for x in predicted_labels} main_predict = max(predicts.items(), key=operator.itemgetter(1))[0] print (main_label, main_predict) if main_label in predicts: pre_main_label = float(predicts[main_label]) / (numberofDoc) precisions.append(pre_main_label) else: precisions.append(0) avg_pre = float(sum(precisions)) / len(precisions) print ("average", avg_pre) if (OutputFile!= None): OutputFile.write("%s,%s\n" % (iteration, avg_pre))
def calculateError_validation(path_Folder, mode, WV, dim, W1 , W2, OutputFile, WSat, WNu, iteration, activationFunc): posFileList_test = os.listdir(path_Folder + "pos/") negFileList_test = os.listdir(path_Folder + "neg/") numberofSamples_test = min(len(posFileList_test), len(negFileList_test)) sumErr=0.0 for k in range(0, numberofSamples_test): path_File_test = path_Folder + "pos/" + posFileList_test[k] EDUs = readTree_att_NSWeight(path_File_test, W1, WV, dim, WSat, WNu, activationFunc) y = [1.0, 0] eduKeys = sortEduKey(EDUs.keys(), reverse=True) input2 = EDUs[str(eduKeys[0])].vector y_in = feedforward(input2, W2) output = feedforward_act(input2, W2, activationFunc) sumErr += MSE(y, output) path_File_test = path_Folder + "neg/" + negFileList_test[k] #print negFileList_test[k] EDUs = readTree_att_NSWeight(path_File_test, W1, WV, dim, WSat, WNu, activationFunc) y = [0, 1.0] eduKeys = sortEduKey(EDUs.keys(), reverse=True) input2 = EDUs[str(eduKeys[0])].vector y_in = feedforward(input2, W2) output = feedforward_act(input2, W2, activationFunc) sumErr += MSE(y, output) totall_Err = sumErr/(2*numberofSamples_test) return totall_Err print iteration, " ", mode , " ", totall_Err OutputFile.write("%s,%s,%s\n" % (iteration, mode, totall_Err))
def calculateError_validation_EDUs(allEDUs, mode, WV, dim, W1 , W2, OutputFile, WSat, WNu, iteration, activationFunc): target_list = np.zeros([0, 2]) output_list = np.zeros([0, 2]) EDU_key = allEDUs.keys() for EDUid in EDU_key: EDUs = allEDUs [EDUid] EDUs = update_EDU(EDUs, W1, WSat, WNu, dim, activationFunc) if (len(EDUs) > 0 and EDUid>0): y = [1.0, 0] #eduKeys = sortEduKey(EDUs.keys(), reverse=True) eduKeys = sortEduKey(EDUs.keys(), reverse=True) input2 = EDUs[str(eduKeys[0])].vector output = feedforward_act(input2, W2, global_outputActivation) target_list = np.concatenate((target_list, [y]), 0) output_list = np.concatenate((output_list, [output]), 0) if (len(EDUs) > 0 and EDUid < 0): y = [0, 1.0] # eduKeys = sortEduKey(EDUs.keys(), reverse=True) eduKeys = sortEduKey(EDUs.keys(), reverse=True) input2 = EDUs[str(eduKeys[0])].vector #y_in = feedforward(input2, W2) output = feedforward_act(input2, W2, global_outputActivation) target_list = np.concatenate((target_list, [y]), 0) output_list = np.concatenate((output_list, [output]), 0) totall_Err = MSE(target_list, output_list) print(iteration, " ", mode , " ", totall_Err) OutputFile.write("%s,%s,%s\n" % (iteration, mode, totall_Err)) return totall_Err
def test_AttWeight(path_Folder, mode, WV, dim, W1 , W2, OutputFile, WSat, WNu, iteration, activationFunc): posFileList_test = os.listdir(path_Folder + "pos/") negFileList_test = os.listdir(path_Folder + "neg/") numberofSamples_test = min(len(posFileList_test), len(negFileList_test)) #numberofSamples_test=100 tp = 0 fp = 0 tn = 0 fn = 0 for k in range(0, numberofSamples_test): path_File_test = path_Folder + "pos/" + posFileList_test[k] #print posFileList_test[k] EDUs = readTree_att_NSWeight(path_File_test, W1, WV, dim, WSat, WNu, activationFunc) y = [1.0, 0] eduKeys = sortEduKey(EDUs.keys(), reverse=True) input2 = EDUs[str(eduKeys[0])].vector # print "pos" # print input2 y_in = feedforward(input2, W2) output = feedforward_act(input2, W2, activationFunc) #print output if output[0] > output[1]: tp += 1 else: fn += 1 path_File_test = path_Folder + "neg/" + negFileList_test[k] #print negFileList_test[k] EDUs = readTree_att_NSWeight(path_File_test, W1, WV, dim, WSat, WNu, activationFunc) eduKeys = sortEduKey(EDUs.keys(), reverse=True) input2 = EDUs[str(eduKeys[0])].vector # print "neg" # print input2 y_in = feedforward(input2, W2) output = feedforward_act(input2, W2, activationFunc) if output[0] < output[1]: tn += 1 else: fp += 1 accuracy = float(tp + tn) / (tp + tn + fp + fn) if (tp+fp) == 0: precision = 0 else: precision = float(tp) / (tp + fp) recall = float(tp) / (tp + fn) if (precision + recall) == 0: F1 =0 else: F1 = 2 * (float(precision * recall)) / (precision + recall) print iteration, " ", mode , " ", tp, " ", tn, " ", fp, " ", fn, " ", accuracy, " ", precision, " ", recall, " ", F1 OutputFile.write("%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n" % (iteration, mode, tp, tn, fp, fn, accuracy, precision, recall, F1))
def train_for_each_Sample_AttWeight (EDUs, EDUs_test, y, W1, W21, W22, eta, dim, activationFunc, dropOutPercent): W1_copy = W1.copy() W21_copy = W21.copy() W22_copy = W22.copy() indexNode = dropOut(len(W1[0]), dropOutPercent) #W1_doc = dropcolrow(W1, indexNode, False) #indexNode = dropOut(len(W1_query[0]), dropOutPercent) W1 = dropcolrow(W1, indexNode, False) indexNode2 = [] indexNode2.extend(indexNode) indexNode2.extend(indexNode*2) W21 = dropcolrow(W21, indexNode2, True) eduKeys_test = sortEduKey(EDUs_test.keys(), reverse=True) input2_test = EDUs_test[str(eduKeys_test[0])].vector eduKeys = sortEduKey(EDUs.keys(), reverse=True) input2 = EDUs[str(eduKeys[0])].vector input = (np.concatenate([input2, input2_test], 0)) #y_in1 = feedforward(input, W21) output1 = feedforward_act(input, W21, activationFunc) y_in = feedforward(output1, W22) output = feedforward_act(output1, W22, global_outputActivation) error_soft = softmax_error(y, output, y_in, global_outputActivation) delta_W22 = calculate_deltaW(error_soft, output1) error_hidden = non_softmax_error(error_soft, W22, input, W21, activationFunc) delta_W21 = calculate_deltaW(error_hidden, input) delta_W1_doc = BpthroughTree(EDUs, error_hidden, W1, W21, dim, activationFunc, True) delta_W1_query = BpthroughTree(EDUs_test, error_hidden, W1, W21, dim, activationFunc, False) #print ("=============== : ", np.sum(delta_W1_doc[:, indexNode])) delta_W1_doc = dropcolrow(delta_W1_doc, indexNode, False) delta_W1_query = dropcolrow(delta_W1_query, indexNode, False) delta_W21 = dropcolrow(delta_W21, indexNode2, True) delta_W = np.divide(np.add(delta_W1_doc, delta_W1_query), 2) W21 = update_weight(eta, W21_copy, delta_W21) W22 = update_weight(eta, W22_copy, delta_W22) W1 = update_weight(eta, W1_copy, delta_W) return W1, W21, W22
def calculateError_validation_pair(allEDUs, mode, WV, dim, W1, W21, W22, OutputFile, iteration, activationFunc, pairs): #FileList = os.listdir(path_Folder) numberofSamples = len(pairs) target_list=[] output_list=[] for pair in pairs: #j in range(0, numberofSamples): filenames = pair.split(' ') #path_File_test = path_Folder + filenames[0] EDUs_test = allEDUs[filenames[0]]#readTree_att_NSWeight(path_File_test, W1_query, WV, dim, activationFunc) EDUs_test = update_EDU(EDUs_test, W1, dim, activationFunc) eduKeys_test = sortEduKey(EDUs_test.keys(), reverse=True) input2_test = EDUs_test[str(eduKeys_test[0])].vector #path_File_test = path_Folder + filenames[1] EDUs = allEDUs[filenames[1]]#readTree_att_NSWeight(path_File_test, W1_doc, WV, dim, activationFunc) EDUs = update_EDU(EDUs, W1, dim, activationFunc) eduKeys = sortEduKey(EDUs.keys(), reverse=True) input2 = EDUs[str(eduKeys[0])].vector input = np.concatenate([input2, input2_test], 0) output1 = feedforward_act(input, W21, activationFunc) output = feedforward_act(output1, W22, global_outputActivation) if (filenames[0].split("-")[1] == filenames[1].split("-")[1]): if len(W22[0]) == 1: y = [1.0] else: y = [0.8, -0.8] target_list.append(y) output_list.append(output) else: if len(W22[0]) == 1: y = [-1.0] else: y = [-0.8, 0.8] target_list.append(y) output_list.append(output) totall_Err = MSE(output_list, target_list) print(iteration, " ", mode , " ", totall_Err) OutputFile.write("%s,%s,%s\n" % (iteration, mode, totall_Err)) return totall_Err
def calculateError_validation(path_Folder, mode, WV, dim, W1_doc, W1_query, W2, OutputFile, iteration, activationFunc): FileList = os.listdir(path_Folder) numberofSamples = len(FileList) target_list=[] output_list=[] for j in range(0, numberofSamples): path_File_test = path_Folder + FileList[j] EDUs_test = readTree_att_NSWeight(path_File_test, W1_query, WV, dim, activationFunc) eduKeys_test = sortEduKey(EDUs_test.keys(), reverse=True) input2_test = EDUs_test[str(eduKeys_test[0])].vector for k in range(0, numberofSamples): path_File_test = path_Folder + FileList[k] EDUs = readTree_att_NSWeight(path_File_test, W1_doc, WV, dim, activationFunc) eduKeys = sortEduKey(EDUs.keys(), reverse=True) input2 = EDUs[str(eduKeys[0])].vector input = np.concatenate([input2, input2_test], 0) output = feedforward_act(input, W2, global_outputActivation) #print output if (FileList[j].split("-")[1] == FileList[k].split("-")[1]): y = [1.0] target_list.append(y) output_list.append(output) else: if (k % 5 == 0): y = [0.0] target_list.append(y) output_list.append(output) totall_Err = cross_entropy(target_list, output_list) print(iteration, " ", mode , " ", totall_Err) OutputFile.write("%s,%s,%s\n" % (iteration, mode, totall_Err)) return totall_Err
def train_for_each_Sample (EDUs, y, W1, W2, eta, activationFunc): eduKeys = sortEduKey(EDUs.keys(), reverse=True) input2 = EDUs[str(eduKeys[0])].vector y_in = feedforward(input2, W2) output = feedforward_act(input2, W2, activationFunc) #print output error_soft = softmax_error(y, output, y_in, activationFunc) delta_W2 = calculate_deltaW(error_soft, input2) delta_W1 = BpthroughTree(EDUs, error_soft, W1, W2, activationFunc) W2 = update_weight(eta, W2, delta_W2) W1 = update_weight(eta, W1, delta_W1) return W1, W2
def plotDocs(EDUs_Test, name, labelColor): labels = [] docRepresentaion = [] keys = EDUs_Test.keys() i = 0 for key in keys: label = key.split("-")[1] labels.append(label) EDUs_te = EDUs_Test[key] #EDUs_te = update_EDU(EDUs_te, W1, "", activationFunc) eduKeys = sortEduKey(EDUs_te.keys(), reverse=True) #print (eduKeys) testRep = EDUs_te[str(eduKeys[0])].vector docRepresentaion.append(testRep) scaler = MinMaxScaler() X_embedded = TSNE(n_components=2).fit_transform(docRepresentaion) #print (X_embedded) scaler.fit(X_embedded) #print(scaler.transform(X_embedded)) X_embedded = scaler.transform(X_embedded) #print (X_embedded) for label, x, y in zip(labels, X_embedded[:, 0], X_embedded[:, 1]): #print (labelColor) colorlabel = labelColor[label] #print (colorlabel) plt.annotate(label, xy=(x, y), color=colorlabel) #, xytext=(-20,20))#, # textcoords='offset points', ha='right', va='bottom', # bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.5), # arrowprops=dict(arrowstyle = '->', connectionstyle='arc3,rad=0')) pylab.savefig('fig' + name + '.png') pylab.close() plt.show()
def test_AttWeight_DrHarati_pair(allEDUs, mode, WV, dim, W1, W21, W22, OutputFile, iteration, activationFunc, pairs): numberofSamples = len(pairs) #numberofSamples_test = 50 tp = 0 fp = 0 tn = 0 fn = 0 sim = 0 notsim = 0 for pair in pairs: #j in range(0, numberofSamples): filenames = pair.split(' ') EDUs_test = allEDUs[filenames[0]] EDUs_test = update_EDU(EDUs_test, W1, dim, activationFunc) eduKeys_test = sortEduKey(EDUs_test.keys(), reverse=True) input2_test = EDUs_test[str(eduKeys_test[0])].vector EDUs = allEDUs[filenames[1]] EDUs = update_EDU(EDUs, W1, dim, activationFunc) eduKeys = sortEduKey(EDUs.keys(), reverse=True) input2 = EDUs[str(eduKeys[0])].vector input = np.concatenate([input2, input2_test], 0) output1 = feedforward_act(input, W21, activationFunc) output = feedforward_act(output1, W22, global_outputActivation) if (filenames[0].split("-")[1] == filenames[1].split("-")[1]): sim += 1 if (sim % 500 == 0): print("Similar ", filenames[0].split("-")[1], filenames[1].split("-")[1], output) #if output[0] > output[1]: if (len(W22[0]) == 1): if output[0] > 0: # print("Similar ", filenames[0].split("-")[1], filenames[1].split("-")[1], output) tp += 1 else: # print("Similar ", filenames[0].split("-")[1], filenames[1].split("-")[1], output) fn += 1 else: if output[0] > output[1]: # print("Similar ", filenames[0].split("-")[1], filenames[1].split("-")[1], output) tp += 1 else: # print("Similar ", filenames[0].split("-")[1], filenames[1].split("-")[1], output) fn += 1 else: notsim += 1 if (notsim % 500 == 0): print("Not-Similar ", filenames[0].split("-")[1], filenames[1].split("-")[1], output) #if output[0] < output[1]: if (len(W22[0]) == 1): if output[0] < 0: # print("Not-Similar ", filenames[0].split("-")[1], filenames[1].split("-")[1], output) tn += 1 else: # print("Not-Similar ", filenames[0].split("-")[1], filenames[1].split("-")[1], output) fp += 1 else: if output[0] < output[1]: # print("Not-Similar ", filenames[0].split("-")[1], filenames[1].split("-")[1], output) tn += 1 else: # print("Not-Similar ", filenames[0].split("-")[1], filenames[1].split("-")[1], output) fp += 1 print (sim, notsim) accuracy = float(tp + tn) / (tp + tn + fp + fn) precision, recall, F1 = calculate_eval_metrics(tp, tn, fp, fn) print(iteration, " ", mode , " ", tp, " ", tn, " ", fp, " ", fn, " ", accuracy, " ", precision, " ", recall, " ", F1) OutputFile.write("%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n" % (iteration, mode, tp, tn, fp, fn, accuracy, precision, recall, F1))
def test_AttWeight_DrHarati(path_Folder, mode, WV, dim, W1 , W2, OutputFile, WSat, WNu, iteration, activationFunc): posFileList_test = os.listdir(path_Folder + "pos/") negFileList_test = os.listdir(path_Folder + "neg/") numberofSamples_test = min(len(posFileList_test), len(negFileList_test)) #numberofSamples_test=100 tp = 0 fp = 0 tn = 0 fn = 0 for k in range(0, numberofSamples_test): path_File_test = path_Folder + "pos/" + posFileList_test[k] EDUs = readTree_att_NSWeight(path_File_test, W1, WV, dim, WSat, WNu, activationFunc) y = [1.0, 0] eduKeys = sortEduKey(EDUs.keys(), reverse=True) input2 = EDUs[str(eduKeys[0])].vector y_in = feedforward(input2, W2) output = feedforward_act(input2, W2, activationFunc) if output[0] == output[1]: print "input", input2 # print "W2", W2[0, :] # print "W1", W1[0, :] # print "WN", WNu[0, :] # print "WS", WSat[0, :] # print "pos", output if output[0] > output[1]: #print "pos ", output tp += 1 else: fn += 1 path_File_test = path_Folder + "neg/" + negFileList_test[k] EDUs = readTree_att_NSWeight(path_File_test, W1, WV, dim, WSat, WNu, activationFunc) eduKeys = sortEduKey(EDUs.keys(), reverse=True) input2 = EDUs[str(eduKeys[0])].vector y_in = feedforward(input2, W2) output = feedforward_act(input2, W2, activationFunc) if output[0] == output[1]: print "input", input2 # print "W2", W2[0, :] # print "W1", W1[0,:] # print "WN", WNu[0,:] # print "WS", WSat[0,:] # print "neg",output if output[0] < output[1]: #print "neg ", output tn += 1 else: fp += 1 accuracy = float(tp + tn) / (tp + tn + fp + fn) precision, recall, F1 = calculate_eval_metrics(tp, tn, fp, fn) print iteration, " ", mode , " ", tp, " ", tn, " ", fp, " ", fn, " ", accuracy, " ", precision, " ", recall, " ", F1 OutputFile.write("%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n" % (iteration, mode, tp, tn, fp, fn, accuracy, precision, recall, F1))