Beispiel #1
0
def  IR_Evaluation_Test(EDUs_Test, EDUs_Train, path_file_Test, path_file_Train, W1, WV, dim, activationFunc, iteration, OutputFile, numberofDoc):

    precisions = []

    TestFileList = os.listdir(path_file_Test)
    TrainFileList = os.listdir(path_file_Train)

    trainDocuments = []
    trainDocuments_label = []

    for tr in range(0, len(TrainFileList)):
        EDUs_tr = EDUs_Train[TrainFileList[tr]]
        EDUs_tr = update_EDU(EDUs_tr, W1, dim, activationFunc)
        # path_File_train = path_file_Train + TrainFileList[tr]
        # EDUs_train = readTree_att_NSWeight(path_File_train, W1_doc, WV, dim, activationFunc)

        eduKeys = sortEduKey(EDUs_tr.keys(), reverse=True)
        trainRep = EDUs_tr[str(eduKeys[0])].vector
        trainDocuments.append(trainRep)
        trainDocuments_label.append(TrainFileList[tr].split("-")[1])

    trainDocuments = np.array(trainDocuments)
    trainDocuments_label = np.array(trainDocuments_label)
    #print trainDocuments_label

    for te in range(0, len(TestFileList)):
        EDUs_te = EDUs_Test[TestFileList[te]]
        EDUs_te = update_EDU(EDUs_te, W1, dim, activationFunc)
        eduKeys = sortEduKey(EDUs_te.keys(), reverse=True)
        testRep = EDUs_te[str(eduKeys[0])].vector
        testRep = np.array(testRep).reshape(1, -1)

        if (np.isnan(testRep).any()):
            print (TestFileList[te])

        distances = cosine_similarity(testRep, trainDocuments)
        main_label = TestFileList[te].split("-")[1]
        predicted_labels = trainDocuments_label[(((distances.argsort())[0])[-1*numberofDoc:][::-1])]


        print (predicted_labels)

        predicted_labels = sorted(predicted_labels)

        predicts = {x: predicted_labels.count(x) for x in predicted_labels}
        main_predict = max(predicts.items(), key=operator.itemgetter(1))[0]

        print (main_label, main_predict)

        if main_label in predicts:
            pre_main_label = float(predicts[main_label]) / (numberofDoc)
            precisions.append(pre_main_label)
        else:
            precisions.append(0)

    avg_pre = float(sum(precisions)) / len(precisions)
    print ("average", avg_pre)

    if (OutputFile!= None):
        OutputFile.write("%s,%s\n" % (iteration, avg_pre))
Beispiel #2
0
def calculateError_validation(path_Folder, mode, WV, dim,  W1 , W2, OutputFile, WSat, WNu, iteration, activationFunc):
    posFileList_test = os.listdir(path_Folder + "pos/")
    negFileList_test = os.listdir(path_Folder + "neg/")
    numberofSamples_test = min(len(posFileList_test), len(negFileList_test))
    sumErr=0.0

    for k in range(0, numberofSamples_test):
        path_File_test = path_Folder + "pos/" + posFileList_test[k]
        EDUs = readTree_att_NSWeight(path_File_test, W1, WV, dim, WSat, WNu, activationFunc)
        y = [1.0, 0]
        eduKeys = sortEduKey(EDUs.keys(), reverse=True)
        input2 = EDUs[str(eduKeys[0])].vector
        y_in = feedforward(input2, W2)
        output = feedforward_act(input2, W2, activationFunc)
        sumErr += MSE(y, output)

        path_File_test = path_Folder + "neg/" + negFileList_test[k]
        #print negFileList_test[k]
        EDUs = readTree_att_NSWeight(path_File_test, W1, WV, dim, WSat, WNu, activationFunc)
        y = [0, 1.0]
        eduKeys = sortEduKey(EDUs.keys(), reverse=True)
        input2 = EDUs[str(eduKeys[0])].vector
        y_in = feedforward(input2, W2)
        output = feedforward_act(input2, W2, activationFunc)
        sumErr += MSE(y, output)

    totall_Err = sumErr/(2*numberofSamples_test)
    return totall_Err
    print  iteration, " ", mode , " ", totall_Err
    OutputFile.write("%s,%s,%s\n" % (iteration, mode, totall_Err))
Beispiel #3
0
def calculateError_validation_EDUs(allEDUs, mode, WV, dim,  W1 , W2, OutputFile, WSat, WNu, iteration, activationFunc):
    target_list = np.zeros([0, 2])
    output_list = np.zeros([0, 2])
    EDU_key = allEDUs.keys()

    for EDUid in EDU_key:
        EDUs = allEDUs [EDUid]
        EDUs = update_EDU(EDUs, W1, WSat, WNu, dim, activationFunc)
        if (len(EDUs) > 0 and EDUid>0):
            y = [1.0, 0]

            #eduKeys = sortEduKey(EDUs.keys(), reverse=True)
            eduKeys = sortEduKey(EDUs.keys(), reverse=True)

            input2 = EDUs[str(eduKeys[0])].vector

            output = feedforward_act(input2, W2, global_outputActivation)
            target_list = np.concatenate((target_list, [y]), 0)
            output_list = np.concatenate((output_list, [output]), 0)

        if (len(EDUs) > 0 and EDUid < 0):
            y = [0, 1.0]
            # eduKeys = sortEduKey(EDUs.keys(), reverse=True)
            eduKeys = sortEduKey(EDUs.keys(), reverse=True)

            input2 = EDUs[str(eduKeys[0])].vector
            #y_in = feedforward(input2, W2)
            output = feedforward_act(input2, W2, global_outputActivation)
            target_list = np.concatenate((target_list, [y]), 0)
            output_list = np.concatenate((output_list, [output]), 0)

    totall_Err = MSE(target_list, output_list)
    print(iteration, " ", mode , " ", totall_Err)
    OutputFile.write("%s,%s,%s\n" % (iteration, mode, totall_Err))
    return totall_Err
Beispiel #4
0
def test_AttWeight(path_Folder, mode, WV, dim,  W1 , W2, OutputFile, WSat, WNu, iteration, activationFunc):
    posFileList_test = os.listdir(path_Folder + "pos/")
    negFileList_test = os.listdir(path_Folder + "neg/")
    numberofSamples_test = min(len(posFileList_test), len(negFileList_test))
    #numberofSamples_test=100
    tp = 0
    fp = 0
    tn = 0
    fn = 0

    for k in range(0, numberofSamples_test):
        path_File_test = path_Folder + "pos/" + posFileList_test[k]
        #print posFileList_test[k]
        EDUs = readTree_att_NSWeight(path_File_test, W1, WV, dim, WSat, WNu, activationFunc)
        y = [1.0, 0]
        eduKeys = sortEduKey(EDUs.keys(), reverse=True)
        input2 = EDUs[str(eduKeys[0])].vector
        # print "pos"
        # print input2
        y_in = feedforward(input2, W2)
        output = feedforward_act(input2, W2, activationFunc)
        #print output

        if output[0] > output[1]:
            tp += 1
        else:
            fn += 1

        path_File_test = path_Folder + "neg/" + negFileList_test[k]
        #print negFileList_test[k]
        EDUs = readTree_att_NSWeight(path_File_test, W1, WV, dim, WSat, WNu, activationFunc)
        eduKeys = sortEduKey(EDUs.keys(), reverse=True)
        input2 = EDUs[str(eduKeys[0])].vector
        # print "neg"
        # print input2
        y_in = feedforward(input2, W2)
        output = feedforward_act(input2, W2, activationFunc)

        if output[0] < output[1]:

            tn += 1
        else:
            fp += 1

    accuracy = float(tp + tn) / (tp + tn + fp + fn)
    if (tp+fp) == 0:
        precision = 0
    else:
        precision = float(tp) / (tp + fp)
    recall = float(tp) / (tp + fn)
    if (precision + recall) == 0:
        F1 =0
    else:
        F1 = 2 * (float(precision * recall)) / (precision + recall)

    print  iteration, " ", mode , " ", tp, " ", tn, " ", fp, " ", fn, " ", accuracy, " ", precision, " ", recall, " ", F1
    OutputFile.write("%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n" % (iteration, mode, tp, tn, fp, fn, accuracy, precision, recall, F1))
Beispiel #5
0
def train_for_each_Sample_AttWeight (EDUs, EDUs_test, y, W1, W21, W22, eta, dim, activationFunc, dropOutPercent):

    W1_copy = W1.copy()
    W21_copy = W21.copy()
    W22_copy = W22.copy()

    indexNode = dropOut(len(W1[0]), dropOutPercent)
    #W1_doc = dropcolrow(W1, indexNode, False)

    #indexNode = dropOut(len(W1_query[0]), dropOutPercent)
    W1 = dropcolrow(W1, indexNode, False)

    indexNode2 = []
    indexNode2.extend(indexNode)
    indexNode2.extend(indexNode*2)

    W21 = dropcolrow(W21, indexNode2, True)

    eduKeys_test = sortEduKey(EDUs_test.keys(), reverse=True)
    input2_test = EDUs_test[str(eduKeys_test[0])].vector

    eduKeys = sortEduKey(EDUs.keys(), reverse=True)
    input2 = EDUs[str(eduKeys[0])].vector

    input = (np.concatenate([input2, input2_test], 0))
    #y_in1 = feedforward(input, W21)
    output1 = feedforward_act(input, W21, activationFunc)
    y_in = feedforward(output1, W22)
    output = feedforward_act(output1, W22, global_outputActivation)

    error_soft = softmax_error(y, output, y_in, global_outputActivation)
    delta_W22 = calculate_deltaW(error_soft, output1)

    error_hidden = non_softmax_error(error_soft, W22, input, W21, activationFunc)
    delta_W21 = calculate_deltaW(error_hidden, input)

    delta_W1_doc = BpthroughTree(EDUs, error_hidden, W1, W21, dim, activationFunc, True)
    delta_W1_query = BpthroughTree(EDUs_test, error_hidden, W1, W21, dim, activationFunc, False)

    #print ("=============== : ", np.sum(delta_W1_doc[:, indexNode]))
    delta_W1_doc = dropcolrow(delta_W1_doc, indexNode, False)
    delta_W1_query = dropcolrow(delta_W1_query, indexNode, False)
    delta_W21 = dropcolrow(delta_W21, indexNode2, True)

    delta_W = np.divide(np.add(delta_W1_doc, delta_W1_query), 2)

    W21 = update_weight(eta, W21_copy, delta_W21)
    W22 = update_weight(eta, W22_copy, delta_W22)
    W1 = update_weight(eta, W1_copy, delta_W)


    return W1, W21, W22
Beispiel #6
0
def calculateError_validation_pair(allEDUs, mode, WV, dim,  W1, W21, W22, OutputFile, iteration, activationFunc, pairs):
    #FileList = os.listdir(path_Folder)
    numberofSamples = len(pairs)

    target_list=[]
    output_list=[]

    for pair in pairs: #j in range(0, numberofSamples):
        filenames = pair.split(' ')
        #path_File_test = path_Folder + filenames[0]
        EDUs_test = allEDUs[filenames[0]]#readTree_att_NSWeight(path_File_test, W1_query, WV, dim, activationFunc)
        EDUs_test = update_EDU(EDUs_test, W1, dim, activationFunc)
        eduKeys_test = sortEduKey(EDUs_test.keys(), reverse=True)
        input2_test = EDUs_test[str(eduKeys_test[0])].vector

        #path_File_test = path_Folder + filenames[1]
        EDUs = allEDUs[filenames[1]]#readTree_att_NSWeight(path_File_test, W1_doc, WV, dim, activationFunc)
        EDUs = update_EDU(EDUs, W1, dim, activationFunc)
        eduKeys = sortEduKey(EDUs.keys(), reverse=True)
        input2 = EDUs[str(eduKeys[0])].vector

        input = np.concatenate([input2, input2_test], 0)
        output1 = feedforward_act(input, W21, activationFunc)
        output = feedforward_act(output1, W22, global_outputActivation)

        if (filenames[0].split("-")[1] == filenames[1].split("-")[1]):
            if len(W22[0]) == 1:
                y = [1.0]
            else:
                y = [0.8, -0.8]

            target_list.append(y)
            output_list.append(output)
        else:
            if len(W22[0]) == 1:
                y = [-1.0]
            else:
                y = [-0.8, 0.8]

            target_list.append(y)
            output_list.append(output)

    totall_Err = MSE(output_list, target_list)
    print(iteration, " ", mode , " ", totall_Err)
    OutputFile.write("%s,%s,%s\n" % (iteration, mode, totall_Err))
    return totall_Err
Beispiel #7
0
def calculateError_validation(path_Folder, mode, WV, dim,  W1_doc, W1_query, W2, OutputFile, iteration, activationFunc):
    FileList = os.listdir(path_Folder)
    numberofSamples = len(FileList)

    target_list=[]
    output_list=[]

    for j in range(0, numberofSamples):
        path_File_test = path_Folder + FileList[j]
        EDUs_test = readTree_att_NSWeight(path_File_test, W1_query, WV, dim, activationFunc)
        eduKeys_test = sortEduKey(EDUs_test.keys(), reverse=True)
        input2_test = EDUs_test[str(eduKeys_test[0])].vector

        for k in range(0, numberofSamples):
            path_File_test = path_Folder + FileList[k]
            EDUs = readTree_att_NSWeight(path_File_test, W1_doc, WV, dim, activationFunc)
            eduKeys = sortEduKey(EDUs.keys(), reverse=True)
            input2 = EDUs[str(eduKeys[0])].vector

            input = np.concatenate([input2, input2_test], 0)
            output = feedforward_act(input, W2, global_outputActivation)
            #print output

            if (FileList[j].split("-")[1] == FileList[k].split("-")[1]):
                y = [1.0]
                target_list.append(y)
                output_list.append(output)
            else:
                if (k % 5 == 0):
                    y = [0.0]
                    target_list.append(y)
                    output_list.append(output)



    totall_Err = cross_entropy(target_list, output_list)
    print(iteration, " ", mode , " ", totall_Err)
    OutputFile.write("%s,%s,%s\n" % (iteration, mode, totall_Err))
    return totall_Err
Beispiel #8
0
def train_for_each_Sample (EDUs, y, W1, W2, eta, activationFunc):
    eduKeys = sortEduKey(EDUs.keys(), reverse=True)
    input2 = EDUs[str(eduKeys[0])].vector
    y_in = feedforward(input2, W2)
    output = feedforward_act(input2, W2, activationFunc)
    #print output
    error_soft = softmax_error(y, output, y_in, activationFunc)
    delta_W2 = calculate_deltaW(error_soft, input2)
    delta_W1 = BpthroughTree(EDUs, error_soft, W1, W2, activationFunc)
    W2 = update_weight(eta, W2, delta_W2)
    W1 = update_weight(eta, W1, delta_W1)

    return W1, W2
Beispiel #9
0
def plotDocs(EDUs_Test, name, labelColor):

    labels = []
    docRepresentaion = []
    keys = EDUs_Test.keys()
    i = 0
    for key in keys:

        label = key.split("-")[1]
        labels.append(label)
        EDUs_te = EDUs_Test[key]
        #EDUs_te = update_EDU(EDUs_te, W1, "", activationFunc)
        eduKeys = sortEduKey(EDUs_te.keys(), reverse=True)
        #print (eduKeys)
        testRep = EDUs_te[str(eduKeys[0])].vector
        docRepresentaion.append(testRep)

    scaler = MinMaxScaler()
    X_embedded = TSNE(n_components=2).fit_transform(docRepresentaion)
    #print (X_embedded)
    scaler.fit(X_embedded)
    #print(scaler.transform(X_embedded))

    X_embedded = scaler.transform(X_embedded)
    #print (X_embedded)

    for label, x, y in zip(labels, X_embedded[:, 0], X_embedded[:, 1]):
        #print (labelColor)
        colorlabel = labelColor[label]
        #print (colorlabel)
        plt.annotate(label, xy=(x, y), color=colorlabel)  #, xytext=(-20,20))#,
    #     textcoords='offset points', ha='right', va='bottom',
    #     bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.5),
    #     arrowprops=dict(arrowstyle = '->', connectionstyle='arc3,rad=0'))

    pylab.savefig('fig' + name + '.png')
    pylab.close()

    plt.show()
Beispiel #10
0
def test_AttWeight_DrHarati_pair(allEDUs, mode, WV, dim,  W1, W21, W22, OutputFile, iteration, activationFunc, pairs):
    numberofSamples = len(pairs)
    #numberofSamples_test = 50
    tp = 0
    fp = 0
    tn = 0
    fn = 0
    sim = 0
    notsim = 0

    for pair in pairs: #j in range(0, numberofSamples):
        filenames = pair.split(' ')
        EDUs_test = allEDUs[filenames[0]]
        EDUs_test = update_EDU(EDUs_test, W1, dim, activationFunc)
        eduKeys_test = sortEduKey(EDUs_test.keys(), reverse=True)
        input2_test = EDUs_test[str(eduKeys_test[0])].vector

        EDUs = allEDUs[filenames[1]]
        EDUs = update_EDU(EDUs, W1, dim, activationFunc)
        eduKeys = sortEduKey(EDUs.keys(), reverse=True)
        input2 = EDUs[str(eduKeys[0])].vector

        input = np.concatenate([input2, input2_test], 0)
        output1 = feedforward_act(input, W21, activationFunc)
        output = feedforward_act(output1, W22, global_outputActivation)

        if (filenames[0].split("-")[1] == filenames[1].split("-")[1]):
            sim += 1
            if (sim % 500 == 0):
                print("Similar ", filenames[0].split("-")[1], filenames[1].split("-")[1], output)
            #if output[0] > output[1]:
            if (len(W22[0]) == 1):
                if output[0] > 0:
                    # print("Similar ", filenames[0].split("-")[1], filenames[1].split("-")[1], output)
                    tp += 1
                else:
                    # print("Similar ", filenames[0].split("-")[1], filenames[1].split("-")[1], output)
                    fn += 1
            else:
                if output[0] > output[1]:
                    # print("Similar ", filenames[0].split("-")[1], filenames[1].split("-")[1], output)
                    tp += 1
                else:
                    # print("Similar ", filenames[0].split("-")[1], filenames[1].split("-")[1], output)
                    fn += 1
        else:
            notsim += 1
            if (notsim % 500 == 0):
                print("Not-Similar ", filenames[0].split("-")[1], filenames[1].split("-")[1], output)
            #if output[0] < output[1]:
            if (len(W22[0]) == 1):
                if output[0] < 0:
                    # print("Not-Similar ", filenames[0].split("-")[1], filenames[1].split("-")[1], output)
                    tn += 1
                else:
                    # print("Not-Similar ", filenames[0].split("-")[1], filenames[1].split("-")[1], output)
                    fp += 1
            else:
                if output[0] < output[1]:
                    # print("Not-Similar ", filenames[0].split("-")[1], filenames[1].split("-")[1], output)
                    tn += 1
                else:
                    # print("Not-Similar ", filenames[0].split("-")[1], filenames[1].split("-")[1], output)
                    fp += 1

    print (sim, notsim)
    accuracy = float(tp + tn) / (tp + tn + fp + fn)
    precision, recall, F1 = calculate_eval_metrics(tp, tn, fp, fn)
    print(iteration, " ", mode , " ", tp, " ", tn, " ", fp, " ", fn, " ", accuracy, " ", precision, " ", recall, " ", F1)
    OutputFile.write("%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n" % (iteration, mode, tp, tn, fp, fn, accuracy, precision, recall, F1))
Beispiel #11
0
def test_AttWeight_DrHarati(path_Folder, mode, WV, dim,  W1 , W2, OutputFile, WSat, WNu, iteration, activationFunc):
    posFileList_test = os.listdir(path_Folder + "pos/")
    negFileList_test = os.listdir(path_Folder + "neg/")
    numberofSamples_test = min(len(posFileList_test), len(negFileList_test))
    #numberofSamples_test=100
    tp = 0
    fp = 0
    tn = 0
    fn = 0

    for k in range(0, numberofSamples_test):
        path_File_test = path_Folder + "pos/" + posFileList_test[k]
        EDUs = readTree_att_NSWeight(path_File_test, W1, WV, dim, WSat, WNu, activationFunc)
        y = [1.0, 0]
        eduKeys = sortEduKey(EDUs.keys(), reverse=True)
        input2 = EDUs[str(eduKeys[0])].vector
        y_in = feedforward(input2, W2)
        output = feedforward_act(input2, W2, activationFunc)

        if output[0] == output[1]:
            print "input", input2
            # print "W2", W2[0, :]
            # print "W1", W1[0, :]
            # print "WN", WNu[0, :]
            # print "WS", WSat[0, :]
            # print "pos", output

        if output[0] > output[1]:
            #print "pos ", output
            tp += 1
        else:

            fn += 1

        path_File_test = path_Folder + "neg/" + negFileList_test[k]
        EDUs = readTree_att_NSWeight(path_File_test, W1, WV, dim, WSat, WNu, activationFunc)
        eduKeys = sortEduKey(EDUs.keys(), reverse=True)
        input2 = EDUs[str(eduKeys[0])].vector
        y_in = feedforward(input2, W2)
        output = feedforward_act(input2, W2, activationFunc)

        if output[0] == output[1]:
            print "input", input2
            # print "W2", W2[0, :]
            # print "W1", W1[0,:]
            # print "WN", WNu[0,:]
            # print "WS", WSat[0,:]
            # print "neg",output

        if output[0] < output[1]:
            #print "neg ", output
            tn += 1
        else:
            fp += 1

    accuracy = float(tp + tn) / (tp + tn + fp + fn)

    precision, recall, F1 = calculate_eval_metrics(tp, tn, fp, fn)

    print  iteration, " ", mode , " ", tp, " ", tn, " ", fp, " ", fn, " ", accuracy, " ", precision, " ", recall, " ", F1
    OutputFile.write("%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n" % (iteration, mode, tp, tn, fp, fn, accuracy, precision, recall, F1))