Esempio n. 1
0
def giveFindDepth(listR_input, listN_input, nameInput):
    listR = listR_input.copy()
    listN = listN_input.copy()
    depth = 0
    while len(listR) > 0:
        indexMatch = listR.index(max(listR))
        if nameInput == listN[indexMatch]:
            return depth, max(listR)
        elif sC.discriminateSimilarWord(nameInput, listN[indexMatch]):
            return depth, max(listR)
        else:
            del listR[indexMatch]
            del listN[indexMatch]
        depth = depth + 1
    print('nothing is found')
    return None
def knn_hurdlingDict_without():
    """
    1 predict_knn_listAnswer
    2 hurdlingDict
    """
    list_knn_hD = []
    rightRate = []

    t0 = time.time()
    for i in range(32):
        list_knn_hD_tmp = []

        if i < 9:
            strNum = '0' + str(i + 1)
        else:
            strNum = str(i + 1)

        # take big mat and name list of train set
        bigMat_file = open(folderTrainBase + '/' + strNum + '/' + 'bigMat.pkl',
                           'rb')
        bigMat_train = pickle.load(bigMat_file)
        bigMat_file.close()

        listName_file = open(
            folderTrainBase + '/' + strNum + '/' + 'listLabel.pkl', 'rb')
        listName_train = pickle.load(listName_file)
        listName_file.close()

        # take big mat and name list of test set
        bigMat_file_ = open(folderTestBase + '/' + strNum + '/' + 'bigMat.pkl',
                            'rb')
        bigMat_test = pickle.load(bigMat_file_)
        bigMat_file_.close()

        listName_file_ = open(
            folderTestBase + '/' + strNum + '/' + 'listLabel.pkl', 'rb')
        listName_test = pickle.load(listName_file_)
        listName_file_.close()

        # predict each data in test set
        for j in range(len(listName_test)):
            # normalization
            testData, trainData = normalization.normalizator(
                bigMat_test[j], bigMat_train)
            # first predict method: knn_nearest => return a list of names
            preName_knn_list = pre.predict_knn_listAnswer(
                th_knn, testData, trainData, listName_train)
            # second predict method: hurdlingDictMatch => return prediction result
            img = getImageMatFromName(listName_test[j] + strNum + '.png')
            answer = hDM.matchBest_without(img, strNum + '/', preName_knn_list)

            if listName_test[j] == answer:
                list_knn_hD.append(1)
                list_knn_hD_tmp.append(1)
            elif sC.discriminateSimilarWord(listName_test[j], answer):
                list_knn_hD.append(1)
                list_knn_hD_tmp.append(1)
            else:
                list_knn_hD.append(0)
                list_knn_hD_tmp.append(0)

            del testData
            del trainData
            del img
            gc.collect()
        rightRate.append(list_knn_hD_tmp.count(1) / len(list_knn_hD_tmp))
        print('right rate for ' + str(i) + ' >>>' +
              str(list_knn_hD_tmp.count(1) / len(list_knn_hD_tmp)))

        del list_knn_hD_tmp
        del bigMat_train
        del bigMat_test
        del listName_train
        del listName_test
        gc.collect()

        t11 = time.time()
        t_res = ((t11 - t0) / (i + 1)) * (32 - (i + 1))
        print('rest time: ' + str(t_res / 60) + ' min')
        print('---------------')

    t1 = time.time()
    print('time: ' + str((t1 - t0) / 60) + ' min')
    print('right rate:')
    print(list_knn_hD.count(1) / len(list_knn_hD))
    print('best right rate:')
    print(max(rightRate))
    print('worst right rate:')
    print(min(rightRate))
def hurdlingDict2_knn_without():
    """
    1 hurdlingDict
    2 predict_knn_nearest
    """
    list_hD_knn = []
    rightRate = []

    t0 = time.time()
    for i in range(32):

        list_hD_knn_tmp = []

        if i < 9:
            strNum = '0' + str(i + 1)
        else:
            strNum = str(i + 1)

        # take big mat and name list of train set
        bigMat_file = open(folderTrainBase + '/' + strNum + '/' + 'bigMat.pkl',
                           'rb')
        bigMat_train = pickle.load(bigMat_file)
        bigMat_file.close()

        listName_file = open(
            folderTrainBase + '/' + strNum + '/' + 'listLabel.pkl', 'rb')
        listName_train = pickle.load(listName_file)
        listName_file.close()

        # take big mat and name list of test set
        bigMat_file_ = open(folderTestBase + '/' + strNum + '/' + 'bigMat.pkl',
                            'rb')
        bigMat_test = pickle.load(bigMat_file_)
        bigMat_file_.close()

        listName_file_ = open(
            folderTestBase + '/' + strNum + '/' + 'listLabel.pkl', 'rb')
        listName_test = pickle.load(listName_file_)
        listName_file_.close()

        # predict each data in test set
        for j in range(len(listName_test)):
            # first predict method: hurdlingDictMatch => return a list of names
            img = getImageMatFromName(listName_test[j] + strNum + '.png')
            preName_hD_list = hDM.MatchList_threshold_without(
                img, th_hurdling, strNum + '/')
            # second predict method: knn_bestMatch_nearest => return prediction result
            trainData_hD, listName_train_hD = filterForTrainData(
                preName_hD_list, listName_train, bigMat_train)
            testData, trainData = normalization.normalizator(
                bigMat_test[j], trainData_hD)
            preName_nearst = pre.predict_knn_BestMatch_nearest(
                testData, trainData, listName_train_hD)
            if preName_nearst == listName_test[j]:
                list_hD_knn.append(1)
                list_hD_knn_tmp.append(1)
            elif sC.discriminateSimilarWord(preName_nearst, listName_test[j]):
                list_hD_knn.append(1)
                list_hD_knn_tmp.append(1)
            else:
                list_hD_knn.append(0)
                list_hD_knn_tmp.append(0)

            del img
            del preName_hD_list
            del trainData_hD
            del listName_train_hD
            del testData
            del trainData
            gc.collect()
        rightRate.append(list_hD_knn_tmp.count(1) / len(list_hD_knn_tmp))
        print('right rate for ' + str(i) + ' >>>' +
              str(list_hD_knn_tmp.count(1) / len(list_hD_knn_tmp)))

        del list_hD_knn_tmp
        del bigMat_train
        del bigMat_test
        del listName_train
        del listName_test
        gc.collect()

        t11 = time.time()
        t_res = ((t11 - t0) / (i + 1)) * (32 - (i + 1))
        print('rest time: ' + str(t_res / 60) + ' min')
        print('---------------')

    t1 = time.time()
    print('-------------------')
    print('time: ' + str((t1 - t0) / 60) + ' min')
    print('right rate:')
    print(list_hD_knn.count(1) / len(list_hD_knn))
    print('best right rate:')
    print(max(rightRate))
    print('worst right rate:')
    print(min(rightRate))
    listName_file_ = open(folderTestBase + '/' + strNum + '/' + 'listLabel.pkl', 'rb')
    listName_test = pickle.load(listName_file_)
    listName_file_.close()

    listAnswer_nearst_tmp = []
    listAnswer_cluster_tmp = []

    for j in range(len(listName_test)):
        testData, trainData = normalization.normalizator(bigMat_test[j], bigMat_train)

        preName_nearst = pre.predict_knn_BestMatch_nearest(testData, trainData, listName_train)
        if listName_test[j] == preName_nearst:
            listAnswer_nearst.append(1)
            listAnswer_nearst_tmp.append(1)
        elif sC.discriminateSimilarWord(listName_test[j], preName_nearst):
            listAnswer_nearst.append(1)
            listAnswer_nearst_tmp.append(1)
        else:
            listAnswer_nearst.append(0)
            listAnswer_nearst_tmp.append(0)

        centerMat, centerNamelist = pre.clusterCenterGenerator(trainData, listName_train)
        preName_cluster = pre.predict_clusterCenter_BestMatch(testData, centerMat, centerNamelist)
        if listName_test[j] == preName_cluster:
            listAnswer_cluster.append(1)
            listAnswer_cluster_tmp.append(1)
        elif sC.discriminateSimilarWord(listName_test[j], preName_cluster):
            listAnswer_cluster.append(1)
            listAnswer_cluster_tmp.append(1)
        else:
Esempio n. 5
0
    listAnswer_hurdlingDict_tmp = []

    if i < 10:
        num = '0' + str(i)
    else:
        num = str(i)

    for imageName in imageList:
        if imageName[-6:-4] == num:
            image = cv2.imread(folderImage + imageName, cv2.IMREAD_GRAYSCALE)
            realName = imageName[:-6]
            answerName = hDM.matchBest_without(image, num + '/')
            if answerName == realName:
                listAnswer_hurdlingDict.append(1)
                listAnswer_hurdlingDict_tmp.append(1)
            elif sC.discriminateSimilarWord(answerName, realName):
                listAnswer_hurdlingDict.append(1)
                listAnswer_hurdlingDict_tmp.append(1)
            else:
                listAnswer_hurdlingDict.append(0)
                listAnswer_hurdlingDict_tmp.append(0)
            del image
            gc.collect()
        else:
            continue

    rightRate_hurdlingDict.append(
        listAnswer_hurdlingDict_tmp.count(1) /
        len(listAnswer_hurdlingDict_tmp))
    del listAnswer_hurdlingDict_tmp
    gc.collect()
Esempio n. 6
0
bigMat_file_ = open(folderTest + 'bigMat.pkl', 'rb')
bigMat_test = pickle.load(bigMat_file_)
bigMat_file_.close()

listName_file_ = open(folderTest + 'listLabel.pkl', 'rb')
listName_test = pickle.load(listName_file_)
listName_file_.close()

list_right = []

for j in range(len(listName_test)):
    testData, trainData = normalization.normalizator(bigMat_test[j], bigMat_train)
    testData_ = np.reshape(testData, (1, -1))
    preName = linear_SVC(trainData, listName_train, testData_)
    if preName == listName_test[j] or sC.discriminateSimilarWord(listName_test[j], preName):
        list_right.append(1)
    else:
        list_right.append(0)

    sys.stdout.write("\rright rate {:.2f} %".format((list_right.count(1)/len(list_right)) * 100))
    sys.stdout.flush()

del bigMat_train
del listName_train
del bigMat_test
del listName_test
gc.collect()

t2 = time.time()
print('\n')