Beispiel #1
0
def index_imgs():
    global database
    global hash_rep
    database = []
    json_files = os.listdir(config.FEATURES_FOLDER)
    json_files = ret_files_without_ext(json_files)

    ctr = 0
    total_iter = len(hash_rep) * len(json_files)
    for i in range(len(hash_rep)):
        layer_index = {}

        for img in json_files:
            with open(path.join(config.FEATURES_FOLDER, img + '.json'),
                      'r',
                      encoding='utf-8') as f:
                feat_desc = json.load(f)
            img_bucket = img_index(feat_desc[img], i)

            if (img_bucket not in layer_index):
                layer_index[img_bucket] = [img]
            else:
                layer_index[img_bucket].append(img)
            ctr += 1
            progress(ctr, total_iter)

        database.append(layer_index)

    return database
Beispiel #2
0
 def LBPFeatureDescriptorForImageSubset(self, imageSet):
     # Iterating on all the images in the selected folder to calculate HOG FD for each of the images
     storeLbpFD = []
     lbp = LBP()
     number_files = len(imageSet)
     i = 0
     for filename in imageSet:
         hognp = lbp.LBPForSingleImage(
             join(str(config.IMAGE_FOLDER), filename))
         storeLbpFD.append(hognp.tolist())
         i = i + 1
         progress(i, number_files)
     print(len(storeLbpFD))
     return storeLbpFD
Beispiel #3
0
 def CMFeatureDescriptor(self):
     # Iterating on all the images in the selected folder to calculate HOG FD for each of the images
     storeCmFD = []
     cm = CM()
     files = os.listdir(str(
         config.IMAGE_FOLDER))  # dir is your directory path
     number_files = len(files)
     i = 0
     for file in os.listdir(str(config.IMAGE_FOLDER)):
         filename = os.fsdecode(file)
         if filename.endswith(".jpg"):
             hognp = cm.CMForSingleImage(
                 str(config.IMAGE_FOLDER) + "\\" + filename)
             storeCmFD.append(hognp.tolist())
             i = i + 1
             progress(i, number_files)
     print()
     return storeCmFD
Beispiel #4
0
 def LBPFeatureDescriptorForImageSubset(self, imageSet):
     # Iterating on all the images in the selected folder to calculate HOG FD for each of the images
     storeLbpFD = []
     lbp = LBP()
     files = os.listdir(str(
         config.IMAGE_FOLDER))  # dir is your directory path
     number_files = len(files)
     i = 0
     for file in os.listdir(str(config.IMAGE_FOLDER)):
         filename = os.fsdecode(file)
         if filename.endswith(".jpg") and (filename
                                           in imageSet.imageName.values):
             hognp = lbp.LBPForSingleImage(
                 str(config.IMAGE_FOLDER) + "\\" + filename)
             storeLbpFD.append(hognp.tolist())
             i = i + 1
             progress(i, number_files)
     print()
     return storeLbpFD
Beispiel #5
0
def train_k_means_clustering(data, k, epochs=20):
    print('\nRunning K means Clustering.')
    datapoints = len(data)
    features = len(data[0])

    centers = random_centers(data, datapoints, k)
    clustered_data = point_clustering(data,
                                      centers,
                                      features,
                                      first_cluster=True)

    for i in range(epochs):
        centers = mean_center(clustered_data, centers, features)
        clustered_data = point_clustering(data,
                                          centers,
                                          features,
                                          first_cluster=False)
        progress(i, epochs - 1)
    return centers
Beispiel #6
0
 def LBPFeatureDescriptor(self):
     # Iterating on all the images in the selected folder to calculate HOG FD for each of the images
     storeLbpFD = {}
     lbp = LBP()
     files = os.listdir(str(
         config.FULL_IMAGESET_FOLDER))  # dir is your directory path
     number_files = len(files)
     i = 0
     for file in os.listdir(str(config.FULL_IMAGESET_FOLDER)):
         filename = os.fsdecode(file)
         if filename.endswith(".jpg"):
             storeLbpFD = {}
             hognp = lbp.LBPForSingleImage(
                 join(str(config.FULL_IMAGESET_FOLDER), filename))
             # storeLbpFD.append(hognp.tolist())
             storeLbpFD[filename] = (hognp.tolist())
             with open(join(config.FEATURES_FOLDER_LBP, filename + ".json"),
                       'w',
                       encoding='utf-8') as f:
                 json.dump(storeLbpFD, f, ensure_ascii=True)
             i = i + 1
             progress(i, number_files)
Beispiel #7
0
def load_features(folder_path):
    hog_feature_map = {}
    counter = 1
    training_files = os.listdir(folder_path)
    print("Extracting features for the training images!")
    for trainingFile in training_files:
        trainingFileJson = os.fsdecode(trainingFile).split(
            '.')[0] + '.' + os.fsdecode(trainingFile).split('.')[1] + '.json'
        fileExists = os.path.exists(
            join(config.FEATURES_FOLDER, trainingFileJson))
        data = {}
        if fileExists:
            with open(join(config.FEATURES_FOLDER, trainingFileJson),
                      "r") as f:
                data = json.load(f)
                hog_feature_map.update(data)
        else:
            data = HOG().HOGForSingleImage(folder_path, trainingFile)
            hog_feature_map.update(data)

        progress(counter, len(training_files))
        counter = counter + 1
    hog_values = list(hog_feature_map.values())
    return hog_values, training_files
Beispiel #8
0
def startTask4():
    print("starting task4")
    print("Enter the folder path containing the labeled images")
    training_folder = input()

    print("Choose one of the below classifier")
    print("1. SVM classifer\n2. Decision-Tree classifier\n3. PPR based classifier")
    classifier = int(input())

    print("Enter the folder path containing the test images")
    test_folder = input()

    hog_feature_map = {}
    counter = 1
    training_files = os.listdir(training_folder)
    print("Extracting features for the training images!")
    for trainingFile in training_files:
        trainingFileJson = os.fsdecode(trainingFile).split('.')[0] + '.' + os.fsdecode(trainingFile).split('.')[
            1] + '.json'
        fileExists = os.path.exists(join(config.FEATURES_FOLDER, trainingFileJson))
        if fileExists:
            with open(join(config.FEATURES_FOLDER, trainingFileJson), "r") as f:
                data = json.load(f)
                hog_feature_map.update(data)
        else:
            data = HOG().HOGForSingleImage(training_folder, trainingFile)
            hog_feature_map.update(data)
        progress(counter, len(training_files))
        counter = counter + 1
    reducer_object = list(hog_feature_map.values())
    print("Performing PCA!")
    pca = PCA_Reducer(reducer_object)
    data = pca.reduceDimension(pca.featureDescriptor)
    print("Done performing PCA!")
    if classifier == 1:
        # image labels are added to the imageLabels list. -1 for dorsal and 1 for palmar
        metadata = pd.read_csv(config.METADATA_FOLDER)
        image_lables = get_labels(training_folder, metadata)
        svm_object = SVM()
        print("Training SVM")
        svm_object.svm_fit(data, image_lables)
        print("Done Training SVM")
        test_labels_map = {}
        predicted_values = []
        actual_values = get_labels(test_folder, metadata)
        for file in os.listdir(test_folder):
            test_file = file
            test_file_json = file + '.json'
            file_exists = os.path.exists(join(config.FEATURES_FOLDER, test_file_json))
            if file_exists:
                with open(join(config.FEATURES_FOLDER, test_file_json), "r") as f:
                    data = json.load(f)
            else:
                data = HOG().HOGForSingleImage(test_folder, test_file)
            pca_output = pca.reduceDimension(list(data.values()))
            output_label = np.asarray(svm_object.predict(pca_output))[0]
            predicted_values.append(output_label)
            if output_label == -1:
                test_labels_map[test_file] = "dorsal"
            else:
                test_labels_map[test_file] = "palmar"
        print(test_labels_map)
        accuracy = accuracy_score(actual_values, predicted_values)
        plotInChromeForTask4(test_labels_map, "Task_4_SVM", accuracy)
        print("Test Accuracy: ", accuracy)

    if classifier == 2:
        data = data.values.tolist()  # decision tree takes data as 2d array
        class_labels = [-1, 1]
        i = 0
        metadata = pd.read_csv(config.METADATA_FOLDER)
        for file in os.listdir(training_folder):
            training_file = os.fsdecode(file)
            label = metadata.loc[metadata['imageName'] == training_file]['aspectOfHand'].iloc[0]
            if "dorsal" in label:
                data[i].append(-1)
            else:
                data[i].append(1)
            i = i + 1
        dtree_object = decisionTree()
        root = dtree_object.construct_dt(data, class_labels, 5, 2)
        test_labels_map = {}
        predicted_values = []
        actual_values = get_labels(test_folder, metadata)
        for file in os.listdir(test_folder):
            test_file = os.fsdecode(file).split('.')[0] + '.' + os.fsdecode(file).split('.')[1]
            test_file_json = os.fsdecode(file).split('.')[0] + '.' + os.fsdecode(file).split('.')[1] + '.json'
            file_exists = os.path.exists(join(config.FEATURES_FOLDER, test_file_json))
            if file_exists:
                with open(join(config.FEATURES_FOLDER, test_file_json), "r") as f:
                    data = json.load(f)
            else:
                data = HOG().HOGForSingleImage(test_folder, test_file)
            pca_output = pca.reduceDimension(list(data.values()))
            pca_output = pca_output.values.tolist()[0]
            output_label = dtree_object.predict(root, pca_output)
            predicted_values.append(output_label)
            if output_label == -1:
                test_labels_map[test_file] = "dorsal"
            else:
                test_labels_map[test_file] = "palmar"
        accuracy = accuracy_score(actual_values, predicted_values)
        plotInChromeForTask4(test_labels_map, "Task_4_DECISION", accuracy)
        print("Test Accuracy: ", accuracy)

    if classifier == 3:
        pca_for_all = data

        i = 0
        imageNames = []
        latentFeatureDict = {}

        # Preprocessing for UnLabelled set
        ppr_hog_map = {}
        for test_file in os.listdir(test_folder):
            trainingFileJson = str(test_file) + '.json'
            fileExists = os.path.exists(join(config.FEATURES_FOLDER, trainingFileJson))
            if fileExists:
                with open(join(config.FEATURES_FOLDER, trainingFileJson), "r") as f:
                    data = json.load(f)
                    ppr_hog_map.update(data)
            else:
                data = HOG().HOGForSingleImage(test_folder, test_file)
                ppr_hog_map.update(data)
        # Appending the labelled data values with unlabelled images data
        reducer_object = list(hog_feature_map.values())
        pp_reducer_object = list(ppr_hog_map.values())
        pp_reducer_object = reducer_object + pp_reducer_object
        pca = PCA_Reducer(pp_reducer_object)
        unlabelled_ppr_data = pca.reduceDimension(pca.featureDescriptor)
        pca_for_all = unlabelled_ppr_data

        for file in os.listdir(str(training_folder)):
            filename = os.fsdecode(file)
            latent = pca_for_all.iloc[i][:]
            imageNames.append(filename)
            latentFeatureDict[filename] = latent
            i = i + 1

        for file in os.listdir(join(test_folder)):
            filename = os.fsdecode(file)
            latent = pca_for_all.iloc[i][:]
            imageNames.append(filename)
            latentFeatureDict[filename] = latent
            i = i + 1

        # seed = pd.Series(0, index=imageNames)
        print("Generating Adjacency Matrix..")
        adjacency_matrix = [[0 for _ in range(len(latentFeatureDict))] for _ in range(len(latentFeatureDict))]
        for i in range(len(latentFeatureDict)):
            distances = []
            for j in range(len(latentFeatureDict)):
                # print(len(latentFeatureDict[imageNames[i]]), len(latentFeatureDict[imageNames[j]]))
                distances.append(find_distance_2_vectors(latentFeatureDict[imageNames[i]],
                                                         latentFeatureDict[imageNames[j]]))

            distances = np.asarray(distances)
            ind = np.argpartition(distances, 20)[:20]
            total = 0
            for distance_index in ind:
                if distances[distance_index] != 0:
                    total += 1 / distances[distance_index]
            for distance_index in ind:
                # This is adding only k nearest neighbours into the matrix and doing ratio to get probablistic matrix
                if distances[distance_index] != 0:
                    adjacency_matrix[distance_index][i] = 1 / distances[distance_index] / total

        rowDict = {}
        i = 0
        for image in imageNames:
            rowDict[i] = image
            i = i + 1

        df = pd.DataFrame(adjacency_matrix, columns=imageNames)
        df.rename(index=rowDict, inplace=True)

        df.to_csv(join(config.DATABASE_FOLDER, "adjacency_matrix_for_task_4.csv"))

        I = np.identity(df.shape[1])
        seed = pd.Series(0, index=imageNames)
        metadata = pd.read_csv(config.METADATA_FOLDER)
        image_lables = get_labels(training_folder, metadata)
        count = image_lables.count(-1)
        val = 1 / count
        for i in range(len(os.listdir(training_folder))):
            if image_lables[i] == -1:
                seed.loc[imageNames[i]] = val
        # print(seed)
        seed2 = pd.Series(0, index=imageNames)
        count2 = image_lables.count(1)
        val2 = 1 / count2
        for i in range(len(os.listdir(training_folder))):
            if image_lables[i] == 1:
                seed2.loc[imageNames[i]] = val

        page_rank = np.matmul(np.linalg.inv(I - .75 * df), 0.25 * seed)
        page_rank2 = np.matmul(np.linalg.inv(I - .75 * df), 0.25 * seed2)
        steady_state = pd.Series(page_rank, index=df.index)
        steady_state2 = pd.Series(page_rank2, index=df.index)
        test_labels_map = {}
        predicted_values = []
        for file in os.listdir(join(test_folder)):
            if steady_state[file] >= steady_state2[file]:
                test_labels_map[file] = "dorsal"
                predicted_values.append(-1)
            else:
                test_labels_map[file] = "palmer"
                predicted_values.append(1)

        actual_values = get_labels(test_folder, metadata)
        accuracy = accuracy_score(actual_values, predicted_values)
        plotInChromeForTask4(test_labels_map, "Task_4_PPR", accuracy)
        print("Test Accuracy: ", accuracy)
        steady_state = steady_state.sort_values(ascending=True)
        steady_state.to_csv(join(config.DATABASE_FOLDER, "steady_state_matrix_for_task_4.csv"))
        steady_state.plot()
        plt.show()