def index_imgs(): global database global hash_rep database = [] json_files = os.listdir(config.FEATURES_FOLDER) json_files = ret_files_without_ext(json_files) ctr = 0 total_iter = len(hash_rep) * len(json_files) for i in range(len(hash_rep)): layer_index = {} for img in json_files: with open(path.join(config.FEATURES_FOLDER, img + '.json'), 'r', encoding='utf-8') as f: feat_desc = json.load(f) img_bucket = img_index(feat_desc[img], i) if (img_bucket not in layer_index): layer_index[img_bucket] = [img] else: layer_index[img_bucket].append(img) ctr += 1 progress(ctr, total_iter) database.append(layer_index) return database
def LBPFeatureDescriptorForImageSubset(self, imageSet): # Iterating on all the images in the selected folder to calculate HOG FD for each of the images storeLbpFD = [] lbp = LBP() number_files = len(imageSet) i = 0 for filename in imageSet: hognp = lbp.LBPForSingleImage( join(str(config.IMAGE_FOLDER), filename)) storeLbpFD.append(hognp.tolist()) i = i + 1 progress(i, number_files) print(len(storeLbpFD)) return storeLbpFD
def CMFeatureDescriptor(self): # Iterating on all the images in the selected folder to calculate HOG FD for each of the images storeCmFD = [] cm = CM() files = os.listdir(str( config.IMAGE_FOLDER)) # dir is your directory path number_files = len(files) i = 0 for file in os.listdir(str(config.IMAGE_FOLDER)): filename = os.fsdecode(file) if filename.endswith(".jpg"): hognp = cm.CMForSingleImage( str(config.IMAGE_FOLDER) + "\\" + filename) storeCmFD.append(hognp.tolist()) i = i + 1 progress(i, number_files) print() return storeCmFD
def LBPFeatureDescriptorForImageSubset(self, imageSet): # Iterating on all the images in the selected folder to calculate HOG FD for each of the images storeLbpFD = [] lbp = LBP() files = os.listdir(str( config.IMAGE_FOLDER)) # dir is your directory path number_files = len(files) i = 0 for file in os.listdir(str(config.IMAGE_FOLDER)): filename = os.fsdecode(file) if filename.endswith(".jpg") and (filename in imageSet.imageName.values): hognp = lbp.LBPForSingleImage( str(config.IMAGE_FOLDER) + "\\" + filename) storeLbpFD.append(hognp.tolist()) i = i + 1 progress(i, number_files) print() return storeLbpFD
def train_k_means_clustering(data, k, epochs=20): print('\nRunning K means Clustering.') datapoints = len(data) features = len(data[0]) centers = random_centers(data, datapoints, k) clustered_data = point_clustering(data, centers, features, first_cluster=True) for i in range(epochs): centers = mean_center(clustered_data, centers, features) clustered_data = point_clustering(data, centers, features, first_cluster=False) progress(i, epochs - 1) return centers
def LBPFeatureDescriptor(self): # Iterating on all the images in the selected folder to calculate HOG FD for each of the images storeLbpFD = {} lbp = LBP() files = os.listdir(str( config.FULL_IMAGESET_FOLDER)) # dir is your directory path number_files = len(files) i = 0 for file in os.listdir(str(config.FULL_IMAGESET_FOLDER)): filename = os.fsdecode(file) if filename.endswith(".jpg"): storeLbpFD = {} hognp = lbp.LBPForSingleImage( join(str(config.FULL_IMAGESET_FOLDER), filename)) # storeLbpFD.append(hognp.tolist()) storeLbpFD[filename] = (hognp.tolist()) with open(join(config.FEATURES_FOLDER_LBP, filename + ".json"), 'w', encoding='utf-8') as f: json.dump(storeLbpFD, f, ensure_ascii=True) i = i + 1 progress(i, number_files)
def load_features(folder_path): hog_feature_map = {} counter = 1 training_files = os.listdir(folder_path) print("Extracting features for the training images!") for trainingFile in training_files: trainingFileJson = os.fsdecode(trainingFile).split( '.')[0] + '.' + os.fsdecode(trainingFile).split('.')[1] + '.json' fileExists = os.path.exists( join(config.FEATURES_FOLDER, trainingFileJson)) data = {} if fileExists: with open(join(config.FEATURES_FOLDER, trainingFileJson), "r") as f: data = json.load(f) hog_feature_map.update(data) else: data = HOG().HOGForSingleImage(folder_path, trainingFile) hog_feature_map.update(data) progress(counter, len(training_files)) counter = counter + 1 hog_values = list(hog_feature_map.values()) return hog_values, training_files
def startTask4(): print("starting task4") print("Enter the folder path containing the labeled images") training_folder = input() print("Choose one of the below classifier") print("1. SVM classifer\n2. Decision-Tree classifier\n3. PPR based classifier") classifier = int(input()) print("Enter the folder path containing the test images") test_folder = input() hog_feature_map = {} counter = 1 training_files = os.listdir(training_folder) print("Extracting features for the training images!") for trainingFile in training_files: trainingFileJson = os.fsdecode(trainingFile).split('.')[0] + '.' + os.fsdecode(trainingFile).split('.')[ 1] + '.json' fileExists = os.path.exists(join(config.FEATURES_FOLDER, trainingFileJson)) if fileExists: with open(join(config.FEATURES_FOLDER, trainingFileJson), "r") as f: data = json.load(f) hog_feature_map.update(data) else: data = HOG().HOGForSingleImage(training_folder, trainingFile) hog_feature_map.update(data) progress(counter, len(training_files)) counter = counter + 1 reducer_object = list(hog_feature_map.values()) print("Performing PCA!") pca = PCA_Reducer(reducer_object) data = pca.reduceDimension(pca.featureDescriptor) print("Done performing PCA!") if classifier == 1: # image labels are added to the imageLabels list. -1 for dorsal and 1 for palmar metadata = pd.read_csv(config.METADATA_FOLDER) image_lables = get_labels(training_folder, metadata) svm_object = SVM() print("Training SVM") svm_object.svm_fit(data, image_lables) print("Done Training SVM") test_labels_map = {} predicted_values = [] actual_values = get_labels(test_folder, metadata) for file in os.listdir(test_folder): test_file = file test_file_json = file + '.json' file_exists = os.path.exists(join(config.FEATURES_FOLDER, test_file_json)) if file_exists: with open(join(config.FEATURES_FOLDER, test_file_json), "r") as f: data = json.load(f) else: data = HOG().HOGForSingleImage(test_folder, test_file) pca_output = pca.reduceDimension(list(data.values())) output_label = np.asarray(svm_object.predict(pca_output))[0] predicted_values.append(output_label) if output_label == -1: test_labels_map[test_file] = "dorsal" else: test_labels_map[test_file] = "palmar" print(test_labels_map) accuracy = accuracy_score(actual_values, predicted_values) plotInChromeForTask4(test_labels_map, "Task_4_SVM", accuracy) print("Test Accuracy: ", accuracy) if classifier == 2: data = data.values.tolist() # decision tree takes data as 2d array class_labels = [-1, 1] i = 0 metadata = pd.read_csv(config.METADATA_FOLDER) for file in os.listdir(training_folder): training_file = os.fsdecode(file) label = metadata.loc[metadata['imageName'] == training_file]['aspectOfHand'].iloc[0] if "dorsal" in label: data[i].append(-1) else: data[i].append(1) i = i + 1 dtree_object = decisionTree() root = dtree_object.construct_dt(data, class_labels, 5, 2) test_labels_map = {} predicted_values = [] actual_values = get_labels(test_folder, metadata) for file in os.listdir(test_folder): test_file = os.fsdecode(file).split('.')[0] + '.' + os.fsdecode(file).split('.')[1] test_file_json = os.fsdecode(file).split('.')[0] + '.' + os.fsdecode(file).split('.')[1] + '.json' file_exists = os.path.exists(join(config.FEATURES_FOLDER, test_file_json)) if file_exists: with open(join(config.FEATURES_FOLDER, test_file_json), "r") as f: data = json.load(f) else: data = HOG().HOGForSingleImage(test_folder, test_file) pca_output = pca.reduceDimension(list(data.values())) pca_output = pca_output.values.tolist()[0] output_label = dtree_object.predict(root, pca_output) predicted_values.append(output_label) if output_label == -1: test_labels_map[test_file] = "dorsal" else: test_labels_map[test_file] = "palmar" accuracy = accuracy_score(actual_values, predicted_values) plotInChromeForTask4(test_labels_map, "Task_4_DECISION", accuracy) print("Test Accuracy: ", accuracy) if classifier == 3: pca_for_all = data i = 0 imageNames = [] latentFeatureDict = {} # Preprocessing for UnLabelled set ppr_hog_map = {} for test_file in os.listdir(test_folder): trainingFileJson = str(test_file) + '.json' fileExists = os.path.exists(join(config.FEATURES_FOLDER, trainingFileJson)) if fileExists: with open(join(config.FEATURES_FOLDER, trainingFileJson), "r") as f: data = json.load(f) ppr_hog_map.update(data) else: data = HOG().HOGForSingleImage(test_folder, test_file) ppr_hog_map.update(data) # Appending the labelled data values with unlabelled images data reducer_object = list(hog_feature_map.values()) pp_reducer_object = list(ppr_hog_map.values()) pp_reducer_object = reducer_object + pp_reducer_object pca = PCA_Reducer(pp_reducer_object) unlabelled_ppr_data = pca.reduceDimension(pca.featureDescriptor) pca_for_all = unlabelled_ppr_data for file in os.listdir(str(training_folder)): filename = os.fsdecode(file) latent = pca_for_all.iloc[i][:] imageNames.append(filename) latentFeatureDict[filename] = latent i = i + 1 for file in os.listdir(join(test_folder)): filename = os.fsdecode(file) latent = pca_for_all.iloc[i][:] imageNames.append(filename) latentFeatureDict[filename] = latent i = i + 1 # seed = pd.Series(0, index=imageNames) print("Generating Adjacency Matrix..") adjacency_matrix = [[0 for _ in range(len(latentFeatureDict))] for _ in range(len(latentFeatureDict))] for i in range(len(latentFeatureDict)): distances = [] for j in range(len(latentFeatureDict)): # print(len(latentFeatureDict[imageNames[i]]), len(latentFeatureDict[imageNames[j]])) distances.append(find_distance_2_vectors(latentFeatureDict[imageNames[i]], latentFeatureDict[imageNames[j]])) distances = np.asarray(distances) ind = np.argpartition(distances, 20)[:20] total = 0 for distance_index in ind: if distances[distance_index] != 0: total += 1 / distances[distance_index] for distance_index in ind: # This is adding only k nearest neighbours into the matrix and doing ratio to get probablistic matrix if distances[distance_index] != 0: adjacency_matrix[distance_index][i] = 1 / distances[distance_index] / total rowDict = {} i = 0 for image in imageNames: rowDict[i] = image i = i + 1 df = pd.DataFrame(adjacency_matrix, columns=imageNames) df.rename(index=rowDict, inplace=True) df.to_csv(join(config.DATABASE_FOLDER, "adjacency_matrix_for_task_4.csv")) I = np.identity(df.shape[1]) seed = pd.Series(0, index=imageNames) metadata = pd.read_csv(config.METADATA_FOLDER) image_lables = get_labels(training_folder, metadata) count = image_lables.count(-1) val = 1 / count for i in range(len(os.listdir(training_folder))): if image_lables[i] == -1: seed.loc[imageNames[i]] = val # print(seed) seed2 = pd.Series(0, index=imageNames) count2 = image_lables.count(1) val2 = 1 / count2 for i in range(len(os.listdir(training_folder))): if image_lables[i] == 1: seed2.loc[imageNames[i]] = val page_rank = np.matmul(np.linalg.inv(I - .75 * df), 0.25 * seed) page_rank2 = np.matmul(np.linalg.inv(I - .75 * df), 0.25 * seed2) steady_state = pd.Series(page_rank, index=df.index) steady_state2 = pd.Series(page_rank2, index=df.index) test_labels_map = {} predicted_values = [] for file in os.listdir(join(test_folder)): if steady_state[file] >= steady_state2[file]: test_labels_map[file] = "dorsal" predicted_values.append(-1) else: test_labels_map[file] = "palmer" predicted_values.append(1) actual_values = get_labels(test_folder, metadata) accuracy = accuracy_score(actual_values, predicted_values) plotInChromeForTask4(test_labels_map, "Task_4_PPR", accuracy) print("Test Accuracy: ", accuracy) steady_state = steady_state.sort_values(ascending=True) steady_state.to_csv(join(config.DATABASE_FOLDER, "steady_state_matrix_for_task_4.csv")) steady_state.plot() plt.show()