def get_database_image_features(self, test_folder=None, decomposition=None, reduced_dimension=False, metadata_pickle=None): test_folder_path = os.path.join( Path(os.path.dirname(__file__)).parent, test_folder) test_image_path = os.path.join(test_folder_path, self.test_image_id) if not reduced_dimension: path = os.path.dirname(__file__) feature = self.model_name features_images = FeaturesImages(self.model_name, test_folder_path) # if not(os.path.exists(os.path.join(path, feature+'.pkl'))): features_images.compute_features_images_folder() test_image_features = features_images.compute_image_features( test_image_path) dataset_images_features = misc.load_from_pickle( os.path.dirname(__file__), feature) return test_image_features, dataset_images_features # return dataset_images_features[self.test_image_id], dataset_images_features else: feature = self.model_name reduced_dimension_pickle_path = os.path.join( Path(os.path.dirname(__file__)).parent, 'Phase2', 'pickle_files') if metadata_pickle: dataset_image_features = misc.load_from_pickle( reduced_dimension_pickle_path, metadata_pickle) test_image_features = dataset_image_features[ self.test_image_id] return test_image_features, dataset_image_features if not (os.path.exists( os.path.join( reduced_dimension_pickle_path, feature + '_' + decomposition.decomposition_name + '.pkl'))): print( 'Pickle file not found for the Particular (model,Reduction)' ) print( 'Runnning Task1 for the Particular (model,Reduction) to get the pickle file' ) decomposition.dimensionality_reduction() dataset_images_features = misc.load_from_pickle( reduced_dimension_pickle_path, feature + '_' + decomposition.decomposition_name, self.k) test_image_features = dataset_images_features[self.test_image_id] return test_image_features, dataset_images_features
def save_label_decomposed_features(self, label, decomposed_label): features = misc.load_from_pickle(self.reduced_dimension_pickle_path, decomposed_label) if self.images_metadata is None: self.set_images_metadata() filtered_images_metadata = self.images_metadata if self.test_images_list is not None: filtered_images_metadata = filtered_images_metadata[ (filtered_images_metadata['imageName'].isin(self.test_images_list))] filtered_images_metadata = filtered_images_metadata[ (filtered_images_metadata['aspectOfHand'].str.contains(label))] images_list = filtered_images_metadata['imageName'].tolist() label_features_dict = {} for image_id in images_list: label_features_dict[image_id] = features[image_id] misc.save2pickle(label_features_dict, self.reduced_dimension_pickle_path, feature=(decomposed_label + '_' + label)) return
def sub_sub_list(self, sub1): if self.images_metadata is None: self.set_images_metadata() filtered_images_metadata = self.images_metadata if self.test_images_list is not None: filtered_images_metadata = filtered_images_metadata[ (filtered_images_metadata['imageName'].isin(self.test_images_list))] subject_map = {} sub_ids_list = filtered_images_metadata['id'].unique().tolist() for sub_id in sub_ids_list: is_subject_id = filtered_images_metadata['id'] == sub_id subject_map[sub_id] = filtered_images_metadata[is_subject_id] parent_directory_path = Path(os.path.dirname(__file__)).parent pickle_file_directory = os.path.join(parent_directory_path, 'Phase1') dataset_images_features = misc.load_from_pickle(pickle_file_directory, 'SIFT_OLD') similarity_list_of_pair = [0] if sub1 not in sub_ids_list: return [tuple([-1, -1])] for sub2 in tqdm(sub_ids_list): if sub1!=sub2: sub_sub_val = self.subject_subject_similarity(subject_map[sub1], subject_map[sub2], dataset_images_features, is_single_subject=True) similarity_list_of_pair.append(tuple([sub2, sub_sub_val])) similarity_list_of_pair = similarity_list_of_pair[1:] similarity_list_of_pair = sorted(similarity_list_of_pair, key=lambda x: x[1]) return similarity_list_of_pair
def set_unlabeled_image_features(self, model, test_image_id, decomposition): parent_directory_path = Path(os.path.dirname(__file__)).parent pickle_file_directory_phase1 = os.path.join(parent_directory_path, 'Phase1') test_image_features = list() test_image_features.append(misc.load_from_pickle(pickle_file_directory_phase1, model)[test_image_id]) self.unlabeled_image_features = decomposition.decomposition_model.get_new_image_features_in_latent_space( test_image_features)
def get_image_dataset_features(self): features_obj = FeaturesImages(self.feature_name, self.test_folder_path) features_obj.compute_features_images_folder() self.image_feature_map = misc.load_from_pickle(self.pickle_file_folder, self.feature_name) self.images_list = list(self.image_feature_map.keys()) self.original_feature_map = copy.deepcopy(self.image_feature_map) self.original_image_list = copy.deepcopy(self.images_list)
def get_sift_features(): parent_directory_path = Path(os.path.dirname(__file__)).parent pickle_file_directory = os.path.join(parent_directory_path, 'Phase1') dataset_images_features = misc.load_from_pickle(pickle_file_directory, 'SIFT') input_k_means = [] sum = 0 images_num = 0 # To store the key_point descriptors in a 2-d matrix of size (k1+k2+k3...+kn)*128 for image_id, feature_vector in dataset_images_features.items(): for feature_descriptor in feature_vector: # Note : haven't used x,y,scale,orientation input_k_means.append(feature_descriptor[4:]) sum = sum + len(feature_vector) images_num = images_num + 1 n_clusters = int(sum / images_num) kmeans = KMeans(n_clusters) print( 'Applying k-means algorithm on all the keypoint descriptors of all images' ) tqdm(kmeans.fit(input_k_means)) row_s = 0 row_e = 0 k = 0 image_features = {} print('Equating the number of features for all the images : ') for image_id, feature_vector in tqdm(dataset_images_features.items()): row_s = row_s + k k = len(feature_vector) row_e = row_e + k closest_cluster = kmeans.predict(input_k_means[row_s:row_e]) reduced_feature_img = [0] * n_clusters for cluster_num in closest_cluster: reduced_feature_img[ cluster_num] = reduced_feature_img[cluster_num] + 1 image_features[image_id] = reduced_feature_img folder_images_features_dict = {} for image_id, feature_vector in dataset_images_features.items(): folder_images_features_dict[image_id] = image_features[image_id] print(len(image_features)) reduced_pickle_file_folder = os.path.join(os.path.dirname(__file__), 'pickle_files') misc.save2pickle(folder_images_features_dict, reduced_pickle_file_folder, 'SIFT_NEW')
def get_features(self, label): test_dataset_folder_path = os.path.abspath( os.path.join(Path(os.getcwd()).parent, self.labelled_dataset_path)) images_list = list( misc.get_images_in_directory(test_dataset_folder_path).keys()) metadata = Metadata(images_list) if self.feature_name != 'SIFT': metadata.save_label_decomposed_features(label, self.decomposed_feature) features = misc.load_from_pickle( self.reduced_pickle_file_folder, self.decomposed_feature + '_' + label) else: features = {} database_features = misc.load_from_pickle( self.main_pickle_file_folder, self.feature_name) label_images_list = metadata.get_specific_metadata_images_list( feature_dict={'aspectOfHand': label}) for image in label_images_list: features[image] = database_features[image] return features
def subject_matrix(self): if self.images_metadata is None: self.set_images_metadata() filtered_images_metadata = self.images_metadata if self.test_images_list is not None: filtered_images_metadata = filtered_images_metadata[( filtered_images_metadata['imageName'].isin( self.test_images_list))] subject_map = {} sub_ids_list = filtered_images_metadata['id'].unique().tolist() sub_ids_list.sort( ) #just to look in sorted order OF SUBJECT IDS IN THE MATRIX #print(sub_ids_list) for sub_id in sub_ids_list: is_subject_id = filtered_images_metadata['id'] == sub_id subject_map[sub_id] = filtered_images_metadata[is_subject_id] # for now taking - number of latent semantics as 20(max_val) parent_directory_path = Path(os.path.dirname(__file__)).parent pickle_file_directory = os.path.join(parent_directory_path, 'Phase1') dataset_images_features = misc.load_from_pickle( pickle_file_directory, 'SIFT_OLD') similarity_matrix = [] for sub1 in tqdm(sub_ids_list): similarity_row = [] similarity_row_pair = [0] for sub2 in sub_ids_list: if sub1 == sub2: similarity_row = similarity_row + [0] else: sub_sub_val = self.subject_subject_similarity( subject_map[sub1], subject_map[sub2], dataset_images_features) # print(sub_sub_val) similarity_row = similarity_row + [sub_sub_val] similarity_matrix.append(similarity_row) p = PrettyTable() p.add_row(['SUBJECT/SUBJECT'] + sub_ids_list) i = 0 for row in similarity_matrix: row = [sub_ids_list[i]] + row p.add_row(row) i = i + 1 print(p.get_string(header=False, border=False)) return similarity_matrix
def set_database_matrix(self): parent_directory_path = Path(os.path.dirname(__file__)).parent pickle_file_directory = os.path.join(parent_directory_path, 'Phase1') print('Getting the Model Features from Phase1') self.feature_extraction_object.compute_features_images_folder() database_images_features = misc.load_from_pickle(pickle_file_directory, self.feature_extraction_model_name) if self.metadata_images_list is not None: print("Taking images based on metadata") for image_id in self.metadata_images_list: self.database_matrix.append(database_images_features[image_id]) self.database_image_id.append(image_id) else: for image_id, feature_vector in database_images_features.items(): self.database_matrix.append(feature_vector) self.database_image_id.append(image_id)
def svm(): model = "HOG" decomposition_model = "PCA" phase = input("Choose from \n. 1. Train \n. 2. Test \n.") if (phase == "train"): # phase = "train" training_features,images_list = test.compute_features_folder(labelled_set_path, phase) metadata_filepath = metadata_file_path + "/" + metadata_file_name csv_labels=test.make_labels(metadata_filepath) binary_labels=[] for i in csv_labels: if "dorsal" in i: binary_labels.append(0) else: binary_labels.append(1) my_model = svmc(.001, .01, 1000) my_model.fit(training_features,binary_labels) misc.save2pickle(my_model, reduced_pickle_file_folder,feature = (model + '_svm')) if(phase == "test"): # phase = "test" testing_features,images_list = test.compute_features_folder(unlabelled_set_path, phase) my_model = misc.load_from_pickle(reduced_pickle_file_folder, feature = (model + '_svm')) value=[] value = my_model.predict(testing_features) ans = [] for i in value: if(i == -1): ans.append("dorsal") else: ans.append("palmar") svm_dict={} i=0 for img in images_list: svm_dict[img]=ans[i] i+=1 print("HI") print(svm_dict)
def set_metadata_image_features(self, pickle_file_path): self.metadata_images_features = misc.load_from_pickle(self.reduced_dimension_pickle_path, pickle_file_path)
def get_main_features(feature_name, dataset_folder_path): folder = os.path.join(Path(os.path.dirname(__file__)).parent, 'Phase1') feature_extraction_object = FeaturesImages(feature_name, dataset_folder_path) feature_extraction_object.compute_features_images_folder() return misc.load_from_pickle(folder, feature_name)
def get_unlabelled_dataset_features(self): self.unlabelled_dataset_features = misc.load_from_pickle( self.reduced_pickle_file_folder, 'unlabelled_' + self.decomposed_feature) return self.unlabelled_dataset_features
def set_task5_result(self): self.task5_result = misc.load_from_pickle(self.reduced_pickle_file_folder, 'Task_5_Result')
def ppr_classifier(): model = "CM" decomposition_model = "NMF" k = 8 phase = input("Choose from \n. 1. Train \n. 2. Test \n.") if (phase == "train"): decomposition = Decomposition(decomposition_model, k, model, labelled_set_path, phase) decomposition.dimensionality_reduction() reduced_dim_folder_images_dict = misc.load_from_pickle("D:\MS_1\MWDB-project\Phase2\pickle_files",feature = (model + '_' + decomposition_model + '_' + phase)) image_image_graph_keys = () columns = list(reduced_dim_folder_images_dict.keys()) image_image_graph_keys = list(it.combinations(reduced_dim_folder_images_dict.keys(),2)) image_image_df = pd.DataFrame(0.00, columns = reduced_dim_folder_images_dict.keys(), index = reduced_dim_folder_images_dict.keys()) image_image_df_top_features = pd.DataFrame(0.00, columns = reduced_dim_folder_images_dict.keys(), index = reduced_dim_folder_images_dict.keys()) image_image_df = norm_distance(image_image_df, reduced_dim_folder_images_dict, image_image_graph_keys, 2) misc.save2pickle(image_image_df, reduced_pickle_file_folder, feature=(model + '_' + decomposition_model + '_image_image_df' )) if (phase == "test"): phase = "test" decomposition = Decomposition(decomposition_model, k, model, unlabelled_set_path, phase) decomposition.dimensionality_reduction() labelled_images_feature_dict = misc.load_from_pickle("D:\MS_1\MWDB-project\Phase2\pickle_files",feature = (model + '_' + decomposition_model + '_train')) unlabelled_images_feature_dict = misc.load_from_pickle("D:\MS_1\MWDB-project\Phase2\pickle_files",feature = (model + '_' + decomposition_model + '_test')) # K = int(input("Enter the number of dominant images - ")) K = 9 prediction = {} image_image_df = misc.load_from_pickle(reduced_pickle_file_folder,feature = (model + '_' + decomposition_model + "_image_image_df")) for unlabelled_img in unlabelled_images_list: # unlabelled_img = "Hand_0000070.jpg" # new_col_dict = {} new_col = [] for labelled_img in labelled_images_feature_dict.keys(): features1 = unlabelled_images_feature_dict.get(unlabelled_img) features2 = labelled_images_feature_dict.get(labelled_img) ind_distance = 0.00 distance = 0.00 # print(features1) # print("--------------") # print(features2) for i in range(len(features1)): ind_distance = abs(features1[i] - features2[i]) distance += (ind_distance ** 2) distance = distance ** (1/float(2)) # new_col_dict[labelled_img] = distance new_col.append(distance) # print(new_col) # new_row = pd.DataFrame(new_col, columns=image_image_df.columns, index=[unlabelled_img]) # image_image_df = image_image_df.append(new_row) image_image_df = image_image_df.append(pd.Series(new_col, index=image_image_df.columns, name=unlabelled_img)) # image_image_df = pd.concat([image_image_df, new_row_df]) new_col.append(0) # print(new_col) image_image_df = image_image_df.assign(unlabelled_img = new_col) image_image_df = image_image_df.rename({'unlabelled_img' : unlabelled_img},axis = 1) image_image_df = image_image_df.loc[:,~image_image_df.columns.duplicated()] image_image_df = image_image_df[~image_image_df.index.duplicated(keep='first')] image_image_features_df = k_neighbour_graph(image_image_df, image_image_df.columns, 8) dominant_img_list = steady_state([unlabelled_img],image_image_features_df, image_image_features_df.columns, K) # print(dominant_img_list) palmar = 0 dorsal = 0 for img in dominant_img_list: if img not in unlabelled_img: class_list = metadata_df['aspectOfHand'].where(metadata_df['imageName']==img) class_list = [class_l for class_l in class_list if str(class_l) != 'nan'] # print(str(class_list) + img ) if(class_list[0].split()[0] == "palmar"): palmar += 1 if(class_list[0].split()[0] == "dorsal"): dorsal += 1 if(dorsal >= palmar): prediction[unlabelled_img] = "dorsal" else: prediction[unlabelled_img] = "palmar" image_image_df = misc.load_from_pickle(reduced_pickle_file_folder,feature = (model + '_' + decomposition_model + "_image_image_df")) print(prediction) correct = 0 class_list = unlabelled_metadata_df['imageName'].tolist() actual_class_list = unlabelled_metadata_df['aspectOfHand'].tolist() # print(actual_class_list) for image_name in prediction.keys(): class_list = unlabelled_metadata_df['aspectOfHand'].where(unlabelled_metadata_df['imageName']==image_name) class_list = [class_l for class_l in class_list if str(class_l) != 'nan'] # print(str(class_list[0].split()[0]) + "--" + image_name ) if(class_list[0].split()[0] == prediction.get(image_name)): correct += 1 print(correct/len(prediction.keys()))
def decision_tree_input(): model = "CM" decomposition_model = "NMF" k = 8 phase = input("Choose from \n. 1. Train \n. 2. Test \n.") if (phase == "train"): # phase = "train" decomposition = Decomposition(decomposition_model, k, model, labelled_set_path, phase) decomposition.dimensionality_reduction() labelled_images_feature_dict = misc.load_from_pickle(r"D:\MS_1\MWDB-project\Phase2\pickle_files",feature = (model + '_' + decomposition_model + '_train')) y_train = get_labels(labelled_images_feature_dict.keys(),metadata_df) X_train = np.vstack(labelled_images_feature_dict.values()) y_train = np.asarray(y_train).reshape(len(y_train),1) dataset = np.concatenate((X_train,y_train) ,axis=1) tree = build_tree(dataset, 6, 1) misc.save2pickle(tree, reduced_pickle_file_folder,feature = (model + '_' + decomposition_model + '_tree')) if(phase == "test"): # phase = "test" tree = misc.load_from_pickle(reduced_pickle_file_folder,feature = (model + '_' + decomposition_model + '_tree')) decomposition = Decomposition(decomposition_model, k, model, unlabelled_set_path, phase) decomposition.dimensionality_reduction() unlabelled_images_feature_dict = misc.load_from_pickle("D:\MS_1\MWDB-project\Phase2\pickle_files",feature = (model + '_' + decomposition_model + '_test')) y_test = get_labels(unlabelled_images_feature_dict.keys(),unlabelled_metadata_df) X_test = np.vstack(unlabelled_images_feature_dict.values()) # print(y_train.shape) # print(y_test) # print(tree) prediction = {} for key in unlabelled_images_feature_dict.keys(): est_val = predict(tree, unlabelled_images_feature_dict[key]) if (est_val == 1): prediction[key] = "palmar" else: prediction[key] = "dorsal" print(prediction) correct = 0 class_list = unlabelled_metadata_df['imageName'].tolist() actual_class_list = unlabelled_metadata_df['aspectOfHand'].tolist() # print(actual_class_list) for image_name in prediction.keys(): class_list = unlabelled_metadata_df['aspectOfHand'].where(unlabelled_metadata_df['imageName']==image_name) class_list = [class_l for class_l in class_list if str(class_l) != 'nan'] # print(str(class_list[0].split()[0]) + "--" + image_name ) if(class_list[0].split()[0] == prediction.get(image_name)): correct += 1 print(correct/len(prediction.keys()))