def execute_task5(request): l = int(request.POST.get('number_of_layers')) k = int(request.POST.get('number_of_hashes_per_layer')) lsh = LSH(k=k, l=l) dbconnection = DatabaseConnection() if read_from_pickle('all_img_features_LSH.pickle') != None: all_image_hog_features = read_from_pickle('all_img_features_LSH.pickle') else: all_image_hog_features = dbconnection.get_object_feature_matrix_from_db(tablename='histogram_of_gradients') save_to_pickle(all_image_hog_features,'all_img_features_LSH.pickle') #SVD on hog features if(read_from_pickle('svd_hog_lsh.pickle')!=None): svd_obj = read_from_pickle('svd_hog_lsh.pickle') transformed_data = svd_obj['data_matrix'] vt = svd_obj['vt'] else: svd = SingularValueDecomposition() transformed_data,vt = svd.get_transformed_data_copy(all_image_hog_features['data_matrix'],400) save_to_pickle({"data_matrix":transformed_data,"images":all_image_hog_features['images'],"vt":vt},'svd_hog_lsh.pickle') # index_of_query_image = (all_image_hog_features['images']).index(query_image) # image_vector = transformed_data[index_of_query_image] bit_map = lsh.generate_representation_for_all_layers(transformed_data,all_image_hog_features['images']) save_to_pickle(lsh, 'lsh_model') return render(request, 'task5a_output.html')
def get_predicted_labels(self, labelled_folder_path, unlabelled_folder_path): labelled_hog_data_table_name = convert_folder_path_to_table_name( labelled_folder_path, "histogram_of_gradients") unlabelled_hog_data_table_name = convert_folder_path_to_table_name( unlabelled_folder_path, "histogram_of_gradients") labelled_metadata_table_name = convert_folder_path_to_table_name( labelled_folder_path, "metadata") db_conn = DatabaseConnection() labelled_data_dict = db_conn.get_object_feature_matrix_from_db( labelled_hog_data_table_name) unlabelled_data_dict = db_conn.get_object_feature_matrix_from_db( unlabelled_hog_data_table_name) labeled_image_names, labeled_data = labelled_data_dict[ "images"], labelled_data_dict["data_matrix"] unlabeled_image_names, unlabeled_data = unlabelled_data_dict[ "images"], unlabelled_data_dict["data_matrix"] total_data = np.concatenate((labeled_data, unlabeled_data), axis=0) total_image_names = labeled_image_names + unlabeled_image_names svd_obj = SingularValueDecomposition() svd_image_data = svd_obj.get_transformed_data(total_data, k=20) labelled_images = db_conn.get_correct_labels_for_given_images( tablename=labelled_metadata_table_name, label_type="aspectOfHand") dorsal_images = get_filtered_images_by_label(labelled_images, "dorsal") palmer_images = get_filtered_images_by_label(labelled_images, "palmar") pgr_obj = PageRank() image_similarity_matrix = pgr_obj.get_image_similarity_matrix_for_top_k_images( 6, svd_image_data) seed_vector_for_dorsal = pgr_obj.get_seed_vector( dorsal_images, total_image_names) seed_vector_for_palmer = pgr_obj.get_seed_vector( palmer_images, total_image_names) pie_with_dorsal = pgr_obj.get_page_rank_eigen_vector( image_similarity_matrix, seed_vector_for_dorsal) pie_with_palmer = pgr_obj.get_page_rank_eigen_vector( image_similarity_matrix, seed_vector_for_palmer) ranked_images_using_dorsal = self.get_ranked_images( pie_with_dorsal, total_image_names) ranked_images_using_palmer = self.get_ranked_images( pie_with_palmer, total_image_names) images_with_labels = [ (img, "dorsal") if ranked_images_using_dorsal[img] > ranked_images_using_palmer[img] else (img, "palmar") for img in unlabeled_image_names ] correct_labels = db_conn.get_correct_labels_for_given_images( image_names=unlabeled_image_names, label_type="aspectOfHand") if correct_labels: acc = calculate_classification_accuracy( convert_tuple_to_dict(images_with_labels), convert_tuple_to_dict(correct_labels)) print("********************************************") print("Accuracy = ", acc) else: acc = "Images not presented in 11 K dataset" return images_with_labels, acc
class Image_Clustering: def __init__(self): self.db_conn = DatabaseConnection() self.no_of_dimensions = 10 def intialize_cluster_centres(self, points, no_of_clusters): no_of_points = len(points) # first_point = round((np.random.random((1)) * no_of_points).tolist()[0]) # print(first_point) # list_of_centre = [points[first_point]] list_of_centre = [points[0]] for centre in range(2, no_of_clusters): max_avg_dist = 0 for point in points: if point in list_of_centre: continue avg_dist = 0.0 for centre in list_of_centre: avg_dist += np.linalg.norm(np.subtract(np.array(centre), np.array(point))) avg_dist = avg_dist / len(list_of_centre) if avg_dist > max_avg_dist: max_avg_dist = avg_dist point_as_centre = point list_of_centre.append(point_as_centre) return list_of_centre def k_means(self, points, no_of_centres): # list_of_centre = (points.shape[0] * np.random.rand(no_of_centres,1)).tolist() points = points.tolist() list_of_centre = self.intialize_cluster_centres(points, no_of_centres) clusters_points = {} old_clusters_centroid = {} clusters_centroid = { index:points[int(list_of_centre[index][0])] for index in range(len(list_of_centre))} while not clusters_centroid == old_clusters_centroid: # allocate points to cluster centroid clusters_points = {} for point in points: min_distance = np.inf point_in_cluster = -1 for cluster_id in clusters_centroid: centroid = clusters_centroid[cluster_id] distance = np.linalg.norm(np.subtract(np.array(point), np.array(centroid))) if distance < min_distance: point_in_cluster = cluster_id min_distance = distance if point_in_cluster in clusters_points: clusters_points[point_in_cluster].append(point) else: clusters_points[point_in_cluster] = [point] # Update cluster centroid old_clusters_centroid = clusters_centroid.copy() clusters_centroid = {} for cluster_id in clusters_points: list_of_points = np.array(clusters_points[cluster_id]) clusters_centroid[cluster_id] = np.mean(list_of_points, axis=0).tolist() # print(cluster_id,clusters_centroid[cluster_id]) return clusters_centroid # ,clusters_centroid def cluster_images(self, no_of_clusters, relative_input_folder_path, relative_output_folder_path): base_tablename = "histogram_of_gradients" input_tablename = convert_folder_path_to_table_name(relative_input_folder_path, base_tablename) output_tablename = convert_folder_path_to_table_name(relative_output_folder_path, base_tablename) input_metadata_tablename = convert_folder_path_to_table_name(relative_input_folder_path) image_dict = self.db_conn.get_object_feature_matrix_from_db(input_tablename) data_matrix = image_dict['data_matrix'] image_names = image_dict['images'] svd_obj = SingularValueDecomposition() U, S, Vt = svd_obj.get_latent_semantics(data_matrix, self.no_of_dimensions) latent_semantic = np.matmul(U, S).tolist() list_of_labels = self.db_conn.get_correct_labels_for_given_images(image_names, "aspectofhand", input_metadata_tablename) # print(list_of_labels) dorsal_data_matrix = [] palmar_data_matrix = [] for image, label in list_of_labels: label_update = label.split(' ')[0] if label_update == DORSAL: dorsal_data_matrix.append(latent_semantic[image_names.index(image)]) elif label_update == PALMAR: palmar_data_matrix.append(latent_semantic[image_names.index(image)]) palmer_list_of_centers = self.k_means(np.array(palmar_data_matrix), no_of_clusters) dorsal_list_of_centers = self.k_means(np.array(dorsal_data_matrix), no_of_clusters) # print(palmer_list_of_centers, dorsal_list_of_centers) points_in_cluster = [] for image_name, image_vector in zip(image_names,latent_semantic): min_distance = np.inf row = () for center in dorsal_list_of_centers: distance = np.linalg.norm(np.subtract(np.array(image_vector), np.array(dorsal_list_of_centers[center]))) if distance < min_distance: row = (image_name, int(center) + 1) min_distance = distance for center in palmer_list_of_centers: distance = np.linalg.norm(np.subtract(np.array(image_vector), np.array(palmer_list_of_centers[center]))) if distance < min_distance: row = (image_name, no_of_clusters+int(center) + 1) min_distance = distance points_in_cluster.append(row) query_image_dict = self.db_conn.get_object_feature_matrix_from_db(output_tablename) query_data_matrix = query_image_dict['data_matrix'] query_image_names = query_image_dict['images'] query_latent = np.matmul(query_data_matrix,np.transpose(Vt)) query_iterate = zip(query_image_names, query_latent.tolist()) prediction = [] for image, image_vector in query_iterate: min_distance = np.inf labelled = "" for cluster_centre in palmer_list_of_centers.values(): distance = np.linalg.norm(np.subtract(np.array(image_vector),np.array(cluster_centre))) if distance < min_distance: labelled = PALMAR min_distance = distance for cluster_centre in dorsal_list_of_centers.values(): distance = np.linalg.norm(np.subtract(np.array(image_vector),np.array(cluster_centre))) if distance<min_distance: labelled = DORSAL min_distance = distance prediction.append((image, labelled)) query_list_of_labels = self.db_conn.get_correct_labels_for_given_images(query_image_names, "aspectofhand", "metadata") # print(query_list_of_labels) prediction = sorted(prediction, key=lambda k: k[0]) if query_list_of_labels: query_list_of_labels = sorted(query_list_of_labels, key=lambda k: k[0]) accuracy = 0.0 for (image_name, predicted_label),(image2, correct_label) in zip(prediction, query_list_of_labels): # print(image_name, predicted_label,image2, correct_label) correct_label = correct_label.split(' ')[0] if(image_name == image2 and correct_label==predicted_label): accuracy += 1 accuracy = 100*accuracy/float(len(query_image_names)) print("The accuracy is "+ str(accuracy)) else: print("Images not presented in 11 K dataset") points_in_cluster = sorted(points_in_cluster, key=lambda k: k[1]) return points_in_cluster, prediction
def execute_task6(request): query_image = request.POST.get('query_image') most_similar_images = int(request.POST.get('most_similar_images')) query_image_folder_name = request.POST.get('query_image_folder_name') relevance_feedback = request.POST.get('relevance_feedback') lsh = read_from_pickle('lsh_model') db_connection = DatabaseConnection() image_vector = db_connection.get_feature_data_for_image( 'histogram_of_gradients', query_image) image_vector = np.asarray(image_vector.flatten()) if read_from_pickle('all_img_features_LSH.pickle') != None: all_image_hog_features = read_from_pickle( 'all_img_features_LSH.pickle') else: all_image_hog_features = db_connection.get_object_feature_matrix_from_db( tablename='histogram_of_gradients') save_to_pickle(all_image_hog_features, 'all_img_features_LSH.pickle') #SVD on hog features if (read_from_pickle('svd_hog_lsh.pickle') != None): svd_obj = read_from_pickle('svd_hog_lsh.pickle') transformed_data = svd_obj['data_matrix'] vt = svd_obj['vt'] else: svd = SingularValueDecomposition() transformed_data, vt = svd.get_transformed_data_copy( all_image_hog_features['data_matrix'], 400) save_to_pickle( { "data_matrix": transformed_data, "images": all_image_hog_features['images'], "vt": vt }, 'svd_hog_lsh.pickle') if (query_image_folder_name != ''): table_name = convert_folder_path_to_table_name( query_image_folder_name, 'histogram_of_gradients') image_vector = db_connection.get_feature_data_for_image( table_name, query_image) image_vector = np.dot(image_vector.astype(float), np.transpose(vt)) new_obj = {} new_obj['data_matrix'] = transformed_data new_obj['images'] = all_image_hog_features['images'] (sorted_k_values, result_stats) = lsh.find_ksimilar_images(k=most_similar_images, image_vector=image_vector, all_image_hog_features=new_obj) # Now getting a bigger test dataset for relevance feedback if relevance_feedback == "Probabilistic": (test_dataset, result_stats) = lsh.find_ksimilar_images( k=10 + most_similar_images, image_vector=image_vector, all_image_hog_features=new_obj) else: (test_dataset, result_stats) = lsh.find_ksimilar_images( k=200 + most_similar_images, image_vector=image_vector, all_image_hog_features=new_obj) save_to_pickle(test_dataset, 'test_dataset.pickle') print(sorted_k_values[:most_similar_images]) return render( request, 'visualize_images.html', { 'images': sorted_k_values[:most_similar_images], "from_task": "task5", 'rel_type': relevance_feedback, "q": query_image, "t": most_similar_images, "num_total": result_stats['total'], "num_unique": result_stats['unique'] })
def get_sorted_k_values(self, num_similar_images, similar_images, all_image_hog_features, image_vector): similar_images_vectors = [] if (num_similar_images <= len(similar_images)): for i in similar_images: index = all_image_hog_features['images'].index(i) similar_images_vectors.append(all_image_hog_features['data_matrix'][index]) ranking = {} for i_comp_vector in range(len(similar_images_vectors)): image_name = similar_images[i_comp_vector] comp_vector_np = similar_images_vectors[i_comp_vector] # print(i_comp_vector," : ",np.linalg.norm(input_vector - comp_vector_np)) ranking[image_name] = np.linalg.norm(image_vector - comp_vector_np) sorted_k_values = sorted(ranking.items(), key=lambda kv: kv[1]) # print(sorted_k_values[:num_similar_images]) return sorted_k_values[:num_similar_images] if __name__ == "__main__": lsh = LSH(k=9, l=10) dbconnection = DatabaseConnection() all_image_hog_features = dbconnection.get_object_feature_matrix_from_db(tablename='histogram_of_gradients') bit_map = lsh.generate_representation_for_all_layers(all_image_hog_features['data_matrix'], all_image_hog_features['images']) image_vector = dbconnection.get_feature_data_for_image('histogram_of_gradients', 'Hand_0000012.jpg') image_vector = np.asarray(image_vector.flatten()) num_similar_images = 6 print(lsh.find_ksimilar_images(k=num_similar_images, image_vector=image_vector, all_image_hog_features=all_image_hog_features))
class Task1_Classifier: def __init__(self): # self.no_of_components = 20 self.db_conn = DatabaseConnection() def calculate_latent_semantic_for_label(self, no_of_components, label, tablename, metadata): image_dict = self.db_conn.get_object_feature_matrix_from_db( tablename, label, "aspectofhand", metadata) image_names = image_dict["images"] data_matrix = image_dict["data_matrix"] dr_obj = PrincipleComponentAnalysis() U, S, Vt = dr_obj.get_latent_semantics(data_matrix, no_of_components) return image_names, Vt def classify_images_folder(self, image_names, data_matrix, dorsal_semantics, palmar_semantics): dorsal_space = np.matmul(data_matrix, np.transpose(dorsal_semantics)) dorsal_distance = np.linalg.norm(dorsal_space, axis=1, keepdims=True) palmar_space = np.matmul(data_matrix, np.transpose(palmar_semantics)) palmar_distance = np.linalg.norm(palmar_space, axis=1, keepdims=True) label_flags = (dorsal_distance > palmar_distance).tolist() print(dorsal_distance, label_flags) predicted_labels = [] for images_name, label in zip(image_names, label_flags): if label[0]: predicted_labels.append((images_name, DORSAL)) else: predicted_labels.append((images_name, PALMAR)) return predicted_labels def get_label_for_folder(self, relative_input_folder_path, relative_output_folder_path, no_of_components=20): input_tablename = convert_folder_path_to_table_name( relative_input_folder_path, "histogram_of_gradients") output_tablename = convert_folder_path_to_table_name( relative_output_folder_path, "histogram_of_gradients") metadata_tablename = convert_folder_path_to_table_name( relative_input_folder_path) image_names = get_image_names_in_a_folder(relative_input_folder_path) #print(image_names) labelled_images = self.db_conn.get_correct_labels_for_given_images( image_names, "aspectofhand") # print(labelled_images) print(len(labelled_images)) dorsal_images, dorsal_semantics = self.calculate_latent_semantic_for_label( no_of_components, DORSAL, input_tablename, metadata_tablename) palmar_images, palmar_semantics = self.calculate_latent_semantic_for_label( no_of_components, PALMAR, input_tablename, metadata_tablename) query_image_names = get_image_names_in_a_folder( relative_output_folder_path) query_image_dict = self.db_conn.get_object_feature_matrix_from_db( output_tablename) query_data_matrix = query_image_dict['data_matrix'] print(len(query_image_names)) predicted_labels = self.classify_images_folder(query_image_names, query_data_matrix, dorsal_semantics, palmar_semantics) query_list_of_labels = self.db_conn.get_correct_labels_for_given_images( query_image_names, "aspectofhand", "metadata") prediction = sorted(predicted_labels, key=lambda k: k[0]) if query_list_of_labels: query_list_of_labels = sorted(query_list_of_labels, key=lambda k: k[0]) accuracy = 0.0 for (image_name, predicted_label), (image2, correct_label) in zip( prediction, query_list_of_labels): # print(image_name, predicted_label,image2, correct_label) correct_label = correct_label.split(' ')[0] if (image_name == image2 and correct_label == predicted_label): accuracy += 1 accuracy = 100 * accuracy / float(len(query_image_names)) print("The accuracy is " + str(accuracy)) else: print("Images not presented in 11 K dataset") print(prediction) return prediction
def get_train_and_test_dataframes_from_db(train_table, train_table_metadata, test_table, num_dims=None, algo="svd"): images_not_present = False label_map = {"dorsal": -1, "palmar": 1} # retrieve data db = DatabaseConnection() train_dataset = db.get_object_feature_matrix_from_db(train_table) test_dataset = db.get_object_feature_matrix_from_db(test_table) # get out data matrix train_data = train_dataset['data_matrix'] train_images = train_dataset['images'] test_data = test_dataset['data_matrix'] test_images = test_dataset['images'] # svd transform if num_dims == None: tf_train_data = train_data tf_test_data = test_data else: if algo == "pca": svd = PCA(n_components=num_dims) tf_train_data = svd.fit_transform(train_data) tf_test_data = svd.transform(test_data) elif algo == "svd": svd = SingularValueDecomposition(num_dims) tf_train_data = svd.fit_transform(train_data) tf_test_data = svd.transform(test_data) # convert list of tuples to dict train_labels_map = dict( db.get_correct_labels_for_given_images(train_images, 'aspectOfHand', train_table_metadata)) result_from_db = db.get_correct_labels_for_given_images( test_images, 'aspectOfHand') if not result_from_db: exp_test_labels_map = None else: exp_test_labels_map = dict(result_from_db) # dataframe setup starts here # train_df train_col_names = ['imagename', 'hog_svd_descriptor', 'label'] train_df = pd.DataFrame(columns=train_col_names) for i, image in enumerate(train_images): temp = train_labels_map[image] label = temp.split(' ')[0] train_df.loc[len(train_df)] = [ image, tf_train_data[i], label_map[label] ] # test_df test_col_names = [ 'imagename', 'hog_svd_descriptor', 'expected_label', 'predicted_label' ] test_df = pd.DataFrame(columns=test_col_names) if exp_test_labels_map: for i, image in enumerate(test_images): temp = exp_test_labels_map[image] label = temp.split(' ')[0] test_df.loc[len(test_df)] = [ image, tf_test_data[i], label_map[label], 'null' ] else: for i, image in enumerate(test_images): images_not_present = True test_df.loc[len(test_df)] = [ image, tf_test_data[i], 'null', 'null' ] return train_df, test_df, images_not_present