def starter(k): result = Database().retrieve_metadata_with_labels(None, None) metadata_query_output = [ [ item["image_id"], item["male"], item["dorsal"], item["left_hand"], item["accessories"], ] for item in result ] """ Column index - Feature: 0 - Male 1 - Female 2 - Dorsal 3 - Palmar 4 - Left 5 - Right 6 - With accessories 7 - Without accessories Image-Metadata matrix with columns = 8 and rows = # of images """ image_metadata_matrix = numpy.zeros(shape=(8, len(metadata_query_output))) for i in range(len(metadata_query_output)): image_metadata_matrix[0][i] = metadata_query_output[i][1] image_metadata_matrix[1][i] = 1 - metadata_query_output[i][1] image_metadata_matrix[2][i] = metadata_query_output[i][2] image_metadata_matrix[3][i] = 1 - metadata_query_output[i][2] image_metadata_matrix[4][i] = metadata_query_output[i][3] image_metadata_matrix[5][i] = 1 - metadata_query_output[i][3] image_metadata_matrix[6][i] = metadata_query_output[i][4] image_metadata_matrix[7][i] = 1 - metadata_query_output[i][4] metadataspace = image_metadata_matrix.transpose() print("K latent_symantics in image_space") imagespace_NMF_model, imagespace_latent_symantics = LatentSymantics( image_metadata_matrix, k, choice=3 ).latent_symantics # [8 X 11k] to [8 X 4] ((11k)d to 4d) for index, latent_feature in enumerate(imagespace_NMF_model.components_): print("top 50 features for latent_topic #", index) print([i for i in latent_feature.argsort()[-50:]]) print("\n") print("K latent_symantics in metadata_space") metadataspace_NMF_model, metadataspace_latent_symantics = LatentSymantics( metadataspace, k, choice=3 ).latent_symantics # [11k X 8] to [11k X 4] (8d to 4d) for index, latent_feature in enumerate(metadataspace_NMF_model.components_): print("top 50 features for latent_topic #", index) print([i for i in latent_feature.argsort()[-50:]]) print("\n")
def starter(k): subject_similarities = [] subject_ids = Database().retrieve_all_subject_ids() for subject_id in subject_ids: subject_similarities.append( np.array(Database().retrieve_subject_similarities(subject_id))) print(np.array(subject_similarities)) latent_symantics_model, latent_symantics = LatentSymantics( np.array(subject_similarities), k, 3).latent_symantics term_weight_pairs = [] latent_symantics_transpose = latent_symantics.transpose() weights = latent_symantics_model.components_ for i in range(len(latent_symantics_transpose)): term_weight_pairs.append([latent_symantics_transpose[i], weights[i]]) print(tabulate(term_weight_pairs, headers=["Term", "Weight"])) print("Latent topics are described in terms of top 50 features.") for index, latent_feature in enumerate(latent_symantics_model.components_): print("top 50 features for latent_topic #", index) print([i for i in latent_feature.argsort()[-50:]]) print("\n")
def starter(feature_model, dimension_reduction, k, visualizer): path, pos = Config().read_path(), None descriptor_type = DescriptorType(feature_model).descriptor_type if DescriptorType(feature_model).check_sift(): x, ids, pos = functions.process_files(path, feature_model, dimension_reduction) else: x, ids = functions.process_files(path, feature_model, dimension_reduction) symantics_type = LatentSymanticsType(dimension_reduction).symantics_type if visualizer == 1: _, latent_symantics = LatentSymantics( x, k, dimension_reduction).latent_symantics k_th_eigenvector_all = [] for i in range(k): col = latent_symantics[:, i] arr = [] for k, val in enumerate(col): arr.append((str(ids[k] + ".jpg"), val)) arr.sort(key=lambda x: x[1], reverse=True) k_th_eigenvector_all.append(arr) print( "Printing term-weight pair for latent Semantic {}:".format(i + 1)) print(arr) k_th_eigenvector_all = pd.DataFrame(k_th_eigenvector_all) Visualizer.visualize_data_symantics(k_th_eigenvector_all, symantics_type, descriptor_type) elif visualizer == 2: latent_symantics, _ = LatentSymantics( x, k, dimension_reduction).latent_symantics k_th_eigenvector_all = [] for j in range(k): arr = [] for i in range(len(ids)): arr.append(( str(ids[i] + ".jpg"), np.dot(x[i], latent_symantics.components_[j]), )) # k_th_eigenvector_all[ids[i]] = np.dot(x[i], latent_symantics[j]) arr.sort(key=lambda x: x[1], reverse=True) k_th_eigenvector_all.append(arr[:1]) print(arr[0]) k_th_eigenvector_all = pd.DataFrame(k_th_eigenvector_all) Visualizer.visualize_feature_symantics(k_th_eigenvector_all, symantics_type, descriptor_type)
def concatenate_latent_symantics(subject, k, choice): connection = MongoClient(Config().mongo_url()) database = connection[Config().database_name()] grid_fs = GridFS(database=database, collection=Config().subjects_metadata_collection_name()) with grid_fs.get(subject["dorsal"]) as dorsal_file: dorsal_image_vectors = json.loads(dorsal_file.read().decode("utf-8")) with grid_fs.get(subject["palmar"]) as palmar_file: palmar_image_vectors = json.loads(palmar_file.read().decode("utf-8")) _, dorsal_latent_symantics = LatentSymantics( np.transpose(dorsal_image_vectors), k, choice).latent_symantics _, palmar_latent_symantics = LatentSymantics( np.transpose(palmar_image_vectors), k, choice).latent_symantics dorsal_latent_symantics = [ x for item in dorsal_latent_symantics.tolist() for x in item ] palmar_latent_symantics = [ x for item in palmar_latent_symantics.tolist() for x in item ] return np.concatenate( (np.array(dorsal_latent_symantics), np.array(palmar_latent_symantics)))
def store_in_db( feature_model, dimension_reduction, k, task, filtered_image_ids=None, label=None, value=None, ): path, pos = Config().read_path(), None descriptor_type = DescriptorType(feature_model).descriptor_type symantics_type = LatentSymanticsType(dimension_reduction).symantics_type if DescriptorType(feature_model).check_sift(): x, ids, pos = process_files(path, feature_model, dimension_reduction, filtered_image_ids) else: x, ids = process_files(path, feature_model, dimension_reduction, filtered_image_ids) latent_symantics_model, latent_symantics = LatentSymantics( x, k, dimension_reduction).latent_symantics records = set_records( ids, descriptor_type, symantics_type, k, latent_symantics, pos, task, label, value, ) Database().insert_many(records) return latent_symantics_model, latent_symantics
def helper(feature_model, dimension_reduction, k, label_choice, image_id): path, pos = Config().read_path(), None descriptor_type = DescriptorType(feature_model).descriptor_type symantics_type = LatentSymanticsType(dimension_reduction).symantics_type label, value, complementary_value = Labels(label_choice).label image = cv2.imread("{}{}{}".format(Config().read_all_path(), image_id, ".jpg")) image_feature_vector = Descriptor(image, feature_model, dimension_reduction).feature_descriptor label_filtered_image_ids = [ item["image_id"] for item in Database().retrieve_metadata_with_labels(label, value) ] complementary_label_filtered_image_ids = [ item["image_id"] for item in Database().retrieve_metadata_with_labels( label, complementary_value) ] if DescriptorType(feature_model).check_sift(): label_feature_vector, label_ids, label_pos = functions.process_files( path, feature_model, dimension_reduction, label_filtered_image_ids) complementary_label_feature_vector, complementary_label_ids, complementary_label_pos = functions.process_files( path, feature_model, dimension_reduction, complementary_label_filtered_image_ids, ) feature_vector = np.concatenate(( label_feature_vector, complementary_label_feature_vector, image_feature_vector, )) pos = label_pos + complementary_label_pos + [ image_feature_vector.shape[0] ] else: label_feature_vector, label_ids = functions.process_files( path, feature_model, dimension_reduction, label_filtered_image_ids) complementary_label_feature_vector, complementary_label_ids = functions.process_files( path, feature_model, dimension_reduction, complementary_label_filtered_image_ids, ) feature_vector = np.concatenate(( label_feature_vector, complementary_label_feature_vector, np.array([image_feature_vector]), )) ids = label_ids + complementary_label_ids + [image_id] _, latent_symantics = LatentSymantics(feature_vector, k, dimension_reduction).latent_symantics records = functions.set_records(ids, descriptor_type, symantics_type, k, latent_symantics, pos, 5) for record in records: if record["image_id"] == image_id: continue elif record["image_id"] in label_ids: record[label] = value elif record["image_id"] in complementary_label_ids: record[label] = complementary_value Database().insert_many(records)
def helper(self,feature_model, dimension_reduction, k): unlabelled_path = "C:/Users/himan/OneDrive/Desktop/MWDB/phase3_sample_data/Unlabelled/Set 1/" files = os.listdir(unlabelled_path) path, pos = Config().read_path(), None descriptor_type = DescriptorType(feature_model).descriptor_type symantics_type = LatentSymanticsType(dimension_reduction).symantics_type label, value, complementary_value = ("dorsal", 1, 0) unlabelled_image_feature_vector = [] unlabelled_image_ids = [] for i, file in enumerate(files): print(file) image = cv2.imread("{}{}".format(unlabelled_path, file)) image_feature_vector = Descriptor( image, feature_model, dimension_reduction ).feature_descriptor unlabelled_image_feature_vector.append(image_feature_vector) unlabelled_image_ids.append(file) label_filtered_image_ids = [ item["image_id"] for item in Database().retrieve_metadata_with_labels(label, value) ] complementary_label_filtered_image_ids = [ item["image_id"] for item in Database().retrieve_metadata_with_labels(label, complementary_value) ] if DescriptorType(feature_model).check_sift(): label_feature_vector, label_ids, label_pos = functions_phase2.process_files( path, feature_model, dimension_reduction, label_filtered_image_ids ) complementary_label_feature_vector, complementary_label_ids, complementary_label_pos = functions_phase2.process_files( path, feature_model, dimension_reduction, complementary_label_filtered_image_ids, ) feature_vector = np.concatenate( ( label_feature_vector, complementary_label_feature_vector, unlabelled_image_feature_vector, ) ) # pos = label_pos + complementary_label_pos + [image_feature_vector.shape[0]] else: label_feature_vector, label_ids = functions_phase2.process_files( path, feature_model, dimension_reduction, label_filtered_image_ids ) complementary_label_feature_vector, complementary_label_ids = functions_phase2.process_files( path, feature_model, dimension_reduction, complementary_label_filtered_image_ids, ) feature_vector = np.concatenate( ( label_feature_vector, complementary_label_feature_vector, unlabelled_image_feature_vector ) ) ids = label_ids + complementary_label_ids + unlabelled_image_ids _, latent_symantics = LatentSymantics( feature_vector, k, dimension_reduction ).latent_symantics # for i, ids in unlabelled_image_ids: # _, latent_symantics = LatentSymantics( # unlabelled_image_feature_vector[i], k, dimension_reduction # ).latent_symantics records = functions_phase2.set_records( ids, descriptor_type, symantics_type, k, latent_symantics, pos, 5 ) for record in records: if record["image_id"] in label_ids: record[label] = value elif record["image_id"] in complementary_label_ids: record[label] = complementary_value else: continue Database().insert_many(records)
def clustering(path, c): mongo_url = "mongodb://localhost:27017/" database_name = "mwdb_phase3" lbld_collection_name = "labelled_hands" unlbld_collection_name = "unlabelled_hands" meta_collection_name = "metadata" lbld_csv = "C:/Users/priya/Documents/images/Phase 3/phase3_sample_data/labelled_set1.csv" unlabelled_csv = "C:/Users/priya/Documents/images/Phase 3/phase3_sample_data/Unlabelled/unlablled_set1.csv" try: connection = MongoClient(mongo_url) database = connection[database_name] lbld_collection = database[lbld_collection_name] unlbld_collection = database[unlbld_collection_name] meta_collection = database[meta_collection_name] # storing labelled images df = pd.read_csv(lbld_csv) lbld_records = df.to_dict(orient='records') lbld_collection.remove() lbld_collection.insert_many(lbld_records) # storing unlabelled images df = pd.read_csv(unlabelled_csv) unlbld_records = df.to_dict(orient='records') unlbld_collection.remove() unlbld_collection.insert_many(unlbld_records) ids1, ids2, feature_vector1, feature_vector2, feature_vector3 = [], [], [], [], [] colors = ['red', 'blue', 'green', 'cyan', 'magenta'] markers = ['o', '<', 's', '+', 'v', '^', '.', '>', ',', 'd'] clust_labels = [] cent_labels = [] cluster = "Cluster " cent = "Centroid " for i in range(c): clust_labels.append(cluster.join(str(i))) cent_labels.append(cent.join(str(i))) # extracting features # dorsal for subject in lbld_collection.find({"aspectOfHand": {"$regex": "dorsal"}}, {"imageName": 1}): image_id = subject['imageName'] img_path = path + image_id image = cv2.imread(img_path) ids1.append(image_id.replace(".jpg", "")) feature_descriptor = Descriptor(image, 1).feature_descriptor # normalize features features_norm = (feature_descriptor - feature_descriptor.min()) / ( feature_descriptor.max() - feature_descriptor.min()) feature_vector1.append(features_norm) _, d_latent_semantics = LatentSymantics( np.array(feature_vector1), 2, 1 ).latent_symantics # K means centroids, prev_centroids, classes, X, centroid_norm, d_img_classes = [], [], [], [], [], [] max_iterations = 1 isOptimal = False for i in range(c): centroids.append(d_latent_semantics[i]) prev_centroids.append(d_latent_semantics[i]) while not isOptimal and max_iterations < 501: d_distances = [] classes = [] d_img_classes = [] for i in range(c): classes.append([]) d_img_classes.append([]) # Calculating clusters for each feature for i in range(d_latent_semantics.shape[0]): features = d_latent_semantics[i] d_distances = [euclidean(features, centroid) for centroid in centroids] classification = d_distances.index(min(d_distances)) classes[classification].append(features) d_img_classes[classification].append(ids1[i]) # Recalculating centroids for i in range(len(classes)): centroids[i] = np.mean(classes[i], axis=0) isOptimal = True for i in range(len(centroids)): if sum((centroids[i] - prev_centroids[i]) / prev_centroids[i] * 100.0) > tolerance: isOptimal = False break prev_centroids[i] = centroids[i] max_iterations += 1 # # Visualize clusters -- takes longer time to show so commented # for i in range(c): # plt.scatter(centroids[i][0], centroids[i][1], s=300, c="black", marker="x", label=cent_labels[i]) # for features in classes[i]: # plt.scatter(features[0], features[1], color=colors[i], s=30, marker=markers[i], label=clust_labels[i]) # plt.show() print "Dorsal CLusters: " for i in range(len(d_img_classes)): print ("Cluster %d: " % i) print d_img_classes[i] # --------------------------------------------------------------------------------------------------------------------- # extracting features # palmar for subject in lbld_collection.find({"aspectOfHand": {"$regex": "palmar"}}, {"imageName": 1}): image_id = subject['imageName'] img_path = path + image_id image = cv2.imread(img_path) ids2.append(image_id.replace(".jpg", "")); # normalize features feature_descriptor = Descriptor(image, 1).feature_descriptor features_norm = (feature_descriptor - feature_descriptor.min()) / ( feature_descriptor.max() - feature_descriptor.min()) feature_vector2.append(features_norm) _, p_latent_semantics = LatentSymantics( np.array(feature_vector2), 2, 1 ).latent_symantics # K means p_centroids, p_prev_centroids, p_classes, p_X, p_centroid_norm, p_img_classes = [], [], [], [], [], [] p_max_iterations = 1 p_isOptimal = False for i in range(c): p_centroids.append(p_latent_semantics[i]) p_prev_centroids.append(p_latent_semantics[i]) p_classes.append([]) p_img_classes.append([]) while not p_isOptimal and p_max_iterations < 501: p_distances = [] p_classes = [] p_img_classes = [] for i in range(c): p_classes.append([]) p_img_classes.append([]) # Calculating clusters for each feature for i in range(p_latent_semantics.shape[0]): features = p_latent_semantics[i] p_distances = [euclidean(features, centroid) for centroid in p_centroids] classification = p_distances.index(min(p_distances)) p_classes[classification].append(features) p_img_classes[classification].append(ids2[i]) # Recalculating centroids for i in range(len(p_classes)): p_centroids[i] = np.mean(p_classes[i], axis=0) p_isOptimal = True for i in range(len(p_centroids)): if sum((p_centroids[i] - p_prev_centroids[i]) / p_prev_centroids[i] * 100.0) > tolerance: p_isOptimal = False break p_prev_centroids[i] = p_centroids[i] p_max_iterations += 1 # # Visualize clusters -- takes longer time to show so commented # for i in range(c): # plt.scatter(p_centroids[i][0], p_centroids[i][1], s=130, marker="x") # for features in p_classes[i]: # plt.scatter(features[0], features[1], color=colors[i], s=30, marker=markers[i]) # plt.show() print "Palmar CLusters: " for i in range(len(p_img_classes)): print ("Cluster %d" % i) print p_img_classes[i] # ---------------------------------------------------------------------------------------------------------------------- # Classification # mean_dorsal = np.mean(centroids, axis=0) # mean_palmar = np.mean(p_centroids, axis=0) image_name = [] dorsal_cnt = 0 palmar_cnt = 0 d_cnt = 0 p_cnt = 0 for image_path in glob.glob(test_path): image = cv2.imread(image_path) # get filename image_name.append(os.path.basename(image_path)) feature_descriptor = Descriptor(image, 1).feature_descriptor # normalize features features_norm = (feature_descriptor - feature_descriptor.min()) / ( feature_descriptor.max() - feature_descriptor.min()) feature_vector3.append(features_norm) _, latent_semantics = LatentSymantics(np.array(feature_vector3), 2, 1).latent_symantics for i in range(len(latent_semantics)): ddistances = [euclidean(latent_semantics[i], centroid) for centroid in centroids] pdistances = [euclidean(latent_semantics[i], centroid) for centroid in p_centroids] subject_img = unlbld_collection.find_one({"imageName": image_name[i]}, {"aspectOfHand": 1}) if "dorsal" in subject_img['aspectOfHand']: d_cnt += 1 else: p_cnt += 1 if min(ddistances) < min(pdistances): if "dorsal" in subject_img['aspectOfHand']: dorsal_cnt += 1 print ("Image ID: %s, %s" % (image_name[i], "dorsal")) else: if "palmar" in subject_img['aspectOfHand']: palmar_cnt += 1 print ("Image ID: %s, %s" % (image_name[i], "palmar")) print ("Dorsal Accuracy %d" % ((dorsal_cnt*100)/d_cnt)) print ("Palmar Accuracy %d" % ((palmar_cnt*100)/p_cnt)) except Exception as e: traceback.print_exc() print("Connection refused... ")
def insert_images_in_database(feature_model, dimension_reduction, k, identifier, set1_dir=True, set2_dir=True): """ :param feature_model: 1 - CM, 2 - LBP, 3 - HOG, 4 - SIFT :param dimension_reduction: 1 - PCA, 2 - SVD, 3 - NMF, 4 - LDA :param k: reduced dimension value :param identifier: 0 - Read all, 1 - Read from Labelled, 2 - Read from Unlabelled :param set1_dir (Optional): True - Read from Set1 folder of Labelled/Unlabelled, False otherwise :param set2_dir (Optional): True - Read from Set2 folder of Labelled/Unlabelled, False otherwise :return None Default case: Read from both Set1 and Set2 folders """ # Read images and feature extraction if identifier == 0: read_all_path = Config().read_all_path() files = os.listdir(read_all_path) connection = Database().open_connection() db = connection[Config().database_name()] collection = db[Config().collection_name()] for i, file in enumerate(files): print("Reading file: {} | {} % Done".format( file, ((i + 1) * 100.0) / len(files))) image = cv2.imread("{}{}".format(read_all_path, file)) feature_descriptor = Descriptor( image, feature_model, dimension_reduction).feature_descriptor image_id = file.replace(".jpg", "") collection.insert_one({ "image_id": image_id, "vector": feature_descriptor.tolist() }) connection.close() query_results = Database().retrieve_many() ids = [item["image_id"] for item in query_results] x = np.array([item["vector"] for item in query_results]) elif identifier == 1: if set1_dir and set2_dir: ids1, x1 = functions.process_files( Config().read_training_set1_path(), feature_model, dimension_reduction) ids2, x2 = functions.process_files( Config().read_training_set2_path(), feature_model, dimension_reduction) ids = ids1 + ids2 x = np.concatenate((x1, x2)) elif set1_dir: ids, x = functions.process_files( Config().read_training_set1_path(), feature_model, dimension_reduction) elif set2_dir: ids, x = functions.process_files( Config().read_training_set2_path(), feature_model, dimension_reduction) else: if set1_dir and set2_dir: ids1, x1 = functions.process_files( Config().read_testing_set1_path(), feature_model, dimension_reduction) ids2, x2 = functions.process_files( Config().read_testing_set2_path(), feature_model, dimension_reduction) ids = ids1 + ids2 x = np.concatenate((x1, x2)) elif set1_dir: ids, x = functions.process_files(Config().read_testing_set1_path(), feature_model, dimension_reduction) elif set2_dir: ids, x = functions.process_files(Config().read_testing_set2_path(), feature_model, dimension_reduction) # Find Latent_symantics _, latent_symantics = LatentSymantics(x, k, dimension_reduction).latent_symantics # inserting data into Database if identifier == 0: records = functions.set_records(ids, latent_symantics) Database().insert_many(records) elif identifier == 1: records = functions.set_records(ids, latent_symantics, training=True) Database().insert_many(records, collection_type="training") else: records = functions.set_records(ids, latent_symantics) Database().insert_many(records, collection_type="testing") print("Done... ")