Beispiel #1
0
def starter(k):
    result = Database().retrieve_metadata_with_labels(None, None)
    metadata_query_output = [
        [
            item["image_id"],
            item["male"],
            item["dorsal"],
            item["left_hand"],
            item["accessories"],
        ]
        for item in result
    ]

    """
    Column index - Feature:
    0 - Male
    1 - Female
    2 - Dorsal
    3 - Palmar
    4 - Left
    5 - Right
    6 - With accessories
    7 - Without accessories
    
    Image-Metadata matrix with columns = 8 and rows = # of images
    """
    image_metadata_matrix = numpy.zeros(shape=(8, len(metadata_query_output)))
    for i in range(len(metadata_query_output)):

        image_metadata_matrix[0][i] = metadata_query_output[i][1]
        image_metadata_matrix[1][i] = 1 - metadata_query_output[i][1]
        image_metadata_matrix[2][i] = metadata_query_output[i][2]
        image_metadata_matrix[3][i] = 1 - metadata_query_output[i][2]
        image_metadata_matrix[4][i] = metadata_query_output[i][3]
        image_metadata_matrix[5][i] = 1 - metadata_query_output[i][3]
        image_metadata_matrix[6][i] = metadata_query_output[i][4]
        image_metadata_matrix[7][i] = 1 - metadata_query_output[i][4]

    metadataspace = image_metadata_matrix.transpose()

    print("K latent_symantics in image_space")
    imagespace_NMF_model, imagespace_latent_symantics = LatentSymantics(
        image_metadata_matrix, k, choice=3
    ).latent_symantics  # [8 X 11k] to [8 X 4] ((11k)d to 4d)
    for index, latent_feature in enumerate(imagespace_NMF_model.components_):
        print("top 50 features for latent_topic #", index)
        print([i for i in latent_feature.argsort()[-50:]])
        print("\n")

    print("K latent_symantics in metadata_space")
    metadataspace_NMF_model, metadataspace_latent_symantics = LatentSymantics(
        metadataspace, k, choice=3
    ).latent_symantics  # [11k X 8] to [11k X 4] (8d to 4d)
    for index, latent_feature in enumerate(metadataspace_NMF_model.components_):
        print("top 50 features for latent_topic #", index)
        print([i for i in latent_feature.argsort()[-50:]])
        print("\n")
Beispiel #2
0
def starter(k):
    subject_similarities = []
    subject_ids = Database().retrieve_all_subject_ids()

    for subject_id in subject_ids:
        subject_similarities.append(
            np.array(Database().retrieve_subject_similarities(subject_id)))

    print(np.array(subject_similarities))

    latent_symantics_model, latent_symantics = LatentSymantics(
        np.array(subject_similarities), k, 3).latent_symantics

    term_weight_pairs = []
    latent_symantics_transpose = latent_symantics.transpose()
    weights = latent_symantics_model.components_
    for i in range(len(latent_symantics_transpose)):
        term_weight_pairs.append([latent_symantics_transpose[i], weights[i]])
    print(tabulate(term_weight_pairs, headers=["Term", "Weight"]))

    print("Latent topics are described in terms of top 50 features.")
    for index, latent_feature in enumerate(latent_symantics_model.components_):
        print("top 50 features for latent_topic #", index)
        print([i for i in latent_feature.argsort()[-50:]])
        print("\n")
Beispiel #3
0
def starter(feature_model, dimension_reduction, k, visualizer):
    path, pos = Config().read_path(), None
    descriptor_type = DescriptorType(feature_model).descriptor_type
    if DescriptorType(feature_model).check_sift():
        x, ids, pos = functions.process_files(path, feature_model,
                                              dimension_reduction)
    else:
        x, ids = functions.process_files(path, feature_model,
                                         dimension_reduction)

    symantics_type = LatentSymanticsType(dimension_reduction).symantics_type
    if visualizer == 1:
        _, latent_symantics = LatentSymantics(
            x, k, dimension_reduction).latent_symantics
        k_th_eigenvector_all = []
        for i in range(k):
            col = latent_symantics[:, i]
            arr = []
            for k, val in enumerate(col):
                arr.append((str(ids[k] + ".jpg"), val))
            arr.sort(key=lambda x: x[1], reverse=True)
            k_th_eigenvector_all.append(arr)
            print(
                "Printing term-weight pair for latent Semantic {}:".format(i +
                                                                           1))
            print(arr)
        k_th_eigenvector_all = pd.DataFrame(k_th_eigenvector_all)
        Visualizer.visualize_data_symantics(k_th_eigenvector_all,
                                            symantics_type, descriptor_type)
    elif visualizer == 2:
        latent_symantics, _ = LatentSymantics(
            x, k, dimension_reduction).latent_symantics
        k_th_eigenvector_all = []
        for j in range(k):
            arr = []
            for i in range(len(ids)):
                arr.append((
                    str(ids[i] + ".jpg"),
                    np.dot(x[i], latent_symantics.components_[j]),
                ))
                # k_th_eigenvector_all[ids[i]] = np.dot(x[i], latent_symantics[j])
            arr.sort(key=lambda x: x[1], reverse=True)
            k_th_eigenvector_all.append(arr[:1])
            print(arr[0])
        k_th_eigenvector_all = pd.DataFrame(k_th_eigenvector_all)
        Visualizer.visualize_feature_symantics(k_th_eigenvector_all,
                                               symantics_type, descriptor_type)
Beispiel #4
0
def concatenate_latent_symantics(subject, k, choice):
    connection = MongoClient(Config().mongo_url())
    database = connection[Config().database_name()]

    grid_fs = GridFS(database=database,
                     collection=Config().subjects_metadata_collection_name())
    with grid_fs.get(subject["dorsal"]) as dorsal_file:
        dorsal_image_vectors = json.loads(dorsal_file.read().decode("utf-8"))
    with grid_fs.get(subject["palmar"]) as palmar_file:
        palmar_image_vectors = json.loads(palmar_file.read().decode("utf-8"))

    _, dorsal_latent_symantics = LatentSymantics(
        np.transpose(dorsal_image_vectors), k, choice).latent_symantics
    _, palmar_latent_symantics = LatentSymantics(
        np.transpose(palmar_image_vectors), k, choice).latent_symantics

    dorsal_latent_symantics = [
        x for item in dorsal_latent_symantics.tolist() for x in item
    ]
    palmar_latent_symantics = [
        x for item in palmar_latent_symantics.tolist() for x in item
    ]
    return np.concatenate(
        (np.array(dorsal_latent_symantics), np.array(palmar_latent_symantics)))
Beispiel #5
0
def store_in_db(
    feature_model,
    dimension_reduction,
    k,
    task,
    filtered_image_ids=None,
    label=None,
    value=None,
):
    path, pos = Config().read_path(), None
    descriptor_type = DescriptorType(feature_model).descriptor_type
    symantics_type = LatentSymanticsType(dimension_reduction).symantics_type

    if DescriptorType(feature_model).check_sift():
        x, ids, pos = process_files(path, feature_model, dimension_reduction,
                                    filtered_image_ids)
    else:
        x, ids = process_files(path, feature_model, dimension_reduction,
                               filtered_image_ids)

    latent_symantics_model, latent_symantics = LatentSymantics(
        x, k, dimension_reduction).latent_symantics

    records = set_records(
        ids,
        descriptor_type,
        symantics_type,
        k,
        latent_symantics,
        pos,
        task,
        label,
        value,
    )

    Database().insert_many(records)

    return latent_symantics_model, latent_symantics
Beispiel #6
0
def helper(feature_model, dimension_reduction, k, label_choice, image_id):
    path, pos = Config().read_path(), None
    descriptor_type = DescriptorType(feature_model).descriptor_type
    symantics_type = LatentSymanticsType(dimension_reduction).symantics_type
    label, value, complementary_value = Labels(label_choice).label

    image = cv2.imread("{}{}{}".format(Config().read_all_path(), image_id,
                                       ".jpg"))
    image_feature_vector = Descriptor(image, feature_model,
                                      dimension_reduction).feature_descriptor

    label_filtered_image_ids = [
        item["image_id"]
        for item in Database().retrieve_metadata_with_labels(label, value)
    ]
    complementary_label_filtered_image_ids = [
        item["image_id"] for item in Database().retrieve_metadata_with_labels(
            label, complementary_value)
    ]

    if DescriptorType(feature_model).check_sift():
        label_feature_vector, label_ids, label_pos = functions.process_files(
            path, feature_model, dimension_reduction, label_filtered_image_ids)
        complementary_label_feature_vector, complementary_label_ids, complementary_label_pos = functions.process_files(
            path,
            feature_model,
            dimension_reduction,
            complementary_label_filtered_image_ids,
        )
        feature_vector = np.concatenate((
            label_feature_vector,
            complementary_label_feature_vector,
            image_feature_vector,
        ))
        pos = label_pos + complementary_label_pos + [
            image_feature_vector.shape[0]
        ]
    else:
        label_feature_vector, label_ids = functions.process_files(
            path, feature_model, dimension_reduction, label_filtered_image_ids)
        complementary_label_feature_vector, complementary_label_ids = functions.process_files(
            path,
            feature_model,
            dimension_reduction,
            complementary_label_filtered_image_ids,
        )
        feature_vector = np.concatenate((
            label_feature_vector,
            complementary_label_feature_vector,
            np.array([image_feature_vector]),
        ))

    ids = label_ids + complementary_label_ids + [image_id]

    _, latent_symantics = LatentSymantics(feature_vector, k,
                                          dimension_reduction).latent_symantics

    records = functions.set_records(ids, descriptor_type, symantics_type, k,
                                    latent_symantics, pos, 5)

    for record in records:
        if record["image_id"] == image_id:
            continue
        elif record["image_id"] in label_ids:
            record[label] = value
        elif record["image_id"] in complementary_label_ids:
            record[label] = complementary_value

    Database().insert_many(records)
Beispiel #7
0
    def helper(self,feature_model, dimension_reduction, k):
        unlabelled_path = "C:/Users/himan/OneDrive/Desktop/MWDB/phase3_sample_data/Unlabelled/Set 1/"
        files = os.listdir(unlabelled_path)
        path, pos = Config().read_path(), None
        descriptor_type = DescriptorType(feature_model).descriptor_type
        symantics_type = LatentSymanticsType(dimension_reduction).symantics_type
        label, value, complementary_value = ("dorsal", 1, 0)
        unlabelled_image_feature_vector = []
        unlabelled_image_ids = []


        for i, file in enumerate(files):
            print(file)

            image = cv2.imread("{}{}".format(unlabelled_path, file))
            image_feature_vector = Descriptor(
                image, feature_model, dimension_reduction
            ).feature_descriptor
            unlabelled_image_feature_vector.append(image_feature_vector)
            unlabelled_image_ids.append(file)




        label_filtered_image_ids = [
            item["image_id"]
            for item in Database().retrieve_metadata_with_labels(label, value)
        ]
        complementary_label_filtered_image_ids = [
            item["image_id"]
            for item in Database().retrieve_metadata_with_labels(label, complementary_value)
        ]

        if DescriptorType(feature_model).check_sift():
            label_feature_vector, label_ids, label_pos = functions_phase2.process_files(
                path, feature_model, dimension_reduction, label_filtered_image_ids
            )
            complementary_label_feature_vector, complementary_label_ids, complementary_label_pos = functions_phase2.process_files(
                path,
                feature_model,
                dimension_reduction,
                complementary_label_filtered_image_ids,
            )
            feature_vector = np.concatenate(
                (
                    label_feature_vector,
                    complementary_label_feature_vector,
                    unlabelled_image_feature_vector,
                )
            )
            # pos = label_pos + complementary_label_pos + [image_feature_vector.shape[0]]
        else:
            label_feature_vector, label_ids = functions_phase2.process_files(
                path, feature_model, dimension_reduction, label_filtered_image_ids
            )
            complementary_label_feature_vector, complementary_label_ids = functions_phase2.process_files(
                path,
                feature_model,
                dimension_reduction,
                complementary_label_filtered_image_ids,
            )

            feature_vector = np.concatenate(
                (
                    label_feature_vector,
                    complementary_label_feature_vector,
                    unlabelled_image_feature_vector

                )
            )

        ids = label_ids + complementary_label_ids + unlabelled_image_ids

        _, latent_symantics = LatentSymantics(
            feature_vector, k, dimension_reduction
        ).latent_symantics

        # for i, ids in unlabelled_image_ids:
        #     _, latent_symantics = LatentSymantics(
        #         unlabelled_image_feature_vector[i], k, dimension_reduction
        #     ).latent_symantics

        records = functions_phase2.set_records(
            ids, descriptor_type, symantics_type, k, latent_symantics, pos, 5
        )

        for record in records:

            if record["image_id"] in label_ids:
                record[label] = value
            elif record["image_id"] in complementary_label_ids:
                record[label] = complementary_value
            else:
                continue

        Database().insert_many(records)
Beispiel #8
0
def clustering(path, c):
    mongo_url = "mongodb://localhost:27017/"
    database_name = "mwdb_phase3"
    lbld_collection_name = "labelled_hands"
    unlbld_collection_name = "unlabelled_hands"
    meta_collection_name = "metadata"
    lbld_csv = "C:/Users/priya/Documents/images/Phase 3/phase3_sample_data/labelled_set1.csv"
    unlabelled_csv = "C:/Users/priya/Documents/images/Phase 3/phase3_sample_data/Unlabelled/unlablled_set1.csv"
    try:
        connection = MongoClient(mongo_url)
        database = connection[database_name]
        lbld_collection = database[lbld_collection_name]
        unlbld_collection = database[unlbld_collection_name]
        meta_collection = database[meta_collection_name]
        # storing labelled images
        df = pd.read_csv(lbld_csv)
        lbld_records = df.to_dict(orient='records')
        lbld_collection.remove()
        lbld_collection.insert_many(lbld_records)

        # storing unlabelled images
        df = pd.read_csv(unlabelled_csv)
        unlbld_records = df.to_dict(orient='records')
        unlbld_collection.remove()
        unlbld_collection.insert_many(unlbld_records)

        ids1, ids2, feature_vector1, feature_vector2, feature_vector3 = [], [], [], [], []
        colors = ['red', 'blue', 'green', 'cyan', 'magenta']
        markers = ['o', '<', 's', '+', 'v', '^', '.', '>', ',', 'd']
        clust_labels = []
        cent_labels = []
        cluster = "Cluster "
        cent = "Centroid "
        for i in range(c):
            clust_labels.append(cluster.join(str(i)))
            cent_labels.append(cent.join(str(i)))
        # extracting features
        # dorsal
        for subject in lbld_collection.find({"aspectOfHand": {"$regex": "dorsal"}}, {"imageName": 1}):
            image_id = subject['imageName']
            img_path = path + image_id
            image = cv2.imread(img_path)
            ids1.append(image_id.replace(".jpg", ""))
            feature_descriptor = Descriptor(image, 1).feature_descriptor
            # normalize features
            features_norm = (feature_descriptor - feature_descriptor.min()) / (
                    feature_descriptor.max() - feature_descriptor.min())
            feature_vector1.append(features_norm)

        _, d_latent_semantics = LatentSymantics(
            np.array(feature_vector1), 2, 1
        ).latent_symantics
        # K means
        centroids, prev_centroids, classes, X, centroid_norm, d_img_classes = [], [], [], [], [], []
        max_iterations = 1
        isOptimal = False
        for i in range(c):
            centroids.append(d_latent_semantics[i])
            prev_centroids.append(d_latent_semantics[i])
        while not isOptimal and max_iterations < 501:
            d_distances = []
            classes = []
            d_img_classes = []
            for i in range(c):
                classes.append([])
                d_img_classes.append([])
            # Calculating clusters for each feature
            for i in range(d_latent_semantics.shape[0]):
                features = d_latent_semantics[i]
                d_distances = [euclidean(features, centroid) for centroid in centroids]
                classification = d_distances.index(min(d_distances))
                classes[classification].append(features)
                d_img_classes[classification].append(ids1[i])
            # Recalculating centroids
            for i in range(len(classes)):
                centroids[i] = np.mean(classes[i], axis=0)
            isOptimal = True
            for i in range(len(centroids)):
                if sum((centroids[i] - prev_centroids[i]) / prev_centroids[i] * 100.0) > tolerance:
                    isOptimal = False
                    break
                prev_centroids[i] = centroids[i]
            max_iterations += 1
        # # Visualize clusters -- takes longer time to show so commented
        # for i in range(c):
        #     plt.scatter(centroids[i][0], centroids[i][1], s=300, c="black", marker="x", label=cent_labels[i])
        #     for features in classes[i]:
        #         plt.scatter(features[0], features[1], color=colors[i], s=30, marker=markers[i], label=clust_labels[i])
        # plt.show()
        print "Dorsal CLusters: "
        for i in range(len(d_img_classes)):
            print ("Cluster %d: " % i)
            print d_img_classes[i]
        # ---------------------------------------------------------------------------------------------------------------------
        # extracting features
        # palmar
        for subject in lbld_collection.find({"aspectOfHand": {"$regex": "palmar"}}, {"imageName": 1}):
            image_id = subject['imageName']
            img_path = path + image_id
            image = cv2.imread(img_path)
            ids2.append(image_id.replace(".jpg", ""));
            # normalize features
            feature_descriptor = Descriptor(image, 1).feature_descriptor
            features_norm = (feature_descriptor - feature_descriptor.min()) / (
                    feature_descriptor.max() - feature_descriptor.min())
            feature_vector2.append(features_norm)
        _, p_latent_semantics = LatentSymantics(
            np.array(feature_vector2), 2, 1
        ).latent_symantics
        # K means
        p_centroids, p_prev_centroids, p_classes, p_X, p_centroid_norm, p_img_classes = [], [], [], [], [], []
        p_max_iterations = 1
        p_isOptimal = False
        for i in range(c):
            p_centroids.append(p_latent_semantics[i])
            p_prev_centroids.append(p_latent_semantics[i])
            p_classes.append([])
            p_img_classes.append([])
        while not p_isOptimal and p_max_iterations < 501:
            p_distances = []
            p_classes = []
            p_img_classes = []
            for i in range(c):
                p_classes.append([])
                p_img_classes.append([])
            # Calculating clusters for each feature
            for i in range(p_latent_semantics.shape[0]):
                features = p_latent_semantics[i]
                p_distances = [euclidean(features, centroid) for centroid in p_centroids]
                classification = p_distances.index(min(p_distances))
                p_classes[classification].append(features)
                p_img_classes[classification].append(ids2[i])
            # Recalculating centroids
            for i in range(len(p_classes)):
                p_centroids[i] = np.mean(p_classes[i], axis=0)
            p_isOptimal = True
            for i in range(len(p_centroids)):
                if sum((p_centroids[i] - p_prev_centroids[i]) / p_prev_centroids[i] * 100.0) > tolerance:
                    p_isOptimal = False
                    break
                p_prev_centroids[i] = p_centroids[i]
            p_max_iterations += 1

        # # Visualize clusters -- takes longer time to show so commented
        # for i in range(c):
        #     plt.scatter(p_centroids[i][0], p_centroids[i][1], s=130, marker="x")
        #     for features in p_classes[i]:
        #         plt.scatter(features[0], features[1], color=colors[i], s=30, marker=markers[i])
        # plt.show()
        print "Palmar CLusters: "
        for i in range(len(p_img_classes)):
            print ("Cluster %d" % i)
            print p_img_classes[i]
        # ----------------------------------------------------------------------------------------------------------------------
        # Classification
        # mean_dorsal = np.mean(centroids, axis=0)
        # mean_palmar = np.mean(p_centroids, axis=0)
        image_name = []
        dorsal_cnt = 0
        palmar_cnt = 0
        d_cnt = 0
        p_cnt = 0
        for image_path in glob.glob(test_path):
            image = cv2.imread(image_path)
            # get filename
            image_name.append(os.path.basename(image_path))
            feature_descriptor = Descriptor(image, 1).feature_descriptor
            # normalize features
            features_norm = (feature_descriptor - feature_descriptor.min()) / (
                    feature_descriptor.max() - feature_descriptor.min())
            feature_vector3.append(features_norm)
        _, latent_semantics = LatentSymantics(np.array(feature_vector3), 2, 1).latent_symantics
        for i in range(len(latent_semantics)):
            ddistances = [euclidean(latent_semantics[i], centroid) for centroid in centroids]
            pdistances = [euclidean(latent_semantics[i], centroid) for centroid in p_centroids]

            subject_img = unlbld_collection.find_one({"imageName": image_name[i]}, {"aspectOfHand": 1})
            if "dorsal" in subject_img['aspectOfHand']:
                d_cnt += 1
            else:
                p_cnt += 1
            if min(ddistances) < min(pdistances):
                if "dorsal" in subject_img['aspectOfHand']:
                    dorsal_cnt += 1
                print ("Image ID: %s, %s" % (image_name[i], "dorsal"))
            else:
                if "palmar" in subject_img['aspectOfHand']:
                    palmar_cnt += 1
                print ("Image ID: %s, %s" % (image_name[i], "palmar"))
        print ("Dorsal Accuracy %d" % ((dorsal_cnt*100)/d_cnt))
        print ("Palmar Accuracy %d" % ((palmar_cnt*100)/p_cnt))

    except Exception as e:
        traceback.print_exc()
        print("Connection refused... ")
Beispiel #9
0
def insert_images_in_database(feature_model,
                              dimension_reduction,
                              k,
                              identifier,
                              set1_dir=True,
                              set2_dir=True):
    """
    :param feature_model: 1 - CM, 2 - LBP, 3 - HOG, 4 - SIFT
    :param dimension_reduction: 1 - PCA, 2 - SVD, 3 - NMF, 4 - LDA
    :param k: reduced dimension value
    :param identifier: 0 - Read all, 1 - Read from Labelled, 2 - Read from Unlabelled
    :param set1_dir (Optional): True - Read from Set1 folder of Labelled/Unlabelled, False otherwise
    :param set2_dir (Optional): True - Read from Set2 folder of Labelled/Unlabelled, False otherwise
    :return None

    Default case: Read from both Set1 and Set2 folders
    """

    # Read images and feature extraction
    if identifier == 0:
        read_all_path = Config().read_all_path()
        files = os.listdir(read_all_path)
        connection = Database().open_connection()
        db = connection[Config().database_name()]
        collection = db[Config().collection_name()]

        for i, file in enumerate(files):
            print("Reading file: {} | {} % Done".format(
                file, ((i + 1) * 100.0) / len(files)))
            image = cv2.imread("{}{}".format(read_all_path, file))

            feature_descriptor = Descriptor(
                image, feature_model, dimension_reduction).feature_descriptor
            image_id = file.replace(".jpg", "")
            collection.insert_one({
                "image_id": image_id,
                "vector": feature_descriptor.tolist()
            })

        connection.close()
        query_results = Database().retrieve_many()
        ids = [item["image_id"] for item in query_results]
        x = np.array([item["vector"] for item in query_results])

    elif identifier == 1:
        if set1_dir and set2_dir:
            ids1, x1 = functions.process_files(
                Config().read_training_set1_path(), feature_model,
                dimension_reduction)
            ids2, x2 = functions.process_files(
                Config().read_training_set2_path(), feature_model,
                dimension_reduction)
            ids = ids1 + ids2
            x = np.concatenate((x1, x2))
        elif set1_dir:
            ids, x = functions.process_files(
                Config().read_training_set1_path(), feature_model,
                dimension_reduction)
        elif set2_dir:
            ids, x = functions.process_files(
                Config().read_training_set2_path(), feature_model,
                dimension_reduction)
    else:
        if set1_dir and set2_dir:
            ids1, x1 = functions.process_files(
                Config().read_testing_set1_path(), feature_model,
                dimension_reduction)
            ids2, x2 = functions.process_files(
                Config().read_testing_set2_path(), feature_model,
                dimension_reduction)
            ids = ids1 + ids2
            x = np.concatenate((x1, x2))
        elif set1_dir:
            ids, x = functions.process_files(Config().read_testing_set1_path(),
                                             feature_model,
                                             dimension_reduction)
        elif set2_dir:
            ids, x = functions.process_files(Config().read_testing_set2_path(),
                                             feature_model,
                                             dimension_reduction)

    # Find Latent_symantics
    _, latent_symantics = LatentSymantics(x, k,
                                          dimension_reduction).latent_symantics

    # inserting data into Database
    if identifier == 0:
        records = functions.set_records(ids, latent_symantics)
        Database().insert_many(records)
    elif identifier == 1:
        records = functions.set_records(ids, latent_symantics, training=True)
        Database().insert_many(records, collection_type="training")
    else:
        records = functions.set_records(ids, latent_symantics)
        Database().insert_many(records, collection_type="testing")
    print("Done... ")