Ejemplo n.º 1
0
def main():
    feature_extraction_model = task6.feature_extraction_model
    dimension_reduction_model = task6.dimension_reduction_model
    k_value_for_ss_similarity = 10

    given_k_value = get_input_k()
    print(global_constants.LINE_SEPARATOR)
    print("User Inputs summary")
    print(global_constants.LINE_SEPARATOR)
    print("k-value: {}".format(given_k_value))
    print(global_constants.LINE_SEPARATOR)

    dim_reduction = DimensionReduction(feature_extraction_model, dimension_reduction_model, k_value_for_ss_similarity)
    # original feature vectors
    obj_feat_matrix = dim_reduction.get_object_feature_matrix()
    # get the img IDs from the database for images in the fit model
    img_set = pd.DataFrame({"imageId": obj_feat_matrix['imageId']})
    # get the metadata for each image with given subject id
    subject_data = dim_reduction.get_metadata("imageName", list(set(img_set["imageId"].tolist())))
    # unique subject IDs in dataset
    dataset_subject_ids = set((subject_data)["id"])
    subject_subject_matrix = []
    m_value = len(img_set)
    starttime = time.time()
    model = task6.load_model(dim_reduction, feature_extraction_model, dimension_reduction_model,
                             k_value_for_ss_similarity)
    folder = path.basename(path.dirname(obj_feat_matrix['path'][0]))

    for i, subjectid in enumerate(dataset_subject_ids):
        given_subject_images = dim_reduction.get_metadata("id", list([subjectid]))["imageName"].tolist()
        image_list_for_subject = list(set(given_subject_images).intersection(set(img_set["imageId"].tolist())))
        similar_subjects = task6.find_similar_subjects(subjectid, image_list_for_subject, model,
                                                       img_set, dim_reduction, m_value, folder)
        subject_subject_matrix.append(np.asarray(list(similar_subjects.values())))

    print("\nTime taken to create subject subject matrix: {}\n".format(time.time() - starttime))
    # perform nmf on subject_subject_matrix
    # given_k_value = 1
    matrix = pd.DataFrame(data={'imageId': list(dataset_subject_ids), 'featureVector': subject_subject_matrix})
    dim_red = DimensionReduction(None, "NMF", given_k_value, subject_subject=True, matrix=matrix)
    w, h, model = dim_red.execute()

    # display latent semantics
    # printing the term weight
    print_tw(w, h, subject_subject=True)
    # save to csv
    filename = "task7" + '_' + str(given_k_value)
    CSVReader().save_to_csv(w, None, filename, subject_subject=True)
    print("Please check the CSV file: output/{}.csv".format(filename))
Ejemplo n.º 2
0
def run_task3(feature_extraction_model, dimension_reduction_model, label,
              k_value):
    """Main function for the Task3"""
    # Performs the dimensionality reduction
    dim_reduction = DimensionReduction(feature_extraction_model,
                                       dimension_reduction_model, k_value,
                                       label)
    obj_feature = dim_reduction.get_object_feature_matrix()
    obj_lat, feat_lat, model = dim_reduction.execute()

    # Saves the returned model
    filename = "{0}_{1}_{2}_{3}".format(feature_extraction_model,
                                        dimension_reduction_model,
                                        label.replace(" ", ''), str(k_value))
    model_interact.save_model(model=model, filename=filename)
Ejemplo n.º 3
0
def run_task4(feature_extraction_model, dimension_reduction_model, folder,
              image_name, dist_func, label, k_value, m_value):
    """Main function for the Task4"""

    # Saves the returned model
    filename = "{0}_{1}_{2}_{3}".format(feature_extraction_model,
                                        dimension_reduction_model,
                                        label.replace(" ", ''), str(k_value))
    model = model_interact.load_model(filename=filename)

    # Compute the reduced dimensions for the new query image and find m similar images
    dim_reduction = DimensionReduction(feature_extraction_model,
                                       dimension_reduction_model, k_value,
                                       label)
    obj_feature = dim_reduction.get_object_feature_matrix()
    result = dim_reduction.find_m_similar_images(model, m_value, folder,
                                                 image_name, dist_func)
    return result
def main():
    """Main function for the task 1"""
    feature_extraction_model = get_input_feature_extractor_model()
    dimension_reduction_model = get_input_dimensionality_reduction_model()
    k_value = get_input_k()

    print(global_constants.LINE_SEPARATOR)
    print("User Inputs summary")
    print(global_constants.LINE_SEPARATOR)
    print(
        "Feature Extraction Model: {}\nDimensionality Reduction Model: {}\nk-value: {}"
        .format(feature_extraction_model, dimension_reduction_model, k_value))
    print(global_constants.LINE_SEPARATOR)

    # Performs the dimensionality reduction
    dim_reduction = DimensionReduction(feature_extraction_model,
                                       dimension_reduction_model, k_value)
    obj_lat, feat_lat = save_model(dim_reduction, feature_extraction_model,
                                   dimension_reduction_model, k_value)

    # Print term weight pairs to terminal
    data_tw = print_tw(obj_lat, feat_lat)

    # save term weight pairs to csv
    filename = "task1" + '_' + feature_extraction_model + '_' + dimension_reduction_model + '_' + str(
        k_value)
    CSVReader().save_to_csv(obj_lat, feat_lat, filename)
    print("Please check the CSV file: output/{}.csv".format(filename))

    data = dim_reduction.get_object_feature_matrix()

    title = {
        "Feature Extraction": feature_extraction_model,
        "Dimensionality Reduction": dimension_reduction_model,
        "k": k_value,
    }
    if k_value <= 20:
        print("Generating Visualization ...")
        show_data_ls(data, data_tw, title)
    print("Generating Visualization ...")
    show_feature_ls(data, feat_lat, title)
def main():
    """Main function for the script"""
    feature_extraction_model = "HOG"
    dimension_reduction_model = "PCA"
    k_value = get_input_k("k")
    K_value = get_input_k("K")
    folder = get_input_folder("Folder")
    dim_k_value = 40

    query_images = get_input_image_list(folder)
    start = time.time()
    dim_red = DimensionReduction(feature_extraction_model,
                                 dimension_reduction_model,
                                 dim_k_value,
                                 folder_metadata=folder,
                                 metadata_collection="labelled")
    obj_feat = dim_red.get_object_feature_matrix()
    features_list = np.array(obj_feat['featureVector'].tolist())
    images_list = np.array(obj_feat['imageId'])
    cos_sim = cosine_similarity(features_list)

    sim_graph = sim_graph_from_sim_max(cos_sim, images_list, k_value)
    results = ppr(sim_graph, images_list, query_images)
    results = results[:K_value]

    print("Top {} images from Personalized page Rank are:".format(K_value))
    for r in results:
        r["path"] = os.path.abspath(os.path.join(folder, r['imageId']))
        print(r)

    query_images_list = [
        os.path.abspath(os.path.join(folder, img)) for img in query_images
    ]
    title = {"Model": "Personalized Page Rank", "k": k_value, "K": K_value}
    show_images_ppr(query_images_list, title, results)
    print("Execution time: {} seconds".format(time.time() - start))
def main():
    """Main function for the script"""
    start = time.time()
    feature_extraction_model = "HOG"
    # feature_extraction_models = ["CM", "HOG"]
    feature_extraction_model_1 = "CM"
    dimension_reduction_model = "PCA"
    k_value = 5
    dim_k_value = 40
    # K_value = 20
    lab_folder = "Dataset3/Labelled/Set1"
    unlab_folder = "Dataset3/Unlabelled/Set 2"

    # ================================================================================================================
    # labelled Images
    dim_red = DimensionReduction(feature_extraction_model,
                                 dimension_reduction_model,
                                 dim_k_value,
                                 folder_metadata=lab_folder,
                                 metadata_collection="labelled")
    obj_feat_lab = dim_red.get_object_feature_matrix()
    features_list_lab = np.array(obj_feat_lab['featureVector'].tolist())
    images_list_lab = np.array(obj_feat_lab['imageId'])
    # filtering the labelled set
    dorsal_list, palmar_list = filter_images_by_label(images_list_lab)

    # unlabelled images
    dim_red = DimensionReduction(feature_extraction_model,
                                 dimension_reduction_model,
                                 dim_k_value,
                                 folder_metadata=unlab_folder,
                                 metadata_collection="unlabelled")
    obj_feat_unlab = dim_red.get_object_feature_matrix()
    # features_list_unlab = np.array(obj_feat_unlab['featureVector'].tolist())
    images_list_unlab = np.array(obj_feat_unlab['imageId'])

    # ================================================================================================================
    # labelled Images
    dim_red = DimensionReduction(feature_extraction_model_1,
                                 dimension_reduction_model,
                                 dim_k_value,
                                 folder_metadata=lab_folder,
                                 metadata_collection="labelled")
    obj_feat_lab_1 = dim_red.get_object_feature_matrix()
    features_list_lab_1 = np.array(obj_feat_lab_1['featureVector'].tolist())
    # images_list_lab = np.array(obj_feat_lab_1['imageId'])
    # filtering the labelled set

    # # unlabelled images
    # dim_red = DimensionReduction(feature_extraction_model_1, dimension_reduction_model, dim_k_value,
    #                              folder_metadata=unlab_folder,
    #                              metadata_collection="unlabelled")
    # obj_feat_unlab_1 = dim_red.get_object_feature_matrix()
    # features_list_unlab_1 = np.array(obj_feat_unlab_1['featureVector'].tolist())
    # # images_list_unlab = np.array(obj_feat_unlab['imageId'])

    # ================================================================================================================

    features_list_lab = np.concatenate(
        (features_list_lab, features_list_lab_1), axis=1)
    # features_list_unlab = np.concatenate((features_list_unlab, features_list_unlab_1), axis=1)
    # mongo_wrap = MongoWrapper()
    # res = mongo_wrap.find(feature_extraction_model.lower(), {"imageId": que})
    # res1 = mongo_wrap.find(feature_extraction_model_1.lower(), )

    dorsal_list, palmar_list = filter_images_by_label(images_list_lab)
    # features_list = np.concatenate((features_list_lab, features_list_unlab))
    # print(features_list.shape)
    feature_list = features_list_lab
    # images_list = np.concatenate((images_list_lab, images_list_unlab))
    # images_list = list(images_list)
    # Finding Similarity Matrix

    mongo_wrap = MongoWrapper()
    final_results = {}
    for img in images_list_unlab:
        # print(len(images_list_lab))
        images_list = np.concatenate((images_list_lab, [img]))
        # print(images_list)
        res = mongo_wrap.find(feature_extraction_model.lower(),
                              {"imageId": img})[0]
        res1 = mongo_wrap.find(feature_extraction_model_1.lower(),
                               {"imageId": img})[0]
        feature_query = np.concatenate(
            (np.array(res["featureVector"]), np.array(res1["featureVector"])))
        features_list = np.vstack((feature_list, feature_query))
        cos_sim = cosine_similarity(features_list)
        sim_graph = np.empty((0, len(cos_sim)))
        for row in cos_sim:
            k_largest = np.argsort(-np.array(row))[1:k_value + 1]
            # sim_graph_row = [d if i in k_largest else 0 for i, d in enumerate(row)]
            sim_graph_row = [
                d if i in k_largest else 0 for i, d in enumerate(row)
            ]
            sim_graph = np.append(sim_graph, np.array([sim_graph_row]), axis=0)

        row_sums = sim_graph.sum(axis=1)
        sim_graph = sim_graph / row_sums[:, np.newaxis]
        idx = 0

        results = ppr(sim_graph, images_list, [img])
        dorsal_count = 0
        palmar_count = 0
        # print("{}: {}".format(img, results))
        for r in results:
            if r != img:
                # print("{} {}".format(" " * 10, r))
                if r in dorsal_list:
                    dorsal_count += 1
                elif r in palmar_list:
                    palmar_count += 1
                if dorsal_count + palmar_count >= 5:
                    if dorsal_count > palmar_count:
                        final_results[img] = "dorsal"
                    else:
                        final_results[img] = "palmar"
                    break

    # results_dorsal = ppr(sim_graph, images_list, dorsal_list)
    # results_palmar = ppr(sim_graph, images_list, palmar_list)

    # for img in images_list_unlab:
    #     if results_dorsal[img] < results_palmar[img]:
    #         final_results[img] = "dorsal"
    #     else:
    #         final_results[img] = "palmar"

    actual_labels = fetch_actual_labels(final_results.keys())
    print("Classification")
    no_correct = 0
    print(len(final_results))
    for r in final_results:
        print("Image Id: {}, Label:{} Actual Label: {}".format(
            r, final_results[r], actual_labels[r]))
        if final_results[r] == actual_labels[r]:
            no_correct += 1

    print("Classification Accuracy: {}".format(
        (no_correct / len(final_results)) * 100))

    # for palm in
    # print("Clustering Results")
    # for r in results:
    #     r["path"] = os.path.abspath(os.path.join(lab_folder, r['imageId']))
    #     print(r)

    # query_images_list = [os.path.abspath(os.path.join(folder, img)) for img in query_images]
    # title = {"Model": "Personalized Page Rank", "k": k_value, "K": K_value}
    # show_images_ppr(query_images_list, title, results)

    print("Execution time: {} seconds".format(time.time() - start))
def main():
    # given subject id
    given_subject_id = get_input_subject_id()
    k_value = 40
    master_folder = "Hands"
    dim_reduction = DimensionReduction(feature_extraction_model,
                                       dimension_reduction_model, k_value)
    # original feature vectors
    obj_feat_matrix = dim_reduction.get_object_feature_matrix()
    # extract model saved from task 1
    model = load_model(dim_reduction, feature_extraction_model,
                       dimension_reduction_model, k_value)
    # get the img IDs from the database for images in the fit model
    img_set = pd.DataFrame({"imageId": obj_feat_matrix['imageId']})
    # image count to rank against current image
    m_value = len(img_set)
    print(global_constants.LINE_SEPARATOR)
    print("User Inputs summary")
    print(global_constants.LINE_SEPARATOR)
    print("Query Subject Id: {}".format(given_subject_id))
    print(global_constants.LINE_SEPARATOR)
    # given_subject_id = 55
    # similar subjects to find
    similar_subject_count = 3
    # get metadata for given subject's images
    metadata = dim_reduction.get_metadata("id", list([given_subject_id]))
    # get a list of img IDs for the particular subject in the dataset
    image_list_for_given_subject = random.sample(
        list(set(metadata["imageName"].tolist())), 5)
    image_list = list(set(img_set["imageId"].tolist()))

    starttime = time.time()

    # method call to find similar subjects
    subject_similarity = find_similar_subjects(given_subject_id,
                                               image_list_for_given_subject,
                                               model, img_set, dim_reduction,
                                               m_value, master_folder)
    # sort the similarity scores in descending order
    sorted_subject_similarity = sorted(subject_similarity.items(),
                                       key=operator.itemgetter(1),
                                       reverse=True)

    print()
    print("Subject  :   Score")
    list_subjects = []
    max = similar_subject_count
    counter = 0
    while counter < max:
        subject = sorted_subject_similarity[counter]
        if subject[0] != given_subject_id:
            print(subject[0], "  :   ", subject[1])
            list_subjects.append([subject[0], subject[1]])
        else:
            max += 1
        counter += 1
    print()
    # print(sorted_subject_similarity)

    image_list_for_similar_subjects_abs_path = []
    similarity_scores = []
    folder_path = os.path.dirname(obj_feat_matrix['path'][0])
    # create list of images for each subject to visualize most similar subjects
    for subject in (sorted_subject_similarity):
        if subject[0] != given_subject_id:
            metadata = dim_reduction.get_metadata("id", list([subject[0]]))
            similarity_scores.append(subject[1])
            image_list_for_similar_subject = list(
                set(metadata["imageName"].tolist()).intersection(
                    set(img_set["imageId"].tolist())))
            image_list_for_one_similar_subject_abs_path = []
            for image in image_list_for_similar_subject:
                image_list_for_one_similar_subject_abs_path.append(
                    (os.path.join(folder_path, image)))
            image_list_for_similar_subjects_abs_path.append(
                image_list_for_one_similar_subject_abs_path)
            similar_subject_count -= 1
            if (similar_subject_count <= 0):
                break

    # Create image list for given subject
    image_list_for_given_subject_abs_path = []
    # pick 5 images of given subject at random from master dataset
    for image in image_list_for_given_subject:
        image_list_for_given_subject_abs_path.append(
            os.path.abspath(os.path.join(master_folder, image)))

    output_path = os.path.abspath(os.path.join("output"))
    if not os.path.exists(output_path):
        os.makedirs(output_path)
    fig_filename = os.path.join(
        output_path,
        "task6_{0}_{1}_{2}_{3}_{4}.png".format(feature_extraction_model,
                                               dimension_reduction_model,
                                               str(k_value), dist_func,
                                               given_subject_id))
    # show images on a plot
    imgvwr.show_subjectwise_images(given_subject_id,
                                   image_list_for_given_subject_abs_path,
                                   list_subjects,
                                   image_list_for_similar_subjects_abs_path,
                                   fig_filename)

    print("\nTime taken for task 6: {}\n".format(time.time() - starttime))
def main():
    """Main function for the script"""
    feature_extraction_model = "HOG"
    # feature_extraction_models = ["CM", "HOG"]
    feature_extraction_model_1 = "CM"
    dimension_reduction_model = "PCA"
    k_value = 10
    dim_k_value = 40
    # K_value = 20
    # lab_folder = "Dataset3/Labelled/Set1"
    # unlab_folder = "Dataset3/Unlabelled/Set 2"
    lab_folder = get_input_folder("Labelled Folder")
    unlab_folder = get_input_folder("Classify")
    start = time.time()
    # ================================================================================================================
    # labelled Images
    dim_red = DimensionReduction(feature_extraction_model,
                                 dimension_reduction_model,
                                 dim_k_value,
                                 folder_metadata=lab_folder,
                                 metadata_collection="labelled")
    obj_feat_lab = dim_red.get_object_feature_matrix()
    features_list_lab = np.array(obj_feat_lab['featureVector'].tolist())
    images_list_lab = np.array(obj_feat_lab['imageId'])
    # filtering the labelled set
    dorsal_list, palmar_list = filter_images_by_label(images_list_lab)

    # unlabelled images
    dim_red = DimensionReduction(feature_extraction_model,
                                 dimension_reduction_model,
                                 dim_k_value,
                                 folder_metadata=unlab_folder,
                                 metadata_collection="unlabelled")
    obj_feat_unlab = dim_red.get_object_feature_matrix()
    features_list_unlab = np.array(obj_feat_unlab['featureVector'].tolist())
    images_list_unlab = np.array(obj_feat_unlab['imageId'])

    # ================================================================================================================
    # labelled Images
    dim_red = DimensionReduction(feature_extraction_model_1,
                                 dimension_reduction_model,
                                 dim_k_value,
                                 folder_metadata=lab_folder,
                                 metadata_collection="labelled")
    obj_feat_lab_1 = dim_red.get_object_feature_matrix()
    features_list_lab_1 = np.array(obj_feat_lab_1['featureVector'].tolist())
    # images_list_lab = np.array(obj_feat_lab_1['imageId'])
    # filtering the labelled set

    # unlabelled images
    dim_red = DimensionReduction(feature_extraction_model_1,
                                 dimension_reduction_model,
                                 dim_k_value,
                                 folder_metadata=unlab_folder,
                                 metadata_collection="unlabelled")
    obj_feat_unlab_1 = dim_red.get_object_feature_matrix()
    features_list_unlab_1 = np.array(
        obj_feat_unlab_1['featureVector'].tolist())
    # images_list_unlab = np.array(obj_feat_unlab['imageId'])
    features_list_lab = np.concatenate(
        (features_list_lab, features_list_lab_1), axis=1)
    features_list_unlab = np.concatenate(
        (features_list_unlab, features_list_unlab_1), axis=1)

    # ================================================================================================================

    dorsal_list, palmar_list = filter_images_by_label(images_list_lab)
    features_list = np.concatenate((features_list_lab, features_list_unlab))
    images_list = np.concatenate((images_list_lab, images_list_unlab))
    images_list = list(images_list)
    # Finding Similarity Matrix
    cos_sim = cosine_similarity(features_list)
    sim_graph = np.empty((0, len(cos_sim)))
    for row in cos_sim:
        k_largest = np.argsort(-np.array(row))[1:k_value + 1]
        sim_graph_row = [d if i in k_largest else 0 for i, d in enumerate(row)]
        sim_graph = np.append(sim_graph, np.array([sim_graph_row]), axis=0)

    row_sums = sim_graph.sum(axis=1)
    sim_graph = sim_graph / row_sums[:, np.newaxis]
    idx = 0
    results_dorsal = ppr(sim_graph, images_list, dorsal_list)
    results_palmar = ppr(sim_graph, images_list, palmar_list)
    final_results = {}

    for img in images_list_unlab:
        if results_dorsal[img] < results_palmar[img]:
            final_results[img] = "dorsal"
        else:
            final_results[img] = "palmar"

    actual_labels = fetch_actual_labels(images_list_unlab)
    print("Classification")
    no_correct = 0
    correctly_classified = []
    incorrectly_classified = []
    print("|   ImageId          | Prediction |  Actual |")
    for r in final_results:
        print("|   {} |   {}   |  {} |".format(r, final_results[r],
                                               actual_labels[r]))
        if final_results[r] == actual_labels[r]:
            correctly_classified.append(r)
            no_correct += 1
        else:
            incorrectly_classified.append(r)

    print("Correctly classified: {}\n".format(correctly_classified))
    print("InCorrectly classified: {}\n".format(incorrectly_classified))

    print("Classification Accuracy: {}%".format(no_correct /
                                                len(images_list_unlab) * 100))
    print("Execution time: {} seconds".format(time.time() - start))