def main():
    """Main function for the Task 8"""
    k_value = get_input_k()
    while k_value > 8:
        print("Please enter a value of k within 8")
        k_value = get_input_k()
    folder = get_input_folder()

    print(global_constants.LINE_SEPARATOR)
    print("User Inputs summary")
    print(global_constants.LINE_SEPARATOR)
    print("k-value: {}\nFolder: {}".format(k_value, folder))
    print(global_constants.LINE_SEPARATOR)

    dim_red = DimensionReduction(None,
                                 "NMF",
                                 k_value,
                                 image_metadata=True,
                                 folder_metadata=folder)
    w, h, model = dim_red.execute()

    # printing the term weight
    print_tw(w, h, image_metadata=True)

    # save to csv
    filename = "task8" + "_" + str(k_value)
    CSVReader().save_to_csv(w, h, filename, image_metadata=True)
    print("Please check the CSV file: output/{}.csv".format(filename))
Esempio n. 2
0
def main():
    """Main function for the task 4"""
    feature_extraction_model = get_input_feature_extractor_model()
    dimension_reduction_model = get_input_dimensionality_reduction_model()
    k_value = get_input_k()
    label = get_input_image_label()
    folder = get_input_folder()
    image_name = get_input_image(folder)
    m_value = get_input_m()

    if dimension_reduction_model != "NMF":
        dist_func = "euclidean"
    elif feature_extraction_model in ["CM", "LBP"]:
        dist_func = "nvsc1"
    else:
        dist_func = "euclidean"
        # dist_func = "cosine"
        # dist_func = "chebyshev"
        # dist_func = "manhattan"
        # dist_func = "chi_square"
        # dist_func = "euclidean"

    print(global_constants.LINE_SEPARATOR)
    print("User Inputs summary")
    print(global_constants.LINE_SEPARATOR)
    print("Feature Extraction Model: {}\nDimensionality Reduction Model: {}\nk-value: {}\nLabel: {}\nFolder: {}\n"
          "Image: {}\nm-value: {}".format(feature_extraction_model, dimension_reduction_model, k_value, label, folder,
                                          image_name, m_value))
    print(global_constants.LINE_SEPARATOR)

    # Saves the returned model
    filename = "{0}_{1}_{2}_{3}".format(feature_extraction_model, dimension_reduction_model, label.replace(" ", ''),
                                        str(k_value))
    model = model_interact.load_model(filename=filename)
    if not model:
        print("Please run Task 3 for {}, {}, {} and {}".format(feature_extraction_model, dimension_reduction_model,
                                                               label, k_value))
        sys.exit(1)

    # Compute the reduced dimensions for the new query image and find m similar images
    dim_reduction = DimensionReduction(feature_extraction_model, dimension_reduction_model, k_value, label)
    result = dim_reduction.find_m_similar_images(model, m_value, folder, image_name, dist_func)
    print(global_constants.LINE_SEPARATOR)
    print("Similar Images")
    print(global_constants.LINE_SEPARATOR)
    for rec in result:
        print(rec)
    print(global_constants.LINE_SEPARATOR)

    title = {
        "Feature Extraction": feature_extraction_model,
        "Dimension Reduction": dimension_reduction_model,
        "k": k_value,
        "Label": label,
        "Distance": dist_func
             }
    show_images(os.path.abspath(os.path.join(folder, image_name)), result, title)
Esempio n. 3
0
def dimension_reduction():
    # save_model_file()
    constants = GlobalConstants()
    model = Model()
    features = model.load_model('cm_np')
    redn = DimensionReduction(dimension_reduction_model=constants.PCA,
                              extractor_model=constants.CM,
                              matrix=features,
                              conversion=True,
                              k_value=500)
    redn.execute()
    pass
Esempio n. 4
0
def get_x_train(fea_ext_mod, dim_red_mod, k_value, train_set):
    model_interact = Model()
    dim_reduction = DimensionReduction(fea_ext_mod,
                                       dim_red_mod,
                                       k_value,
                                       folder_metadata=train_set,
                                       metadata_collection="labelled")
    obj_lat, feat_lat, model = dim_reduction.execute()
    filename = "{0}_{1}_{2}_{3}".format(fea_ext_mod, dim_red_mod, str(k_value),
                                        os.path.basename(train_set))
    model_interact.save_model(model=model, filename=filename)
    return obj_lat
Esempio n. 5
0
def get_y(fea_ext_mod,
          dim_red_mod,
          k_value,
          collection_name,
          red_dim=None,
          obj_lat=None):
    dim_red = DimensionReduction(fea_ext_mod, dim_red_mod, k_value)
    if collection_name == 'unlabelled':
        aspect = dim_red.get_metadata(
            "imageName", red_dim['imageId'].tolist())['aspectOfHand'].tolist()
    else:
        aspect = dim_red.get_metadata_collection(
            "imageName", obj_lat['imageId'].tolist(),
            collection_name)['aspectOfHand'].tolist()
    return [i.split(' ')[0] for i in aspect]
Esempio n. 6
0
def run_task3(feature_extraction_model, dimension_reduction_model, label,
              k_value):
    """Main function for the Task3"""
    # Performs the dimensionality reduction
    dim_reduction = DimensionReduction(feature_extraction_model,
                                       dimension_reduction_model, k_value,
                                       label)
    obj_feature = dim_reduction.get_object_feature_matrix()
    obj_lat, feat_lat, model = dim_reduction.execute()

    # Saves the returned model
    filename = "{0}_{1}_{2}_{3}".format(feature_extraction_model,
                                        dimension_reduction_model,
                                        label.replace(" ", ''), str(k_value))
    model_interact.save_model(model=model, filename=filename)
Esempio n. 7
0
def get_x_test(fea_ext_mod, dim_red_mod, k_value, train_set, test_set):
    model_interact = Model()
    dim_reduction = DimensionReduction(fea_ext_mod, dim_red_mod, k_value)
    filename = "{0}_{1}_{2}_{3}".format(fea_ext_mod, dim_red_mod, str(k_value),
                                        os.path.basename(train_set))
    model = model_interact.load_model(filename=filename)
    red_dims = []
    unlabelled_image_list = os.listdir(test_set)
    for image in unlabelled_image_list:
        red_dim = dim_reduction.compute_query_image(model, test_set, image)
        red_dims.append(red_dim[0])
    df = pd.DataFrame({
        "imageId": unlabelled_image_list,
        "reducedDimensions": red_dims
    })
    return df
def compute_latent_semantic_for_label(fea_ext_mod, dim_red_mod, label, k_value,
                                      folder):

    # p2task5.run_task3(fea_ext_mod, dim_red_mod, label, k_value)

    dim_reduction = DimensionReduction(fea_ext_mod,
                                       dim_red_mod,
                                       k_value,
                                       label,
                                       folder_metadata=folder,
                                       metadata_collection="labelled")
    obj_lat, feat_lat, model = dim_reduction.execute()
    # Saves the returned model
    filename = "{0}_{1}_{2}_{3}_{4}".format(fea_ext_mod, dim_red_mod, label,
                                            str(k_value),
                                            os.path.basename(folder))
    model_interact.save_model(model=model, filename=filename)
    return obj_lat, feat_lat, model
Esempio n. 9
0
def run_task4(feature_extraction_model, dimension_reduction_model, folder,
              image_name, dist_func, label, k_value, m_value):
    """Main function for the Task4"""

    # Saves the returned model
    filename = "{0}_{1}_{2}_{3}".format(feature_extraction_model,
                                        dimension_reduction_model,
                                        label.replace(" ", ''), str(k_value))
    model = model_interact.load_model(filename=filename)

    # Compute the reduced dimensions for the new query image and find m similar images
    dim_reduction = DimensionReduction(feature_extraction_model,
                                       dimension_reduction_model, k_value,
                                       label)
    obj_feature = dim_reduction.get_object_feature_matrix()
    result = dim_reduction.find_m_similar_images(model, m_value, folder,
                                                 image_name, dist_func)
    return result
def main():
    """Main function for the Task3"""
    feature_extraction_model = get_input_feature_extractor_model()
    dimension_reduction_model = get_input_dimensionality_reduction_model()
    k_value = get_input_k()
    label = get_input_image_label()

    print(global_constants.LINE_SEPARATOR)
    print("User Inputs summary")
    print(global_constants.LINE_SEPARATOR)
    print(
        "Feature Extraction Model: {}\nDimensionality Reduction Model: {}\nk-value: {}"
        .format(feature_extraction_model, dimension_reduction_model, k_value))
    print(global_constants.LINE_SEPARATOR)

    print(global_constants.LINE_SEPARATOR)
    print("Saving the metadata to MongoDB")
    print(global_constants.LINE_SEPARATOR)
    csv_reader.save_hand_csv_mongo("HandInfo.csv")
    print(global_constants.LINE_SEPARATOR)

    # Performs the dimensionality reduction
    dim_reduction = DimensionReduction(feature_extraction_model,
                                       dimension_reduction_model, k_value,
                                       label)
    obj_lat, feat_lat, model = dim_reduction.execute()

    # Saves the returned model
    filename = "{0}_{1}_{2}_{3}".format(feature_extraction_model,
                                        dimension_reduction_model,
                                        label.replace(" ", ''), str(k_value))
    model_interact.save_model(model=model, filename=filename)

    # Printing the term weight pairs
    data_tw = print_tw(obj_lat, feat_lat)

    # save term weight pairs to csv
    filename = "task3_{}_{}_{}_{}".format(feature_extraction_model,
                                          dimension_reduction_model, label,
                                          k_value)
    csv_reader.save_to_csv(obj_lat, feat_lat, filename)
    print("Please check the CSV file: output/{}.csv".format(filename))
def main():
    """Main function for the task 1"""
    feature_extraction_model = get_input_feature_extractor_model()
    dimension_reduction_model = get_input_dimensionality_reduction_model()
    k_value = get_input_k()

    print(global_constants.LINE_SEPARATOR)
    print("User Inputs summary")
    print(global_constants.LINE_SEPARATOR)
    print(
        "Feature Extraction Model: {}\nDimensionality Reduction Model: {}\nk-value: {}"
        .format(feature_extraction_model, dimension_reduction_model, k_value))
    print(global_constants.LINE_SEPARATOR)

    # Performs the dimensionality reduction
    dim_reduction = DimensionReduction(feature_extraction_model,
                                       dimension_reduction_model, k_value)
    obj_lat, feat_lat = save_model(dim_reduction, feature_extraction_model,
                                   dimension_reduction_model, k_value)

    # Print term weight pairs to terminal
    data_tw = print_tw(obj_lat, feat_lat)

    # save term weight pairs to csv
    filename = "task1" + '_' + feature_extraction_model + '_' + dimension_reduction_model + '_' + str(
        k_value)
    CSVReader().save_to_csv(obj_lat, feat_lat, filename)
    print("Please check the CSV file: output/{}.csv".format(filename))

    data = dim_reduction.get_object_feature_matrix()

    title = {
        "Feature Extraction": feature_extraction_model,
        "Dimensionality Reduction": dimension_reduction_model,
        "k": k_value,
    }
    if k_value <= 20:
        print("Generating Visualization ...")
        show_data_ls(data, data_tw, title)
    print("Generating Visualization ...")
    show_feature_ls(data, feat_lat, title)
Esempio n. 12
0
def main():
    feature_extraction_model = task6.feature_extraction_model
    dimension_reduction_model = task6.dimension_reduction_model
    k_value_for_ss_similarity = 10

    given_k_value = get_input_k()
    print(global_constants.LINE_SEPARATOR)
    print("User Inputs summary")
    print(global_constants.LINE_SEPARATOR)
    print("k-value: {}".format(given_k_value))
    print(global_constants.LINE_SEPARATOR)

    dim_reduction = DimensionReduction(feature_extraction_model, dimension_reduction_model, k_value_for_ss_similarity)
    # original feature vectors
    obj_feat_matrix = dim_reduction.get_object_feature_matrix()
    # get the img IDs from the database for images in the fit model
    img_set = pd.DataFrame({"imageId": obj_feat_matrix['imageId']})
    # get the metadata for each image with given subject id
    subject_data = dim_reduction.get_metadata("imageName", list(set(img_set["imageId"].tolist())))
    # unique subject IDs in dataset
    dataset_subject_ids = set((subject_data)["id"])
    subject_subject_matrix = []
    m_value = len(img_set)
    starttime = time.time()
    model = task6.load_model(dim_reduction, feature_extraction_model, dimension_reduction_model,
                             k_value_for_ss_similarity)
    folder = path.basename(path.dirname(obj_feat_matrix['path'][0]))

    for i, subjectid in enumerate(dataset_subject_ids):
        given_subject_images = dim_reduction.get_metadata("id", list([subjectid]))["imageName"].tolist()
        image_list_for_subject = list(set(given_subject_images).intersection(set(img_set["imageId"].tolist())))
        similar_subjects = task6.find_similar_subjects(subjectid, image_list_for_subject, model,
                                                       img_set, dim_reduction, m_value, folder)
        subject_subject_matrix.append(np.asarray(list(similar_subjects.values())))

    print("\nTime taken to create subject subject matrix: {}\n".format(time.time() - starttime))
    # perform nmf on subject_subject_matrix
    # given_k_value = 1
    matrix = pd.DataFrame(data={'imageId': list(dataset_subject_ids), 'featureVector': subject_subject_matrix})
    dim_red = DimensionReduction(None, "NMF", given_k_value, subject_subject=True, matrix=matrix)
    w, h, model = dim_red.execute()

    # display latent semantics
    # printing the term weight
    print_tw(w, h, subject_subject=True)
    # save to csv
    filename = "task7" + '_' + str(given_k_value)
    CSVReader().save_to_csv(w, None, filename, subject_subject=True)
    print("Please check the CSV file: output/{}.csv".format(filename))
def main():
    """Main function for the script"""
    feature_extraction_model = "HOG"
    dimension_reduction_model = "PCA"
    k_value = get_input_k("k")
    K_value = get_input_k("K")
    folder = get_input_folder("Folder")
    dim_k_value = 40

    query_images = get_input_image_list(folder)
    start = time.time()
    dim_red = DimensionReduction(feature_extraction_model,
                                 dimension_reduction_model,
                                 dim_k_value,
                                 folder_metadata=folder,
                                 metadata_collection="labelled")
    obj_feat = dim_red.get_object_feature_matrix()
    features_list = np.array(obj_feat['featureVector'].tolist())
    images_list = np.array(obj_feat['imageId'])
    cos_sim = cosine_similarity(features_list)

    sim_graph = sim_graph_from_sim_max(cos_sim, images_list, k_value)
    results = ppr(sim_graph, images_list, query_images)
    results = results[:K_value]

    print("Top {} images from Personalized page Rank are:".format(K_value))
    for r in results:
        r["path"] = os.path.abspath(os.path.join(folder, r['imageId']))
        print(r)

    query_images_list = [
        os.path.abspath(os.path.join(folder, img)) for img in query_images
    ]
    title = {"Model": "Personalized Page Rank", "k": k_value, "K": K_value}
    show_images_ppr(query_images_list, title, results)
    print("Execution time: {} seconds".format(time.time() - start))
Esempio n. 14
0
def main():
    fea_ext_mod = "HOG"
    dim_red_mod = "PCA"
    dist_func = "euclidean"
    k_value = 30

    training_set = os.path.abspath(get_input_folder("Labelled"))
    test_set = os.path.abspath(get_input_folder("Classify"))
    # training_set = os.path.abspath('Dataset3\Labelled\Set1')
    # test_set = os.path.abspath('Dataset3\\Unlabelled\Set 1')
    label = "dorsal"
    obj_lat, feat_lat, model = compute_latent_semantic_for_label(
        fea_ext_mod, dim_red_mod, label, k_value, training_set)
    filename = "p3task1_{0}_{1}_{2}_{3}".format(fea_ext_mod, dim_red_mod,
                                                label, str(k_value))
    csv_reader.save_to_csv(obj_lat, feat_lat, filename)

    label_p = 'palmar'
    obj_lat_p, feat_lat_p, model_p = compute_latent_semantic_for_label(
        fea_ext_mod, dim_red_mod, label_p, k_value, training_set)
    filename = "p3task1_{0}_{1}_{2}_{3}".format(fea_ext_mod, dim_red_mod,
                                                label_p, str(k_value))
    csv_reader.save_to_csv(obj_lat_p, feat_lat_p, filename)

    x_train = obj_lat['reducedDimensions'].tolist()
    x_train += (obj_lat_p['reducedDimensions'].tolist())
    red_dim_unlabelled_images = reduced_dimensions_for_unlabelled_folder(
        fea_ext_mod, dim_red_mod, k_value, label, training_set, test_set)
    x_test = red_dim_unlabelled_images['reducedDimensions'].tolist()

    dim_red = DimensionReduction(fea_ext_mod, dim_red_mod, k_value)
    labelled_aspect = dim_red.get_metadata_collection(
        "imageName", obj_lat['imageId'].tolist(),
        "labelled")['aspectOfHand'].tolist()
    y_train = [i.split(' ')[0] for i in labelled_aspect]

    labelled_aspect = dim_red.get_metadata_collection(
        "imageName", obj_lat_p['imageId'].tolist(),
        "labelled")['aspectOfHand'].tolist()
    y_train += ([i.split(' ')[0] for i in labelled_aspect])

    unlabelled_aspect = dim_red.get_metadata_collection(
        "imageName", red_dim_unlabelled_images['imageId'].tolist(),
        "unlabelled")['aspectOfHand'].tolist()
    y_test = [i.split(' ')[0] for i in unlabelled_aspect]

    # makes into arrays and transforms the training labels into 1 for "dorsal", -1 for "palmar" data points
    x_train = np.array(x_train)
    y_train = list(map(lambda x: 1 if x == "dorsal" else -1, y_train))
    y_train = np.array(y_train)

    # shuffling the training data
    indices = np.arange(x_train.shape[0])
    np.random.shuffle(indices)
    x_train = x_train[indices]
    y_train = y_train[indices]

    x_test = np.array(x_test)

    # creates the SVM classifier
    clf = SupportVectorMachine(gaussian_kernel, C=500)
    clf.fit(x_train, y_train)
    predictions = clf.predict(x_test)

    # transforms the testing labels into 1 for "dorsal", -1 for "palmar" data points
    y_test = list(map(lambda x: 1 if x == "dorsal" else -1, y_test))

    # calculates and prints the results onto the console
    correct = np.sum(predictions == y_test)
    print("---------------------------")
    accuracy = (correct / len(predictions)) * 100
    print("Accuracy: " + str(accuracy) + "%")
    unlabelled_images = red_dim_unlabelled_images['imageId']
    predicted_labels = list(
        map(lambda x: "dorsal" if x == 1 else "palmar", predictions))
    actual_labels = list(
        map(lambda x: "dorsal" if x == 1 else "palmar", y_test))
    print("---------------------------")
    print("Results:")
    print("Image ID, Prediction, Actual")
    for image_id, p, a in zip(unlabelled_images, predicted_labels,
                              actual_labels):
        print("(" + image_id + ", " + p + ", " + a + ")")
def main():
    fea_ext_mod = "HOG"
    dim_red_mod = "SVD"
    dist_func = "euclidean"
    k_value = get_input_k("k-value")
    training_set = os.path.abspath(get_input_folder("Labelled"))
    test_set = os.path.abspath(get_input_folder("Classify"))
    label = "dorsal"
    obj_lat, feat_lat, model = compute_latent_semantic_for_label(
        fea_ext_mod, dim_red_mod, label, k_value, training_set)
    filename = "p3task1_{0}_{1}_{2}_{3}".format(fea_ext_mod, dim_red_mod,
                                                label, str(k_value))
    csv_reader.save_to_csv(obj_lat, feat_lat, filename)
    x_train = obj_lat['reducedDimensions'].tolist()

    red_dim_unlabelled_images = reduced_dimensions_for_unlabelled_folder(
        fea_ext_mod, dim_red_mod, k_value, label, training_set, test_set)
    x_test = red_dim_unlabelled_images['reducedDimensions'].tolist()

    dim_red = DimensionReduction(fea_ext_mod, dim_red_mod, k_value)
    labelled_aspect = dim_red.get_metadata_collection(
        "imageName", obj_lat['imageId'].tolist(),
        "labelled")['aspectOfHand'].tolist()
    y_train = [i.split(' ')[0] for i in labelled_aspect]

    label_p = 'palmar'
    obj_lat_p, feat_lat_p, model_p = compute_latent_semantic_for_label(
        fea_ext_mod, dim_red_mod, label_p, k_value, training_set)
    filename = "p3task1_{0}_{1}_{2}_{3}".format(fea_ext_mod, dim_red_mod,
                                                label_p, str(k_value))
    csv_reader.save_to_csv(obj_lat_p, feat_lat_p, filename)
    x_train += (obj_lat_p['reducedDimensions'].tolist())
    labelled_aspect = dim_red.get_metadata_collection(
        "imageName", obj_lat_p['imageId'].tolist(),
        "labelled")['aspectOfHand'].tolist()
    y_train += ([i.split(' ')[0] for i in labelled_aspect])

    zip_train = list(zip(x_train, y_train))
    random.shuffle(zip_train)
    x_train, y_train = zip(*zip_train)

    unlabelled_aspect = dim_red.get_metadata_collection(
        "imageName", red_dim_unlabelled_images['imageId'].tolist(),
        "unlabelled")['aspectOfHand'].tolist()
    y_test = [i.split(' ')[0] for i in unlabelled_aspect]
    lr = LogisticRegression(penalty='l2',
                            random_state=np.random.RandomState(42),
                            solver='lbfgs',
                            max_iter=300,
                            multi_class='ovr',
                            class_weight='balanced',
                            n_jobs=-1,
                            l1_ratio=0)
    lr.fit(x_train, y_train)
    # y_pred = lr.predict(x_test)
    predictions = lr.predict(x_test)
    unlabelled_images = red_dim_unlabelled_images['imageId'].tolist()
    predicted_labels = list(predictions)
    actual_labels = list(y_test)
    print("---------------------------")
    print("     Results:")
    print("---------------------------")
    print("     Accuracy:", lr.score(x_test, y_test))
    print("---------------------------")
    print("     Image ID             |   Prediction    |   Actual")
    for image_id, p, a in zip(unlabelled_images, predicted_labels,
                              actual_labels):
        print("     " + image_id + "     |   " + p + "        |   " + a)
def main():
    # given subject id
    given_subject_id = get_input_subject_id()
    k_value = 40
    master_folder = "Hands"
    dim_reduction = DimensionReduction(feature_extraction_model,
                                       dimension_reduction_model, k_value)
    # original feature vectors
    obj_feat_matrix = dim_reduction.get_object_feature_matrix()
    # extract model saved from task 1
    model = load_model(dim_reduction, feature_extraction_model,
                       dimension_reduction_model, k_value)
    # get the img IDs from the database for images in the fit model
    img_set = pd.DataFrame({"imageId": obj_feat_matrix['imageId']})
    # image count to rank against current image
    m_value = len(img_set)
    print(global_constants.LINE_SEPARATOR)
    print("User Inputs summary")
    print(global_constants.LINE_SEPARATOR)
    print("Query Subject Id: {}".format(given_subject_id))
    print(global_constants.LINE_SEPARATOR)
    # given_subject_id = 55
    # similar subjects to find
    similar_subject_count = 3
    # get metadata for given subject's images
    metadata = dim_reduction.get_metadata("id", list([given_subject_id]))
    # get a list of img IDs for the particular subject in the dataset
    image_list_for_given_subject = random.sample(
        list(set(metadata["imageName"].tolist())), 5)
    image_list = list(set(img_set["imageId"].tolist()))

    starttime = time.time()

    # method call to find similar subjects
    subject_similarity = find_similar_subjects(given_subject_id,
                                               image_list_for_given_subject,
                                               model, img_set, dim_reduction,
                                               m_value, master_folder)
    # sort the similarity scores in descending order
    sorted_subject_similarity = sorted(subject_similarity.items(),
                                       key=operator.itemgetter(1),
                                       reverse=True)

    print()
    print("Subject  :   Score")
    list_subjects = []
    max = similar_subject_count
    counter = 0
    while counter < max:
        subject = sorted_subject_similarity[counter]
        if subject[0] != given_subject_id:
            print(subject[0], "  :   ", subject[1])
            list_subjects.append([subject[0], subject[1]])
        else:
            max += 1
        counter += 1
    print()
    # print(sorted_subject_similarity)

    image_list_for_similar_subjects_abs_path = []
    similarity_scores = []
    folder_path = os.path.dirname(obj_feat_matrix['path'][0])
    # create list of images for each subject to visualize most similar subjects
    for subject in (sorted_subject_similarity):
        if subject[0] != given_subject_id:
            metadata = dim_reduction.get_metadata("id", list([subject[0]]))
            similarity_scores.append(subject[1])
            image_list_for_similar_subject = list(
                set(metadata["imageName"].tolist()).intersection(
                    set(img_set["imageId"].tolist())))
            image_list_for_one_similar_subject_abs_path = []
            for image in image_list_for_similar_subject:
                image_list_for_one_similar_subject_abs_path.append(
                    (os.path.join(folder_path, image)))
            image_list_for_similar_subjects_abs_path.append(
                image_list_for_one_similar_subject_abs_path)
            similar_subject_count -= 1
            if (similar_subject_count <= 0):
                break

    # Create image list for given subject
    image_list_for_given_subject_abs_path = []
    # pick 5 images of given subject at random from master dataset
    for image in image_list_for_given_subject:
        image_list_for_given_subject_abs_path.append(
            os.path.abspath(os.path.join(master_folder, image)))

    output_path = os.path.abspath(os.path.join("output"))
    if not os.path.exists(output_path):
        os.makedirs(output_path)
    fig_filename = os.path.join(
        output_path,
        "task6_{0}_{1}_{2}_{3}_{4}.png".format(feature_extraction_model,
                                               dimension_reduction_model,
                                               str(k_value), dist_func,
                                               given_subject_id))
    # show images on a plot
    imgvwr.show_subjectwise_images(given_subject_id,
                                   image_list_for_given_subject_abs_path,
                                   list_subjects,
                                   image_list_for_similar_subjects_abs_path,
                                   fig_filename)

    print("\nTime taken for task 6: {}\n".format(time.time() - starttime))
def main():
    """Main function for the script"""
    feature_extraction_model = "HOG"
    # feature_extraction_models = ["CM", "HOG"]
    feature_extraction_model_1 = "CM"
    dimension_reduction_model = "PCA"
    k_value = 10
    dim_k_value = 40
    # K_value = 20
    # lab_folder = "Dataset3/Labelled/Set1"
    # unlab_folder = "Dataset3/Unlabelled/Set 2"
    lab_folder = get_input_folder("Labelled Folder")
    unlab_folder = get_input_folder("Classify")
    start = time.time()
    # ================================================================================================================
    # labelled Images
    dim_red = DimensionReduction(feature_extraction_model,
                                 dimension_reduction_model,
                                 dim_k_value,
                                 folder_metadata=lab_folder,
                                 metadata_collection="labelled")
    obj_feat_lab = dim_red.get_object_feature_matrix()
    features_list_lab = np.array(obj_feat_lab['featureVector'].tolist())
    images_list_lab = np.array(obj_feat_lab['imageId'])
    # filtering the labelled set
    dorsal_list, palmar_list = filter_images_by_label(images_list_lab)

    # unlabelled images
    dim_red = DimensionReduction(feature_extraction_model,
                                 dimension_reduction_model,
                                 dim_k_value,
                                 folder_metadata=unlab_folder,
                                 metadata_collection="unlabelled")
    obj_feat_unlab = dim_red.get_object_feature_matrix()
    features_list_unlab = np.array(obj_feat_unlab['featureVector'].tolist())
    images_list_unlab = np.array(obj_feat_unlab['imageId'])

    # ================================================================================================================
    # labelled Images
    dim_red = DimensionReduction(feature_extraction_model_1,
                                 dimension_reduction_model,
                                 dim_k_value,
                                 folder_metadata=lab_folder,
                                 metadata_collection="labelled")
    obj_feat_lab_1 = dim_red.get_object_feature_matrix()
    features_list_lab_1 = np.array(obj_feat_lab_1['featureVector'].tolist())
    # images_list_lab = np.array(obj_feat_lab_1['imageId'])
    # filtering the labelled set

    # unlabelled images
    dim_red = DimensionReduction(feature_extraction_model_1,
                                 dimension_reduction_model,
                                 dim_k_value,
                                 folder_metadata=unlab_folder,
                                 metadata_collection="unlabelled")
    obj_feat_unlab_1 = dim_red.get_object_feature_matrix()
    features_list_unlab_1 = np.array(
        obj_feat_unlab_1['featureVector'].tolist())
    # images_list_unlab = np.array(obj_feat_unlab['imageId'])
    features_list_lab = np.concatenate(
        (features_list_lab, features_list_lab_1), axis=1)
    features_list_unlab = np.concatenate(
        (features_list_unlab, features_list_unlab_1), axis=1)

    # ================================================================================================================

    dorsal_list, palmar_list = filter_images_by_label(images_list_lab)
    features_list = np.concatenate((features_list_lab, features_list_unlab))
    images_list = np.concatenate((images_list_lab, images_list_unlab))
    images_list = list(images_list)
    # Finding Similarity Matrix
    cos_sim = cosine_similarity(features_list)
    sim_graph = np.empty((0, len(cos_sim)))
    for row in cos_sim:
        k_largest = np.argsort(-np.array(row))[1:k_value + 1]
        sim_graph_row = [d if i in k_largest else 0 for i, d in enumerate(row)]
        sim_graph = np.append(sim_graph, np.array([sim_graph_row]), axis=0)

    row_sums = sim_graph.sum(axis=1)
    sim_graph = sim_graph / row_sums[:, np.newaxis]
    idx = 0
    results_dorsal = ppr(sim_graph, images_list, dorsal_list)
    results_palmar = ppr(sim_graph, images_list, palmar_list)
    final_results = {}

    for img in images_list_unlab:
        if results_dorsal[img] < results_palmar[img]:
            final_results[img] = "dorsal"
        else:
            final_results[img] = "palmar"

    actual_labels = fetch_actual_labels(images_list_unlab)
    print("Classification")
    no_correct = 0
    correctly_classified = []
    incorrectly_classified = []
    print("|   ImageId          | Prediction |  Actual |")
    for r in final_results:
        print("|   {} |   {}   |  {} |".format(r, final_results[r],
                                               actual_labels[r]))
        if final_results[r] == actual_labels[r]:
            correctly_classified.append(r)
            no_correct += 1
        else:
            incorrectly_classified.append(r)

    print("Correctly classified: {}\n".format(correctly_classified))
    print("InCorrectly classified: {}\n".format(incorrectly_classified))

    print("Classification Accuracy: {}%".format(no_correct /
                                                len(images_list_unlab) * 100))
    print("Execution time: {} seconds".format(time.time() - start))
def main():
    """Main function for the script"""
    start = time.time()
    feature_extraction_model = "HOG"
    # feature_extraction_models = ["CM", "HOG"]
    feature_extraction_model_1 = "CM"
    dimension_reduction_model = "PCA"
    k_value = 5
    dim_k_value = 40
    # K_value = 20
    lab_folder = "Dataset3/Labelled/Set1"
    unlab_folder = "Dataset3/Unlabelled/Set 2"

    # ================================================================================================================
    # labelled Images
    dim_red = DimensionReduction(feature_extraction_model,
                                 dimension_reduction_model,
                                 dim_k_value,
                                 folder_metadata=lab_folder,
                                 metadata_collection="labelled")
    obj_feat_lab = dim_red.get_object_feature_matrix()
    features_list_lab = np.array(obj_feat_lab['featureVector'].tolist())
    images_list_lab = np.array(obj_feat_lab['imageId'])
    # filtering the labelled set
    dorsal_list, palmar_list = filter_images_by_label(images_list_lab)

    # unlabelled images
    dim_red = DimensionReduction(feature_extraction_model,
                                 dimension_reduction_model,
                                 dim_k_value,
                                 folder_metadata=unlab_folder,
                                 metadata_collection="unlabelled")
    obj_feat_unlab = dim_red.get_object_feature_matrix()
    # features_list_unlab = np.array(obj_feat_unlab['featureVector'].tolist())
    images_list_unlab = np.array(obj_feat_unlab['imageId'])

    # ================================================================================================================
    # labelled Images
    dim_red = DimensionReduction(feature_extraction_model_1,
                                 dimension_reduction_model,
                                 dim_k_value,
                                 folder_metadata=lab_folder,
                                 metadata_collection="labelled")
    obj_feat_lab_1 = dim_red.get_object_feature_matrix()
    features_list_lab_1 = np.array(obj_feat_lab_1['featureVector'].tolist())
    # images_list_lab = np.array(obj_feat_lab_1['imageId'])
    # filtering the labelled set

    # # unlabelled images
    # dim_red = DimensionReduction(feature_extraction_model_1, dimension_reduction_model, dim_k_value,
    #                              folder_metadata=unlab_folder,
    #                              metadata_collection="unlabelled")
    # obj_feat_unlab_1 = dim_red.get_object_feature_matrix()
    # features_list_unlab_1 = np.array(obj_feat_unlab_1['featureVector'].tolist())
    # # images_list_unlab = np.array(obj_feat_unlab['imageId'])

    # ================================================================================================================

    features_list_lab = np.concatenate(
        (features_list_lab, features_list_lab_1), axis=1)
    # features_list_unlab = np.concatenate((features_list_unlab, features_list_unlab_1), axis=1)
    # mongo_wrap = MongoWrapper()
    # res = mongo_wrap.find(feature_extraction_model.lower(), {"imageId": que})
    # res1 = mongo_wrap.find(feature_extraction_model_1.lower(), )

    dorsal_list, palmar_list = filter_images_by_label(images_list_lab)
    # features_list = np.concatenate((features_list_lab, features_list_unlab))
    # print(features_list.shape)
    feature_list = features_list_lab
    # images_list = np.concatenate((images_list_lab, images_list_unlab))
    # images_list = list(images_list)
    # Finding Similarity Matrix

    mongo_wrap = MongoWrapper()
    final_results = {}
    for img in images_list_unlab:
        # print(len(images_list_lab))
        images_list = np.concatenate((images_list_lab, [img]))
        # print(images_list)
        res = mongo_wrap.find(feature_extraction_model.lower(),
                              {"imageId": img})[0]
        res1 = mongo_wrap.find(feature_extraction_model_1.lower(),
                               {"imageId": img})[0]
        feature_query = np.concatenate(
            (np.array(res["featureVector"]), np.array(res1["featureVector"])))
        features_list = np.vstack((feature_list, feature_query))
        cos_sim = cosine_similarity(features_list)
        sim_graph = np.empty((0, len(cos_sim)))
        for row in cos_sim:
            k_largest = np.argsort(-np.array(row))[1:k_value + 1]
            # sim_graph_row = [d if i in k_largest else 0 for i, d in enumerate(row)]
            sim_graph_row = [
                d if i in k_largest else 0 for i, d in enumerate(row)
            ]
            sim_graph = np.append(sim_graph, np.array([sim_graph_row]), axis=0)

        row_sums = sim_graph.sum(axis=1)
        sim_graph = sim_graph / row_sums[:, np.newaxis]
        idx = 0

        results = ppr(sim_graph, images_list, [img])
        dorsal_count = 0
        palmar_count = 0
        # print("{}: {}".format(img, results))
        for r in results:
            if r != img:
                # print("{} {}".format(" " * 10, r))
                if r in dorsal_list:
                    dorsal_count += 1
                elif r in palmar_list:
                    palmar_count += 1
                if dorsal_count + palmar_count >= 5:
                    if dorsal_count > palmar_count:
                        final_results[img] = "dorsal"
                    else:
                        final_results[img] = "palmar"
                    break

    # results_dorsal = ppr(sim_graph, images_list, dorsal_list)
    # results_palmar = ppr(sim_graph, images_list, palmar_list)

    # for img in images_list_unlab:
    #     if results_dorsal[img] < results_palmar[img]:
    #         final_results[img] = "dorsal"
    #     else:
    #         final_results[img] = "palmar"

    actual_labels = fetch_actual_labels(final_results.keys())
    print("Classification")
    no_correct = 0
    print(len(final_results))
    for r in final_results:
        print("Image Id: {}, Label:{} Actual Label: {}".format(
            r, final_results[r], actual_labels[r]))
        if final_results[r] == actual_labels[r]:
            no_correct += 1

    print("Classification Accuracy: {}".format(
        (no_correct / len(final_results)) * 100))

    # for palm in
    # print("Clustering Results")
    # for r in results:
    #     r["path"] = os.path.abspath(os.path.join(lab_folder, r['imageId']))
    #     print(r)

    # query_images_list = [os.path.abspath(os.path.join(folder, img)) for img in query_images]
    # title = {"Model": "Personalized Page Rank", "k": k_value, "K": K_value}
    # show_images_ppr(query_images_list, title, results)

    print("Execution time: {} seconds".format(time.time() - start))
def main():

    k = get_input_k("C")
    training_set = get_input_folder("Labelled")
    test_set = get_input_folder("Classify")
    k_value = 30

    dim_reduction = DimensionReduction(feature_extraction_model,
                                       dimension_reduction_model, k_value)

    # obj_lat, feat_lat, model = dim_reduction.execute()
    label = 'dorsal'
    obj_lat, feat_lat, model = p3task1.compute_latent_semantic_for_label(
        feature_extraction_model, dimension_reduction_model, label, k_value,
        training_set)
    label_p = 'palmar'
    obj_lat_p, feat_lat_p, model_p = p3task1.compute_latent_semantic_for_label(
        feature_extraction_model, dimension_reduction_model, label_p, k_value,
        training_set)
    red_dim = p3task1.reduced_dimensions_for_unlabelled_folder(
        feature_extraction_model, dimension_reduction_model, k_value, label,
        training_set, test_set)

    #input for project
    df = obj_lat[['reducedDimensions', 'imageId']]
    df_p = obj_lat_p[['reducedDimensions', 'imageId']]
    #inputt for scikit
    tf = obj_lat['reducedDimensions']
    tf_p = obj_lat_p['reducedDimensions']

    a = []
    a_p = []
    for x in tf:
        a.append(x)
    for x in tf_p:
        a_p.append(x)

    X = df.values
    Y = df_p.values

    # k clusters
    # k=5
    #
    km = KMeans(n_clusters=k,
                random_state=0,
                n_init=30,
                init='k-means++',
                precompute_distances=True,
                n_jobs=-1).fit(a)
    km_p = KMeans(n_clusters=k,
                  random_state=0,
                  n_init=30,
                  init='k-means++',
                  precompute_distances=True,
                  n_jobs=-1).fit(a_p)

    # print(km.labels_)
    counter = np.zeros(k)
    counter_p = np.zeros(k)
    for k_m in km.labels_:
        counter[k_m] += 1
    # print(counter)
    for k_m_p in km_p.labels_:
        counter_p[k_m_p] += 1
    # print(counter_p)
    #
    d_cluster = km.predict(red_dim['reducedDimensions'].tolist())
    p_cluster = km_p.predict(red_dim['reducedDimensions'].tolist())

    unlabelled_aspect = dim_reduction.get_metadata_collection(
        "imageName", red_dim['imageId'].tolist(),
        "unlabelled")['aspectOfHand'].tolist()
    y_test = [i.split(' ')[0] for i in unlabelled_aspect]

    #min max test

    good = 0
    bad = 0

    # for ind in range(len(red_dim['reducedDimensions'])):

    #     cc_dorsal = km.cluster_centers_[d_cluster[ind]]
    #     cc_palmar = km_p.cluster_centers_[p_cluster[ind]]
    #     dist_dorsal = np.linalg.norm(red_dim['reducedDimensions'][ind]-cc_dorsal)
    #     dist_palmar = np.linalg.norm(red_dim['reducedDimensions'][ind]-cc_palmar)

    #     if dist_dorsal<dist_palmar:
    #         #print(red_dim['imageId'][ind], label, y_test[ind])
    #         if y_test[ind] == label:
    #             good +=1
    #         else:
    #             bad+=1
    #     else:
    #         #print(red_dim['imageId'][ind], 'palmar', y_test[ind])
    #         if y_test[ind] == label_p:
    #             good +=1
    #         else:
    #             bad+=1

    # print ("good",good)
    # print("bad",bad)
    # km.score()

    def kmeans_implementation(X):
        random = np.random.choice(len(X), size=k, replace=False)

        centroid = {}
        classes = {}
        classes2 = {}

        # for cen in range(k):
        #     for im in range(0,len(X)):
        #         distance=[np.linalg.norm(np.asarray(X[im][0]) - np.asarray(centroid[0])))]

        for i in range(k):
            centroid[i] = X[random[i]][0]

        for iter in range(500):

            for i in range(k):
                classes[i] = []
                classes2[i] = []
                distance = []

            for x in X:
                # print(x[1])
                distance = [
                    np.linalg.norm(
                        np.asarray(x[0]) - np.asarray(centroid[ind]))
                    for ind in range(len(centroid))
                ]

                classification = distance.index(min(distance))
                classes[classification].append(x)
                classes2[classification].append(x[0])
            previous = dict(centroid)

            for classification in classes2:
                centroid[classification] = np.average(classes2[classification],
                                                      axis=0)

            opti = 0

            for c in centroid:

                og_c = previous[c]
                current = centroid[c]
                if (np.array_equal(current, og_c)):
                    opti += 1

            if (opti == (k)):
                # print(iter)
                break

        return classes, centroid

    classes, centroid = kmeans_implementation(X)
    classes_p, centroid_p = kmeans_implementation(Y)

    #predict loop red_dimension is the query folder

    def predict_class(red_dim, centroid):
        query_classes = {}
        for i in range(k):
            query_classes[i] = []

        for ind in range(len(red_dim['reducedDimensions'])):
            cluster_distance = []
            cluster_distance = [
                np.linalg.norm(red_dim['reducedDimensions'][ind] -
                               np.asarray(centroid[q]))
                for q in range(len(centroid))
            ]
            query_classification = cluster_distance.index(
                min(cluster_distance))
            query_classes[query_classification].append(red_dim['imageId'][ind])
        return query_classes

    query_classes_dorsal = predict_class(red_dim, centroid)
    query_classes_palmar = predict_class(red_dim, centroid)

    correct = 0
    wrong = 0

    def centroid_mean(centroid):
        res_list = [0] * k_value
        mean_centroid = []
        for i in range(k):

            res_list = [a + b for a, b in zip(res_list, centroid[i])]

        for x in res_list:
            mean_centroid.append(x / k)

        return mean_centroid

    mean_centroid_dorsal = centroid_mean(centroid)
    mean_centroid_palmar = centroid_mean(centroid_p)

    dorsal_images = []
    palmar_images = []
    for ind in range(len(red_dim['reducedDimensions'])):
        image_center_dorsal = 0
        image_center_palmar = 0
        image_name = red_dim['imageId'][ind]

        for i in range(k):
            if (image_name in query_classes_dorsal[i]):
                image_center_dorsal = i
            if (image_name in query_classes_palmar[i]):
                image_center_palmar = i

        dorsal_distance = np.linalg.norm(red_dim['reducedDimensions'][ind] -
                                         centroid[image_center_dorsal])
        palmar_distance = np.linalg.norm(red_dim['reducedDimensions'][ind] -
                                         centroid_p[image_center_palmar])

        if dorsal_distance < palmar_distance:
            #print(red_dim['imageId'][ind], label, y_test[ind])´
            dorsal_images.append(red_dim['imageId'][ind])
            if y_test[ind] == label:

                correct += 1
            else:
                wrong += 1
        else:

            #print(red_dim['imageId'][ind], 'palmar', y_test[ind])
            palmar_images.append(red_dim['imageId'][ind])
            if y_test[ind] == label_p:
                correct += 1
            else:
                wrong += 1

    print("correct" + str(correct))
    print("wrong" + str(wrong))

    print("\nClick here: http://localhost:{0}/result\n".format(port_g))
    print("\nClick here: http://localhost:{0}/dorsal\n".format(port_g))
    print("\nClick here: http://localhost:{0}/palmar\n".format(port_g))

    # APP_ROOT = os.path.dirname(os.path.abspath(__file__))

    @app.route('/Dataset2/<filename>')
    def send_image(filename):
        return send_from_directory((training_set), filename)

    @app.route('/test_set/<filename>')
    def send_image_result(filename):
        return send_from_directory((test_set), filename)

    @app.route('/dorsal')
    def get_gallery():
        image_names = [classes, k]

        return render_template("demo.html", image_names=image_names)

    @app.route('/palmar')
    def get_gallery_p():
        image_names_p = [classes_p, k]

        return render_template("demo_p.html", image_names_p=image_names_p)

    @app.route('/result')
    def get_gallery_result():
        results = [dorsal_images, palmar_images]

        return render_template("task2.html", results=results)

    app.run(port=port_g)