Example #1
0
def main():
    feature_extraction_model = task6.feature_extraction_model
    dimension_reduction_model = task6.dimension_reduction_model
    k_value_for_ss_similarity = 10

    given_k_value = get_input_k()
    print(global_constants.LINE_SEPARATOR)
    print("User Inputs summary")
    print(global_constants.LINE_SEPARATOR)
    print("k-value: {}".format(given_k_value))
    print(global_constants.LINE_SEPARATOR)

    dim_reduction = DimensionReduction(feature_extraction_model, dimension_reduction_model, k_value_for_ss_similarity)
    # original feature vectors
    obj_feat_matrix = dim_reduction.get_object_feature_matrix()
    # get the img IDs from the database for images in the fit model
    img_set = pd.DataFrame({"imageId": obj_feat_matrix['imageId']})
    # get the metadata for each image with given subject id
    subject_data = dim_reduction.get_metadata("imageName", list(set(img_set["imageId"].tolist())))
    # unique subject IDs in dataset
    dataset_subject_ids = set((subject_data)["id"])
    subject_subject_matrix = []
    m_value = len(img_set)
    starttime = time.time()
    model = task6.load_model(dim_reduction, feature_extraction_model, dimension_reduction_model,
                             k_value_for_ss_similarity)
    folder = path.basename(path.dirname(obj_feat_matrix['path'][0]))

    for i, subjectid in enumerate(dataset_subject_ids):
        given_subject_images = dim_reduction.get_metadata("id", list([subjectid]))["imageName"].tolist()
        image_list_for_subject = list(set(given_subject_images).intersection(set(img_set["imageId"].tolist())))
        similar_subjects = task6.find_similar_subjects(subjectid, image_list_for_subject, model,
                                                       img_set, dim_reduction, m_value, folder)
        subject_subject_matrix.append(np.asarray(list(similar_subjects.values())))

    print("\nTime taken to create subject subject matrix: {}\n".format(time.time() - starttime))
    # perform nmf on subject_subject_matrix
    # given_k_value = 1
    matrix = pd.DataFrame(data={'imageId': list(dataset_subject_ids), 'featureVector': subject_subject_matrix})
    dim_red = DimensionReduction(None, "NMF", given_k_value, subject_subject=True, matrix=matrix)
    w, h, model = dim_red.execute()

    # display latent semantics
    # printing the term weight
    print_tw(w, h, subject_subject=True)
    # save to csv
    filename = "task7" + '_' + str(given_k_value)
    CSVReader().save_to_csv(w, None, filename, subject_subject=True)
    print("Please check the CSV file: output/{}.csv".format(filename))
Example #2
0
def get_y(fea_ext_mod,
          dim_red_mod,
          k_value,
          collection_name,
          red_dim=None,
          obj_lat=None):
    dim_red = DimensionReduction(fea_ext_mod, dim_red_mod, k_value)
    if collection_name == 'unlabelled':
        aspect = dim_red.get_metadata(
            "imageName", red_dim['imageId'].tolist())['aspectOfHand'].tolist()
    else:
        aspect = dim_red.get_metadata_collection(
            "imageName", obj_lat['imageId'].tolist(),
            collection_name)['aspectOfHand'].tolist()
    return [i.split(' ')[0] for i in aspect]
def main():
    # given subject id
    given_subject_id = get_input_subject_id()
    k_value = 40
    master_folder = "Hands"
    dim_reduction = DimensionReduction(feature_extraction_model,
                                       dimension_reduction_model, k_value)
    # original feature vectors
    obj_feat_matrix = dim_reduction.get_object_feature_matrix()
    # extract model saved from task 1
    model = load_model(dim_reduction, feature_extraction_model,
                       dimension_reduction_model, k_value)
    # get the img IDs from the database for images in the fit model
    img_set = pd.DataFrame({"imageId": obj_feat_matrix['imageId']})
    # image count to rank against current image
    m_value = len(img_set)
    print(global_constants.LINE_SEPARATOR)
    print("User Inputs summary")
    print(global_constants.LINE_SEPARATOR)
    print("Query Subject Id: {}".format(given_subject_id))
    print(global_constants.LINE_SEPARATOR)
    # given_subject_id = 55
    # similar subjects to find
    similar_subject_count = 3
    # get metadata for given subject's images
    metadata = dim_reduction.get_metadata("id", list([given_subject_id]))
    # get a list of img IDs for the particular subject in the dataset
    image_list_for_given_subject = random.sample(
        list(set(metadata["imageName"].tolist())), 5)
    image_list = list(set(img_set["imageId"].tolist()))

    starttime = time.time()

    # method call to find similar subjects
    subject_similarity = find_similar_subjects(given_subject_id,
                                               image_list_for_given_subject,
                                               model, img_set, dim_reduction,
                                               m_value, master_folder)
    # sort the similarity scores in descending order
    sorted_subject_similarity = sorted(subject_similarity.items(),
                                       key=operator.itemgetter(1),
                                       reverse=True)

    print()
    print("Subject  :   Score")
    list_subjects = []
    max = similar_subject_count
    counter = 0
    while counter < max:
        subject = sorted_subject_similarity[counter]
        if subject[0] != given_subject_id:
            print(subject[0], "  :   ", subject[1])
            list_subjects.append([subject[0], subject[1]])
        else:
            max += 1
        counter += 1
    print()
    # print(sorted_subject_similarity)

    image_list_for_similar_subjects_abs_path = []
    similarity_scores = []
    folder_path = os.path.dirname(obj_feat_matrix['path'][0])
    # create list of images for each subject to visualize most similar subjects
    for subject in (sorted_subject_similarity):
        if subject[0] != given_subject_id:
            metadata = dim_reduction.get_metadata("id", list([subject[0]]))
            similarity_scores.append(subject[1])
            image_list_for_similar_subject = list(
                set(metadata["imageName"].tolist()).intersection(
                    set(img_set["imageId"].tolist())))
            image_list_for_one_similar_subject_abs_path = []
            for image in image_list_for_similar_subject:
                image_list_for_one_similar_subject_abs_path.append(
                    (os.path.join(folder_path, image)))
            image_list_for_similar_subjects_abs_path.append(
                image_list_for_one_similar_subject_abs_path)
            similar_subject_count -= 1
            if (similar_subject_count <= 0):
                break

    # Create image list for given subject
    image_list_for_given_subject_abs_path = []
    # pick 5 images of given subject at random from master dataset
    for image in image_list_for_given_subject:
        image_list_for_given_subject_abs_path.append(
            os.path.abspath(os.path.join(master_folder, image)))

    output_path = os.path.abspath(os.path.join("output"))
    if not os.path.exists(output_path):
        os.makedirs(output_path)
    fig_filename = os.path.join(
        output_path,
        "task6_{0}_{1}_{2}_{3}_{4}.png".format(feature_extraction_model,
                                               dimension_reduction_model,
                                               str(k_value), dist_func,
                                               given_subject_id))
    # show images on a plot
    imgvwr.show_subjectwise_images(given_subject_id,
                                   image_list_for_given_subject_abs_path,
                                   list_subjects,
                                   image_list_for_similar_subjects_abs_path,
                                   fig_filename)

    print("\nTime taken for task 6: {}\n".format(time.time() - starttime))