예제 #1
0
def ex_dimensionality_reduction(data_matrix,
                                image_ids,
                                model,
                                lsa_model,
                                k_features,
                                label=None,
                                viz=False):
    if lsa_model == "SVD":
        reduced_data = reduce_dimensions_svd(data_matrix, k_features, viz)
    elif lsa_model == "PCA":
        reduced_data = reduce_dimensions_pca(data_matrix, k_features, viz)
    elif lsa_model == "NMF":
        if model == "CM":
            print("NMF is not applicable to Color Moments")
            return
        reduced_data = reduce_dimensions_nmf(data_matrix, k_features, viz)
    elif lsa_model == "LDA":
        if model == "CM":
            print("LDA is modified and applicable to Color Moments")
            data_matrix_cmlda = convert_data_matrix_cmlda(data_matrix)
            reduced_data = reduce_dimensions_lda(data_matrix_cmlda, k_features,
                                                 viz)
        else:
            reduced_data = reduce_dimensions_lda(data_matrix, k_features, viz)

    insert_reduced_features(model, reduced_data, image_ids, label)
    if extra_credit:
        return reduced_data, image_ids
예제 #2
0
def dimensionality_reduction(data_matrix,
                             image_ids,
                             args,
                             label=None,
                             viz=False):
    if args.lsa_model == "SVD":
        reduced_data = reduce_dimensions_svd(data_matrix, args.k_features,
                                             image_ids, viz)
    elif args.lsa_model == "PCA":
        reduced_data = reduce_dimensions_pca(data_matrix, args.k_features,
                                             image_ids, viz)
    elif args.lsa_model == "NMF":
        if args.model == "CM":
            print("NMF is not applicable to Color Moments")
            return
        reduced_data = reduce_dimensions_nmf(data_matrix, args.k_features,
                                             image_ids, viz)
    elif args.lsa_model == "LDA":
        if args.model == "CM":
            print("LDA is applicable only to a modified Color Moments.")
            data_matrix_cmlda = convert_data_matrix_cmlda(data_matrix)
            reduced_data = reduce_dimensions_lda(data_matrix_cmlda,
                                                 args.k_features, image_ids,
                                                 viz)
        else:
            reduced_data = reduce_dimensions_lda(data_matrix, args.k_features,
                                                 image_ids, viz)

    insert_reduced_features(args.model, reduced_data, image_ids, label)
    return reduced_data, image_ids
예제 #3
0
def get_V_matrix(data_matrix, image_ids, args, label=None, viz=False):
    if args.lsa_model == "SVD":
        reduced_data, V_matrix = reduce_dimensions_svd(data_matrix,
                                                       args.k_features,
                                                       get_v=True)
    elif args.lsa_model == "PCA":
        reduced_data, V_matrix = reduce_dimensions_pca(data_matrix,
                                                       args.k_features,
                                                       get_v=True)
    elif args.lsa_model == "NMF":
        if args.model == "CM":
            print("NMF is not applicable to Color Moments")
            return
        reduced_data, V_matrix = reduce_dimensions_nmf(data_matrix,
                                                       args.k_features,
                                                       get_v=True)
    elif args.lsa_model == "LDA":
        if args.model == "CM":
            print("LDA is NOT applicable because of the Bag Of Word model.")
            # data_matrix_cmlda = convert_data_matrix_cmlda(data_matrix)
            # V_matrix = reduce_dimensions_lda(data_matrix_cmlda, args.k_features, image_ids, viz, get_v=True)
            return
        else:
            reduced_data, V_matrix = reduce_dimensions_lda(data_matrix,
                                                           args.k_features,
                                                           get_v=True)

    # insert_reduced_features(args.model, reduced_data, image_ids, label)
    return reduced_data, V_matrix
def main():
    parser = setup_arg_parse()
    args = parser.parse_args()
    populate_database(args)
    model = "CM"

    dorsal_data_matrix, _ = get_data_matrix(
        model, convert_label_to_filterstring("dorsal"))

    palmar_data_matrix, _ = get_data_matrix(
        model, convert_label_to_filterstring("palmar"))

    dorsal_labels = np.zeros((dorsal_data_matrix.shape[0], 1))
    palmar_labels = np.ones((palmar_data_matrix.shape[0], 1))

    labels = np.append(dorsal_labels, palmar_labels, axis=0)

    combined_data = np.append(dorsal_data_matrix, palmar_data_matrix, axis=0)
    #reduced_data = reduce_dimensions_svd(combined_data, 20)

    reduced_data, v_matrix = reduce_dimensions_svd(combined_data,
                                                   20,
                                                   get_v=True)
    dx, ddx, labels, d_labels = train_test_split(reduced_data,
                                                 labels,
                                                 test_size=0.1,
                                                 random_state=42)

    reduced_data = np.append(dx, ddx, axis=0)
    labels = np.append(labels, d_labels, axis=0)

    labeled_data = np.append(reduced_data, labels, axis=1)

    testing_images, test_image_ids = enumerate_files_in_dir(args.test_folder)

    test_dataset = []
    for test_image, image_id in zip(testing_images, test_image_ids):
        #test_dataset.append(np.array(extract_hog_features(test_image)))
        test_dataset.append(np.array(extract_color_moments(test_image)))

    test_dataset = np.array(test_dataset)

    #reduced_test_dataset = reduce_dimensions_svd(test_dataset, 20)
    reduced_test_dataset = np.matmul(test_dataset, v_matrix)

    mongo_client = connect_to_db()
    actual_labels = get_actual_labels_from_csv(args.labels_csv, test_image_ids)
    predicted = []

    if args.classifier == "DT":
        model = DecisionTreeClassifier()
        model.fit(labeled_data)
        results = model.transform(reduced_test_dataset)
        for test_image_id, result in zip(test_image_ids, results):
            if result == 0:
                label = "dorsal"
            elif result == 1:
                label = "palmar"
            predicted.append((test_image_id, label))
            print("{0} - {1}".format(test_image_id, label))

    elif args.classifier == "SVM":
        clf = SupportVectorMachine(kernel=rbf_kernel, power=4, coef=1)
        training_labels = labels[:]
        # SVM needs labels to be 1, and -1
        training_labels[training_labels == 0] = -1
        clf.fit(reduced_data, training_labels)
        values = clf.predict(reduced_test_dataset)
        print(values)
        for test_image_id, result in zip(test_image_ids, values):
            if result == 1:
                label = "palmar"
            else:
                label = "dorsal"
            predicted.append((test_image_id, label))
            print("{0} - {1}".format(test_image_id, label))

    elif args.classifier == "PPR":
        args.k = 15
        function_val = "manhattan"

        #process_all_images(args.train_folder, "CM")
        #process_all_images(args.test_folder, "CM")
        outgoing_img_graph, image_ids = create_similarity_graph(
            args.k, function_val, "CM")
        transition_matrix = get_transition_matrix(outgoing_img_graph, args.k)

        seed_matrix_dorsal = get_seed_matrix("dorsal")
        seed_matrix_palmar = get_seed_matrix("palmar")

        dorsal_pagerank = compute_pagerank(transition_matrix,
                                           seed_matrix_dorsal)
        palmar_pagerank = compute_pagerank(transition_matrix,
                                           seed_matrix_palmar)
        dorsal_pagerank_dict = {
            x: y
            for x, y in zip(image_ids, dorsal_pagerank)
        }
        palmar_pagerank_dict = {
            x: y
            for x, y in zip(image_ids, palmar_pagerank)
        }

        predicted = label_images(dorsal_pagerank_dict, palmar_pagerank_dict,
                                 test_image_ids)

    print(get_accuracy(actual_labels, predicted))
    mongo_client.close()