Ejemplo n.º 1
0
def get_y(fea_ext_mod,
          dim_red_mod,
          k_value,
          collection_name,
          red_dim=None,
          obj_lat=None):
    dim_red = DimensionReduction(fea_ext_mod, dim_red_mod, k_value)
    if collection_name == 'unlabelled':
        aspect = dim_red.get_metadata(
            "imageName", red_dim['imageId'].tolist())['aspectOfHand'].tolist()
    else:
        aspect = dim_red.get_metadata_collection(
            "imageName", obj_lat['imageId'].tolist(),
            collection_name)['aspectOfHand'].tolist()
    return [i.split(' ')[0] for i in aspect]
Ejemplo n.º 2
0
def main():
    fea_ext_mod = "HOG"
    dim_red_mod = "PCA"
    dist_func = "euclidean"
    k_value = 30

    training_set = os.path.abspath(get_input_folder("Labelled"))
    test_set = os.path.abspath(get_input_folder("Classify"))
    # training_set = os.path.abspath('Dataset3\Labelled\Set1')
    # test_set = os.path.abspath('Dataset3\\Unlabelled\Set 1')
    label = "dorsal"
    obj_lat, feat_lat, model = compute_latent_semantic_for_label(
        fea_ext_mod, dim_red_mod, label, k_value, training_set)
    filename = "p3task1_{0}_{1}_{2}_{3}".format(fea_ext_mod, dim_red_mod,
                                                label, str(k_value))
    csv_reader.save_to_csv(obj_lat, feat_lat, filename)

    label_p = 'palmar'
    obj_lat_p, feat_lat_p, model_p = compute_latent_semantic_for_label(
        fea_ext_mod, dim_red_mod, label_p, k_value, training_set)
    filename = "p3task1_{0}_{1}_{2}_{3}".format(fea_ext_mod, dim_red_mod,
                                                label_p, str(k_value))
    csv_reader.save_to_csv(obj_lat_p, feat_lat_p, filename)

    x_train = obj_lat['reducedDimensions'].tolist()
    x_train += (obj_lat_p['reducedDimensions'].tolist())
    red_dim_unlabelled_images = reduced_dimensions_for_unlabelled_folder(
        fea_ext_mod, dim_red_mod, k_value, label, training_set, test_set)
    x_test = red_dim_unlabelled_images['reducedDimensions'].tolist()

    dim_red = DimensionReduction(fea_ext_mod, dim_red_mod, k_value)
    labelled_aspect = dim_red.get_metadata_collection(
        "imageName", obj_lat['imageId'].tolist(),
        "labelled")['aspectOfHand'].tolist()
    y_train = [i.split(' ')[0] for i in labelled_aspect]

    labelled_aspect = dim_red.get_metadata_collection(
        "imageName", obj_lat_p['imageId'].tolist(),
        "labelled")['aspectOfHand'].tolist()
    y_train += ([i.split(' ')[0] for i in labelled_aspect])

    unlabelled_aspect = dim_red.get_metadata_collection(
        "imageName", red_dim_unlabelled_images['imageId'].tolist(),
        "unlabelled")['aspectOfHand'].tolist()
    y_test = [i.split(' ')[0] for i in unlabelled_aspect]

    # makes into arrays and transforms the training labels into 1 for "dorsal", -1 for "palmar" data points
    x_train = np.array(x_train)
    y_train = list(map(lambda x: 1 if x == "dorsal" else -1, y_train))
    y_train = np.array(y_train)

    # shuffling the training data
    indices = np.arange(x_train.shape[0])
    np.random.shuffle(indices)
    x_train = x_train[indices]
    y_train = y_train[indices]

    x_test = np.array(x_test)

    # creates the SVM classifier
    clf = SupportVectorMachine(gaussian_kernel, C=500)
    clf.fit(x_train, y_train)
    predictions = clf.predict(x_test)

    # transforms the testing labels into 1 for "dorsal", -1 for "palmar" data points
    y_test = list(map(lambda x: 1 if x == "dorsal" else -1, y_test))

    # calculates and prints the results onto the console
    correct = np.sum(predictions == y_test)
    print("---------------------------")
    accuracy = (correct / len(predictions)) * 100
    print("Accuracy: " + str(accuracy) + "%")
    unlabelled_images = red_dim_unlabelled_images['imageId']
    predicted_labels = list(
        map(lambda x: "dorsal" if x == 1 else "palmar", predictions))
    actual_labels = list(
        map(lambda x: "dorsal" if x == 1 else "palmar", y_test))
    print("---------------------------")
    print("Results:")
    print("Image ID, Prediction, Actual")
    for image_id, p, a in zip(unlabelled_images, predicted_labels,
                              actual_labels):
        print("(" + image_id + ", " + p + ", " + a + ")")
def main():
    fea_ext_mod = "HOG"
    dim_red_mod = "SVD"
    dist_func = "euclidean"
    k_value = get_input_k("k-value")
    training_set = os.path.abspath(get_input_folder("Labelled"))
    test_set = os.path.abspath(get_input_folder("Classify"))
    label = "dorsal"
    obj_lat, feat_lat, model = compute_latent_semantic_for_label(
        fea_ext_mod, dim_red_mod, label, k_value, training_set)
    filename = "p3task1_{0}_{1}_{2}_{3}".format(fea_ext_mod, dim_red_mod,
                                                label, str(k_value))
    csv_reader.save_to_csv(obj_lat, feat_lat, filename)
    x_train = obj_lat['reducedDimensions'].tolist()

    red_dim_unlabelled_images = reduced_dimensions_for_unlabelled_folder(
        fea_ext_mod, dim_red_mod, k_value, label, training_set, test_set)
    x_test = red_dim_unlabelled_images['reducedDimensions'].tolist()

    dim_red = DimensionReduction(fea_ext_mod, dim_red_mod, k_value)
    labelled_aspect = dim_red.get_metadata_collection(
        "imageName", obj_lat['imageId'].tolist(),
        "labelled")['aspectOfHand'].tolist()
    y_train = [i.split(' ')[0] for i in labelled_aspect]

    label_p = 'palmar'
    obj_lat_p, feat_lat_p, model_p = compute_latent_semantic_for_label(
        fea_ext_mod, dim_red_mod, label_p, k_value, training_set)
    filename = "p3task1_{0}_{1}_{2}_{3}".format(fea_ext_mod, dim_red_mod,
                                                label_p, str(k_value))
    csv_reader.save_to_csv(obj_lat_p, feat_lat_p, filename)
    x_train += (obj_lat_p['reducedDimensions'].tolist())
    labelled_aspect = dim_red.get_metadata_collection(
        "imageName", obj_lat_p['imageId'].tolist(),
        "labelled")['aspectOfHand'].tolist()
    y_train += ([i.split(' ')[0] for i in labelled_aspect])

    zip_train = list(zip(x_train, y_train))
    random.shuffle(zip_train)
    x_train, y_train = zip(*zip_train)

    unlabelled_aspect = dim_red.get_metadata_collection(
        "imageName", red_dim_unlabelled_images['imageId'].tolist(),
        "unlabelled")['aspectOfHand'].tolist()
    y_test = [i.split(' ')[0] for i in unlabelled_aspect]
    lr = LogisticRegression(penalty='l2',
                            random_state=np.random.RandomState(42),
                            solver='lbfgs',
                            max_iter=300,
                            multi_class='ovr',
                            class_weight='balanced',
                            n_jobs=-1,
                            l1_ratio=0)
    lr.fit(x_train, y_train)
    # y_pred = lr.predict(x_test)
    predictions = lr.predict(x_test)
    unlabelled_images = red_dim_unlabelled_images['imageId'].tolist()
    predicted_labels = list(predictions)
    actual_labels = list(y_test)
    print("---------------------------")
    print("     Results:")
    print("---------------------------")
    print("     Accuracy:", lr.score(x_test, y_test))
    print("---------------------------")
    print("     Image ID             |   Prediction    |   Actual")
    for image_id, p, a in zip(unlabelled_images, predicted_labels,
                              actual_labels):
        print("     " + image_id + "     |   " + p + "        |   " + a)
def main():

    k = get_input_k("C")
    training_set = get_input_folder("Labelled")
    test_set = get_input_folder("Classify")
    k_value = 30

    dim_reduction = DimensionReduction(feature_extraction_model,
                                       dimension_reduction_model, k_value)

    # obj_lat, feat_lat, model = dim_reduction.execute()
    label = 'dorsal'
    obj_lat, feat_lat, model = p3task1.compute_latent_semantic_for_label(
        feature_extraction_model, dimension_reduction_model, label, k_value,
        training_set)
    label_p = 'palmar'
    obj_lat_p, feat_lat_p, model_p = p3task1.compute_latent_semantic_for_label(
        feature_extraction_model, dimension_reduction_model, label_p, k_value,
        training_set)
    red_dim = p3task1.reduced_dimensions_for_unlabelled_folder(
        feature_extraction_model, dimension_reduction_model, k_value, label,
        training_set, test_set)

    #input for project
    df = obj_lat[['reducedDimensions', 'imageId']]
    df_p = obj_lat_p[['reducedDimensions', 'imageId']]
    #inputt for scikit
    tf = obj_lat['reducedDimensions']
    tf_p = obj_lat_p['reducedDimensions']

    a = []
    a_p = []
    for x in tf:
        a.append(x)
    for x in tf_p:
        a_p.append(x)

    X = df.values
    Y = df_p.values

    # k clusters
    # k=5
    #
    km = KMeans(n_clusters=k,
                random_state=0,
                n_init=30,
                init='k-means++',
                precompute_distances=True,
                n_jobs=-1).fit(a)
    km_p = KMeans(n_clusters=k,
                  random_state=0,
                  n_init=30,
                  init='k-means++',
                  precompute_distances=True,
                  n_jobs=-1).fit(a_p)

    # print(km.labels_)
    counter = np.zeros(k)
    counter_p = np.zeros(k)
    for k_m in km.labels_:
        counter[k_m] += 1
    # print(counter)
    for k_m_p in km_p.labels_:
        counter_p[k_m_p] += 1
    # print(counter_p)
    #
    d_cluster = km.predict(red_dim['reducedDimensions'].tolist())
    p_cluster = km_p.predict(red_dim['reducedDimensions'].tolist())

    unlabelled_aspect = dim_reduction.get_metadata_collection(
        "imageName", red_dim['imageId'].tolist(),
        "unlabelled")['aspectOfHand'].tolist()
    y_test = [i.split(' ')[0] for i in unlabelled_aspect]

    #min max test

    good = 0
    bad = 0

    # for ind in range(len(red_dim['reducedDimensions'])):

    #     cc_dorsal = km.cluster_centers_[d_cluster[ind]]
    #     cc_palmar = km_p.cluster_centers_[p_cluster[ind]]
    #     dist_dorsal = np.linalg.norm(red_dim['reducedDimensions'][ind]-cc_dorsal)
    #     dist_palmar = np.linalg.norm(red_dim['reducedDimensions'][ind]-cc_palmar)

    #     if dist_dorsal<dist_palmar:
    #         #print(red_dim['imageId'][ind], label, y_test[ind])
    #         if y_test[ind] == label:
    #             good +=1
    #         else:
    #             bad+=1
    #     else:
    #         #print(red_dim['imageId'][ind], 'palmar', y_test[ind])
    #         if y_test[ind] == label_p:
    #             good +=1
    #         else:
    #             bad+=1

    # print ("good",good)
    # print("bad",bad)
    # km.score()

    def kmeans_implementation(X):
        random = np.random.choice(len(X), size=k, replace=False)

        centroid = {}
        classes = {}
        classes2 = {}

        # for cen in range(k):
        #     for im in range(0,len(X)):
        #         distance=[np.linalg.norm(np.asarray(X[im][0]) - np.asarray(centroid[0])))]

        for i in range(k):
            centroid[i] = X[random[i]][0]

        for iter in range(500):

            for i in range(k):
                classes[i] = []
                classes2[i] = []
                distance = []

            for x in X:
                # print(x[1])
                distance = [
                    np.linalg.norm(
                        np.asarray(x[0]) - np.asarray(centroid[ind]))
                    for ind in range(len(centroid))
                ]

                classification = distance.index(min(distance))
                classes[classification].append(x)
                classes2[classification].append(x[0])
            previous = dict(centroid)

            for classification in classes2:
                centroid[classification] = np.average(classes2[classification],
                                                      axis=0)

            opti = 0

            for c in centroid:

                og_c = previous[c]
                current = centroid[c]
                if (np.array_equal(current, og_c)):
                    opti += 1

            if (opti == (k)):
                # print(iter)
                break

        return classes, centroid

    classes, centroid = kmeans_implementation(X)
    classes_p, centroid_p = kmeans_implementation(Y)

    #predict loop red_dimension is the query folder

    def predict_class(red_dim, centroid):
        query_classes = {}
        for i in range(k):
            query_classes[i] = []

        for ind in range(len(red_dim['reducedDimensions'])):
            cluster_distance = []
            cluster_distance = [
                np.linalg.norm(red_dim['reducedDimensions'][ind] -
                               np.asarray(centroid[q]))
                for q in range(len(centroid))
            ]
            query_classification = cluster_distance.index(
                min(cluster_distance))
            query_classes[query_classification].append(red_dim['imageId'][ind])
        return query_classes

    query_classes_dorsal = predict_class(red_dim, centroid)
    query_classes_palmar = predict_class(red_dim, centroid)

    correct = 0
    wrong = 0

    def centroid_mean(centroid):
        res_list = [0] * k_value
        mean_centroid = []
        for i in range(k):

            res_list = [a + b for a, b in zip(res_list, centroid[i])]

        for x in res_list:
            mean_centroid.append(x / k)

        return mean_centroid

    mean_centroid_dorsal = centroid_mean(centroid)
    mean_centroid_palmar = centroid_mean(centroid_p)

    dorsal_images = []
    palmar_images = []
    for ind in range(len(red_dim['reducedDimensions'])):
        image_center_dorsal = 0
        image_center_palmar = 0
        image_name = red_dim['imageId'][ind]

        for i in range(k):
            if (image_name in query_classes_dorsal[i]):
                image_center_dorsal = i
            if (image_name in query_classes_palmar[i]):
                image_center_palmar = i

        dorsal_distance = np.linalg.norm(red_dim['reducedDimensions'][ind] -
                                         centroid[image_center_dorsal])
        palmar_distance = np.linalg.norm(red_dim['reducedDimensions'][ind] -
                                         centroid_p[image_center_palmar])

        if dorsal_distance < palmar_distance:
            #print(red_dim['imageId'][ind], label, y_test[ind])´
            dorsal_images.append(red_dim['imageId'][ind])
            if y_test[ind] == label:

                correct += 1
            else:
                wrong += 1
        else:

            #print(red_dim['imageId'][ind], 'palmar', y_test[ind])
            palmar_images.append(red_dim['imageId'][ind])
            if y_test[ind] == label_p:
                correct += 1
            else:
                wrong += 1

    print("correct" + str(correct))
    print("wrong" + str(wrong))

    print("\nClick here: http://localhost:{0}/result\n".format(port_g))
    print("\nClick here: http://localhost:{0}/dorsal\n".format(port_g))
    print("\nClick here: http://localhost:{0}/palmar\n".format(port_g))

    # APP_ROOT = os.path.dirname(os.path.abspath(__file__))

    @app.route('/Dataset2/<filename>')
    def send_image(filename):
        return send_from_directory((training_set), filename)

    @app.route('/test_set/<filename>')
    def send_image_result(filename):
        return send_from_directory((test_set), filename)

    @app.route('/dorsal')
    def get_gallery():
        image_names = [classes, k]

        return render_template("demo.html", image_names=image_names)

    @app.route('/palmar')
    def get_gallery_p():
        image_names_p = [classes_p, k]

        return render_template("demo_p.html", image_names_p=image_names_p)

    @app.route('/result')
    def get_gallery_result():
        results = [dorsal_images, palmar_images]

        return render_template("task2.html", results=results)

    app.run(port=port_g)