Example #1
0
def train_model(model_name, X_train, Y_train, X_val, Y_val):
    """
    Trains a supervised classifier using the training data provided, and scores
    it using the validation dataset.

    Param:
        - model_name: a string containing a model type
        - Train data:
            - X_train
            - Y_train
        - Validation data:
            - X_val
            - Y_val

    Return:
        - model: a supervised classifier, to be used for testing
    """

    if model_name == 'svm':
        model = SVM()
    elif model_name == "random_forest":
        max_depth = 2
        model = RandomForest(max_depth)
    elif model_name == "neural_network":
        max_depth = 2
        out_size = 2
        hidden_size = 30
        in_size = X_train.shape[1]
        model = NeuralNetwork(in_size, hidden_size, out_size)
    elif model_name == "knn":
        n_neighbors = 50
        model = KNearestNeighbors(n_neighbors)
    else:
        return "Error: Model not yet implemented..."

    print("Training " + model_name + "...")

    train_score = model.train(X_train, Y_train)
    valid_score = model.score(X_val, Y_val)

    print("Training Accuracy: %s" % train_score)
    print("Validation Accuracy: %s" % valid_score)

    return model
__author__ = "danil.gizdatullin"

import config as conf
from k_nearest_neighbors import KNearestNeighbors

K_Nearest_Neighbors = KNearestNeighbors(file_name=conf.path_to_similarity_data)

print K_Nearest_Neighbors.statistics(conf.number_of_nearest_neighbors, histogram=True, number_of_bins=200)
Example #3
0
    knn_misclassification_rate = np.zeros(3)
    for index, k in enumerate([1, 5, 10]):
        knn = sklearn.neighbors.KNeighborsClassifier(n_neighbors=k)
        knn.fit(X_train, y_train)
        knn_result = knn.predict(X_test)
        knn_misclassification_rate[index] = np.mean(knn_result != y_test)
    plt.figure()
    plt.plot(1 / np.array([1, 5, 10]), knn_misclassification_rate, "-bo")
    plt.suptitle("Misclassification rate v.s. 1/k of KNN")
    plt.show()
    print(knn_misclassification_rate)

    # Question 5 #

    k_nearest_neighbors = KNearestNeighbors(1)
    k_nearest_neighbors.fit(X_train, y_train)
    knn_result = k_nearest_neighbors.predict(X_test)
    knn_naive_misclassification_rate = np.mean(knn_result != y_test)
    print(knn_naive_misclassification_rate)

    # Question 7 #
    w_knn_misclassification_rate = np.zeros(3)
    for index, k in enumerate([1, 5, 10]):
        knn = WeightedKNearestNeighbors(n_neighbors=k)
        knn.fit(X_train, y_train)
        knn_result = knn.predict(X_test)
        w_knn_misclassification_rate[index] = np.mean(knn_result != y_test)
    plt.figure()
    plt.plot(1 / np.array([1, 5, 10]), w_knn_misclassification_rate, "-bo")
    plt.suptitle("Misclassification rate v.s. 1/k of weighted KNN")
__author__ = 'danil.gizdatullin'

# import numpy as np
import matplotlib.pyplot as plt
import time

from k_nearest_neighbors import KNearestNeighbors


def special_print(a):
    for key in a.iterkeys():
        print("Key = %i" % key)
        print(a[key].dict)
        print("#####################")

start_time = time.time()

k_nn = KNearestNeighbors()
# special_print(k_nn.k_nearest_dict)
# special_print(k_nn.reverse_k_nearest_dict)
distr = k_nn.return_distribution_k_nearest_neighbors()
distr_reverse = k_nn.return_distribution_reverse_k_nearest_neighbors()

print("--- %s seconds ---" % (time.time() - start_time))
# plt.hist(distr)
plt.hist(distr_reverse, bins=100)
plt.show()