def train(instances):
    # print('starting knn training')
    p = None
    if args.algorithm == 'knn':
        p = Knn(args.knn, max_max_index)
        p.train(instances)
    elif args.algorithm == 'adaboost':
        p = Adaboost(max_max_index, args.num_boosting_iterations)
        p.train(instances)
    elif args.algorithm == 'distance_knn':
        p = Knn(args.knn, max_max_index, args.algorithm)
        p.train(instances)
    # print('ending training')
    return p
예제 #2
0
def main():
    # Create a Knn object
    knn_classifier = Knn("diabetes.csv")

    # Set k as 20
    knn_classifier.set_k(20)
    knn_classifier.get_confusion_matrix()
예제 #3
0
def main():
    col_names = [
        'sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species'
    ]
    iris = pd.read_csv('./iris.data', header=None, names=col_names)
    iris_class = {'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2}
    iris['species_num'] = [iris_class[i] for i in iris.species]
    X = iris.drop(['species', 'species_num'], axis=1).to_numpy()
    y = iris.species_num.to_numpy()
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    kr = Knn(3)
    # knn = KNeighborsClassifier(3)

    # model = knn.fit(X_train, y_train)
    kr.fit(X_train, y_train)

    # p = model.predict(X_test)
    p2 = kr.predict(X_test)
    correct = 0
    total = 0
    for pred in zip(p2, y_test):
        if pred[0] == pred[1]:
            correct += 1
        total += 1
    print("acc :", correct / total)
예제 #4
0
 def test_minkowski_distance(self):
     """Test to check that minkowski distance is correct"""
     knn = Knn(n_neighbors=3, p=5)
     knn.fit(np.array(little_X), little_Y)
     d = knn._minkowski_distance(np.array([3, 4]))
     assert np.allclose(
         d, [2.01234, 6.419382]), "Minkowski Distance is not correct"
    def clasificate_iris_to_test(self, sl, sw, pl, pw, n_neighbours):

        """funkcja klasyfikująca kwiat o podanych parametrach
        i danej liczbie sąsiadów"""

        if pw.replace('.', '', 1).isdigit() \
                and pl.replace('.', '', 1).isdigit() \
                and sw.replace('.', '', 1).isdigit() \
                and sl.replace('.', '', 1).isdigit() \
                and n_neighbours.isdigit():

            pw, pl, sw, sl, n_neighbours\
                = float(pw), float(pl), float(sw),\
                  float(sl), int(n_neighbours)

            if sl <= 8 and sw <= 8 and pl <= 8 and pw <= 8\
                    and 0 <= sl and 0 <= sw and 0 <= pl and 0 <= pw:

                knn = Knn(n_neighbours)
                knn.knn_test(np.array([[pw, pl, sw, sl]]), self.data,
                             self.target)
                knn.save_plot(np.array([[pw, pl, sw, sl]]), self.data,
                              self.target)
                return True
            else:
                return False
        else:
            return False
예제 #6
0
def get_best_k_for_data_error_rate_normal(data_set_path, number_of_runs):
    """
    Does a search over the k space to find the best k value for classification based k-NN

    Runs the cross validation experiment with a set of data n number of runs.
    The results from the cross validation for each run of k is stored then averaged
    over the total number of runs.

    The result is then ploted
    """
    k_list = range(1, 11)

    train_mse_list_master = [0] * len(k_list)

    for x in range(number_of_runs):
        train_mse_list = []
        for k in k_list:
            knn = Knn(k, True)
            average_error_rate = Experiments.run_classification_experiment(
                data_set_path, k, knn)
            train_mse_list.append(average_error_rate)

        for index in range(len(train_mse_list_master)):
            train_mse_list_master[index] += train_mse_list[index]
    for index in range(len(train_mse_list_master)):
        train_mse_list_master[index] /= number_of_runs
    generate_validation_curves(k_list,
                               train_mse_list_master,
                               "Average Error Rate",
                               title="Number of K's vs Average Error Rate",
                               x_axis_label="# of k's",
                               y_axis_label="Error Rate")
예제 #7
0
def run_knn(n, p, k):

    #initialize random data here
    if n > 0 and p > 0:
        X_train = np.random.rand(int(n * .8), p)
        y_train = np.concatenate((np.zeros(int(
            (n / 2) * .8)), np.ones(int((n / 2) * .8))))
        X_dev = np.random.rand(int(n * .2), p)

    one = str(datetime.datetime.now().time()).split(":")

    knn = Knn(k, X_dev, X_train, y_train)

    two = str(datetime.datetime.now().time()).split(":")

    # KNN cost = run() --> get_neighbors() + get_majority_vote()
    # = (for each X_dev example) * { [get its neighbors] + [then get the top vote] }
    # = (# of X_dev examples) * ...
    # ...{ [ (# of X_train * ) + (sort # of X_train) + (k)] + [ (k*2) + (k) ] }
    len_X_train = len(X_train)
    cost = len(X_dev) * (((len_X_train * (2 + 4 + p * 2)) +
                          (len_X_train * math.log(len_X_train)) + (k)) +
                         (k * 2 + k))

    time_diff = (float(two[0]) - float(one[0])) * 3600 + (
        float(two[1]) - float(one[1])) * 60 + (float(two[2]) - float(one[2]))

    print(
        str(n) + ", " + str(p) + ", " + str(k) + ", " + str(int(cost)) + ", " +
        str(time_diff))
예제 #8
0
def loopPcaKnn(loop=1):
    accuracy = 0
    for i in range(loop):
        iris_data = IrisPCA('iris_data_set/iris.data')
        iris_data.randomSplit(35)
        # print (np.array(iris_data.irisdata))
        # print (np.array(iris_data.test_data))
        # get means and Standard deviation for training data
        iris_data.calTrainMeanSd(iris_data.train_data)
        # print ("Mean: \n", iris_data.train_mean)
        # print ("Standard deviation: \n", iris_data.train_standard_deviation)
        # apply Z score normalize for training data
        # print (np.array(iris_data.train_data))
        iris_data.zScoreNormalize(iris_data.train_data)
        # np.set_printoptions(precision=3)
        # print (np.array(iris_data.train_data))
        # get Projection Matrix W
        iris_data.calProjectionMatrixW(number_of_conponent=2)
        # print ("\nProjection Matrix W: \n", np.array(iris_data.projectionMatrixW))
        # apply Z score normalize for testing data
        # print (np.array(iris_data.test_data))
        iris_data.zScoreNormalize(iris_data.test_data)
        # print (np.array(iris_data.test_data))
        new_train_data = iris_data.getProjectedData(iris_data.train_data)
        new_test_data = iris_data.getProjectedData(iris_data.test_data)
        # print (np.array(new_train_data))
        # print (np.array(new_test_data))
        knn = Knn()
        # print ("Round ",i+1, " 3-NN accuracy: ", format(knn.kNearestNeighbors(new_train_data, new_test_data), ".3f"))
        accuracy += knn.kNearestNeighbors(new_train_data, new_test_data)
    return accuracy / loop
예제 #9
0
def knn_train_test(k, xTrain, yTrain, xTest, yTest):
    """
    Given a specified k, train the knn model and predict
    the labels of the test data. Returns the accuracy of
    the resulting model.

    Parameters
    ----------
    k : int
        The number of neighbors
    xTrain : nd-array with shape n x d
        Training data 
    yTrain : 1d array with shape n
        Array of labels associated with training data.
    xTest : nd-array with shape m x d
        Test data 
    yTest : 1d array with shape m
        Array of labels associated with test data.

    Returns
    -------
    acc : float
        The accuracy of the trained knn model on the test data
    """
    model = Knn(k)
    model.train(xTrain, yTrain['label'])
    # predict the test dataset
    yHatTest = model.predict(xTest)
    return accuracy(yHatTest, yTest['label'])
예제 #10
0
 def test_uniform_weight_with_0(self):
     """Test to see that _distance_weights return a weight of 1/d for each distance"""
     knn = Knn(n_neighbors=3)
     distances = np.array([0, .3, 4])
     weights = knn._distance_weights(distances)
     assert np.allclose(weights, np.array([[1, 0], [1 / .3, .3], [
         1 / 4, 4
     ]])), "distance_weights are not correct when we have distances of 0"
예제 #11
0
 def test_distance_weight(self):
     """Test to see that _distance_weights return a weight of 1/d for each distance"""
     knn = Knn(n_neighbors=3)
     distances = np.array([2, .3, 4])
     weights = knn._distance_weights(distances)
     assert np.allclose(weights,
                        np.array([[1 / 2, 2], [1 / .3, .3],
                                  [1 / 4,
                                   4]])), "distance_weights are not correct"
예제 #12
0
파일: main.py 프로젝트: AlexNik11/IT-master
def main():
    metro = Metro("list_of_moscow_metro_stations.csv", "list_of_moscow_metro_stations_changes.csv")

    print([station.name for station in metro.get_line('Сокольническая').stations])
    print(metro.get_line('D3'))

    knn = Knn(metro, ['Окружная', 'Кофе'], 0, 0)
    print(knn.compute('Окружная'))
    print(metro.get_length('Ховрино', 'Сокол'))
    return 0
예제 #13
0
def icaKnnTest():
    iris_data = IrisICA('iris_data_set/iris.data')
    iris_data.plotIrisData('iris data before ica')
    iris_data.applyIcaFromFullIris(number_components=4)
    energy_of_components = iris_data.getSortedComponentEnergy()
    train_data, test_data = iris_data.getTrainTestSet(energy_of_components[:2],
                                                      train_size=0.7)
    iris_data.plotIrisData('iris data after ica')
    knn = Knn()
    print(knn.kNearestNeighbors(train_data, test_data))
    plt.show()
예제 #14
0
    def __init__(self):
        self.points = []
        self.labels = []
        self.lines = []
        self.knn = Knn()

        self.root = Tk()
        self.root.title("KNN Demo")
        self.root.geometry('800x520')
        self.root.resizable(False, False)

        self.axis = Canvas(self.root, width=720, height=370)
        self.axis.grid(column=0, row=0, columnspan=5)
        self.axis.configure(bg='white')
        self.axis.bind('<Button 1>', self.clickCallback)
        self.axis_points = []
        self.axis_predictions = []

        self.debug_lbl = Label(self.root, text='Mensaje ==>')
        self.debug_lbl.grid(column=0, row=1)

        self.debug_msg = Label(self.root, text='-')
        self.debug_msg.grid(column=0, row=1, columnspan=5)

        self.init_btn = Button(self.root,
                               text='Reiniciar',
                               command=self.cleanHandler)
        self.init_btn.grid(column=5, row=2)

        self.class_lbl = Label(self.root, text='Clase ==>')
        self.class_lbl.grid(column=0, row=2)

        self.current_mark = self.MARKS[0]
        self.class_btn = Button(self.root,
                                text=self.current_mark,
                                command=self.classButtonHandler)
        self.class_btn.grid(column=1, row=2)

        self.mode_lbl = Label(self.root, text='MODO ==>')
        self.mode_lbl.grid(column=2, row=2)

        self.current_mode = self.MODES[0]
        self.mode_btn = Button(self.root,
                               text=self.current_mode,
                               command=self.modeButtonHandler)
        self.mode_btn.grid(column=3, row=2)

        self.k_lbl = Label(self.root, text='K ==>')
        self.k_lbl.grid(column=2, row=3)

        self.k_txt = Entry(self.root, width=2, text='3')
        self.k_txt.grid(column=3, row=3)

        self.root.mainloop()
예제 #15
0
    def test_k_5_distance_minkowski(self):
        """Test to compare our knn with Sklearn knn when k=5 and distance is minkowski with p=3"""
        knn = KNeighborsClassifier(n_neighbors=5, metric="minkowski", p=3)
        knn.fit(X_train, y_train)
        prediction = knn.predict(X_test)

        knn2 = Knn(n_neighbors=5, metric="minkowski", p=3)
        knn2.fit(X_train, y_train)
        prediction2 = knn2.predict(X_test)

        assert np.alltrue(prediction == prediction2
                          ), "Error testing knn (minkowski) with k=5 and p=3"
예제 #16
0
    def test_k_5(self):
        """Test to compare our knn with Sklearn knn when k=5 and distance is euclidean"""
        knn = KNeighborsClassifier(n_neighbors=5)
        knn.fit(X_train, y_train)
        prediction = knn.predict(X_test)

        knn2 = Knn(n_neighbors=5)
        knn2.fit(X_train, y_train)
        prediction2 = knn2.predict(X_test)

        assert np.alltrue(
            prediction == prediction2), "Error testing knn with k=5"
예제 #17
0
    def test_distance_weight_2(self):
        """Test to compare our knn with Sklearn when k=5 and weights are the inverse of distance"""
        knn = KNeighborsClassifier(n_neighbors=5, weights='distance')
        knn.fit(X_train, y_train)
        prediction = knn.predict(X_test)

        knn2 = Knn(n_neighbors=5, weights='distance')
        knn2.fit(X_train, y_train)
        prediction2 = knn2.predict(X_test)

        assert np.alltrue(prediction == prediction2
                          ), "Error testing knn with k=5 and weights=distance"
예제 #18
0
def icaKnnLoop(loop=10):
    accuracy = 0
    for i in range(loop):
        iris_data = IrisICA('iris_data_set/iris.data')
        iris_data.applyIcaFromFullIris(number_components=4)
        energy_of_components = iris_data.getSortedComponentEnergy()
        train_data, test_data = iris_data.getTrainTestSet(
            energy_of_components[:2], train_size=0.7)
        knn = Knn()
        current_accuracy = knn.kNearestNeighbors(train_data, test_data)
        accuracy += current_accuracy
        print('round ', i + 1, ' accuracy: ', current_accuracy)
    return accuracy / loop
예제 #19
0
파일: main.py 프로젝트: mles2/UM
def display_f_test(inputs_from_feature_selection, dataset):
    print("ftest results: (f, p) =")
    print(
        " NN, KNN: ",
        combined_ftest_5x2cv(NeuralNet(10000).clf,
                             Knn(2).clf,
                             inputs_from_feature_selection,
                             dataset.target,
                             random_seed=1))
    print(
        " NN, SVM: ",
        combined_ftest_5x2cv(NeuralNet(10000).clf,
                             Svm().clf,
                             inputs_from_feature_selection,
                             dataset.target,
                             random_seed=1))
    print(
        " KNN, SVM: ",
        combined_ftest_5x2cv(Knn(2).clf,
                             Svm().clf,
                             inputs_from_feature_selection,
                             dataset.target,
                             random_seed=1))
예제 #20
0
def loopFaKnn(loop=1):
    accuracy = 0
    for i in range(loop):
        iris_data = IrisFA('iris_data_set/iris.data')
        iris_data.randomSplit(35)
        iris_data.getProjectionMatrixW(k=2)
        new_train_data = iris_data.getProjectedData(iris_data.train_data)
        new_test_data = iris_data.getProjectedData(iris_data.test_data)
        # print (np.array(iris_data.train_data))
        # print (np.array(new_train_data))
        knn = Knn()
        # print ("Round ",i+1, " 3-NN accuracy: ", format(knn.kNearestNeighbors(new_train_data, new_test_data), ".3f"))
        accuracy += knn.kNearestNeighbors(new_train_data, new_test_data)
    return accuracy / loop
예제 #21
0
def main():
    X_train, y_train, X_test, y_test = load_mnist()

    # data binarization
    # for i in tqdm(range(len(x_train))):
    #     for j in range(28):
    #         for k in range(28):
    #             x_train[i][j][k] = 1 if x_train[i][j][k] > 177 else 0
    # for i in tqdm(range(len(x_test))):
    #     for j in range(28):
    #         x_test[i][j].squeeze()
    #         for k in range(28):
    #             x_test[i][j][k] = 1 if x_test[i][j][k] > 177 else 0

    # plot data samples
    # plot = plt.subplots(nrows=4, ncols=5, sharex='all', sharey='all')[1].flatten()
    # for i in range(20):
    #     img = x_train[i]
    #     plot[i].set_title(y_train[i])
    #     plot[i].imshow(img, cmap='Greys', interpolation='nearest')
    # plot[0].set_xticks([])
    # plot[0].set_yticks([])
    # plt.tight_layout()
    # plt.show()

    knn = Knn()
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    correct = sum((y_test - y_pred) == 0)

    print('==> correct:', correct)
    print('==> total:', len(X_test))
    print('==> acc:', correct / len(X_test))

    # plot pred samples
    fig = plt.subplots(nrows=4, ncols=5, sharex='all',
                       sharey='all')[1].flatten()
    for i in range(20):
        img = X_test[i]
        fig[i].set_title(y_pred[i])
        fig[i].imshow(img, cmap='Greys', interpolation='nearest')
    fig[0].set_xticks([])
    fig[0].set_yticks([])
    plt.tight_layout()
    plt.show()
    def run(self, x, nbit, resolution, error):

        nbits = str(nbit)
        test_data = np.load("./data/" + nbits + "bit" + "/" + nbits + "bit" +
                            "_" + str(resolution) + "x" + str(resolution) +
                            "_test_images.npy")
        train_data = np.load("./data/" + nbits + "bit" + "/" + nbits + "bit" +
                             "_" + str(resolution) + "x" + str(resolution) +
                             "_train_images.npy")

        x = int(x)
        if (error != 0):
            test_data = np.array(
                [self.pixel_bit_error(error, i, nbit) for i in test_data])

        if (x == 1):
            generations = input("enter the number of generations")
            batchSize = input("enter the size of each batch")
            generations = int(generations)
            batchSize = int(batchSize)
            Jesus = Cnn()
            Jesus.run(train_data, test_data, resolution, error, generations,
                      batchSize)
        if (x == 2):
            if (error != 0):
                train_data = np.array(
                    [self.pixel_bit_error(error, i, nbit) for i in train_data])
            Jesus = Svm()
            Jesus.run(train_data, test_data, resolution)
        if (x == 3):
            if (error != 0):
                train_data = np.array(
                    [self.pixel_bit_error(error, i, nbit) for i in train_data])
            k = input("k ?")
            k = int(k)
            Jesus = Knn(k)
            Jesus.run(train_data, test_data, resolution)
        if (x == 4):
            Jesus = Caliente([], error)
            batchSize = input("enter the size of each batch")
            generations = input("enter the number of generations")
            generations = int(generations)
            batchSize = int(batchSize)
            Jesus.run(train_data, test_data, resolution, generations,
                      batchSize)
def processImage(request):
    source = str(
        os.path.join(os.path.dirname(__file__),
                     '../testsamples/cleantha.png').replace('\\', '/'))
    imagefilter = ImageFilter(source)
    #没有经过轮廓提取的
    vectorTarget = imagefilter.getVectorNormal()
    arrayTarget = imagefilter.getArrayNormal()
    #经过轮廓提取的
    #vectorTarget = imagefilter.getVectorNew()
    #arrayTarget = imagefilter.getArrayNew()

    knnmachine = Knn(3)
    knnmachine.test = vectorTarget
    knnresult, flag = knnmachine.getNumber()

    svmresult = svmdecision(arrayTarget)
    annresult = anndecision(vectorTarget)
    #pcaresult = pcadecision(vectorTarget)
    #ldaresult = ldadecision(vectorTarget)

    #感觉靠谱程度是svm > knn > ann

    #resultList = [knnresult, svmresult, annresult, pcaresult, ldaresult]
    resultList = [knnresult, svmresult, annresult]
    print resultList
    print flag
    result = -1
    if len(list(set(resultList))) == len(resultList):
        #result = resultList[0]
        result = int(sum(resultList) / len(resultList))
    elif resultList[0] != resultList[1] and resultList[1] == resultList[2]:
        if flag:
            result = resultList[0]
        else:
            result = max(set(resultList), key=resultList.count)
    elif resultList[0] == resultList[2] and resultList[0] != resultList[1]:
        if flag:
            result = resultList[0]
        else:
            result = resultList[1]
    else:
        result = max(set(resultList), key=resultList.count)
    return render_to_response("uploadnew.html", {'result': '%d' % int(result)})
예제 #24
0
def loopLdaKnn(loop=1):
    accuracy = 0
    for i in range(loop):
        iris_data = IrisLDA('iris_data_set/iris.data')
        iris_data.randomSplit(35)
        iris_data.getMeansForEachClass(iris_data.train_data)
        iris_data.getScatterMatrices()
        iris_data.getTranformMatrixW()
        new_train_data = iris_data.getProjectedData(iris_data.train_data)
        new_test_data = iris_data.getProjectedData(iris_data.test_data)
        # print (np.array(iris_data.train_data))
        # print (np.array(new_train_data))
        knn = Knn()
        print(
            "Round ", i + 1, " 3-NN accuracy: ",
            format(knn.kNearestNeighbors(new_train_data, new_test_data),
                   ".3f"))
        accuracy += knn.kNearestNeighbors(new_train_data, new_test_data)
    return accuracy / loop
def main():

    # getting data
    # returning set of features and set of labels
    # for each 4-elements set of features there is one label assigned
    # label is assgined based on characteristic resulting from features
    iris = datasets.load_iris()
    iris_df = pd.DataFrame(iris['data'], columns=iris['feature_names'])
    X = iris_df.to_numpy()
    y = iris['target']
    iris_df['species'] = iris['target']
    # print(X)  # [[5.9 3.  4.2 1.5],...,[6.  2.2 4.  1. ],...,[6.1 2.9 4.7 1.4]]
    # print(y)  # [0,0,0,....,1,1,1,....,2,2,2,...]
    plot_chart(iris_df)

    # splitting data into training and testing subsets
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=1234)

    clf = Knn(k=3)
    clf.fit(X_train,
            y_train)  # fitting model with features and corresponding labels
    predictions = clf.predict(X_test)

    print('Test samples shape: ' + str(X_test.shape))  # 120 features
    print(X_test)
    print('')
    print('Predictions shape: ' + str(predictions.shape))  # 30 lables
    print(predictions)

    print('')
    calculate_accuracy(predictions,
                       y_test)  # comparing predicitons outcome with y_test

    new_features = np.asarray([[6.2, 2.8, 5.7, 1.8]])
    predicted_label = clf.predict(np.asarray(new_features))

    print('')
    print('New Features: ' + str(new_features))
    print('Predicted label: ' + str(predicted_label))
    print('Predicted speices: ' + str(species[int(predicted_label[0])]))
예제 #26
0
def main():
    examen = Examen2()
    #KNN
    auc, tEntrenamiento, tClasificacion = examen.runClasificator(Knn())
    print("-------------------------------------------")
    print("Resultados de KNN:")
    print("")
    print("AUC = " + str(auc))
    print("Tiempo promedio de entrenamiento = " + str(tEntrenamiento))
    print("Tiempo promedio de clasificacion = " + str(tClasificacion))
    print("-------------------------------------------")
    print("")
    #MLP
    auc, tEntrenamiento, tClasificacion = examen.runClasificator(MLP())
    print("-------------------------------------------")
    print("Resultados de MLP:")
    print("")
    print("AUC = " + str(auc))
    print("Tiempo promedio de entrenamiento = " + str(tEntrenamiento))
    print("Tiempo promedio de clasificacion = " + str(tClasificacion))
    print("-------------------------------------------")
    print("")
    #Kmeans
    auc, tEntrenamiento, tClasificacion = examen.runClasificator(KM())
    print("-------------------------------------------")
    print("Resultados de KMeans:")
    print("")
    print("AUC = " + str(auc))
    print("Tiempo promedio de entrenamiento = " + str(tEntrenamiento))
    print("Tiempo promedio de clasificacion = " + str(tClasificacion))
    print("-------------------------------------------")
    print("")
    #OCKRA
    auc, tEntrenamiento, tClasificacion = examen.runClasificator(OCKRA())
    print("-------------------------------------------")
    print("Resultados de OCKRA:")
    print("")
    print("AUC = " + str(auc))
    print("Tiempo promedio de entrenamiento = " + str(tEntrenamiento))
    print("Tiempo promedio de clasificacion = " + str(tClasificacion))
    print("-------------------------------------------")
    print("")
예제 #27
0
def run_knn(n, p, k, parallel, num_procs):
    #initialize random data here
    if n > 0 and p > 0:
        np.random.seed(42)
        X_train = np.random.rand(int(n * .8), p)
        y_train = np.concatenate((np.zeros(int(
            (n / 2) * .8)), np.ones(int((n / 2) * .8))))
        X_dev = np.random.rand(int(n * .2), p)
        y_dev = np.concatenate((np.zeros(int(
            (n / 2) * .2)), np.ones(int((n / 2) * .2))))

    start = time.time()

    if parallel:
        knn = KnnParallel(k, X_dev, X_train, y_train, num_procs)
    else:
        knn = Knn(k, X_dev, X_train, y_train)

    end = time.time()

    predictions = knn.predictions
    correct = 0
    for pred, actual in zip(predictions, y_dev):
        if int(pred) == int(actual):
            correct += 1

    accuracy = correct / len(predictions)

    # KNN cost = run() --> get_neighbors() + get_majority_vote()
    # = (for each X_dev example) * { [get its neighbors] + [then get the top vote] }
    # = (# of X_dev examples) * ...
    # ...{ [ (# of X_train * ) + (sort # of X_train) + (k)] + [ (k*2) + (k) ] }
    len_X_train = len(X_train)
    cost = int( len(X_dev) * ( ( (len_X_train*(2 + 4 + p*2)) + \
     (len_X_train * math.log(len_X_train)) + (k) ) + (k*2 + k) ) )

    time_diff = end - start

    print("{}, {}, {}, {}, {}, {}, {}, {}".format(n, p, k, cost, time_diff,
                                                  parallel, num_procs,
                                                  accuracy))
예제 #28
0
def get_best_k_for_data_mse(data_set_path, number_of_runs):
    """
    Does a search over the k space to find the best k value for regression based k-NN

    Runs the cross validation experiment with a set of data n number of runs.
    The results from the cross validation for each run of k is stored then averaged
    over the total number of runs.

    The result is then ploted
    """
    k_list = range(1, 11)

    train_mse_list_master = [0] * len(k_list)

    for x in range(number_of_runs):
        train_mse_list = []

        # compute performance for each k value
        for k in k_list:
            knn = Knn(k, False)
            average_mse = Experiments.run_experiment_regression(
                data_set_path, k, knn)
            train_mse_list.append(average_mse)

        # add values to the master list of k values to be averaged later
        for index in range(len(train_mse_list_master)):
            train_mse_list_master[index] += train_mse_list[index]

    # average all of the values for each k tested by the number of times k was tested
    for index in range(len(train_mse_list_master)):
        train_mse_list_master[index] /= number_of_runs

    generate_validation_curves(k_list,
                               train_mse_list_master,
                               "Average Mean Squared Error",
                               title="Number of K's vs AMSE",
                               x_axis_label="# of k's",
                               y_axis_label="MSE")
예제 #29
0
def main():
    X_train, y_train, X_test, y_test = load_mnist()

    knn = Knn()
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    correct = sum((y_test - y_pred) == 0)

    print('==> correct:', correct)
    print('==> total:', len(X_test))
    print('==> acc:', correct / len(X_test))

    # plot pred samples
    fig = plt.subplots(nrows=4, ncols=5, sharex='all',
                       sharey='all')[1].flatten()
    for i in range(20):
        img = X_test[i]
        fig[i].set_title(y_pred[i])
        fig[i].imshow(img, cmap='Greys', interpolation='nearest')
    fig[0].set_xticks([])
    fig[0].set_yticks([])
    plt.tight_layout()
    plt.show()
예제 #30
0
import flask
from flask import request, jsonify, render_template, make_response
import numpy as np
import cv2
import sys
import base64
import io
from knn import Knn
from PIL import Image
from flask_cors import CORS

app = flask.Flask(__name__)
CORS(app)
app.config["DEBUG"] = True

knn = Knn()
knn.set_k(100)


@app.route('/getMathAnswer', methods=['POST'])
def getMathAnswer():
    if 'base64' in request.json:
        try:
            data = request.json['base64']
            imgdata = base64.b64decode(data)
            filename = 'some_image.png'
            # print(request.json)
            with open(filename, 'wb') as f:
                f.write(imgdata)
            im = cv2.imread(filename)
            # customHeight = int(request.json['height'])