Example #1
0
def run_with_sample() -> None:
    total_point = 0.0
    number_test = 100
    for i in range(0, number_test):
        data = get_train_data()
        samples: List[FrameOrSeries] = []
        data[0], sample = get_sample_data(data[0], 25)  # type: FrameOrSeries
        samples.append(sample)
        data[1], sample = get_sample_data(data[1], 25)  # type: FrameOrSeries
        samples.append(sample)

        knn.clear()
        knn.fit(data[0].values.tolist(), data[1].values.tolist())

        point = 0
        total = 0
        for k in range(0, len(samples)):
            for s in samples[k].values:
                if knn.predict(s.tolist(), k=3) == k:
                    point += 1
                total += 1

        print('point: ' + str(point))
        print('total: ' + str(total))
        total_point += float(point / total * 100)
        print(point / total * 100)

    print('total_point: ' + str(total_point / number_test))
Example #2
0
 def test__predict_with_tans_fun(self):
     X = array([[2, 4], [6, 8]])
     Y = array(['a','b'])
     knn = KNN(trans_fun = KNN.STD_TRANS)
     knn.fit(X,Y)
     self.assertEqual(knn.predict(array([4,5]), 1), 'a')
     self.assertEqual(knn.predict(array([5,6]), 1), 'b')
Example #3
0
 def test__predict_with_tans_fun(self):
     X = array([[2, 4], [6, 8]])
     Y = array(['a', 'b'])
     knn = KNN(trans_fun=KNN.STD_TRANS)
     knn.fit(X, Y)
     self.assertEqual(knn.predict(array([4, 5]), 1), 'a')
     self.assertEqual(knn.predict(array([5, 6]), 1), 'b')
Example #4
0
def run() -> None:
    test_data = get_test_data()
    data = get_train_data()
    knn.clear()
    knn.fit(data[0].values.tolist(), data[1].values.tolist())
    test_list: list = test_data.values.tolist()
    results = []
    for i in test_list:  # type: list
        id = i[:1][0]
        del i[0]
        results.append([int(id), knn.predict(i)])
    write_to_csv(results)
Example #5
0
def gridsearchKNN2(parametros, xTrain, yTrain, Xval, yVal):
    f1Metric = []
    par = []
    for params in ParameterGrid(parametros):
        knn = KNeighborsClassifier(n_neighbors=params['n_neighbors'],
                                   metric=params['metric'])
        knn.fit(xTrain, yTrain)
        pred = knn.predict(Xval)
        f1 = metrics.f1_score(yVal, pred, average='weighted')
        print(f1)
        f1Metric.append(f1)
        par.append(params)
    return par[f1Metric.index(max(f1Metric))]
Example #6
0
        y = dataset['y']
        Xtest = dataset['Xtest']
        ytest = dataset['ytest']

        # part 1: implement knn.predict
        # part 2: print training and test errors for k=1,3,10 (use utils.classification_error)
        # part 3: plot classification boundaries for k=1 (use utils.plot_2dclassifier)
        model = None
        predict = None
        yhat = None
        Yhat = None
        tr_err = 0
        te_err = 0

        for k in [1, 3, 10]:
            model = knn.fit(X, y, k)
            predict = model['predict']
            yhat = predict(model, X)
            Yhat = predict(model, Xtest)
            tr_err = utils.classification_error(y, yhat)
            te_err = utils.classification_error(ytest, Yhat)
            print("Training error for k =", k, "is =", tr_err)
            print("Testing error for k =", k, "is =", te_err)

        utils.plot_2dclassifier(knn.fit(X, y, 1), Xtest, ytest)
        plt.show()

    if question == '1.2':
        dataset = utils.load_dataset('citiesBig1')
        X = dataset['X']
        y = dataset['y']
Example #7
0
import EEG_feature_extraction
import EEG_load

# main - train&test
#dataset = EEG_load.load_data("s16",20)

dataset = EEG_feature_extraction.generate_feature_data("s16", 20)

X = dataset['X_train']
y = dataset['Y_train']
Xtest = dataset['X_test']
ytest = dataset['Y_test']

k = [1, 3, 10]
for i in range(3):
    model = knn.fit(X, y, k[i])
    y_pred = knn.predict(model, X)
    train_error = np.mean(y_pred.flatten() != y)
    print("The current training error is: %r" % train_error)

    y_pred = knn.predict(model, Xtest)
    test_error = np.mean(y_pred.flatten() != ytest)
    print("The current test error is: %r" % test_error)

# part 3: plot classification boundaries for k=1 (use utils.plot_2dclassifier)
model1 = knn.fit(X, y, k[2])
utils.plot_2dclassifier(model1, X, y)
#plt.show()

# save figure
fname = "../s16-c20-mean.png"
Example #8
0
                            '1.1', '1.2', '2.1', '2.2', '3.1', '3.2', '3.3',
                            '4.1', '4.2', '4.3'
                        ])

    io_args = parser.parse_args()
    question = io_args.question

    if question == '1.1':
        dataset = utils.load_dataset('citiesSmall')
        X = dataset['X']
        y = dataset['y']
        Xtest = dataset['Xtest']
        ytest = dataset['ytest']
        #model = knn.fit(X,y,3)
        #model = knn.fit(X,y,1)
        model = knn.fit(X, y, 10)

        y_pred_tr = knn.predict(model, X)
        y_pred_te = knn.predict(model, Xtest)
        trerror = utils.classification_error(y_pred_tr, y)
        teerror = utils.classification_error(y_pred_te, ytest)

        print(trerror)
        print(teerror)

        utils.plot_2dclassifier(model, Xtest, ytest)

        # part 1: implement knn.predict
        # part 2: print training and test errors for k=1,3,10 (use utils.classification_error)
        # part 3: plot classification boundaries for k=1 (use utils.plot_2dclassifier)
Example #9
0
print(feat_vec.shape)

feat_vec = np.concatenate((feat_vec[:, :1], add / np.mean(add)), axis=1)

accuracy = 0
for i in range(0, 100):

    np.random.shuffle(feat_vec)

    sli = 0.8 * len(feat_vec)

    X = np.split(feat_vec, [int(sli)], axis=0)

    y_train = np.transpose(X[0][:, :1])[0]
    X_train = X[0][:, 1:]

    y_test = np.transpose(X[1][:, :1])[0]
    X_test = X[1][:, 1:]

    knn = neighbors.KNeighborsClassifier(n_neighbors=4,
                                         weights='distance',
                                         metric='manhattan')
    knn.fit(X_train, y_train)

    test = knn.predict(X_test)

    acc = test == y_test
    accuracy += acc.sum() / len(acc)

print(accuracy / 100)