def classify_nn(X, y, k): m = X.shape[0] m_test = int(m*0.25) m_train = m - m_test # Split data in train and test data # A random permutation, to split the data randomly #np.random.seed(k) indices = np.random.permutation(m) X_train = X[indices[:m_train]] y_train = y[indices[:m_train]] X_test = X[indices[m_train:]] y_test = y[indices[m_train:]] # Create and fit a nearest-neighbor classifier from sklearn.neighbors import NeighborsClassifier knn = NeighborsClassifier() knn.fit(X_train, y_train) print 'knn=%s' % knn y_pred = knn.predict(X_test) correct = y_pred == y_test print 'k=%2d: Num tests=%6d correct=%6d = %2d%%' % (k, correct.shape[0], correct.sum(), int(100*correct.sum()/correct.shape[0])) if False: for i in range(correct.shape[0]): print ' %d==%d => %d' % (y_pred[i], y_test[i], correct[i]) exit()
def run_tests(classifier, test_data, c_type, name, k_kmeans, knn_ks, t_shelf): for k_knn in knn_ks: if c_type == 'knn': means, labels = classifier cls = NeighborsClassifier(n_neighbors=k_knn) cls.fit(means, labels) elif c_type == 'svm': cls = classifier # results: every test sample is labeled by classifier X = np.vstack(test_data) start = time.time() results = cls.predict(X) elapsed_time = time.time() - start save_time(elapsed_time, len(X), t_shelf, name, k_kmeans, k_knn) targets = make_targets(test_data) save_results(results, targets, name, k_kmeans, k_knn) print_results(results, targets)