id = np.random.randint(0, nsamp, ntest) d_test = dat[id, :] c_test_true = clas[id] d_train = np.delete(dat, id, 0) c_train = np.delete(clas, id) print 'extracting subsample of points for training and classification test' for i in range(ntest): print id[i], d_test[i, :] print '' print 'fitting k nearest neighbour' #perform a k nearest neighbour algorithm on the test data to predict which level of heart #disease will be present in a test patient c_test = hdp.k_test(d_test, d_train, c_train, distance=2, k=3) print 'comparing the test with true classifications' for i in range(ntest): print 'test class: true class ', c_test[i], c_test_true[i] idmatch = np.where(c_test == c_test_true)[0] nmatch = np.shape(idmatch)[0] print 'accuracy...', 1. * nmatch / ntest #plot the individual pairs of dimensions in the parameter space to investigate the correlations if (diagplots == 1): os.system('rm -rf diagplots') os.system('mkdir diagplots') idclas = np.unique(clas) nclass = np.shape(idclas)[0]
a[:, :] = 1. * X_transformed XT_train = np.delete(a, itest, 0) classifier.fit(XT_train[:, :k], trainclas) # transform new data using already fitted pca # (don't re-fit the pca) newdata_transformed = pca.transform(testdat) # predict labels using the trained classifier pred_labels = classifier.predict(newdata_transformed[:, :k]) #now use my k nearest neighbour knn_test = hdp.k_test(newdata_transformed[:, :k], XT_train[:, :k], trainclas, distance=2, k=3) print 'pca aided classification' fracgood = np.shape(np.where(pred_labels == testclas)[0])[0] / 1. / ntest fracgood_k = np.shape(np.where(knn_test == testclas)[0])[0] / 1. / ntest for i in range(ntest): print pred_labels[i], testclas[i], knn_test[i] print 'frac good', fracgood, fracgood_k #now without using pca classifier = DecisionTreeClassifier() X_transformed = pca.fit_transform(iris.data) classifier.fit(traindat[:, :k], trainclas)
print('Accuracy neural net sklearn:', round(accuracy, 2), '%.') cpu_nn_skl[i,iv] = t1 - t0 acc_nn_skl[i,iv] = accuracy #!!!!!!!!! K_nearest neighbour #!!!!!!!!!!!!!!! t0 = time.time() opknn = cpc.k_test(test_features,train_features,train_labels,distance=2,k=3) t1 = time.time() predictions = np.array(opknn) ntot = np.shape(predictions)[0] ncorrect = np.shape(np.where(predictions == test_labels)[0])[0] accuracy = 100 * (1.*ncorrect/ntot) print('Accuracy knn:', round(accuracy, 2), '%.') cpu_knn[i,iv] = t1 - t0 acc_knn[i,iv] = accuracy