def test_project_data_pca(self): x1 = ml.feature_normalize(self.ex7data1['X'])[0] ex1 = ml.project_data_pca(x1, ml.pca(x1)[0], 1) self.assertEqual(round(ex1[0][0], 3), 1.496)
X = utils.getData() X[All] = machine_learning.norm(X[All]) print "Done." print "Hierarchial clustering..." hierarchy = machine_learning.recursiveCluster(X[dAll], size=500) Y = machine_learning.flatten(hierarchy, min=40) X = X[Y >= 0] # Eliminating outliers Y = Y[Y >= 0] y_values = np.unique(Y) for i in range(0, len(y_values)): Y[Y == y_values[i]] = i print "Done." print "Visualizing..." machine_learning.pca(X[dAll], Y) machine_learning.hist(X[["time"]], Y) print "Done." print "Shifting and randomizing..." shift = 10 X, Y = utils.shiftLabels(X, Y, shift) X, Y = utils.randomize(X, Y) print 'Done.' print "Choosing best parameters for classifier..." clf, clf_acc, test_acc = machine_learning.best_classifier(X[All], Y) print "Done." print "Classifier accuracy: ", clf_acc print "Test data accuracy: ", test_acc print "Classifier model: ", clf
def test_pca(self): x1 = ml.feature_normalize(self.ex7data1['X'])[0] ex1 = ml.pca(x1) self.assertEqual(ex1[0].all(), np.array([[-0.707107,-0.707107], [-0.707107, 0.707107]]).all())