def train_and_run_dtree(decision_tree, X_train, X_test, y_train, y_test, format_string, should_print_tree): decision_tree.fit(X_train, y_train) # Calculate accurracy on test set y_pred = decision_tree.predict(X_test) acc = accuracy(y_pred, y_test) if should_print_tree: decision_tree.print_tree() class_estimation_graph(2, X_test, y_test, y_pred, format_string.format(100 * acc))
def linearly_separable(): X, y = create_linearly_separable_two_class() X_train, X_test, y_train, y_test = train_test_split(X, y, test_proportion=0.2) classifier = SVM(Kernel.gaussian_kernel(sigma=1)) classifier.fit(X_train, y_train) y_pred = classifier.predict(X_test) acc = accuracy(y_pred, y_test) class_estimation_graph( 2, X_test, y_test, y_pred, "SVM linear %.2f%% Accuracy on Linearly Separable" % (acc * 100))
def main(): n_classes = 4 # Just has one feature to make it easy to graph. X, y = datasets.make_classification(n_samples=200, n_features=2, n_informative=2, n_redundant=0, n_clusters_per_class=1, flip_y=0.1, n_classes=n_classes) X_train, X_test, y_train, y_test = train_test_split(X, y, test_proportion=0.2) k=4 classifier = KNN_Classification(k=k) classifier.fit(X_train, y_train) y_pred = classifier.predict(X_test) acc = accuracy(y_pred, y_test) class_estimation_graph(n_classes, X_test, y_test, y_pred, "KNN %.2f%% Accuracy" % (acc*100))
def with_data_error_with_slack(): n_classes = 2 # Just has one feature to make it easy to graph. X, y = datasets.make_classification(n_samples=200, n_features=2, n_informative=2, n_redundant=0, n_clusters_per_class=2, flip_y=0.1, n_classes=n_classes) X_train, X_test, y_train, y_test = train_test_split(X, y, test_proportion=0.2) classifier = SVM(Kernel.gaussian_kernel(sigma=1), C=1) classifier.fit(X_train, y_train) y_pred = classifier.predict(X_test) acc = accuracy(y_pred, y_test) class_estimation_graph(n_classes, X_test, y_test, y_pred, "SVM linear %.2f%% Accuracy" % (acc * 100))
def main(_=None): # Just has one feature to make it easy to graph. X, y = datasets.make_classification(n_samples=200, n_features=1, n_informative=1, n_redundant=0, n_clusters_per_class=1, flip_y=0.1) X_train, X_test, y_train, y_test = train_test_split(X, y, test_proportion=0.2) logistic_reg = LogisticRegressionTF() logistic_reg.fit(X_train, y_train) y_pred_probability = logistic_reg.predict(X_test) y_pred_probability = np.squeeze(y_pred_probability) mse = mean_square_error(y_pred_probability, y_test) logistic_reg.set_classification_boundary(0.5) y_pred_classified = logistic_reg.predict(X_test) y_pred_classified = np.squeeze(y_pred_classified) acc = accuracy(y_pred_classified, y_test) plt.figure() plt.scatter(X_test, y_test, color="Black", label="Actual") plt.scatter(X_test, y_pred_probability, color="Red", label="Classification Probability") plt.scatter(X_test, y_pred_classified, color="Blue", label="Rounded Prediction") plt.legend(loc='center right', fontsize=8) plt.title("Logistic Regression %.2f MSE, %.2f%% Accuracy)" % (mse, acc * 100)) plt.show()
def runBinaryClassification(self, algorithm, expected_accuracy=0.95): """ The algorithm should have been initialized with any additional checkers available for its learning method - like have the optimizer wrapped by NumericGradientChecker. Will be predicting for a single class - is it that class or not? """ # Very simple dataset, only has 2 classes, 2 features, and no error X_train = binary_class_X['train'] y_train = binary_class_y['train'] algorithm.fit(X_train, y_train) X_test = binary_class_X['test'] y_test = binary_class_y['test'] # Round just in case the algorithm returns likelihoods y_pred = np.round(algorithm.predict(X_test)) # Expect high accuracy due to no noise, simple data set, large # of samples self.assertGreater(accuracy(y_pred, y_test), expected_accuracy)
def runMultiClassClassification(self, algorithm, expected_accuracy=0.90): """ The algorithm should have been initialized with any additional checkers available for its learning method - like have the optimizer wrapped by NumericGradientChecker. The algorithm MUST be able to handle multi class inputs Will be predicting for a single class - is it that class or not? """ # Very simple dataset, only has 4 classes, 2 features, and no error X_train = multi_class_X['train'] y_train = multi_class_y['train'] algorithm.fit(X_train, y_train) X_test = multi_class_X['test'] y_test = multi_class_y['test'] y_pred = algorithm.predict(X_test) # Expect high accuracy due to no noise, simple data set, large # of samples self.assertGreater(accuracy(y_pred, y_test), expected_accuracy)
def main(): n_classes = 4 # Just has one feature to make it easy to graph. X, y = datasets.make_classification(n_samples=200, n_features=2, n_informative=2, n_redundant=0, n_clusters_per_class=1, flip_y=0.1, n_classes=n_classes) X_train, X_test, y_train, y_test = train_test_split(X, y, test_proportion=0.2) logistic_reg = OneVsAllClassification(CreateDefaultLogisticRegression) logistic_reg.fit(X_train, y_train) y_pred = logistic_reg.predict(X_test) acc = accuracy(y_pred, y_test) class_estimation_graph( n_classes, X_test, y_test, y_pred, "Logistic Regression %.2f%% Accuracy.\nShape is true class, color is estimate" % (acc * 100))