def initializeDataMaps(X, y, X_map, y_map): X_train, X_test, y_train, y_test = train_test_split(X, y, test_proportion=0.2) X_map['train'] = X_train y_map['train'] = y_train X_map['test'] = X_test y_map['test'] = y_test
def main(num_samples=50, points_per_dimension=20): X, y = datasets.make_classification(n_samples=num_samples, n_features=2, n_informative=2, n_redundant=0, n_clusters_per_class=2, flip_y=0.1) X_train, X_test, y_train, y_test = train_test_split(X, y, test_proportion=0.2) logistic_reg = LogisticRegression(optimizer=GradientDescent( num_iterations=20000)) logistic_reg.fit(X_train, y_train) decision_boundary_graph(X_test, y_test, logistic_reg, "Logistic Regression", points_per_dimension=points_per_dimension) if svm_able_to_run: logistic_reg = SVM(Kernel.linear_kernel(), C=1) logistic_reg.fit(X_train, y_train) decision_boundary_graph(X_test, y_test, logistic_reg, "SVM - Linear Kernel", points_per_dimension=points_per_dimension) logistic_reg = SVM(Kernel.gaussian_kernel(sigma=2), C=1) logistic_reg.fit(X_train, y_train) decision_boundary_graph(X_test, y_test, logistic_reg, "SVM - Gaussian Kernel", points_per_dimension=points_per_dimension) else: print("WARNING: cvxopt not installed, SVM will not work.") logistic_reg = KNN_Classification(k=1) logistic_reg.fit(X, y) logistic_reg2 = KNN_Classification(k=3) logistic_reg2.fit(X, y) decision_boundary_graph(X_test, y_test, logistic_reg, "KNN K=1", points_per_dimension=points_per_dimension) decision_boundary_graph(X_test, y_test, logistic_reg2, "KKN K=3", points_per_dimension=points_per_dimension)
def main(should_print_tree=False): X, y = create_1d_categorical_feature_regression() X_train, X_test, y_train, y_test = train_test_split(X, y, test_proportion=0.2) # Can't run CART with regression when using categorical variables train_and_run_dtree(DecisionTreeRegression(algorithm_to_use='ID3'), X_train, X_test, y_train, y_test, 'Decision Tree ID3 (MSE {:.2f})', should_print_tree)
def linearly_separable(): X, y = create_linearly_separable_two_class() X_train, X_test, y_train, y_test = train_test_split(X, y, test_proportion=0.2) classifier = SVM(Kernel.gaussian_kernel(sigma=1)) classifier.fit(X_train, y_train) y_pred = classifier.predict(X_test) acc = accuracy(y_pred, y_test) class_estimation_graph( 2, X_test, y_test, y_pred, "SVM linear %.2f%% Accuracy on Linearly Separable" % (acc * 100))
def main(should_print_tree=False): X, y = create_2d_categorical_feature_two_class() X_train, X_test, y_train, y_test = train_test_split(X, y, test_proportion=0.2) train_and_run_dtree(DecisionTreeClassifier(algorithm_to_use='ID3'), X_train, X_test, y_train, y_test, 'Decision Tree ID3 (accuracy {:.2f}%)', should_print_tree) train_and_run_dtree(DecisionTreeClassifier(algorithm_to_use='CART'), X_train, X_test, y_train, y_test, 'Decision Tree CART (accuracy {:.2f}%)', should_print_tree)
def main(): n_classes = 4 # Just has one feature to make it easy to graph. X, y = datasets.make_classification(n_samples=200, n_features=2, n_informative=2, n_redundant=0, n_clusters_per_class=1, flip_y=0.1, n_classes=n_classes) X_train, X_test, y_train, y_test = train_test_split(X, y, test_proportion=0.2) k=4 classifier = KNN_Classification(k=k) classifier.fit(X_train, y_train) y_pred = classifier.predict(X_test) acc = accuracy(y_pred, y_test) class_estimation_graph(n_classes, X_test, y_test, y_pred, "KNN %.2f%% Accuracy" % (acc*100))
def main(num_iterations=200, iterations_per_update=20): # Just has one feature to make it easy to graph. X, y = datasets.make_classification(n_samples=200, n_features=1, n_informative=1, n_redundant=0, n_clusters_per_class=1, flip_y=0.1) X_train, X_test, y_train, y_test = train_test_split(X, y, test_proportion=0.2) logistic_reg = LogisticRegression(optimizer=OptimizerCostGraph( GradientDescent(num_iterations=num_iterations), iterations_per_update=iterations_per_update)) logistic_reg.fit(X_train, y_train)
def with_data_error_force_accurate(): n_classes = 2 # Just has one feature to make it easy to graph. X, y = datasets.make_classification(n_samples=200, n_features=2, n_informative=2, n_redundant=0, n_clusters_per_class=1, flip_y=0.1, n_classes=n_classes) X_train, X_test, y_train, y_test = train_test_split(X, y, test_proportion=0.2) classifier = SVM(Kernel.linear_kernel()) try: classifier.fit(X_train, y_train) raise RuntimeError("Should not have successfully fit the model") except AssertionError as e: # We expect an AssertionError since the problem is non-separaable. pass
def with_data_error_with_slack(): n_classes = 2 # Just has one feature to make it easy to graph. X, y = datasets.make_classification(n_samples=200, n_features=2, n_informative=2, n_redundant=0, n_clusters_per_class=2, flip_y=0.1, n_classes=n_classes) X_train, X_test, y_train, y_test = train_test_split(X, y, test_proportion=0.2) classifier = SVM(Kernel.gaussian_kernel(sigma=1), C=1) classifier.fit(X_train, y_train) y_pred = classifier.predict(X_test) acc = accuracy(y_pred, y_test) class_estimation_graph(n_classes, X_test, y_test, y_pred, "SVM linear %.2f%% Accuracy" % (acc * 100))
def main(_=None): # Just has one feature to make it easy to graph. X, y = datasets.make_classification(n_samples=200, n_features=1, n_informative=1, n_redundant=0, n_clusters_per_class=1, flip_y=0.1) X_train, X_test, y_train, y_test = train_test_split(X, y, test_proportion=0.2) logistic_reg = LogisticRegressionTF() logistic_reg.fit(X_train, y_train) y_pred_probability = logistic_reg.predict(X_test) y_pred_probability = np.squeeze(y_pred_probability) mse = mean_square_error(y_pred_probability, y_test) logistic_reg.set_classification_boundary(0.5) y_pred_classified = logistic_reg.predict(X_test) y_pred_classified = np.squeeze(y_pred_classified) acc = accuracy(y_pred_classified, y_test) plt.figure() plt.scatter(X_test, y_test, color="Black", label="Actual") plt.scatter(X_test, y_pred_probability, color="Red", label="Classification Probability") plt.scatter(X_test, y_pred_classified, color="Blue", label="Rounded Prediction") plt.legend(loc='center right', fontsize=8) plt.title("Logistic Regression %.2f MSE, %.2f%% Accuracy)" % (mse, acc * 100)) plt.show()
def main(_=None): # Just has one feature to make it easy to graph. X, y = datasets.make_regression(n_samples=200, n_features=1, bias=random.uniform(-10, 10), noise=5) X_train, X_test, y_train, y_test = train_test_split(X, y, test_proportion=0.2) linear_reg = LinearRegressionTF() linear_reg.fit(X_train, y_train) y_pred = linear_reg.predict(X_test) y_pred = np.squeeze(y_pred) mse = mean_square_error(y_pred, y_test) plt.figure() plt.scatter(X_test, y_test, color="Black", label="Actual") plt.plot(X_test, y_pred, label="Estimate") plt.legend(loc='lower right', fontsize=8) plt.title("Linear Regression %.2f MSE)" % (mse)) plt.show()
def main(): n_classes = 4 # Just has one feature to make it easy to graph. X, y = datasets.make_classification(n_samples=200, n_features=2, n_informative=2, n_redundant=0, n_clusters_per_class=1, flip_y=0.1, n_classes=n_classes) X_train, X_test, y_train, y_test = train_test_split(X, y, test_proportion=0.2) logistic_reg = OneVsAllClassification(CreateDefaultLogisticRegression) logistic_reg.fit(X_train, y_train) y_pred = logistic_reg.predict(X_test) acc = accuracy(y_pred, y_test) class_estimation_graph( n_classes, X_test, y_test, y_pred, "Logistic Regression %.2f%% Accuracy.\nShape is true class, color is estimate" % (acc * 100))
def main(): # Just using one feature to make it graphable X, y = datasets.make_regression(n_samples=200, n_features=1, bias=150, noise=4) X_train, X_test, y_train, y_test = train_test_split(X, y, test_proportion=0.2) reg = KNN_Regression(k=4) reg.fit(X_train, y_train) y_pred = reg.predict(X_test) mse = mean_square_error(y_pred, y_test) plt.scatter(X_test, y_test, color="Black", label="Actual") plt.scatter(X_test, y_pred, color="Red", label="Prediction") plt.legend(loc='lower right', fontsize=8) plt.title("KNN Regression (%.2f MSE)" % mse) plt.show()
def main(): # Just has one feature to make it easy to graph. X, y = datasets.make_regression(n_samples=200, n_features=1, bias=random.uniform(-10, 10), noise=5) X_train, X_test, y_train, y_test = train_test_split(X, y, test_proportion=0.2) linear_reg = LinearRegression() linear_reg.fit(X_train, y_train) y_pred = linear_reg.predict(X_test) mse = mean_square_error(y_pred, y_test) linear_reg_w_grad_desc = LinearRegression(optimizer=GradientDescent(num_iterations=2500)) linear_reg_w_grad_desc.fit(X_train, y_train) y_pred_w_grad_desc = linear_reg_w_grad_desc.predict(X_test) mse_w_grad_desc = mean_square_error(y_pred_w_grad_desc, y_test) plt.figure() plt.scatter(X_test, y_test, color="Black", label="Actual") plt.plot(X_test, y_pred, label="Estimate") plt.plot(X_test, y_pred_w_grad_desc, label="Estimate using Optimizer") plt.legend(loc='lower right', fontsize=8) plt.title("Linear Regression %.2f MSE Normal Eq, %.2f MSE Gradient Descent)" % (mse, mse_w_grad_desc)) plt.show()
""" self.fit(X, y) return self.transform(X) if __name__ == "__main__": # Just include one relevant feature, which we will graph upon. # Given far too many features with not enough samples, so will often # overfit when not pruning. X, y = datasets.make_regression(n_samples=100, n_features=30, n_informative=1, noise=5) X_train, X_test, y_train, y_test = train_test_split(X, y, test_proportion=0.2) # Without any pruning reg_orig = linear_regression.LinearRegression() reg_orig.fit(X_train, y_train) y_pred_orig = reg_orig.predict(X_test) orig_mse = mean_square_error(y_pred_orig, y_test) # Setup pruner and prune the # of feautres features down to 1 pruner = FeaturePruner(linear_regression.LinearRegression(), 1) X_train_pruned = pruner.fit_transform(X_train, y_train) X_test_pruned = pruner.transform(X_test) reg_pruned = linear_regression.LinearRegression() reg_pruned.fit(X_train_pruned, y_train)