def testMSE(self): X = np.fromstring('1 2 3 4', dtype=int, sep=" ") Y = np.fromstring('2 5 3 7', dtype=int, sep=" ") # Difference: 1 3 0 2 self.assertEqual(mean_square_error(X, X), 0) self.assertEqual(mean_square_error(X, Y), 19/4)
def incorrect_cost_function_v4(X, theta, y): """ Incorrectly computes the cost function with a major increase in cost """ pred = estimator_function(X, theta) cost = mean_square_error(pred, y) / 2 + 100000 m = len(y) gradient = 1 / m * np.dot(X.T, pred - y) return (cost, gradient)
def correct_cost_function(X, theta, y): """ Correctly computes cost and gradient for MSE """ pred = estimator_function(X, theta) cost = mean_square_error(pred, y) / 2 m = len(y) gradient = 1 / m * np.dot(X.T, pred - y) return (cost, gradient)
def incorrect_cost_function_v5(X, theta, y): """ Incorrectly computes the cost function, has a minor decrease in gradient """ pred = estimator_function(X, theta) cost = mean_square_error(pred, y) / 2 m = len(y) gradient = 1 / m * np.dot(X.T, pred - y) gradient[0] -= 1e-5 return (cost, gradient)
def incorrect_cost_function_v3(X, theta, y): """ Incorrecly computes cost and gradient for MSE, forgets to divide the MSE by 2. """ pred = estimator_function(X, theta) cost = mean_square_error(pred, y) m = len(y) gradient = 1 / m * np.dot(X.T, pred - y) return (cost, gradient)
def incorrect_cost_function_v2(X, theta, y): """ Incorrecly computes cost and gradient for MSE, returns the negative value of gradient, which would cause the algorithm to increase the error. """ pred = estimator_function(X, theta) cost = mean_square_error(pred, y) / 2 m = len(y) gradient = -1 / m * np.dot(X.T, pred - y) return (cost, gradient)
def incorrect_cost_function_v1(X, theta, y): """ Incorrecly computes cost and gradient for MSE, forgets to subtract y from the predicted when calculating the gradient. """ pred = estimator_function(X, theta) cost = mean_square_error(pred, y) / 2 m = len(y) gradient = 1 / m * np.dot(X.T, pred) return (cost, gradient)
def train_and_run_dtree(decision_tree, X_train, X_test, y_train, y_test, format_title, should_print_tree): decision_tree.fit(X_train, y_train) y_pred = decision_tree.predict(X_test) if should_print_tree: decision_tree.print_tree() mse = mean_square_error(y_pred, y_test) display_2d_regression(X_test[:, 0], X_test[:, 1], y_pred, y_test, format_title.format(mse))
def main(): # Just has one feature to make it easy to graph. X, y = datasets.make_regression(n_samples=200, n_features=1, bias=random.uniform(-10, 10), noise=5) X_train, X_test, y_train, y_test = train_test_split(X, y, test_proportion=0.2) linear_reg = LinearRegression() linear_reg.fit(X_train, y_train) y_pred = linear_reg.predict(X_test) mse = mean_square_error(y_pred, y_test) linear_reg_w_grad_desc = LinearRegression(optimizer=GradientDescent(num_iterations=2500)) linear_reg_w_grad_desc.fit(X_train, y_train) y_pred_w_grad_desc = linear_reg_w_grad_desc.predict(X_test) mse_w_grad_desc = mean_square_error(y_pred_w_grad_desc, y_test) plt.figure() plt.scatter(X_test, y_test, color="Black", label="Actual") plt.plot(X_test, y_pred, label="Estimate") plt.plot(X_test, y_pred_w_grad_desc, label="Estimate using Optimizer") plt.legend(loc='lower right', fontsize=8) plt.title("Linear Regression %.2f MSE Normal Eq, %.2f MSE Gradient Descent)" % (mse, mse_w_grad_desc)) plt.show()
def main(_=None): # Just has one feature to make it easy to graph. X, y = datasets.make_classification(n_samples=200, n_features=1, n_informative=1, n_redundant=0, n_clusters_per_class=1, flip_y=0.1) X_train, X_test, y_train, y_test = train_test_split(X, y, test_proportion=0.2) logistic_reg = LogisticRegressionTF() logistic_reg.fit(X_train, y_train) y_pred_probability = logistic_reg.predict(X_test) y_pred_probability = np.squeeze(y_pred_probability) mse = mean_square_error(y_pred_probability, y_test) logistic_reg.set_classification_boundary(0.5) y_pred_classified = logistic_reg.predict(X_test) y_pred_classified = np.squeeze(y_pred_classified) acc = accuracy(y_pred_classified, y_test) plt.figure() plt.scatter(X_test, y_test, color="Black", label="Actual") plt.scatter(X_test, y_pred_probability, color="Red", label="Classification Probability") plt.scatter(X_test, y_pred_classified, color="Blue", label="Rounded Prediction") plt.legend(loc='center right', fontsize=8) plt.title("Logistic Regression %.2f MSE, %.2f%% Accuracy)" % (mse, acc * 100)) plt.show()
def runSingleLinearRegression(self, algorithm, max_mse): """ The algorithm should have been initialized with any additional checkers available for its learning method - like have the optimizer wrapped by NumericGradientChecker. Will be regressing with a single output variable per sample. """ # Very simple dataset, only has 2 classes, 2 features, and no error X_train = simple_linear_X['train'] y_train = simple_linear_y['train'] algorithm.fit(X_train, y_train) X_test = simple_linear_X['test'] y_test = simple_linear_y['test'] # Round just in case the algorithm returns likelihoods y_pred = algorithm.predict(X_test) # Expect very good mse, since no noise self.assertGreater(max_mse, mean_square_error(y_pred, y_test))
def main(_=None): # Just has one feature to make it easy to graph. X, y = datasets.make_regression(n_samples=200, n_features=1, bias=random.uniform(-10, 10), noise=5) X_train, X_test, y_train, y_test = train_test_split(X, y, test_proportion=0.2) linear_reg = LinearRegressionTF() linear_reg.fit(X_train, y_train) y_pred = linear_reg.predict(X_test) y_pred = np.squeeze(y_pred) mse = mean_square_error(y_pred, y_test) plt.figure() plt.scatter(X_test, y_test, color="Black", label="Actual") plt.plot(X_test, y_pred, label="Estimate") plt.legend(loc='lower right', fontsize=8) plt.title("Linear Regression %.2f MSE)" % (mse)) plt.show()
def main(): # Just using one feature to make it graphable X, y = datasets.make_regression(n_samples=200, n_features=1, bias=150, noise=4) X_train, X_test, y_train, y_test = train_test_split(X, y, test_proportion=0.2) reg = KNN_Regression(k=4) reg.fit(X_train, y_train) y_pred = reg.predict(X_test) mse = mean_square_error(y_pred, y_test) plt.scatter(X_test, y_test, color="Black", label="Actual") plt.scatter(X_test, y_pred, color="Red", label="Prediction") plt.legend(loc='lower right', fontsize=8) plt.title("KNN Regression (%.2f MSE)" % mse) plt.show()
# Given far too many features with not enough samples, so will often # overfit when not pruning. X, y = datasets.make_regression(n_samples=100, n_features=30, n_informative=1, noise=5) X_train, X_test, y_train, y_test = train_test_split(X, y, test_proportion=0.2) # Without any pruning reg_orig = linear_regression.LinearRegression() reg_orig.fit(X_train, y_train) y_pred_orig = reg_orig.predict(X_test) orig_mse = mean_square_error(y_pred_orig, y_test) # Setup pruner and prune the # of feautres features down to 1 pruner = FeaturePruner(linear_regression.LinearRegression(), 1) X_train_pruned = pruner.fit_transform(X_train, y_train) X_test_pruned = pruner.transform(X_test) reg_pruned = linear_regression.LinearRegression() reg_pruned.fit(X_train_pruned, y_train) y_pred_pruned = reg_pruned.predict(X_test_pruned) pruned_mse = mean_square_error(y_pred_pruned, y_test) actual = plt.scatter(X_test_pruned, y_test, color="Black", label="Actual Value")
def _cost_function(X, theta, y): pred = np.dot(X, theta) cost = mean_square_error(pred, y) / 2 m = len(y) gradient = 1/m * np.dot(X.T, pred - y) return (cost, gradient)