Exemple #1
0
 def testMSE(self):
     X = np.fromstring('1 2 3 4', dtype=int, sep=" ")
     Y = np.fromstring('2 5 3 7', dtype=int, sep=" ")
     # Difference: 1 3 0 2
     
     self.assertEqual(mean_square_error(X, X), 0)
     self.assertEqual(mean_square_error(X, Y),
                      19/4)
def incorrect_cost_function_v4(X, theta, y):
    """
    Incorrectly computes the cost function with a major increase in cost
    """
    pred = estimator_function(X, theta)

    cost = mean_square_error(pred, y) / 2 + 100000
    m = len(y)
    gradient = 1 / m * np.dot(X.T, pred - y)
    return (cost, gradient)
def correct_cost_function(X, theta, y):
    """
    Correctly computes cost and gradient for MSE
    """
    pred = estimator_function(X, theta)

    cost = mean_square_error(pred, y) / 2
    m = len(y)
    gradient = 1 / m * np.dot(X.T, pred - y)
    return (cost, gradient)
def incorrect_cost_function_v5(X, theta, y):
    """
    Incorrectly computes the cost function, has a minor decrease in gradient
    """
    pred = estimator_function(X, theta)

    cost = mean_square_error(pred, y) / 2
    m = len(y)
    gradient = 1 / m * np.dot(X.T, pred - y)
    gradient[0] -= 1e-5
    return (cost, gradient)
def incorrect_cost_function_v3(X, theta, y):
    """
    Incorrecly computes cost and gradient for MSE, forgets to divide the MSE
    by 2.
    """
    pred = estimator_function(X, theta)

    cost = mean_square_error(pred, y)
    m = len(y)
    gradient = 1 / m * np.dot(X.T, pred - y)
    return (cost, gradient)
def incorrect_cost_function_v2(X, theta, y):
    """
    Incorrecly computes cost and gradient for MSE, returns the negative
    value of gradient, which would cause the algorithm to increase the error.
    """
    pred = estimator_function(X, theta)

    cost = mean_square_error(pred, y) / 2
    m = len(y)
    gradient = -1 / m * np.dot(X.T, pred - y)
    return (cost, gradient)
def incorrect_cost_function_v1(X, theta, y):
    """
    Incorrecly computes cost and gradient for MSE, forgets to subtract y from
    the predicted when calculating the gradient.
    """
    pred = estimator_function(X, theta)

    cost = mean_square_error(pred, y) / 2
    m = len(y)
    gradient = 1 / m * np.dot(X.T, pred)
    return (cost, gradient)
Exemple #8
0
def train_and_run_dtree(decision_tree, X_train, X_test, y_train, y_test,
                        format_title, should_print_tree):
    decision_tree.fit(X_train, y_train)
    y_pred = decision_tree.predict(X_test)

    if should_print_tree:
        decision_tree.print_tree()

    mse = mean_square_error(y_pred, y_test)

    display_2d_regression(X_test[:, 0], X_test[:, 1], y_pred, y_test,
                          format_title.format(mse))
def main():
    # Just has one feature to make it easy to graph.
    X, y = datasets.make_regression(n_samples=200, n_features=1,
                                    bias=random.uniform(-10, 10), noise=5)
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_proportion=0.2)
    
    linear_reg = LinearRegression()
    linear_reg.fit(X_train, y_train)
    y_pred = linear_reg.predict(X_test)
    mse = mean_square_error(y_pred, y_test)
    
    linear_reg_w_grad_desc = LinearRegression(optimizer=GradientDescent(num_iterations=2500))
    linear_reg_w_grad_desc.fit(X_train, y_train)
    y_pred_w_grad_desc = linear_reg_w_grad_desc.predict(X_test)
    mse_w_grad_desc = mean_square_error(y_pred_w_grad_desc, y_test)
    
    plt.figure()
    plt.scatter(X_test, y_test, color="Black", label="Actual")
    plt.plot(X_test, y_pred, label="Estimate")
    plt.plot(X_test, y_pred_w_grad_desc, label="Estimate using Optimizer")
    plt.legend(loc='lower right', fontsize=8)
    plt.title("Linear Regression %.2f MSE Normal Eq, %.2f MSE Gradient Descent)" % (mse, mse_w_grad_desc))
    plt.show()
Exemple #10
0
def main(_=None):
    # Just has one feature to make it easy to graph.
    X, y = datasets.make_classification(n_samples=200,
                                        n_features=1,
                                        n_informative=1,
                                        n_redundant=0,
                                        n_clusters_per_class=1,
                                        flip_y=0.1)

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_proportion=0.2)

    logistic_reg = LogisticRegressionTF()
    logistic_reg.fit(X_train, y_train)

    y_pred_probability = logistic_reg.predict(X_test)
    y_pred_probability = np.squeeze(y_pred_probability)
    mse = mean_square_error(y_pred_probability, y_test)

    logistic_reg.set_classification_boundary(0.5)
    y_pred_classified = logistic_reg.predict(X_test)
    y_pred_classified = np.squeeze(y_pred_classified)
    acc = accuracy(y_pred_classified, y_test)

    plt.figure()
    plt.scatter(X_test, y_test, color="Black", label="Actual")
    plt.scatter(X_test,
                y_pred_probability,
                color="Red",
                label="Classification Probability")
    plt.scatter(X_test,
                y_pred_classified,
                color="Blue",
                label="Rounded Prediction")
    plt.legend(loc='center right', fontsize=8)
    plt.title("Logistic Regression %.2f MSE, %.2f%% Accuracy)" %
              (mse, acc * 100))
    plt.show()
    def runSingleLinearRegression(self, algorithm, max_mse):
        """
        The algorithm should have been initialized with any additional checkers
        available for its learning method - like have the optimizer wrapped by
        NumericGradientChecker.
        
        Will be regressing with a single output variable per sample.
        """
        # Very simple dataset, only has 2 classes, 2 features, and no error
        X_train = simple_linear_X['train']
        y_train = simple_linear_y['train']

        algorithm.fit(X_train, y_train)

        X_test = simple_linear_X['test']
        y_test = simple_linear_y['test']

        # Round just in case the algorithm returns likelihoods
        y_pred = algorithm.predict(X_test)

        # Expect very good mse, since no noise
        self.assertGreater(max_mse, mean_square_error(y_pred, y_test))
def main(_=None):
    # Just has one feature to make it easy to graph.
    X, y = datasets.make_regression(n_samples=200,
                                    n_features=1,
                                    bias=random.uniform(-10, 10),
                                    noise=5)

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_proportion=0.2)

    linear_reg = LinearRegressionTF()
    linear_reg.fit(X_train, y_train)
    y_pred = linear_reg.predict(X_test)
    y_pred = np.squeeze(y_pred)
    mse = mean_square_error(y_pred, y_test)

    plt.figure()
    plt.scatter(X_test, y_test, color="Black", label="Actual")
    plt.plot(X_test, y_pred, label="Estimate")
    plt.legend(loc='lower right', fontsize=8)
    plt.title("Linear Regression %.2f MSE)" % (mse))
    plt.show()
def main():
    # Just using one feature to make it graphable
    X, y = datasets.make_regression(n_samples=200,
                                    n_features=1,
                                    bias=150,
                                    noise=4)

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_proportion=0.2)

    reg = KNN_Regression(k=4)

    reg.fit(X_train, y_train)

    y_pred = reg.predict(X_test)

    mse = mean_square_error(y_pred, y_test)

    plt.scatter(X_test, y_test, color="Black", label="Actual")
    plt.scatter(X_test, y_pred, color="Red", label="Prediction")
    plt.legend(loc='lower right', fontsize=8)
    plt.title("KNN Regression (%.2f MSE)" % mse)
    plt.show()
    # Given far too many features with not enough samples, so will often
    # overfit when not pruning.
    X, y = datasets.make_regression(n_samples=100,
                                    n_features=30,
                                    n_informative=1,
                                    noise=5)

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_proportion=0.2)

    # Without any pruning
    reg_orig = linear_regression.LinearRegression()
    reg_orig.fit(X_train, y_train)
    y_pred_orig = reg_orig.predict(X_test)
    orig_mse = mean_square_error(y_pred_orig, y_test)

    # Setup pruner and prune the # of feautres features down to 1
    pruner = FeaturePruner(linear_regression.LinearRegression(), 1)
    X_train_pruned = pruner.fit_transform(X_train, y_train)
    X_test_pruned = pruner.transform(X_test)

    reg_pruned = linear_regression.LinearRegression()
    reg_pruned.fit(X_train_pruned, y_train)
    y_pred_pruned = reg_pruned.predict(X_test_pruned)
    pruned_mse = mean_square_error(y_pred_pruned, y_test)

    actual = plt.scatter(X_test_pruned,
                         y_test,
                         color="Black",
                         label="Actual Value")
 def _cost_function(X, theta, y):
     pred = np.dot(X, theta)
     cost = mean_square_error(pred, y) / 2
     m = len(y)
     gradient = 1/m * np.dot(X.T, pred - y)
     return (cost, gradient)