예제 #1
0
def validationCurve(X, y, Xval, yval):
    # VALIDATIONCURVE Generate the train and validation errors needed to
    # plot a validation curve that we can use to select lambda
    #       [lambda_vec, error_train, error_val] = ...
    #             VALIDATIONCURVE(X, y, Xval, yval) returns the train
    #             and validation errors( in error_train, error_val)
    #             for different values of lambda.You are given the training set (X,
    #             y) and validation set (Xval, yval).

    #
    # Selected values of lambda (you should not change this)
    lambda_vec = [0, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1, 3, 10]

    # You need to return these variables correctly.
    error_train = []
    error_val = []

    from ex5_regularized_linear_regressionand_bias_vs_variance.trainLinearReg import trainLinearReg
    from ex5_regularized_linear_regressionand_bias_vs_variance.linearRegCostFunction import linearRegCostFunction
    for l in lambda_vec:
        _, theta = trainLinearReg(X, y, l)
        error_train.append(linearRegCostFunction(X, y, theta, 0)[0])
        error_val.append(linearRegCostFunction(Xval, yval, theta, 0)[0])

    return lambda_vec, error_train, error_val
예제 #2
0
    def test_regularized_linear_regression_cost_and_grad(self):
        # m = Number of examples
        theta = np.array([[1], [1]])
        X_padded = np.column_stack((np.ones((self.m, 1)), self.X))
        from ex5_regularized_linear_regressionand_bias_vs_variance.linearRegCostFunction import linearRegCostFunction
        J, grad = linearRegCostFunction(X_padded, self.y, theta, 1)
        self.assertAlmostEqual(J, 303.993, delta=0.001)
        print('Cost at theta = [1 ; 1]: {cost} \n'
              '(this value should be about 303.993192)'.format(cost=J))

        # =========== Part 3: Regularized Linear Regression Gradient =============
        # You should now implement the gradient for regularized linear
        # regression.
        self.assertAlmostEqual(grad[0], -15.303016, delta=0.0001)
        self.assertAlmostEqual(grad[1], 598.250744, delta=0.0001)
        print('Gradient at theta = [1 ; 1]:  [{grad_0}; {grad_1}] \n'
              '(this value should be about [-15.303016; 598.250744])\n'.format(grad_0=grad[0], grad_1=grad[1]))
예제 #3
0
def learningCurve(X, y, Xval, yval, _lambda):
    # LEARNINGCURVE Generates the train and cross validation set errors needed
    # to plot a learning curve
    # [error_train, error_val] = ...
    # LEARNINGCURVE(X, y, Xval, yval, lambda ) returns the train and
    # cross validation set errors for a learning curve.In particular,
    # it returns two vectors of the same length - error_train and
    # error_val.Then, error_train(i) contains the training error for
    # i examples( and similarly for error_val(i)).
    #
    # In this function, you will compute the train and test errors for
    # dataset sizes from 1 up to m.In practice, when working with larger
    # datasets, you might want to do this in larger intervals.
    #

    # Number of training examples
    m = np.shape(X)[0]

    # You need to return these values correctly
    error_train = []
    error_val = []

    # ====================== YOUR CODE HERE ======================
    # Instructions: Fill in this function to return training errors in
    #               error_train and the cross validation errors in error_val.
    #               i.e., error_train(i) and
    #               error_val(i) should give you the errors
    #               obtained after training on i examples.
    #
    # Note: You should evaluate the training error on the first i training
    #       examples (i.e., X(1:i, :) and y(1:i)).
    #
    #       For the cross-validation error, you should instead evaluate on
    #       the _entire_ cross validation set (Xval and yval).
    #
    # Note: If you are using your cost function (linearRegCostFunction)
    #       to compute the training and cross validation error, you should
    #       call the function with the lambda argument set to 0.
    #       Do note that you will still need to use lambda when running
    #       the training to obtain the theta parameters.
    #
    # Hint: You can loop over the examples with the following:
    #
    #       for i = 1:m
    #           # Compute train/cross validation errors using training examples
    #           # X(1:i, :) and y(1:i), storing the result in
    #           # error_train(i) and error_val(i)
    #           ....
    #
    #       end
    #

    # ---------------------- Sample Solution ----------------------

    from ex5_regularized_linear_regressionand_bias_vs_variance.linearRegCostFunction import linearRegCostFunction
    from ex5_regularized_linear_regressionand_bias_vs_variance.trainLinearReg import trainLinearReg

    for i in range(m):
        _, theta = trainLinearReg(X[:i + 1, :], y[:i + 1], _lambda)
        error_train.append(
            linearRegCostFunction(X[:i + 1, :], y[:i + 1], theta, 0)[0])
        error_val.append(linearRegCostFunction(Xval, yval, theta, 0)[0])

    return error_train, error_val
예제 #4
0
    def test_feature_mapping_for_polynomial_regression(self):
        p = 8
        # Map X onto Polynomial Features and Normalize
        from ex5_regularized_linear_regressionand_bias_vs_variance.polyFeatures import polyFeatures
        X_poly = polyFeatures(self.X, p)
        X_poly_m, X_poly_n = np.shape(X_poly)
        self.assertEqual(X_poly_m, self.m)
        self.assertEqual(X_poly_n, p)

        from ex5_regularized_linear_regressionand_bias_vs_variance.featureNormalize import featureNormalize
        X_poly, mu, sigma = featureNormalize(X_poly)
        X_poly = np.column_stack((np.ones((self.m, 1)), X_poly))

        X_poly_test = polyFeatures(self.Xtest, p)
        X_poly_test_m, X_poly_test_n = np.shape(X_poly_test)
        self.assertEqual(X_poly_test_m, np.shape(self.Xtest)[0])
        self.assertEqual(X_poly_test_n, p)
        X_poly_test = X_poly_test - mu
        X_poly_test = X_poly_test / sigma
        X_poly_test = np.column_stack((np.ones((X_poly_test.shape[0], 1)), X_poly_test))

        X_poly_val = polyFeatures(self.Xval, p)
        X_poly_val_m, X_poly_val_n = np.shape(X_poly_val)
        self.assertEqual(X_poly_val_m, np.shape(self.Xval)[0])
        self.assertEqual(X_poly_val_n, p)
        X_poly_val = X_poly_val - mu
        X_poly_val = X_poly_val / sigma
        X_poly_val = np.column_stack((np.ones((X_poly_val.shape[0], 1)), X_poly_val))

        print('Normalized Training Example 1:\n'
              '  {X_poly}  '.format(X_poly=X_poly))

        # =========== Part 7: Learning Curve for Polynomial Regression =============
        # Now, you will get to experiment with polynomial regression with multiple
        # values of lambda .The code below runs polynomial regression with
        # lambda = 0. You should try running the code with different values of
        # lambda to see how the fit and learning curve change.
        #
        _lambda = 0
        from ex5_regularized_linear_regressionand_bias_vs_variance.trainLinearReg import trainLinearReg
        cost, theta = trainLinearReg(X_poly, self.y, _lambda)
        self.assertIsNotNone(cost)
        self.assertIsNotNone(theta)

        import matplotlib.pyplot as plt
        plt.figure(1)
        plt.scatter(self.X, self.y, marker='x', c='r', s=30, linewidth=2)
        plt.xlim([-80, 80])
        plt.ylim([-20, 60])
        plt.xlabel('Change in water level(x)')
        plt.ylabel('Water flowing out of the dam(y)')
        plt.title('Polynomial Regression Fit (lambda = {:f})'.format(_lambda))

        # plt.plot(self.X, self.y, 'rx', markersize=10, linewidth=1.5)
        from ex5_regularized_linear_regressionand_bias_vs_variance.plotFit import plotFit
        plotFit(min(self.X), max(self.X), mu, sigma, theta, p)
        plt.show(block=False)

        plt.figure(2)
        from ex5_regularized_linear_regressionand_bias_vs_variance.learningCurve import learningCurve
        error_train, error_val = learningCurve(X_poly, self.y, X_poly_val, self.yval, 0)
        p1, p2 = plt.plot(range(1, self.m + 1), error_train, range(1, self.m + 1), error_val)
        plt.legend((p1, p2), ('Train', 'Cross Validation'))
        plt.show(block=False)

        print('Polynomial Regression (lambda =%{_lambda})'.format(_lambda=_lambda))
        print('# Training Examples\tTrain Error\tCross Validation Error')
        for i in range(0, self.m):
            print('\t{i}\t\t{error_train}\t{error_val}'.format(i=i, error_train=error_train[i], error_val=error_val[i]))

        # =========== Part 8: Validation for Selecting Lambda =============
        #  You will now implement validationCurve to test various values of
        #  lambda on a validation set. You will then use this to select the
        #  "best" lambda value.
        #

        from ex5_regularized_linear_regressionand_bias_vs_variance.validationCurve import validationCurve
        lambda_vec, error_train, error_val = validationCurve(X_poly, self.y, X_poly_val, self.yval)
        self.assertEqual(len(error_train), len(lambda_vec))
        self.assertEqual(len(error_val), len(lambda_vec))

        plt.close('all')
        p1, p2, = plt.plot(lambda_vec, error_train, lambda_vec, error_val)
        plt.legend((p1, p2), ('Train', 'Cross Validation'))
        plt.xlabel('lambda')
        plt.ylabel('Error')
        plt.show(block=False)

        print('lambda\t\tTrain Error\tValidation Error')
        for i in range(len(lambda_vec)):
            print(
                '{lambda_vec}\t{error_train}\t{error_val}'.format(lambda_vec=lambda_vec[i], error_train=error_train[i],
                                                                  error_val=error_val[i]))
        # =========== Part 9: Computing test set error and Plotting learning curves with randomly selected examples
        # ============= best lambda value from previous step
        lambda_val = 3

        # note that we're using X_poly - polynomial linear regression with polynomial features
        from ex5_regularized_linear_regressionand_bias_vs_variance.trainLinearReg import trainLinearReg
        _, theta = trainLinearReg(X_poly, self.y, lambda_val)

        # because we're using X_poly, we also have to use X_poly_test with polynomial features
        from ex5_regularized_linear_regressionand_bias_vs_variance.linearRegCostFunction import linearRegCostFunction
        error_test, _ = linearRegCostFunction(X_poly_test, self.ytest, theta, 0)
        print('Test set error: {error_test}'.format(error_test=error_test))  # expected 3.859
        # why? something wrong
        # self.assertAlmostEqual(error_test, 3.859, delta=0.01)

        # =========== Part 10: Plot learning curves with randomly selected examples =============
        #

        # lambda_val value for this step
        lambda_val = 0.01

        times = 50

        error_train_rand = np.zeros((self.m, times))
        error_val_rand = np.zeros((self.m, times))

        for i in range(self.m):
            for k in range(times):
                rand_sample_train = np.random.permutation(X_poly.shape[0])
                rand_sample_train = rand_sample_train[:i + 1]

                rand_sample_val = np.random.permutation(X_poly_val.shape[0])
                rand_sample_val = rand_sample_val[:i + 1]

                X_poly_train_rand = X_poly[rand_sample_train, :]
                y_train_rand = self.y[rand_sample_train]
                X_poly_val_rand = X_poly_val[rand_sample_val, :]
                y_val_rand = self.yval[rand_sample_val]

                _, theta = trainLinearReg(X_poly_train_rand, y_train_rand, lambda_val)
                cost, _ = linearRegCostFunction(X_poly_train_rand, y_train_rand, np.asarray(theta), 0)
                error_train_rand[i, k] = cost
                cost, _ = linearRegCostFunction(X_poly_val_rand, y_val_rand, theta, 0)
                error_val_rand[i, k] = cost

        error_train = np.mean(error_train_rand, axis=1)
        error_val = np.mean(error_val_rand, axis=1)

        p1, p2 = plt.plot(range(self.m), error_train, range(self.m), error_val)
        plt.title('Polynomial Regression Learning Curve (lambda = {:f})'.format(lambda_val))
        plt.legend((p1, p2), ('Train', 'Cross Validation'))
        plt.xlabel('Number of training examples')
        plt.ylabel('Error')
        plt.axis([0, 13, 0, 150])
        plt.show(block=False)
 def grad(theta, _X, _y, __lambda):
     _, _grad = linearRegCostFunction(_X, _y, theta, __lambda)
     return _grad
 def costFunc(theta, _X, _y, __lambda):
     cost, _ = linearRegCostFunction(_X, _y, theta, __lambda)
     return cost