def validationCurve(X, y, Xval, yval): """Generates the train and validation errors needed to plot a validation curve that we can use to select lambda VALIDATIONCURVE(X, y, Xval, yval) returns the train and validation errors (in error_train, error_val) for different values of lambda. You are given the training set (X, y) and validation set (Xval, yval). """ # Selected values of lambda (you should not change this) lambda_vec = np.array([0, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1, 3, 10]).reshape(-1, 1) # You need to return these variables correctly. error_train = np.zeros((len(lambda_vec), 1)) error_val = np.zeros((len(lambda_vec), 1)) for i in range(len(lambda_vec)): curr_lambda = lambda_vec[i] theta = trainLinearReg(X, y, curr_lambda) error_train[i], tmp = linearRegCostFunction(X, y, theta, 0) error_val[i], tmp = linearRegCostFunction(Xval, yval, theta, 0) return lambda_vec, error_train, error_val
def polynomialDegreeCurve(X, y, Xval, yval, reg_lambda): """Error cruve in function of degree of polynimal d """ dimensions = np.arange(1, 80).reshape(-1, 1) # You need to return these variables correctly. error_train = np.zeros((len(dimensions), 1)) error_val = np.zeros((len(dimensions), 1)) m_train_set = X.shape[0] m_val_set = Xval.shape[0] for i in range(len(dimensions)): dimension = dimensions[i] X_poly = polyFeatures(X, dimension) X_poly, mu, sigma = featureNormalize(X_poly) # Normalize X_poly = np.c_[np.ones((m_train_set, 1)), X_poly] X_poly_val = polyFeatures(Xval, dimension) X_poly_val = X_poly_val - mu X_poly_val = X_poly_val / sigma X_poly_val = np.c_[np.ones((m_val_set, 1)), X_poly_val] theta = trainLinearReg(X_poly, y, reg_lambda) error_train[i], tmp = linearRegCostFunction(X_poly, y, theta, 0) error_val[i], tmp = linearRegCostFunction(X_poly_val, yval, theta, 0) return dimensions, error_train, error_val
def validationCurve(X, y, Xval, yval): #VALIDATIONCURVE Generate the train and validation errors needed to #plot a validation curve that we can use to select lambda # [lambda_vec, error_train, error_val] = ... # VALIDATIONCURVE(X, y, Xval, yval) returns the train # and validation errors (in error_train, error_val) # for different values of lambda. You are given the training set (X, # y) and validation set (Xval, yval). # # Selected values of lambda (you should not change this) lambda_vec = np.array([0, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1, 3, 10]) # You need to return these variables correctly. error_train = np.zeros((len(lambda_vec), 1)) error_val = np.zeros((len(lambda_vec), 1)) # ====================== YOUR CODE HERE ====================== # Instructions: Fill in this function to return training errors in # error_train and the validation errors in error_val. The # vector lambda_vec contains the different lambda parameters # to use for each calculation of the errors, i.e, # error_train(i), and error_val(i) should give # you the errors obtained after training with # lambda = lambda_vec(i) # # Note: You can loop over lambda_vec with the following: # # for i = 1:length(lambda_vec) # lambda = lambda_vec(i); # # Compute train / val errors when training linear # # regression with regularization parameter lambda # # You should store the result in error_train(i) # # and error_val(i) # .... # # end # # for i in xrange(len(lambda_vec)): lambda_val = lambda_vec[i] # learn theta parameters with current lambda value theta = tlr.trainLinearReg(X, y, lambda_val) # fill in error_train[i] and error_val[i] # note that for error computation, we set lambda = 0 in the last argument error_train[i] = lrcf.linearRegCostFunction(X, y, theta, 0) error_val[i] = lrcf.linearRegCostFunction(Xval, yval, theta, 0) return lambda_vec, error_train, error_val
def validationCurve(X, y, Xval, yval): lambda_vec = np.array([0, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1, 3, 10]) error_train = np.zeros(lambda_vec.size) error_val = np.zeros(lambda_vec.size) for i in range(lambda_vec.size): lamda = lambda_vec[i] theta = trainLinearReg(X, y, lamda) error_train[i], _ = linearRegCostFunction(X, y, theta, 0) error_val[i], _ = linearRegCostFunction(Xval, yval, theta, 0) return lambda_vec, error_train, error_val
def learningCurve(X, y, Xval, yval, lamda): m = y.size error_train = np.zeros(m) error_val = np.zeros(m) for i in range(m): Xtrain = X[:i + 1, :] ytrain = y[:i + 1, :] theta = trainLinearReg(Xtrain, ytrain, lamda) error_train[i], _ = linearRegCostFunction(Xtrain, ytrain, theta, 0) error_val[i], _ = linearRegCostFunction(Xval, yval, theta, 0) return error_train, error_val
def validationCurve(X, y, Xval, yval): """returns the train and validation errors (in error_train, error_val) for different values of lambda. You are given the training set (X, y) and validation set (Xval, yval). """ # Selected values of lambda (you should not change this) lambda_vec = np.array([0, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1, 3, 10]) m=lambda_vec.size # You need to return these variables correctly. error_train = np.zeros(lambda_vec.size) error_val = np.zeros(lambda_vec.size) # ====================== YOUR CODE HERE ====================== # Instructions: Fill in this function to return training errors in # error_train and the validation errors in error_val. The # vector lambda_vec contains the different lambda parameters # to use for each calculation of the errors, i.e, # error_train(i), and error_val(i) should give # you the errors obtained after training with # lambda = lambda_vec(i) # # Note: You can loop over lambda_vec with the following: # # for i = 1:length(lambda_vec) # lambda = lambda_vec(i) # # Compute train / val errors when training linear # # regression with regularization parameter lambda # # You should store the result in error_train(i) # # and error_val(i) # .... # # end # # # ========================================================================= error_train=np.zeros(m) error_val=np.zeros(m) for Lambda in lambda_vec: theta=trainLinearReg(X,y,Lambda) error_train[np.where(Lambda == lambda_vec)] = linearRegCostFunction(X,y, theta, 0)[0] error_val[np.where(Lambda == lambda_vec)] = linearRegCostFunction(Xval,yval, theta, 0)[0] return lambda_vec, error_train, error_val
def learningCurve(X, y, Xval, yval, lmbda): # Number of training examples m = X.shape[0] # You need to return these values correctly error_train = np.zeros((m, 1)) error_val = np.zeros((m, 1)) # ====================== YOUR CODE HERE ====================== # Instructions: Fill in this function to return training errors in # error_train and the cross validation errors in error_val. # i.e., error_train[i] and # error_val[i] should give you the errors # obtained after training on i examples. # # Note: You should evaluate the training error on the first i training # examples (i.e., X[:i+1, :] and y[:i+1]). # # For the cross-validation error, you should instead evaluate on # the _entire_ cross validation set (Xval and yval). # # Note: If you are using your cost function (linearRegCostFunction) # to compute the training and cross validation error, you should # call the function with the lmbda argument set to 0. # Do note that you will still need to use lmbda when running # the training to obtain the theta parameters. # # Hint: You can loop over the examples with the following: # # for i in range(m): # % Compute train/cross validation errors using training examples # % X[:i+1, :] and y[:i+1], storing the result in # % error_train[i] and error_val[i] # .... # # # ---------------------- Sample Solution ---------------------- for i in range(m): theta = trainLinearReg(X[:i + 1, :], y[:i + 1, :], lmbda) error_train[i] = linearRegCostFunction(theta, X[:i + 1, :], y[:i + 1, :], 0)[0] error_val[i] = linearRegCostFunction(theta, Xval, yval, 0)[0] # ------------------------------------------------------------- # ============================================================= return error_train, error_val
def validationCurve(X, y, Xval, yval): """" VALIDATIONCURVE Generate the train and validation errors needed to plot a validation curve that we can use to select lambda lambda_vec, error_train, error_val = VALIDATIONCURVE(X, y, Xval, yval) returns the train and validation errors (in error_train, error_val) for different values of lmbda. You are given the training set (X, y) and validation set (Xval, yval). """ # Selected values of lambda (you should not change this) lmbda_vec = [0, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1, 3, 10] # You need to return these variables correctly. error_train = np.zeros((len(lmbda_vec), 1)) error_val = np.zeros((len(lmbda_vec), 1)) # ====================== YOUR CODE HERE ====================== # Instructions: Fill in this function to return training errors in # error_train and the validation errors in error_val. The # vector lmbda_vec contains the different lambda parameters # to use for each calculation of the errors, i.e, # error_train[i], and error_val[i] should give # you the errors obtained after training with # lmbda = lmbda_vec(i) # # Note: You can loop over lmbda_vec with the following: # # for i in range(len(lmbda_vec)): # lmbda = lmbda_vec[i] # % Compute train / val errors when training linear # % regression with regularization parameter lambda # % You should store the result in error_train[i] # % and error_val[i] # .... # # for i in range(len(lmbda_vec)): lmbda = lmbda_vec[i] theta = trainLinearReg(X, y, lmbda) error_train[i] = linearRegCostFunction(theta, X, y, 0)[0] error_val[i] = linearRegCostFunction(theta, Xval, yval, 0)[0] # ========================================================================= return (lmbda_vec, error_train, error_val)
def validationCurve(X, y, Xval, yval): lambda_vec = np.array([0, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1, 3, 10]) l_len = np.size(lambda_vec) error_train = np.zeros((l_len, 1)) error_val = np.zeros((l_len, 1)) for i in range(0,l_len): _lambda = lambda_vec[i] theta = trainLinearReg(X, y, _lambda) error_train[i] = linearCostFunctionReg(theta, X, y, 0) error_val[i] = linearCostFunctionReg(theta, Xval, yval, 0) return lambda_vec, error_train, error_val
def validationCurve(X, y, Xval, yval): # Selected values of lambda (you should not change this) lambda_vec = np.array([0, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1, 3, 10]).reshape(-1,1) # You need to return these variables correctly. error_train = np.zeros(lambda_vec.shape[0]) error_val = np.zeros(lambda_vec.shape[0]) for i in range(lambda_vec.shape[0]): _lambda = lambda_vec[i] theta = trainLinearReg(X, y, _lambda) error_train[i], _ = linearRegCostFunction(X, y, theta, 0) error_val[i], _ = linearRegCostFunction(Xval, yval, theta, 0) return lambda_vec, error_train, error_val
def learningCurve(X, y, Xval, yval, lambda_val): #LEARNINGCURVE Generates the train and cross validation set errors needed #to plot a learning curve # [error_train, error_val] = ... # LEARNINGCURVE(X, y, Xval, yval, lambda_val) returns the train and # cross validation set errors for a learning curve. In particular, # it returns two vectors of the same length - error_train and # error_val. Then, error_train(i) contains the training error for # i examples (and similarly for error_val(i)). # # In this function, you will compute the train and test errors for # dataset sizes from 1 up to m. In practice, when working with larger # datasets, you might want to do this in larger intervals. # # Number of training examples m = len(X) # You need to return these values correctly error_train = np.zeros((m, 1)) error_val = np.zeros((m, 1)) for i in xrange(1,m+1): # define training variables for this loop X_train = X[:i] y_train = y[:i] # learn theta parameters with current X_train and y_train theta = tlr.trainLinearReg(X_train, y_train, lambda_val) # fill in error_train(i) and error_val(i) # note that for error computation, we set lambda_val = 0 in the last argument error_train[i-1] = lrcf.linearRegCostFunction(X_train, y_train, theta, 0) error_val[i-1] = lrcf.linearRegCostFunction(Xval , yval , theta, 0) return error_train, error_val
def validationCurve(X, y, Xval, yval): """Generates the train and validation errors needed to plot a validation curve that we can use to select lambda VALIDATIONCURVE(X, y, Xval, yval) returns the train and validation errors (in error_train, error_val) for different values of lambda. You are given the training set (X, y) and validation set (Xval, yval). """ # Selected values of lambda (you should not change this) lambda_vec = np.array([0, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1, 3, 10]).reshape(-1, 1) # You need to return these variables correctly. error_train = np.zeros((len(lambda_vec), 1)) error_val = np.zeros((len(lambda_vec), 1)) for i in range(len(lambda_vec)): curr_lambda = lambda_vec[i] theta = trainLinearReg(X, y, curr_lambda) error_train[i], tmp = linearRegCostFunction(X, y, theta, 0) error_val[i], tmp = linearRegCostFunction(Xval, yval, theta, 0) return lambda_vec, error_train, error_val
pause() """ =========== Part 4: Train Linear Regression ============= Once you have implemented the cost and gradient correctly, the trainLinearReg function will use your cost function to train regularized linear regression. Write Up Note: The data is non-linear, so this will not give a great fit. """ # Train linear regression with lambda = 0 reg_lambda = 0 new_input = np.c_[np.ones((m, 1)), X] theta = trainLinearReg(new_input, y, reg_lambda) # Plot fit over the data p = new_input.dot(theta) plt.plot(X, p, color='blue') plt.draw() plt.show(block=False) print('Program paused. Press enter to continue.\n') pause() """ =========== Part 5: Learning Curve for Linear Regression ============= Next, you should implement the learningCurve function.
def learningCurve(X, y, Xval, yval, Lambda): """returns the train and cross validation set errors for a learning curve. In particular, it returns two vectors of the same length - error_train and error_val. Then, error_train(i) contains the training error for i examples (and similarly for error_val(i)). In this function, you will compute the train and test errors for dataset sizes from 1 up to m. In practice, when working with larger datasets, you might want to do this in larger intervals. """ # Number of training examples m, _ = X.shape # You need to return these values correctly error_train = np.zeros(m) error_val = np.zeros(m) # ====================== YOUR CODE HERE ====================== # Instructions: Fill in this function to return training errors in # error_train and the cross validation errors in error_val. # i.e., error_train(i) and # error_val(i) should give you the errors # obtained after training on i examples. # # Note: You should evaluate the training error on the first i training # examples (i.e., X(1:i, :) and y(1:i)). # # For the cross-validation error, you should instead evaluate on # the _entire_ cross validation set (Xval and yval). # # Note: If you are using your cost function (linearRegCostFunction) # to compute the training and cross validation error, you should # call the function with the lambda argument set to 0. # Do note that you will still need to use lambda when running # the training to obtain the theta parameters. # # Hint: You can loop over the examples with the following: # # for i = 1:m # # Compute train/cross validation errors using training examples # # X(1:i, :) and y(1:i), storing the result in # # error_train(i) and error_val(i) # .... # # end # # ---------------------- Sample Solution ---------------------- #X = np.column_stack((np.ones(m), X)) #Xval = np.column_stack((np.ones(Xval.shape), Xval)) for i in range(m): theta = trainLinearReg(X[:i+1],y[:i+1],Lambda) error_train[i] = linearRegCostFunction(X[:i+1],y[:i+1], theta, 0)[0] error_val[i] = linearRegCostFunction(Xval,yval, theta, 0)[0] # ------------------------------------------------------------------------- # ========================================================================= return error_train, error_val
print('Gradient at theta = [1 ; 1]: [%f; %f]' '\n(this value should be about [-15.303016; 598.250744])\n' % (grad[0], grad[1])) ## =========== Part 4: Train Linear Regression ============= # Once you have implemented the cost and gradient correctly, the # trainLinearReg function will use your cost function to train # regularized linear regression. # # Write Up Note: The data is non-linear, so this will not give a great fit. # # Train linear regression with lambda = 0 lamda = 0 theta = trainLinearReg(X, y, lamda) # Plot fit over the data pred = X @ theta plt.plot(X[:, 1:], pred, 'b--') # plt.show() ## =========== Part 5: Learning Curve for Linear Regression ============= # Next, you should implement the learningCurve function. # # Write Up Note: Since the model is underfitting the data, we expect to # see a graph with "high bias" -- Figure 3 in ex5.pdf # error_train, error_val = learningCurve(X, y, np.insert(Xval, 0, 1, axis=1), yval, 0)
print('(this value should be about 303.993192)') # =========== 1.3 Regularized linear regression gradient ============= theta = np.array([1, 1]) _, grad = linearRegCostFunction(theta, np.hstack((np.ones((m, 1)), X)), y, 1) print('Gradient at theta = [1 ; 1]:', grad.ravel()) print('(this value should be about [-15.303016; 598.250744])') # ================== 1.4 Fitting linear regression =================== print('\nPart 4: Train Linear Regression') # Train linear regression with lambda = 0 l = 0.0 theta = trainLinearReg(np.hstack((np.ones((m, 1)), X)), y, l) pred = np.hstack((np.ones((m, 1)), X)).dot(theta) plt.figure() plt.plot(X, y, linestyle='', marker='x', color='r') plt.plot(X, pred, linestyle='--', marker='', color='b') plt.xlabel('Change in water level (x)') plt.ylabel('Water flowing out of the dam (y)') #plt.show() # ===================== 2. Bias-variance ============================== # ===================== 2.1 Learning curves =========================== l = 0.0 error_train, error_val = learningCurve(np.hstack((np.ones((m, 1)), X)), y,
# =========== Part 2: Regularized Linear Regression Cost ============= theta = np.ones((2, 1)) J = linearRegCostFunction(theta, np.column_stack((np.ones((m, 1)), X)), y, 1) print('Cost at theta = [1 ; 1] - (this value should be about 303.993192)\n', J) # =========== Part 3: Regularized Linear Regression Gradient ============= J, grad = linearRegCostFunction(theta, np.column_stack((np.ones((m, 1)), X)), y, 1, True) print( 'Gradient at theta = [1 ; 1] - (this value should be about [-15.303016; 598.250744])\n', grad) # =========== Part 4: Train Linear Regression ============= _lambda = 0 result = trainLinearReg(np.column_stack((np.ones((m, 1)), X)), y, _lambda) plt.plot(X, y, marker='x', linestyle='None') plt.ylabel('Water flowing out of the dam (y)') plt.xlabel('Change in water level (x)') plt.xticks(np.arange(-50, 50, 10.0)) plt.plot(X, np.dot(np.column_stack((np.ones((m, 1)), X)), result.x)) plt.show() # =========== Part 5: Learning Curve for Linear Regression ============= _lambda = 0 error_train, error_val = learningCurve(np.column_stack((np.ones((m,1)), X)), y, \ np.column_stack((np.ones((Xval.shape[0],1)), Xval)), yval, _lambda) plt.plot(range(0, m), error_train, label="Training Error") plt.plot(range(0, m), error_val, label="Validation Error") plt.legend() plt.xlabel('Number of training examples')
print('Gradient at theta = [1, 1]: [%f, %f]' % (grad[0], grad[1])) print('(this value should be about [-15.303016, 598.250744])') input('Program paused. Press enter to continue.') # =========== Part 4: Train Linear Regression ============= # Once you have implemented the cost and gradient correctly, the # trainLinearReg function will use your cost function to train # regularized linear regression. # # Write Up Note: The data is non-linear, so this will not give a great fit. # Train linear regression with lambda = 0 lmbda = 0 theta = trainLinearReg(np.hstack((np.ones((m, 1)), X)), y, lmbda) # Plot fit over the data plt.plot(X, np.hstack((np.ones((m, 1)), X)).dot(theta).T, '-', linewidth=2) input('Program paused. Press enter to continue.') # =========== Part 5: Learning Curve for Linear Regression ============= # Next, you should implement the learningCurve function. # # Write Up Note: Since the model is underfitting the data, we expect to # see a graph with "high bias" -- slide 8 in ML-advice.pdf # lmbda = 0 error_train, error_val = learningCurve(np.hstack((np.ones((m, 1)), X)), y,
def learningCurve(X, y, Xval, yval, Lambda): """returns the train and cross validation set errors for a learning curve. In particular, it returns two vectors of the same length - error_train and error_val. Then, error_train(i) contains the training error for i examples (and similarly for error_val(i)). In this function, you will compute the train and test errors for dataset sizes from 1 up to m. In practice, when working with larger datasets, you might want to do this in larger intervals. """ # Number of training examples m, _ = X.shape # You need to return these values correctly error_train = np.zeros(m) error_val = np.zeros(m) # ====================== YOUR CODE HERE ====================== # Instructions: Fill in this function to return training errors in # error_train and the cross validation errors in error_val. # i.e., error_train(i) and # error_val(i) should give you the errors # obtained after training on i examples. # # Note: You should evaluate the training error on the first i training # examples (i.e., X(1:i, :) and y(1:i)). # # For the cross-validation error, you should instead evaluate on # the _entire_ cross validation set (Xval and yval). # # Note: If you are using your cost function (linearRegCostFunction) # to compute the training and cross validation error, you should # call the function with the lambda argument set to 0. # Do note that you will still need to use lambda when running # the training to obtain the theta parameters. # # Hint: You can loop over the examples with the following: # # for i = 1:m # # Compute train/cross validation errors using training examples # # X(1:i, :) and y(1:i), storing the result in # # error_train(i) and error_val(i) # .... # # end # # ---------------------- Sample Solution ---------------------- for i in range(m): theta = trainLinearReg(X[0:i + 1, :], y[0:i + 1], Lambda, method='CG', maxiter=200) train = linearRegCostFunction(X[0:i + 1, :], y[0:i + 1], theta, 0) error_train[i] = train[0] val = linearRegCostFunction(Xval, yval, theta, 0) error_val[i] = val[0] # ------------------------------------------------------------------------- # ========================================================================= return error_train, error_val
#1.2,1.3 m = len(y) ones = np.ones((m,1)) X = np.hstack((ones, X)) lam = 0 theta = np.ones([2,1]) J = costFunctionReg(theta,X,y,lam) print(J) grad = gradientReg(theta,X,y,lam) print(grad) #1.4 lam = 0 thetaOpt = trainLinearReg(X,y,lam) print(thetaOpt) hypothesis = np.dot(X,thetaOpt) #plt.plot(X[:,1],hypothesis,'b') #plt.show() #2.1 lam =1 #learningCurve(X, y, Xval, yval, lam) #3 p = 8 Xpoly = polyFeatures(X,p) Xpoly, mu, sigma = featureNormalize(Xpoly) Xpoly = np.hstack((ones, Xpoly))
.format(grad[0], grad[1])) raw_input('Program paused. Press enter to continue.\n') ## =========== Part 4: Train Linear Regression ============= # Once you have implemented the cost and gradient correctly, the # trainLinearReg function will use your cost function to train # regularized linear regression. # # Write Up Note: The data is non-linear, so this will not give a great # fit. # # Train linear regression with lambda = 0 lambda_val = 0 theta = tlr.trainLinearReg(X_padded, y, lambda_val) # resets plot plt.close() # Plot fit over the data plt.plot(X, y, 'rx', markersize=10, linewidth=1.5) plt.xlabel('Change in water level (x)') plt.ylabel('Water flowing out of the dam (y)') plt.hold(True) plt.plot(X, np.dot(np.column_stack((np.ones((m, 1)), X)), theta), '--', linewidth=2) plt.show(block=False) plt.hold(False)
print( 'Gradient at theta = [1 1]: [%f %f] \n(this value should be about [-15.303016 598.250744])\n' % (grad[0], grad[1])) ## =========== Part 4: Train Linear Regression ============= # Once you have implemented the cost and gradient correctly, the # trainLinearReg function will use your cost function to train # regularized linear regression. # # Write Up Note: The data is non-linear, so this will not give a great # fit. # # Train linear regression with Lambda = 0 Lambda = 0 theta = trainLinearReg(np.column_stack((np.ones(m), X)), y, 1) # Plot fit over the data plt.scatter(X, y, marker='x', s=20, color='r', lw=1.5) plt.ylabel('Water flowing out of the dam (y)') # Set the y-axis label plt.xlabel('Change in water level (x)') # Set the x-axis label plt.plot(X, np.column_stack((np.ones(m), X)).dot(theta), '--', lw=2.0) plt.show() input('Program paused. Press <Enter> to continue...') ## =========== Part 5: Learning Curve for Linear Regression ============= # Next, you should implement the learningCurve function. # # Write Up Note: Since the model is underfitting the data, we expect to # see a graph with "high bias"
.format(float(grad[0]), float(grad[1]))) input('Program paused. Press enter to continue.\n') # =========== Part 4: Train Linear Regression ============= # Once you have implemented the cost and gradient correctly, the # trainLinearReg function will use your cost function to train # regularized linear regression. # Write Up Note: The data is non-linear, so this will not give a great # fit. # Train linear regression with lambda = 0 lambda_par = 0 theta = trainLinearReg(np.concatenate( (np.ones(m).reshape(m, 1), X), axis=1), y, lambda_par) print('Visualizing Data and Trained Linear Regression ...\n') # Plot fit over the data plt.plot(X, y, 'rx', markersize=10, linewidth=1.5) plt.plot(X, np.concatenate((np.ones(m).reshape(m, 1), X), axis=1) @theta, '--', linewidth=2) plt.xlabel('Change in water level (x)') plt.ylabel('Water flowing out of the dam (y)') plt.show() input('Program paused. Press enter to continue.\n') # =========== Part 5: Learning Curve for Linear Regression ============= # Next, you should implement the learningCurve function.
def learningCurve(X, y, Xval, yval, lambda_value): #LEARNINGCURVE Generates the train and cross validation set errors needed #to plot a learning curve # [error_train, error_val] = ... # LEARNINGCURVE(X, y, Xval, yval, lambda) returns the train and # cross validation set errors for a learning curve. In particular, # it returns two vectors of the same length - error_train and # error_val. Then, error_train(i) contains the training error for # i examples (and similarly for error_val(i)). # # In this function, you will compute the train and test errors for # dataset sizes from 1 up to m. In practice, when working with larger # datasets, you might want to do this in larger intervals. # # Number of training examples m = X.shape[0] # You need to return these values correctly error_train = np.zeros((m, 1)) error_val = np.zeros((m, 1)) # ====================== YOUR CODE HERE ====================== # Instructions: Fill in this function to return training errors in # error_train and the cross validation errors in error_val. # i.e., error_train(i) and # error_val(i) should give you the errors # obtained after training on i examples. # # Note: You should evaluate the training error on the first i training # examples (i.e., X(1:i, :) and y(1:i)). # # For the cross-validation error, you should instead evaluate on # the _entire_ cross validation set (Xval and yval). # # Note: If you are using your cost function (linearRegCostFunction) # to compute the training and cross validation error, you should # call the function with the lambda argument set to 0. # Do note that you will still need to use lambda when running # the training to obtain the theta parameters. # # Hint: You can loop over the examples with the following: # # for i = 1:m # % Compute train/cross validation errors using training examples # % X(1:i, :) and y(1:i), storing the result in # % error_train(i) and error_val(i) # .... # # end # # ---------------------- Sample Solution ---------------------- for i in range(m): X_i = X[:i + 1, :] y_i = y[:i + 1] theta_i = trainLinearReg(X_i, y_i, lambda_value) error_train[i], _ = linearRegCostFunction(X_i, y_i, theta_i, 0) error_val[i], _ = linearRegCostFunction(Xval, yval, theta_i, 0) # ------------------------------------------------------------- # ========================================================================= return (error_train, error_val)
print 'Program paused. Press enter to continue.' raw_input() ## =========== Part 4: Train Linear Regression ============= # Once you have implemented the cost and gradient correctly, the # trainLinearReg function will use your cost function to train # regularized linear regression. # # Write Up Note: The data is non-linear, so this will not give a great # fit. # # Train linear regression with lambda = 0 lambda_ = 0. theta = trainLinearReg(addOnes(X), y, lambda_) print theta # Plot fit over the data fig = figure() plot(X, y, 'rx', markersize=10, linewidth=1.5) xlabel('Change in water level (x)') ylabel('Water flowing out of the dam (y)') hold(True) plot(X, dot(addOnes(X), theta), '--', linewidth=2) hold(False) fig.show() print 'Program paused. Press enter to continue.' raw_input()
'Gradient at theta = [1 ; 1] \n(this value should be about [-15.303016; 598.250744])\n', grad[0], grad[1]) ''' %% =========== Part 4: Train Linear Regression ============= % Once you have implemented the cost and gradient correctly, the % trainLinearReg function will use your cost function to train % regularized linear regression. % % Write Up Note: The data is non-linear, so this will not give a great % fit. % % Train linear regression with lambda = 0 ''' lambda_ = 0 theta = trainLinearReg(ones_X, y, lambda_) # Plot fit over the data plt.plot(X, y, 'rx') plt.xlabel('Change in water level (x)') plt.ylabel('Water flowing out of the dam (y)') y_new = ones_X.dot(theta) plt.plot(X, y_new, '--') plt.show() ''' %% =========== Part 5: Learning Curve for Linear Regression ============= % Next, you should implement the learningCurve function. % % Write Up Note: Since the model is underfitting the data, we expect to % see a graph with "high bias" -- slide 8 in ML-advice.pdf %
# You should now implement the gradient for regularized linear # regression. theta = np.array([1, 1]) _, grad = linearRegCostFunction(X_ones, y, theta, 1) print('Gradient at theta = [1 ; 1]:', grad) print('(this value should be about [-15.303016; 598.250744])\n') # =========== Part 4: Train Linear Regression ============= # Once you have implemented the cost and gradient correctly, the # trainLinearReg function will use your cost function to train # regularized linear regression. # # Write Up Note: The data is non-linear, so this will not give a great # fit. lamda = 0 theta = trainLinearReg(X_ones, y, lamda) plt.scatter(X, y, marker='x', c='r', s=60) plt.plot(X, X_ones.dot(theta)) plt.xlabel('Change in water level (x)') plt.ylabel('Water flowing out of the dam (y)') plt.show() # =========== Part 5: Learning Curve for Linear Regression ============= # Next, you should implement the learningCurve function. # # Write Up Note: Since the model is underfitting the data, we expect to # see a graph with "high bias" -- slide 8 in ML-advice.pdf lamda = 0 error_train, error_val = learningCurve(X_ones, y, Xval_ones, yval, lamda)
'Gradient at theta = [1 1]: [%f %f] \n(this value should be about [-15.303016 598.250744])\n' % (grad[0], grad[1])) # %% =========== Part 4: Train Linear Regression ============= # Once you have implemented the cost and gradient correctly, the # trainLinearReg function will use your cost function to train # regularized linear regression. # # Write Up Note: The data is non-linear, so this will not give a great # fit. # # Train linear regression with Lambda = 0 Lambda = 0 X_stack = np.column_stack((np.ones(m), X)) theta = trainLinearReg(X_stack, y, Lambda) # Prediction from the learned model pred = X_stack.dot(theta) # Plot fit over the data plt.figure() plt.scatter(X, y, marker='x', s=20, edgecolor='r', lw=1.5) plt.ylabel('Water flowing out of the dam (y)') # Set the y-axis label plt.xlabel('Change in water level (x)') # Set the x-axis label plt.plot(X, pred, '--r', lw=2.0) plt.grid() plt.show() # %% =========== Part 5: Learning Curve for Linear Regression ============= # Next, you should implement the learningCurve function.
''' %% =========== Part 4: Train Linear Regression ============= % Once you have implemented the cost and gradient correctly, the % trainLinearReg function will use your cost function to train % regularized linear regression. % % Write Up Note: The data is non-linear, so this will not give a great % fit. % % Train linear regression with lambda = 0 ''' lambda_ = 0 theta = trainLinearReg(ones_X, y, lambda_) # Plot fit over the data plt.plot(X, y, 'rx') plt.xlabel('Change in water level (x)') plt.ylabel('Water flowing out of the dam (y)') y_new = ones_X.dot(theta) plt.plot(X, y_new, '--') plt.show() ''' %% =========== Part 5: Learning Curve for Linear Regression ============= % Next, you should implement the learningCurve function. % % Write Up Note: Since the model is underfitting the data, we expect to % see a graph with "high bias" -- slide 8 in ML-advice.pdf
data=loadmat('../data/ex5data1.mat') y_train = data['y'] X_train = np.c_[np.ones_like(data['X']), data['X']] yval = data['yval'] Xval = np.c_[np.ones_like(data['Xval']), data['Xval']] plt.scatter(X_train[:,1],y_train, s=30, c='r', marker='x', linewidths=1) plt.xlabel("Change in water level (x)") plt.ylabel("Water flowing out of the dam (y)") plt.show() fit = trainLinearReg(X_train, y_train, 0) regr = LinearRegression(fit_intercept=False) regr.fit(X_train, y_train.ravel()) plt.plot(np.linspace(-50,40), (fit.x[0]+ (fit.x[1]*np.linspace(-50,40))), label='Scipy optimize') plt.plot(np.linspace(-50,40), (regr.coef_[0]+ (regr.coef_[1]*np.linspace(-50,40))), label='Scikit-learn') plt.scatter(X_train[:,1], y_train, s=50, c='r', marker='x', linewidths=1) plt.xlabel('Change in water level (x)') plt.ylabel('Water flowing out of the dam (y)') plt.legend(loc=4) plt.show() t_error, v_error = learningCurve(X_train, y_train, Xval, yval, 0)
(grad[0], grad[1])) input('Program paused. Press Enter to continue...') # =========== Part 4: Train Linear Regression ============= # Once you have implemented the cost and gradient correctly, the # trainLinearReg function will use your cost function to train # regularized linear regression. # # Write Up Note: The data is non-linear, so this will not give a great # fit. # # Train linear regression with lambda = 0 lambda_ = 0 theta = trainLinearReg(np.c_[np.ones(m), X], y, lambda_) # Plot fit over the data plt.figure() plt.scatter(X, y, marker='x', s=60, edgecolor='r', color='r', lw=1.5) plt.xlabel('Change in water level (x)') plt.ylabel('Water flowing out of the dam (y)') plt.xlim(-50, 40) plt.ylim(-5, 40) plt.plot(X, np.c_[np.ones(m), X] @ theta.T, '--', lw=2.0) plt.show(block=False) input('Program paused. Press Enter to continue...') # =========== Part 5: Learning Curve for Linear Regression =============
raw_input("Program paused. Press Enter to continue...") ## =========== Part 4: Train Linear Regression ============= # Once you have implemented the cost and gradient correctly, the # trainLinearReg function will use your cost function to train # regularized linear regression. # # Write Up Note: The data is non-linear, so this will not give a great # fit. # # Train linear regression with Lambda = 0 Lambda = 0 theta = trainLinearReg(np.column_stack((np.ones(m), X)), y, 1) # Plot fit over the data plt.scatter(X, y, marker='x', s=20, edgecolor='r', lw=1.5) plt.ylabel('Water flowing out of the dam (y)') # Set the y-axis label plt.xlabel('Change in water level (x)') # Set the x-axis label plt.plot(X, np.column_stack((np.ones(m), X)).dot(theta), '--', lw=2.0) raw_input("Program paused. Press Enter to continue...") ## =========== Part 5: Learning Curve for Linear Regression ============= # Next, you should implement the learningCurve function. # # Write Up Note: Since the model is underfitting the data, we expect to # see a graph with "high bias" -- slide 8 in ML-advice.pdf
print('Gradient at theta = [1, 1]: [%f, %f]' % (grad[0], grad[1])) print('(this value should be about [-15.303016, 598.250744])') input('Program paused. Press enter to continue.') # =========== Part 4: Train Linear Regression ============= # Once you have implemented the cost and gradient correctly, the # trainLinearReg function will use your cost function to train # regularized linear regression. # # Write Up Note: The data is non-linear, so this will not give a great fit. # Train linear regression with lambda = 0 lmbda = 0 theta = trainLinearReg(np.hstack((np.ones((m, 1)), X)), y, lmbda) # Plot fit over the data plt.plot(X, np.hstack((np.ones((m, 1)), X)).dot(theta).T, '-', linewidth=2) input('Program paused. Press enter to continue.') # =========== Part 5: Learning Curve for Linear Regression ============= # Next, you should implement the learningCurve function. # # Write Up Note: Since the model is underfitting the data, we expect to # see a graph with "high bias" -- slide 8 in ML-advice.pdf # lmbda = 0 error_train, error_val = learningCurve(
print('Cost at theta = [1 ; 1]: {:f}\n(this value should be about 303.993192)\n'.format(J)) raw_input('Program paused. Press enter to continue.\n') theta = np.array([[1] , [1]]) J, grad = lrcf.linearRegCostFunction(X_padded, y, theta, 1, True) print('Gradient at theta = [1 ; 1]: [{:f}; {:f}] \n(this value should be about [-15.303016; 598.250744])'.format(grad[0], grad[1])) raw_input('Program paused. Press enter to continue.\n') print "Part 4: Train Linear Regression " lambda_val = 0 theta = tlr.trainLinearReg(X_padded, y, lambda_val) #display plt.close() plt.plot(X, y, 'rx', markersize=10, linewidth=1.5) plt.xlabel('Change in water level (x)') plt.ylabel('Water flowing out of the dam (y)') plt.hold(True) plt.plot(X, np.dot(np.column_stack((np.ones((m,1)), X)), theta), '--', linewidth=2) plt.show() raw_input('Program paused. Press enter to continue.\n') plt.hold(False) ## =========== Part 5: Learning Curve for Linear Regression ============= lambda_val = 0
input('Program paused. Press enter to continue.\n') ## =========== Part 3: Regularized Linear Regression Gradient ============= # You should now implement the gradient for regularized linear regression. print('Gradient at theta = [1 ; 1]: [%f; %f](this value should be about [-15.303016; 598.250744])\n'%(grad[0],grad[1])) input('Program paused. Press enter to continue.\n') ## =========== Part 4: Train Linear Regression ============= # Once you have implemented the cost and gradient correctly, the trainLinearReg function will use your # cost function to train regularized linear regression. # Write Up Note: The data is non-linear, so this will not give a great fit. # Train linear regression with lambda = 0 xlambda = 0 theta = tLR.trainLinearReg(np.c_[(np.ones([m,1]),X)],y,xlambda) # Plot fit over the data plt = plot.plotNormal(X,y) y_pred = np.dot(np.c_[(np.ones([m, 1]),X)],theta) plt.plot(X, y_pred, color='b') plt.show() # =========== Part 5: Learning Curve for Linear Regression ============= # Next, you should implement the learningCurve function. # Write Up Note: Since the model is underfitting the data, we expect to see a graph with "high bias" -- Figure 3 in ex5.pdf xlambda = 0 error_train,error_val = lC.learningCurve(np.c_[(np.ones([m,1]),X)],y,np.c_[np.ones([Xval.shape[0],1]),Xval],yval,xlambda) # plot the learning curve
def ex5(): ## Machine Learning Online Class # Exercise 5 | Regularized Linear Regression and Bias-Variance # # Instructions # ------------ # # This file contains code that helps you get started on the # exercise. You will need to complete the following functions: # # linearRegCostFunction.m # learningCurve.m # validationCurve.m # # For this exercise, you will not need to change any code in this file, # or any other files other than those mentioned above. # ## Initialization #clear ; close all; clc ## =========== Part 1: Loading and Visualizing Data ============= # We start the exercise by first loading and visualizing the dataset. # The following code will load the dataset into your environment and plot # the data. # # Load Training Data print('Loading and Visualizing Data ...') # Load from ex5data1: # You will have X, y, Xval, yval, Xtest, ytest in your environment mat = scipy.io.loadmat('ex5data1.mat') X = mat['X'] y = mat['y'].ravel() Xval = mat['Xval'] yval = mat['yval'].ravel() Xtest = mat['Xtest'] ytest = mat['ytest'].ravel() # m = Number of examples m = X.shape[0] # Plot training data plt.plot(X, y, marker='x', linestyle='None', ms=10, lw=1.5) plt.xlabel('Change in water level (x)') plt.ylabel('Water flowing out of the dam (y)') plt.savefig('figure1.png') print('Program paused. Press enter to continue.') #pause; ## =========== Part 2: Regularized Linear Regression Cost ============= # You should now implement the cost function for regularized linear # regression. # theta = np.array([1, 1]) J, _ = linearRegCostFunction(np.concatenate([np.ones((m, 1)), X], axis=1), y, theta, 1) print( 'Cost at theta = [1 ; 1]: %f \n(this value should be about 303.993192)' % J) print('Program paused. Press enter to continue.') #pause; ## =========== Part 3: Regularized Linear Regression Gradient ============= # You should now implement the gradient for regularized linear # regression. # theta = np.array([1, 1]) J, grad = linearRegCostFunction( np.concatenate([np.ones((m, 1)), X], axis=1), y, theta, 1) print( 'Gradient at theta = [1 ; 1]: [%f; %f] \n(this value should be about [-15.303016; 598.250744])' % (grad[0], grad[1])) print('Program paused. Press enter to continue.') #pause; ## =========== Part 4: Train Linear Regression ============= # Once you have implemented the cost and gradient correctly, the # trainLinearReg function will use your cost function to train # regularized linear regression. # # Write Up Note: The data is non-linear, so this will not give a great # fit. # fig = plt.figure() # Train linear regression with lambda = 0 lambda_value = 0 theta = trainLinearReg(np.concatenate([np.ones((m, 1)), X], axis=1), y, lambda_value) # Plot fit over the data plt.plot(X, y, marker='x', linestyle='None', ms=10, lw=1.5) plt.xlabel('Change in water level (x)') plt.ylabel('Water flowing out of the dam (y)') plt.plot(X, np.dot(np.concatenate([np.ones((m, 1)), X], axis=1), theta), '--', lw=2) plt.savefig('figure2.png') print('Program paused. Press enter to continue.') #pause; ## =========== Part 5: Learning Curve for Linear Regression ============= # Next, you should implement the learningCurve function. # # Write Up Note: Since the model is underfitting the data, we expect to # see a graph with "high bias" -- slide 8 in ML-advice.pdf # fig = plt.figure() lambda_value = 0 error_train, error_val = learningCurve( np.concatenate([np.ones((m, 1)), X], axis=1), y, np.concatenate([np.ones((yval.size, 1)), Xval], axis=1), yval, lambda_value) plt.plot(np.arange(1, m + 1), error_train, np.arange(1, m + 1), error_val) plt.title('Learning curve for linear regression') plt.legend(['Train', 'Cross Validation']) plt.xlabel('Number of training examples') plt.ylabel('Error') plt.axis([0, 13, 0, 150]) print('# Training Examples\tTrain Error\tCross Validation Error') for i in range(m): print(' \t%d\t\t%f\t%f' % (i, error_train[i], error_val[i])) plt.savefig('figure3.png') print('Program paused. Press enter to continue.') #pause; ## =========== Part 6: Feature Mapping for Polynomial Regression ============= # One solution to this is to use polynomial regression. You should now # complete polyFeatures to map each example into its powers # p = 8 # Map X onto Polynomial Features and Normalize X_poly = polyFeatures(X, p) X_poly, mu, sigma = featureNormalize(X_poly) # Normalize X_poly = np.concatenate([np.ones((m, 1)), X_poly], axis=1) # Add Ones # Map X_poly_test and normalize (using mu and sigma) X_poly_test = polyFeatures(Xtest, p) X_poly_test -= mu X_poly_test /= sigma X_poly_test = np.concatenate( [np.ones((X_poly_test.shape[0], 1)), X_poly_test], axis=1) # Add Ones # Map X_poly_val and normalize (using mu and sigma) X_poly_val = polyFeatures(Xval, p) X_poly_val -= mu X_poly_val /= sigma X_poly_val = np.concatenate( [np.ones((X_poly_val.shape[0], 1)), X_poly_val], axis=1) # Add Ones print('Normalized Training Example 1:') print(formatter(' %f \n', X_poly[0, :])) print('\nProgram paused. Press enter to continue.') #pause; ## =========== Part 7: Learning Curve for Polynomial Regression ============= # Now, you will get to experiment with polynomial regression with multiple # values of lambda. The code below runs polynomial regression with # lambda = 0. You should try running the code with different values of # lambda to see how the fit and learning curve change. # fig = plt.figure() lambda_value = 0 theta = trainLinearReg(X_poly, y, lambda_value) # Plot training data and fit plt.plot(X, y, marker='x', ms=10, lw=1.5) plotFit(np.min(X), np.max(X), mu, sigma, theta, p) plt.xlabel('Change in water level (x)') plt.ylabel('Water flowing out of the dam (y)') plt.title('Polynomial Regression Fit (lambda = %f)' % lambda_value) plt.figure() error_train, error_val = learningCurve(X_poly, y, X_poly_val, yval, lambda_value) plt.plot(np.arange(1, 1 + m), error_train, np.arange(1, 1 + m), error_val) plt.title('Polynomial Regression Learning Curve (lambda = %f)' % lambda_value) plt.xlabel('Number of training examples') plt.ylabel('Error') plt.axis([0, 13, 0, 100]) plt.legend(['Train', 'Cross Validation']) print('Polynomial Regression (lambda = %f)\n' % lambda_value) print('# Training Examples\tTrain Error\tCross Validation Error') for i in range(m): print(' \t%d\t\t%f\t%f' % (i, error_train[i], error_val[i])) plt.savefig('figure4.png') print('Program paused. Press enter to continue.') #pause; ## =========== Part 8: Validation for Selecting Lambda ============= # You will now implement validationCurve to test various values of # lambda on a validation set. You will then use this to select the # "best" lambda value. # fig = plt.figure() lambda_vec, error_train, error_val = validationCurve( X_poly, y, X_poly_val, yval) plt.plot(lambda_vec, error_train, lambda_vec, error_val) plt.legend(['Train', 'Cross Validation']) plt.xlabel('lambda') plt.ylabel('Error') print('lambda\t\tTrain Error\tValidation Error') for i in range(lambda_vec.size): print(' %f\t%f\t%f' % (lambda_vec[i], error_train[i], error_val[i])) plt.savefig('figure5.png') print('Program paused. Press enter to continue.')
% (grad[0], grad[1])) input('Program paused. Press enter to continue.\n') # # =========== Part 4: Train Linear Regression ============= # Once you have implemented the cost and gradient correctly, the # trainLinearReg function will use your cost function to train # regularized linear regression. # # Write Up Note: The data is non-linear, so this will not give a great # fit. # # Train linear regression with lambda = 0 _lambda = 0 theta = trainLinearReg(_X, y, _lambda) # Plot fit over the data plt.plot(X, np.dot(_X, theta), '--', linewidth=2) input('Program paused. Press enter to continue.\n') # # =========== Part 5: Learning Curve for Linear Regression ============= # Next, you should implement the learningCurve function. # # Write Up Note: Since the model is underfitting the data, we expect to # see a graph with "high bias" -- slide 8 in ML-advice.pdf # _lambda = 0 error_train, error_val = learningCurve(_X, y, _Xval, yval, _lambda)
% (grad[0], grad[1])) input("Program paused. Press Enter to continue...") ## =========== Part 4: Train Linear Regression ============= # Once you have implemented the cost and gradient correctly, the # trainLinearReg function will use your cost function to train # regularized linear regression. # # Write Up Note: The data is non-linear, so this will not give a great # fit. # # Train linear regression with Lambda = 0 Lambda = 0 theta = trainLinearReg(np.column_stack((np.ones(m), X)), y, 1) # Plot fit over the data plt.scatter(X, y, marker='x', s=20, edgecolor='r', lw=1.5) plt.ylabel('Water flowing out of the dam (y)') # Set the y-axis label plt.xlabel('Change in water level (x)') # Set the x-axis label plt.plot(X, np.column_stack((np.ones(m), X)).dot(theta), '--', lw=2.0) input("Program paused. Press Enter to continue...") ## =========== Part 5: Learning Curve for Linear Regression ============= # Next, you should implement the learningCurve function. # # Write Up Note: Since the model is underfitting the data, we expect to # see a graph with "high bias" -- slide 8 in ML-advice.pdf #
input('Program paused. Press enter to continue.\n') ## =========== Part 4: Train Linear Regression ============= # Once you have implemented the cost and gradient correctly, the # trainLinearReg function will use your cost function to train # regularized linear regression. # # Write Up Note: The data is non-linear, so this will not give a great # fit. # # Train linear regression with lambda = 0 _lambda = 0 theta = trainLinearReg(_X, y, _lambda) # Plot fit over the data plt.plot(X, np.dot(_X, theta), '--', linewidth=2) input('Program paused. Press enter to continue.\n') ## =========== Part 5: Learning Curve for Linear Regression ============= # Next, you should implement the learningCurve function. # # Write Up Note: Since the model is underfitting the data, we expect to # see a graph with "high bias" -- slide 8 in ML-advice.pdf # _lambda = 0