def ridge_regression_gradient_descent(self, feature_matrix,output,initial_weights, step_size,l2_penalty,max_iterations=100, debug=True): weights = np_utils.np.array(initial_weights) iteration = 0 # iteration counter print_frequency = 1 # for adjusting frequency of debugging output #while not reached maximum number of iterations: while iteration < max_iterations: iteration += 1 # increment iteration counter if (iteration in [10, 100]) and debug: print_frequency = iteration # compute the predictions(dot-product) between feature_matrix and weights predictions = np_utils.predict_output(feature_matrix,weights) # compute the errors as predictions - output errors = predictions - output # from time to time, print the value of the cost function if (iteration % print_frequency == 0) and debug: print('Iteration: %s' % iteration) print('Cost function: %s'% np_utils.compute_cost_function(errors, weights, l2_penalty)) for i in xrange(len(weights)): # loop over each weight # Recall that feature_matrix[:,i] is the feature column associated with weights[i] # compute the derivative for weight[i]. #(Remember: when i=0, you are computing the derivative of the constant!) is_constant = True if i == 0 else False feature = feature_matrix[:,i] derivative = np_utils.feature_derivative_ridge(errors,feature,weights[i],l2_penalty,is_constant) # subtract the step size times the derivative from the current weight weights[i] -= step_size * derivative return iteration, weights
def compute_ro(self, i, feature_matrix, output, weights): """ whenever ro[i] falls between -l1_penalty/2 and l1_penalty/2, the corresponding weight w[i] is sent to zero ro[i] = SUM[ [feature_i]*(output - prediction + w[i]*[feature_i]) ] """ prediction = np_utils.predict_output(feature_matrix,weights) # Numpy vector for feature_i feature_i = feature_matrix[:,i] ro_i = (feature_i * (output - prediction + weights[i] * feature_i)).sum() return ro_i
def quiz_2_ridge_grandient_descent(sales): print "\n**********************************" print "* Ridge Gradient Descent *" print "**********************************\n" simple_features = ['sqft_living'] my_output = 'price' train_data,test_data = sales.random_split(.8,seed=0) (simple_feature_matrix,output) = np_utils.get_numpy_data(train_data,simple_features,my_output) (simple_test_feature_matrix,test_output) = np_utils.get_numpy_data(test_data,simple_features,my_output) ridge = RidgeRegression() l2_no_reg,l2_high_reg = 0,1e11 initial_weights = np_utils.np.array([0.,0.]) print "\nQ1 & Q2 coefficients with features: %s" % (simple_features) ridge_weights = compute_ridge_regression(ridge,simple_feature_matrix,output,[l2_no_reg,l2_high_reg],initial_weights) # print ridge_weights print "\nQ3: Line fit with no regularization (l2_penalty=0) is steeper" print "\nQ4: high regularization (l2_penalty=1e11)" compute_ridge_rss([ridge_weights[l2_high_reg]],simple_test_feature_matrix,test_data) print "\t- Between 5e14 and 8e14" more_features = ['sqft_living','sqft_living15'] initial_w_morefeatures = np_utils.np.array([0.0,0.0,0.0]) (more_feature_matrix,output_more_features) = np_utils.get_numpy_data(train_data,more_features,my_output) (more_test_feature_matrix,test_output_more) = np_utils.get_numpy_data(test_data,more_features,my_output) print "\nQ5 & Q6 coefficients with features: %s" % (more_features) ridge_morefeatures = compute_ridge_regression(ridge,more_feature_matrix,output_more_features, [l2_no_reg,l2_high_reg],initial_w_morefeatures) print "\nQ7: using all zero weights with features: %s" % (simple_features) compute_ridge_rss([initial_w_morefeatures],more_test_feature_matrix,test_data) print "\t-Between 1e15 and 3e15" num_of_house = 1#5 print "\nQ8: Which model makes better predictions for 1st house:" for l2_penalty in [l2_no_reg,l2_high_reg]: print "L2:%s:" % l2_penalty current_predictions = np_utils.predict_output(more_test_feature_matrix,ridge_morefeatures[l2_penalty]) for house_predict in range(num_of_house): pred,real = current_predictions[house_predict],test_data['price'][house_predict] print '\t\t(predict) %s vs %s (real) diff: %s' % (pred,real,real - pred)
def more_features_with_lasso_coordinate(lasso, sales): train_data, test_data = sales.random_split(.8, seed=0) all_features = [ 'bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'waterfront', 'view', 'condition', 'grade', 'sqft_above', 'sqft_basement', 'yr_built', 'yr_renovated' ] feature_matrix_norm, train_output, train_norms = np_utils.get_normalized_data( train_data, all_features, 'price') initial_weights = np_utils.np.zeros(len(all_features) + 1) weights_info, nnz_features = {}, {} penalty_tolerance = [[1e7, 1.0], [1e8, 1.0], [1e4, 5e5]] penalty_str = {1e7: '1e7', 1e8: '1e8', 1e4: '1e4'} print "\nFeatures assigned for Q5,Q6,Q7:" for penalty, tolerance in penalty_tolerance: weights = lasso.lasso_cyclical_coordinate_descent( feature_matrix_norm, train_output, initial_weights, penalty, tolerance) # print weights weights_normalized = weights / train_norms weights_info[penalty] = weights_normalized dict_weights = dict( zip(['constant'] + all_features, weights_normalized)) nnz_features[penalty] = filter(lambda x: dict_weights[x] > 0, dict_weights) print "\n\tL1_penalty_%s: %s" % (penalty_str[penalty], nnz_features[penalty]) print "\nQ8: three models RSS on the TEST data:" test_feature_matrix, test_output = np_utils.get_numpy_data( test_data, all_features, 'price') for penalty, tolerance in penalty_tolerance: current_predictions = np_utils.predict_output(test_feature_matrix, weights_info[penalty]) RSS = reg.compute_RSS(current_predictions, test_output) print "\n\tL1_penalty_%s: %s" % (penalty_str[penalty], RSS)
def compute_ridge_regression(ridge, feature_matrix, output, l2_penalties, init_weights): step_size = 1e-12 max_iterations = 1000 fist_house = 1 y_axis = [] ridge_info = {} for l2_penalty in l2_penalties: iteration,weights = ridge.ridge_regression_gradient_descent(feature_matrix,output, init_weights, step_size,l2_penalty,max_iterations,debug=False) # print 'L2(%s)\tIteration:%s & Learned weights:%s' % (l2_penalty,iteration,weights) y_axis.append(np_utils.predict_output(feature_matrix,l2_penalty)) ridge_info[l2_penalty]=weights # print weights print "\n\tcoefficients with regularization (L2:%s) is: %s" % (l2_penalty,round(weights[fist_house],1)) # x_axis = feature_matrix # plt.plot(x_axis,y_axis[0],x_axis,y_axis[1]) # plt.savefig('../graphs/ridge_reg_learned_weights.png') # plt.close() return ridge_info
def evaluate_lasso_coordinate(lasso, sales): simple_features = ['sqft_living', 'bedrooms'] my_output = 'price' initial_weights = np_utils.np.zeros(3) l1_penalty = 1e7 tolerance = 1.0 feature_matrix_norm, output, norms = np_utils.get_normalized_data( sales, simple_features, my_output) weights = lasso.lasso_cyclical_coordinate_descent(feature_matrix_norm, output, initial_weights, l1_penalty, tolerance) # print weights current_predictions = np_utils.predict_output(feature_matrix_norm, weights) RSS = reg.compute_RSS(current_predictions, output) print "\nQ3: Lasso-coordinate with normalized dataset RSS is: %s" % RSS print "\nQ4: Features assigned a zero weight at convergence: %s" % simple_features[ -1] print "\t->%s" % weights
def regression_gradient_descent(self, feature_matrix,output,initial_weights,step_size,tolerance): converged = False weights = np_utils.np.array(initial_weights) # make sure it's a numpy array while not converged: # compute the predictions based on feature_matrix and weights using your predict_output() function predictions = np_utils.predict_output(feature_matrix,weights) # compute the errors as predictions - output errors = predictions - output gradient_sum_squares = 0 # initialize the gradient sum of squares # while we haven't reached the tolerance yet, update each feature's weight for i in range(len(weights)): # loop over each weight # Recall that feature_matrix[:, i] is the feature column associated with weights[i] # compute the derivative for weight[i]: feature = feature_matrix[:,i] derivative = 2 * np_utils.feature_derivative(errors,feature) # add the squared value of the derivative to the gradient sum of squares (for assessing convergence) gradient_sum_squares += derivative ** 2 # subtract the step size times the derivative from the current weight weights[i] -= step_size * derivative # compute the square-root of the gradient sum of squares to get the gradient magnitude: gradient_magnitude = sqrt(gradient_sum_squares) if gradient_magnitude < tolerance: converged = True return (weights)
def get_predictions(dataset, features, output, weights): feature_matrix, output = np_utils.get_numpy_data(dataset, features, output) predictions = np_utils.predict_output(feature_matrix, weights) return predictions
def compute_ridge_rss(weights_list, feature_matrix, test_data): for weights_vals in weights_list: current_predictions = np_utils.predict_output(feature_matrix,weights_vals) RSS1 = reg.compute_RSS(current_predictions,test_data['price']) # print 'RSS1: %s' % (RSS1) print "\n\tTEST error (RSS) is: %s" % (RSS1)