Ejemplo n.º 1
0
	def ridge_regression_gradient_descent(self, feature_matrix,output,initial_weights,
											step_size,l2_penalty,max_iterations=100, debug=True):

		weights = np_utils.np.array(initial_weights)
		iteration = 0 # iteration counter
		print_frequency = 1  # for adjusting frequency of debugging output

		#while not reached maximum number of iterations:
		while iteration < max_iterations:
			iteration += 1  # increment iteration counter
			if (iteration in [10, 100]) and debug:
				print_frequency = iteration

			# compute the predictions(dot-product) between feature_matrix and weights
			predictions = np_utils.predict_output(feature_matrix,weights)

			# compute the errors as predictions - output
			errors = predictions - output

			# from time to time, print the value of the cost function
			if (iteration % print_frequency == 0) and debug:
				print('Iteration: %s' % iteration)
				print('Cost function: %s'% np_utils.compute_cost_function(errors, weights, l2_penalty))

			for i in xrange(len(weights)): # loop over each weight
				# Recall that feature_matrix[:,i] is the feature column associated with weights[i]
				# compute the derivative for weight[i].
				#(Remember: when i=0, you are computing the derivative of the constant!)
				is_constant = True if i == 0 else False
				feature = feature_matrix[:,i]
				derivative = np_utils.feature_derivative_ridge(errors,feature,weights[i],l2_penalty,is_constant)
				# subtract the step size times the derivative from the current weight
				weights[i] -= step_size * derivative

		return iteration, weights
Ejemplo n.º 2
0
	def compute_ro(self, i, feature_matrix, output, weights):
		""" whenever ro[i] falls between -l1_penalty/2 and l1_penalty/2,
		 the corresponding weight w[i] is sent to zero 
		 ro[i] = SUM[ [feature_i]*(output - prediction + w[i]*[feature_i]) ] """
		prediction = np_utils.predict_output(feature_matrix,weights)
		# Numpy vector for feature_i
		feature_i = feature_matrix[:,i]
		ro_i = (feature_i * (output - prediction + weights[i] * feature_i)).sum()

		return ro_i
def quiz_2_ridge_grandient_descent(sales):
	print "\n**********************************"
	print "*     Ridge Gradient Descent     *"
	print "**********************************\n"

	simple_features = ['sqft_living']
	my_output = 'price'
	train_data,test_data = sales.random_split(.8,seed=0)
	(simple_feature_matrix,output) = np_utils.get_numpy_data(train_data,simple_features,my_output)
	(simple_test_feature_matrix,test_output) = np_utils.get_numpy_data(test_data,simple_features,my_output)

	ridge = RidgeRegression()
	l2_no_reg,l2_high_reg = 0,1e11
	initial_weights = np_utils.np.array([0.,0.])
	print "\nQ1 & Q2 coefficients with features: %s" % (simple_features)
	ridge_weights = compute_ridge_regression(ridge,simple_feature_matrix,output,[l2_no_reg,l2_high_reg],initial_weights)
	# print ridge_weights

	print "\nQ3: Line fit with no regularization (l2_penalty=0) is steeper"
	print "\nQ4: high regularization (l2_penalty=1e11)"
	compute_ridge_rss([ridge_weights[l2_high_reg]],simple_test_feature_matrix,test_data)
	print "\t- Between 5e14 and 8e14"

	more_features = ['sqft_living','sqft_living15']
	initial_w_morefeatures = np_utils.np.array([0.0,0.0,0.0])
	(more_feature_matrix,output_more_features) = np_utils.get_numpy_data(train_data,more_features,my_output)
	(more_test_feature_matrix,test_output_more) = np_utils.get_numpy_data(test_data,more_features,my_output)

	print "\nQ5 & Q6 coefficients with features: %s" % (more_features)
	ridge_morefeatures = compute_ridge_regression(ridge,more_feature_matrix,output_more_features,
		[l2_no_reg,l2_high_reg],initial_w_morefeatures)

	print "\nQ7: using all zero weights with features: %s" % (simple_features)
	compute_ridge_rss([initial_w_morefeatures],more_test_feature_matrix,test_data)
	print "\t-Between 1e15 and 3e15"

	num_of_house = 1#5
	print "\nQ8: Which model makes better predictions for 1st house:"
	for l2_penalty in [l2_no_reg,l2_high_reg]:
		print "L2:%s:" % l2_penalty
		current_predictions = np_utils.predict_output(more_test_feature_matrix,ridge_morefeatures[l2_penalty])
		for house_predict in range(num_of_house):
			pred,real = current_predictions[house_predict],test_data['price'][house_predict]
			print '\t\t(predict) %s vs %s (real)  diff: %s' % (pred,real,real - pred)
def more_features_with_lasso_coordinate(lasso, sales):
    train_data, test_data = sales.random_split(.8, seed=0)

    all_features = [
        'bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors',
        'waterfront', 'view', 'condition', 'grade', 'sqft_above',
        'sqft_basement', 'yr_built', 'yr_renovated'
    ]

    feature_matrix_norm, train_output, train_norms = np_utils.get_normalized_data(
        train_data, all_features, 'price')

    initial_weights = np_utils.np.zeros(len(all_features) + 1)

    weights_info, nnz_features = {}, {}
    penalty_tolerance = [[1e7, 1.0], [1e8, 1.0], [1e4, 5e5]]
    penalty_str = {1e7: '1e7', 1e8: '1e8', 1e4: '1e4'}
    print "\nFeatures assigned for Q5,Q6,Q7:"

    for penalty, tolerance in penalty_tolerance:
        weights = lasso.lasso_cyclical_coordinate_descent(
            feature_matrix_norm, train_output, initial_weights, penalty,
            tolerance)
        # print weights
        weights_normalized = weights / train_norms
        weights_info[penalty] = weights_normalized
        dict_weights = dict(
            zip(['constant'] + all_features, weights_normalized))
        nnz_features[penalty] = filter(lambda x: dict_weights[x] > 0,
                                       dict_weights)
        print "\n\tL1_penalty_%s: %s" % (penalty_str[penalty],
                                         nnz_features[penalty])

    print "\nQ8: three models RSS on the TEST data:"
    test_feature_matrix, test_output = np_utils.get_numpy_data(
        test_data, all_features, 'price')
    for penalty, tolerance in penalty_tolerance:
        current_predictions = np_utils.predict_output(test_feature_matrix,
                                                      weights_info[penalty])
        RSS = reg.compute_RSS(current_predictions, test_output)
        print "\n\tL1_penalty_%s: %s" % (penalty_str[penalty], RSS)
def compute_ridge_regression(ridge, feature_matrix, output, l2_penalties, init_weights):
	step_size = 1e-12
	max_iterations = 1000
	fist_house = 1
	y_axis = []
	ridge_info = {}
	for l2_penalty in l2_penalties:
		iteration,weights = ridge.ridge_regression_gradient_descent(feature_matrix,output, init_weights,
			step_size,l2_penalty,max_iterations,debug=False)
		# print 'L2(%s)\tIteration:%s & Learned weights:%s' % (l2_penalty,iteration,weights)
		y_axis.append(np_utils.predict_output(feature_matrix,l2_penalty))
		ridge_info[l2_penalty]=weights
		# print weights
		print "\n\tcoefficients with regularization (L2:%s) is: %s" % (l2_penalty,round(weights[fist_house],1))

	# x_axis = feature_matrix
	# plt.plot(x_axis,y_axis[0],x_axis,y_axis[1])
	# plt.savefig('../graphs/ridge_reg_learned_weights.png')
	# plt.close()

	return ridge_info
def evaluate_lasso_coordinate(lasso, sales):
    simple_features = ['sqft_living', 'bedrooms']
    my_output = 'price'
    initial_weights = np_utils.np.zeros(3)
    l1_penalty = 1e7
    tolerance = 1.0

    feature_matrix_norm, output, norms = np_utils.get_normalized_data(
        sales, simple_features, my_output)

    weights = lasso.lasso_cyclical_coordinate_descent(feature_matrix_norm,
                                                      output, initial_weights,
                                                      l1_penalty, tolerance)
    # print weights

    current_predictions = np_utils.predict_output(feature_matrix_norm, weights)
    RSS = reg.compute_RSS(current_predictions, output)
    print "\nQ3: Lasso-coordinate with normalized dataset RSS is: %s" % RSS

    print "\nQ4: Features assigned a zero weight at convergence: %s" % simple_features[
        -1]
    print "\t->%s" % weights
Ejemplo n.º 7
0
	def regression_gradient_descent(self, feature_matrix,output,initial_weights,step_size,tolerance):
		converged = False
		weights = np_utils.np.array(initial_weights) # make sure it's a numpy array
		while not converged:
			# compute the predictions based on feature_matrix and weights using your predict_output() function
			predictions = np_utils.predict_output(feature_matrix,weights)
			# compute the errors as predictions - output
			errors = predictions - output
			gradient_sum_squares = 0 # initialize the gradient sum of squares
			# while we haven't reached the tolerance yet, update each feature's weight
			for i in range(len(weights)): # loop over each weight
				# Recall that feature_matrix[:, i] is the feature column associated with weights[i]
				# compute the derivative for weight[i]:
				feature = feature_matrix[:,i]
				derivative = 2 * np_utils.feature_derivative(errors,feature)
				# add the squared value of the derivative to the gradient sum of squares (for assessing convergence)
				gradient_sum_squares += derivative ** 2
				# subtract the step size times the derivative from the current weight
				weights[i] -= step_size * derivative
			# compute the square-root of the gradient sum of squares to get the gradient magnitude:
			gradient_magnitude = sqrt(gradient_sum_squares)
			if gradient_magnitude < tolerance:
				converged = True
		return (weights)
def get_predictions(dataset, features, output, weights):
	feature_matrix, output = np_utils.get_numpy_data(dataset, features, output)
	predictions = np_utils.predict_output(feature_matrix, weights)
	return predictions
def compute_ridge_rss(weights_list, feature_matrix, test_data):
	for weights_vals in weights_list:
		current_predictions = np_utils.predict_output(feature_matrix,weights_vals)
		RSS1 = reg.compute_RSS(current_predictions,test_data['price'])
		# print 'RSS1: %s' % (RSS1)
		print "\n\tTEST error (RSS) is: %s" % (RSS1)