def gradient_descent(X, y, theta, alpha, iterations): temp = np.matrix(np.zeros(theta.shape)) parameters = int(theta.ravel().shape[1]) m = len(y) cost = np.zeros(iterations) plt.ion() fig, ax = plt.subplots() xdata, ydata = [], [] plot = ax.scatter(0, compute_cost(X, y, theta)) plt.xlim(0, iterations) plt.ylim(0, compute_cost(X, y, theta)) plt.draw() for i in range(iterations): xdata.append(i) ydata.append(cost[i]) error = X * theta.T - y for j in range(parameters): der = np.multiply(error, X[:, j]) temp[0, j] = theta[0, j] - ((alpha / m) * np.sum(der)) theta = temp cost[i] = compute_cost(X, y, theta) xdata.append(i) ydata.append(cost[i]) plot.set_offsets(np.c_[xdata, ydata]) fig.canvas.draw_idle() plt.pause(0.1) plt.show() return theta, cost
def part2_4(): data = np.loadtxt('./data/ex1data1.txt', delimiter=',') x = data[:, 0] y = data[:, 1] m = len(y) y = y.reshape(m, 1) X = np.c_[np.ones((m, 1)), x] theta0_vals = np.linspace(-10, 10, 100) theta1_vals = np.linspace(-1, 4, 100) J_vals = np.zeros((len(theta0_vals), len(theta1_vals))) for i, v0 in enumerate(theta0_vals): for j, v1 in enumerate(theta1_vals): t = np.array((v0, v1)) J_vals[i, j] = compute_cost(X, y, t)[0] fig = plt.figure() ax = fig.gca(projection='3d') R, P = np.meshgrid(theta0_vals, theta1_vals) ax.plot_surface(R, P, J_vals) plt.savefig('2-4_surface.png') fig = plt.figure() plt.contour(R, P, J_vals.T, np.logspace(-2, 3, 20)) plt.xlabel(r'${\Theta}_0$') plt.ylabel(r'${\Theta}_1$') plt.savefig('2-4_contour.png')
def gradient_descent(X, y, theta, alpha, num_iters) : m = y.shape[0] cost_hostory = np.zeros((num_iters,1)) for i in range(num_iters): sigma = np.dot(X.T, (np.dot(X, theta) - y)) theta = theta - alpha/m*sigma; cost_hostory[i] = cc.compute_cost(X, y, theta) return theta, cost_hostory
def gradient_descent(x, y, theta, alpha, iterations): m = len(y) j_history = [] while iterations: temp_a = np.dot(x, theta) - y theta = theta - (alpha / m) * np.dot(x.T, temp_a) j_history.append(compute_cost(x, y, theta)) iterations -= 1 return theta
def gradient_descent(x_data, y_data, theta, alpha, iterations): # compute the derivation of theta and return cost_history # initiate some useful params cost_history = np.zeros((iterations, 1)) for i in range(iterations): delta_theta = np.dot(x_data.T, np.dot(x_data, theta) - y_data) / y_data.shape[0] theta -= alpha * delta_theta cost_history[i, 0] = computeCost.compute_cost(x_data, y_data, theta) # if (i+1) % 100 == 0: # print("{}th iteration,cost:{}".format(i+1, cost_history[i, 0])) return theta, cost_history
def gradient_descent(X, y, thetas, alpha, iterations): m = len(y) t = thetas.copy() # copy so as to not modify original thetas variable j_history = [] for _ in range(iterations): new_t = t.copy() j_history.append(compute_cost( X, y, new_t)) # Record cost before making changes for j in range(len(new_t)): x = np.array(X[:, j]).reshape(len(X), 1) # Slice j feature of X new_t[j] = t[j] - ((alpha / m) * np.sum((h(X, t) - y) * x)) t = new_t return t, j_history
def gradient_descent(x, y, theta, alpha, num_iters): """Performs gradient descent to learn theta. Updates theta by taking num_iters gradient steps with learning rate alpha.""" m = len(y) # Number of training examples j_hist = np.zeros([num_iters, 1]) for ind in range(num_iters): h_minus_y = np.dot(x, theta) - y theta = theta - (alpha / m) * np.dot(x.T, h_minus_y) j_hist[ind] = compute_cost(x, y, theta) return theta, j_hist
def gradient_descent(X, Y, theta_init, alpha, iter_num): # 样本个数 m = Y.shape[0] # 代价的历史值 J_history = np.zeros(iter_num) theta = theta_init # 进行迭代计算 for num in range(0, iter_num): # 计算每一个theta值下的代价值 J_history[num] = compute_cost(X, Y, theta) # 根据公式计算梯度,来更新theta的值 hyp = np.dot(X, np.transpose(theta)) theta = theta - alpha * np.dot(np.transpose(hyp - Y), X) / m return theta, J_history
def gradient_descent(X, y, theta, alpha, num_iters): """ to run gradient descent """ # Initialize some useful values m = y.size J_history = np.zeros(num_iters) for i in range(0, num_iters): # ===================== Your Code Here ===================== # Instructions : Perform a single gradient step on the parameter vector theta # Hint: X.shape = (97, 2), y.shape = (97, ), theta.shape = (2, ) error = np.dot(X, theta).flatten() - y theta -= (alpha / m) * np.sum(X * error[:, np.newaxis], 0) J_history[i] = compute_cost(X, y, theta) return theta, J_history
def gradient_descent(X, y, theta, alpha, num_iters): # Initialize some useful values, can also deal with multi theta(>2) m = len(X) theta_len = len(theta) J_history = np.zeros(num_iters) for i in range(0, num_iters): # ===================== Your Code Here ===================== # Instructions : Perform a single gradient step on the parameter vector theta # # Hint: X.shape = (97, 2), y.shape = (97, ), theta.shape = (2, ) inner = np.array(X).dot(theta) - y for j in range(theta_len): theta[j, 0] = theta[j, 0] - (alpha / m * (np.sum(inner.multiply(np.array(X.iloc[:, j:j+1])))))['Price'] # =========================================================== # Save the cost every iteration J_history[i] = compute_cost(X, y, theta) return theta, J_history
def part2_2(): data = np.loadtxt('ex1data1.txt', delimiter=',') x = data[:, 0] y = data[:, 1] m = len(y) X = np.c_[np.ones((m, 1)), x] theta = np.zeros((2, 1)) iterations = 1500 alpha = 0.01 cost = compute_cost(X, y, theta) theta = gradient_descent(X, y, theta, alpha, iterations) print('cost: {0}'.format(cost)) print('theta: {0}'.format(theta)) predict1 = np.array([1, 3.5]).dot(theta) predict2 = np.array([1, 7]).dot(theta) print('predict1: {0}'.format(predict1)) print('predict2: {0}'.format(predict2)) x = np.arange(5, 22, 0.1) y = [theta[0] + theta[1] * xi for xi in x] plt.plot(x, y) plt.savefig('2-2.png')
input('Program paused. Press ENTER to continue') # ===================== Part 2: Gradient descent ===================== print('Running Gradient Descent...') X = np.c_[np.ones(m), X] # Add a column of ones to X theta = np.zeros(2) # initialize fitting parameters # Some gradient descent settings iterations = 1500 alpha = 0.01 # Compute and display initial cost print('Initial cost : ' + str(compute_cost(X, y, theta)) + ' (This value should be about 32.07)') theta, J_history = gradient_descent(X, y, theta, alpha, iterations) print('Theta found by gradient descent: ' + str(theta.reshape(2))) # Plot the linear fit plt.figure(0) line1, = plt.plot(X[:, 1], np.dot(X, theta), label='Linear Regression') plt.legend(handles=[line1]) input('Program paused. Press ENTER to continue') # Predict values for population sizes of 35,000 and 70,000 predict1 = np.dot(np.array([1, 3.5]), theta) print('For population = 35,000, we predict a profit of {:0.3f} (This value should be about 4519.77)'.format(predict1*10000))
# append a column of ones to X z = np.ones((m, 1), dtype=int) X = np.append(z, X, axis=1) # Initialize fitting parameters theta = np.zeros((2, 1), dtype=int) iterations = 1500 alpha = 0.01 X = np.matrix(X) y = np.matrix(y) theta = np.matrix(np.array([0, 0])) # compute cost function cost = compute_cost(X, y, theta) print(cost) # minimize error theta, cost = gradient_descent(X, y, theta, alpha, iterations) # print the minimized error and the resulting parameter vector print(theta) print(cost[len(cost) - 1]) # predict profit for 35K and 70K people predict1 = np.matrix([1, 3.5]) * theta.T print(predict1) predict2 = np.matrix([1, 7]) * theta.T print(predict2)
Xdata = dataset[:,0] Ydata = dataset[:,1] # ======== 2.计算代价和梯度 ======== print('进行梯度计算...\n') # 按照第二维度,把两个数组连接起来 # 给输入数据增加一个偏置维度 X = np.c_[np.ones(Xdata.shape[0]),Xdata] Y = Ydata # 初始化参数:theta,iter_num,alpha theta_init = np.zeros(X.shape[1]) iter_num = 1500 alpha = 0.01 # 计算初始代价 print('Initial cost:', str(compute_cost(X,Y,theta_init)), '\nThis value should be 32.07') # 使用梯度下降法进行优化求解 theta_fin,J_history = gradient_descent(X,Y,theta_init,alpha,iter_num) print('Theta found by gradient descent:',str(theta_fin.reshape(2))) # 绘制数据散点图和线性回归曲线 plt.figure(0) plt.scatter(Xdata,Ydata,c='red',marker='o',s=20) plt.plot(X[:,1],np.dot(X,theta_fin),'b-',lw=3) plt.xlabel('Population of City in 10,000s',fontsize=10) plt.ylabel('Profit of City in $10,000',fontsize=10) plt.legend(['Data Point','Linear Regression']) plt.show() # 预测未知数据 Xtest1 = [1,3.5]
file.close() plotData.plot(x_data, y_data) # =================== Part 3: Cost and Gradient descent =================== # initialize some useful variables m = len(x_data) theta = np.zeros((2, 1)) x_data = np.array(x_data) y_data = np.array(y_data) x_data = x_data[:, np.newaxis] y_data = y_data[:, np.newaxis] x_data = np.column_stack((np.ones(m), x_data)) # some gradient_descent settings iterations = 1500 alpha = 0.01 # compute cost J = computeCost.compute_cost(x_data, y_data, theta) print("with theta is {},cost is {}".format(theta, J)) print("expected cost is 32.07") # begin gradient descent theta, cost_history = gradientDescent.gradient_descent(x_data, y_data, theta, alpha, iterations) print("after {} iterations,theta is {}".format(iterations, theta)) print("expected theta is [-3.6303,1.1664]") # plot the linear fit plt.plot(x_data[:, 1], np.dot(x_data, theta)) plt.scatter(x_data[:, 1], y_data, marker="*", edgecolors="red") plt.show() # ============= Part 4: Visualizing J(theta_0, theta_1) ============= theta0_vals = np.linspace(-10, 10, 100) theta1_vals = np.linspace(-1, 4, 100) J_vals = np.zeros((theta0_vals.shape[0], theta1_vals.shape[0]))
plt.ion() plt.figure() plt.plot(X, y, 'rx', label="Training data") plt.xlabel("Population of City in 10,000s") plt.ylabel("Profit in $10,000s") # pause_func() # =================== Part 3: Cost and Gradient descent =================== X = np.append(np.ones((m, 1)), X, axis=1) # Add a column of ones to x theta = np.zeros((2, 1)) # initialize fitting parameters # Some gradient descent settings iterations = 1500 alpha = 0.01 print('\nTesting the cost function ...\n') # compute and display initial cost J = compute_cost(X, y, theta) print('With theta = [0 ; 0]\nCost computed = %f\n' % J[0]) print('Expected cost value (approx) 32.07\n') J = compute_cost(X, y, np.array(([-1], [2]))) print('\nWith theta = [-1 ; 2]\nCost computed = %f\n' % J[0]) print('Expected cost value (approx) 54.24\n') print('Program paused. Press enter to continue.\n') # pause_func() print('\nRunning Gradient Descent ...\n') # run gradient descent theta = gradient_descent(X, y, theta, alpha, iterations) # print theta to screen print('Theta found by gradient descent:\n') print(theta) print('Expected theta values (approx)\n')
input('Program paused. Press <ENTER> to continue.\n') # =================== Part 3: Cost and Gradient descent =================== x = np.concatenate((np.ones([m, 1]), x), axis=1) # Add a column of ones to x as first column theta = np.zeros([2, 1]) # Initialize fitting parameters # Some gradient descent settings num_iters = 1500 alpha = 0.01 print('\nTesting the cost function ...\n') # Compute and display initial cost J = compute_cost(x, y, theta) print('With theta = [0 0]\nCost computed =', J[0][0], '\n') print('Expected cost value (approx) 32.07\n') # Further testing of the cost function theta = np.array([[-1], [2]]) J = compute_cost(x, y, theta) print('\nWith theta = [-1 2]\nCost computed =', J[0][0], '\n') print('Expected cost value (approx) 54.24\n') input('Program paused. Press enter to continue.\n') print('\nRunning Gradient Descent ...\n') # Run gradient descent theta, j_hist = gradient_descent(x, y, theta, alpha, num_iters)
y = np.array(data[:, 1], ndmin=2).reshape(len(data), 1) m = len(data) # Plot the data plt.plot(x, y, 'rx') plt.ylabel('Profit in $10,000\'s') plt.xlabel('Population of City in 10,000s') plt.axis([0, 25, -5, 25]) plt.show() ''' ==================== Part 3: Cost and Gradient descent ========= ''' # Add x-sub-0 (vector of 1's) X = np.concatenate((np.ones((m, 1)), x), axis=1) # Test thetas thetas = np.zeros((2, 1)) cost = compute_cost(X, y, thetas) print('Cost using thetas(2, 1): ', cost) # Test different thetas thetas = np.array([[-1.0], [2.0]]) cost = compute_cost(X, y, thetas) print('Cost using thetas(-1, 2): ', cost) # Find optimal thetas using gradient descent iterations = 1500 alpha = 0.01 thetas, j_history = gradient_descent(X, y, thetas, alpha, iterations) # print(thetas) # Show best fit line. plt.figure()