def run_linear_regression(): print('Plotting data\n') features = setup() features.columns = ['Profits', 'CityPopulation'] X = features.Profits y = features.CityPopulation m = len(y) iterations = 1500 alpha = 0.01 theta = np.zeros(m) # Set the initial theta value lr = LinearRegression(X, y, iterations, alpha) lr.plot_data(X, y, 'Profits', 'City Population', 'Food Truck Profit v. City Pop') print('Testing gradient descent algorithm...\n') # Add a column of ones to X # X.bias = np.ones((m, 1)) print('Initial cost: {}'.format(lr.cost_function(X, y, theta))) # Run the gradient descent theta, cost_history = lr.gradient_descent(X, y, theta, alpha, iterations) print('Optimum theta found by gradient descent: {}'.format(theta))
ax.set_ylabel('Cost') ax.set_title('Error vs. Number of Iterations') plt.show(block=False) input('Press <Enter> to continue') if __name__ == '__main__': xlabel = "Population of City in 10,000s" ylabel = "Profit in $10,000s" title = "Population of City vs. Profit" x, y = load_data('ex1data1.txt') plot_data(x, y, xlabel, ylabel, title) theta = np.zeros((2, 1)) iterations = 1000 alpha = 0.01 x.insert(0, 'X0', 1) X = x.to_numpy() Y = np.array(y) lin_reg = LinearRegression() gradient, cost_history = lin_reg.gradient_descent(X, Y, theta, iterations, alpha) plot_trendline(x.iloc[:, 1].to_numpy(), y.iloc[:, 0].to_numpy(), gradient, xlabel, ylabel, title) #Ignore X0 = 1 in x dataframe plot_computeCost(cost_history, iterations) print(lin_reg.predict_intuition(3.5, gradient)) print(lin_reg.predict_vectorized([1, 3.5], gradient))