Esempio n. 1
0
def run_linear_regression():
    print('Plotting data\n')
    features = setup()
    features.columns = ['Profits', 'CityPopulation']
    X = features.Profits
    y = features.CityPopulation
    m = len(y)
    iterations = 1500
    alpha = 0.01
    theta = np.zeros(m)  # Set the initial theta value
    lr = LinearRegression(X, y, iterations, alpha)

    lr.plot_data(X, y, 'Profits', 'City Population',
                 'Food Truck Profit v. City Pop')

    print('Testing gradient descent algorithm...\n')
    # Add a column of ones to X
    # X.bias = np.ones((m, 1))

    print('Initial cost: {}'.format(lr.cost_function(X, y, theta)))

    # Run the gradient descent
    theta, cost_history = lr.gradient_descent(X, y, theta, alpha, iterations)

    print('Optimum theta found by gradient descent: {}'.format(theta))
Esempio n. 2
0
    ax.set_ylabel('Cost')
    ax.set_title('Error vs. Number of Iterations')
    plt.show(block=False)
    input('Press <Enter> to continue')


if __name__ == '__main__':
    xlabel = "Population of City in 10,000s"
    ylabel = "Profit in $10,000s"
    title = "Population of City vs. Profit"

    x, y = load_data('ex1data1.txt')

    plot_data(x, y, xlabel, ylabel, title)

    theta = np.zeros((2, 1))
    iterations = 1000
    alpha = 0.01
    x.insert(0, 'X0', 1)
    X = x.to_numpy()
    Y = np.array(y)

    lin_reg = LinearRegression()
    gradient, cost_history = lin_reg.gradient_descent(X, Y, theta, iterations,
                                                      alpha)

    plot_trendline(x.iloc[:, 1].to_numpy(), y.iloc[:, 0].to_numpy(), gradient,
                   xlabel, ylabel, title)  #Ignore X0 = 1 in x dataframe
    plot_computeCost(cost_history, iterations)
    print(lin_reg.predict_intuition(3.5, gradient))
    print(lin_reg.predict_vectorized([1, 3.5], gradient))