コード例 #1
0
def testLinreg():
    train_x, train_y, dev_x, dev_y, test_x, test_y, x_max, y_max = util.loadLinRegData(
    )
    LR = LinearRegression()
    LR.fit_stochastic(train_x, train_y, eta=.01, eps=10**-12, max_iters=10**8)
    preds = LR.predict(train_x)
    train_linreg_rmse = util.findRMSE(preds, train_y) * y_max[0]
    preds = LR.predict(test_x)
    test_linreg_rmse = util.findRMSE(preds, test_y) * y_max[0]
    print('Linreg RMSE: \t', test_linreg_rmse)
    return train_linreg_rmse, test_linreg_rmse
コード例 #2
0
    file = open(filePath,'r')
    allData = np.loadtxt(file, delimiter=',')

    X = np.matrix(allData[:,:-1])
    y = np.matrix((allData[:,-1])).T

    n,d = X.shape
    
    # Standardize
    mean = X.mean(axis=0)
    std = X.std(axis=0)
    X = (X - mean) / std
    
    # Add a row of ones for the bias term
    X = np.c_[np.ones((n,1)), X]
    
    # initialize the model
    init_theta = np.matrix(np.random.randn((d+1))).T
    n_iter = 2000
    alpha = 0.01

    # Instantiate objects
    lr_model = LinearRegression(init_theta = init_theta, alpha = alpha, n_iter = n_iter)
    lr_model.fit(X,y)

    # Compute the closed form solution in one line of code
    thetaClosedForm = linalg.inv((X.T*X))*X.T*y # TODO:  replace "0" with closed form solution
    print "thetaClosedForm: ", thetaClosedForm


コード例 #3
0
        Xtrain = X[train_indice:valid_indice_one - 1]
        ytrain = y[train_indice:valid_indice_one - 1]

        Xvalidate_one = X[valid_indice_one:valid_indice_two - 1]
        yvalidate_one = y[valid_indice_one:valid_indice_two - 1]

        Xvalidate_two = X[valid_indice_two:]
        yvalidate_two = y[valid_indice_two:]

        num_of_polynomials = 5
        best_poly = 1
        best_err = 100
        for poly in range(1, num_of_polynomials):
            print("Polynomial degree: %.3f" % poly)
            model = LinearRegression(poly)
            model.fit(Xtrain, ytrain)
            y_hat_train = model.predict(Xtrain)
            y_hat_validate_one = model.predict(Xvalidate_one)
            y_hat_validate_two = model.predict(Xvalidate_two)

            tr_err = np.mean((y_hat_train - ytrain)**2)
            va_err_one = np.mean((y_hat_validate_one - yvalidate_one)**2)
            va_err_two = np.mean((y_hat_validate_two - yvalidate_two)**2)
            print("Train error: %.3f" % tr_err)
            print("Val1 error: %.3f" % va_err_one)
            print("Val2 error: %.3f \n" % va_err_two)

            sum_err = (va_err_one + va_err_two) / 2
            if sum_err < best_err:
                best_err = sum_err
コード例 #4
0
    y = np.matrix((allData[:, -1])).T

    n, d = X.shape

    # Add a row of ones for the bias term
    X = np.c_[np.ones((n, 1)), X]

    # initialize the model
    init_theta = np.matrix(
        np.ones((d + 1, 1))
    ) * 10  # note that we really should be initializing this to be near zero, but starting it near [10,10] works better to visualize gradient descent for this particular problem
    n_iter = 1500
    alpha = 0.01

    # Instantiate objects
    lr_model = LinearRegression(init_theta=init_theta,
                                alpha=alpha,
                                n_iter=n_iter)
    plotData1D(X[:, 1], y)
    lr_model.fit(X, y)
    plotRegLine1D(lr_model, X, y)

    # Visualize the objective function convex shape
    theta1_vals = np.linspace(-10, 10, 100)
    theta2_vals = np.linspace(-10, 10, 100)
    visualizeObjective(lr_model, theta1_vals, theta2_vals, X, y)

    # Compute the closed form solution in one line of code
    theta_closed_form = 0  # TODO:  replace "0" with closed form solution
    print "theta_closed_form: ", theta_closed_form
コード例 #5
0
eta_vals = [10**-4, 10**-3, 10**-2]
max_iters = [10**8, 10**7, 10**6]

results = []

for eps_val in eps_vals:
    for eta_val in eta_vals:
        for max_iter in max_iters:
            print('Fitting regression with eta', eta_val, 'eps', eps_val,
                  'max iter', max_iter)
            cur_result = []
            cur_result.append(eps_val)
            cur_result.append(eta_val)
            cur_result.append(max_iter)

            LR = LinearRegression()
            LR.fit_stochastic(train_x,
                              train_y,
                              eta=eta_val,
                              eps=eps_val,
                              max_iters=max_iter)

            preds = LR.predict(train_x)
            rmse = util.findRMSE(preds, train_y)
            cur_result.append(rmse * y_max[0])

            preds = LR.predict(dev_x)
            rmse = util.findRMSE(preds, dev_y)
            cur_result.append(rmse * y_max[0])
            results.append(cur_result)
コード例 #6
0
ファイル: test_linreg.py プロジェクト: zkan/linreg
 def setUp(self):
     self.linreg = LinearRegression()
コード例 #7
0
ファイル: test_linreg.py プロジェクト: zkan/linreg
class LinearRegressionTest(unittest.TestCase):
    def setUp(self):
        self.linreg = LinearRegression()

    def test_dot_product_of_two_vectors_should_get_dot_product_result(self):
        a = [1, 2, 3]
        b = [4, 5, 6]

        result = self.linreg.dot_product(a, b)

        expected = 32
        self.assertEquals(result, expected)

    def test_dot_product_should_get_negative_if_vectors_have_different_length(
        self
    ):
        a = [1, 2, 3]
        b = [4, 5, 6, 7]

        result = self.linreg.dot_product(a, b)

        expected = -1
        self.assertEquals(result, expected)

    def test_compute_cost_for_single_example_should_return_cost(self):
        X = [[1, 2]]
        y = [2]
        theta = [0.1, 0.2]

        result = self.linreg.compute_cost(X, y, theta)

        expected = 1.125
        self.assertEquals(result, expected)

    def test_compute_cost_for_entire_examples_should_return_cost(self):
        X = [[1, 2], [3, 4]]
        y = [2, 2.5]
        theta = [0.1, 0.2]

        result = self.linreg.compute_cost(X, y, theta)

        expected = 2.105
        self.assertEquals(result, expected)

    def test_run_gradient_descent_with_one_iteration_should_change_theta(self):
        X = [[1, 2], [3, 4]]
        y = [2, 2.5]
        theta = [0, 0]
        number_of_iterations = 1

        result = self.linreg.run_gradient_descent(X, y, theta, number_of_iterations)

        expected = (
            [2.6572999999999993],
            [0.07, 0.16]
        )
        self.assertEquals(result, expected)

    def test_run_gradient_descent_with_ten_iteration_should_change_theta(self):
        X = [[1, 2], [3, 4]]
        y = [2, 2.5]
        theta = [0, 0]
        number_of_iterations = 10

        result = self.linreg.run_gradient_descent(X, y, theta, number_of_iterations)

        expected = (
            [
                2.6572999999999993,
                1.4340467700000001,
                0.8330420285729998,
                0.5415905512402678,
                0.4029957986666608,
                0.339058852804416,
                0.31098737943231103,
                0.2997051253564717,
                0.2959527141041505,
                0.2953245352067722
            ],
            [0.21850056185143937, 0.5469413946894416]
        )
        self.assertEquals(result, expected)
コード例 #8
0
        for i in range(my):
            img[:,i] = img[:,i]/np.max(img[:,i])

        xs = []
        ys = []

        for x in counts:
            for y in counts[x]:
                xs.append(x)
                ys.append(y)
        
        xs = np.array(xs)
        ys = np.array(ys)
        xs = xs.reshape((len(xs), 1))
        ys = ys.reshape((len(ys), 1))
        print ys.shape, xs.shape
        x = LinearRegression()
        y = x.fit(xs,ys)#,weight)


        fig = pplot.figure()
        ax = fig.add_subplot(111)
        ax.imshow(img, interpolation='nearest',cmap=Reds,vmin=0,vmax=1,origin='lower')
        ax.set_title("Distribution of degree per avalanche size")
        ax.set_xlabel("Avalanche size s")
        ax.set_ylabel("Degree of first defaulting node")
        pplot.show()


コード例 #9
0
# and the third column is the price of the house, which we want to predict.
file_name = 'dataset/ex1data2.txt'
with open(file_name, 'r') as f:
    house_data = np.loadtxt(file_name, delimiter=',')

num_sample = house_data.shape[0]  # number of all the samples
X = house_data[:, :2]
y = house_data[:, 2].reshape((-1, 1))

# Add intercept term or bias to X
print('X shape: ', X.shape)
print('y shape: ', y.shape)
print('First 10 examples from the dataset')
print(house_data[0:10, :])

# Normalize
X = (X - np.mean(X, axis=0)) / np.std(X, axis=0)
# Add bias dimension
X = np.hstack((X, np.ones((num_sample, 1))))

lr_bgd = LinearRegression()
tic = time.time()
losses_bgd = lr_bgd.train(X,
                          y,
                          method='sgd',
                          learning_rate=1e-2,
                          num_iters=1000,
                          verbose=True)
toc = time.time()
print('Traning time for BGD with vectorized version is %f \n' % (toc - tic))
print(lr_bgd.W)