Esempio n. 1
0
def fitting():
    data = pd.read_csv('house_data.txt', names=['area', 'bedroom', 'price'])

    x_data = data[['area', 'bedroom']]
    x_data = (x_data - x_data.mean())/(x_data.max() - x_data.min())
    y_data = data['price']
    plt.figure(figsize=(10, 6))

    plt.subplot(2, 2, 1)
    plt.plot(x_data['area'], y_data, 'rx')
    plt.title('area-price')

    plt.subplot(2, 2, 2)
    plt.plot(x_data['bedroom'], y_data, 'bx')
    plt.title('bedroom-price')


    alpha = 10
    max_iter = 50

    model = LinearRegression(alpha, max_iter)
    loss, _ = model.fit(x_data.values, y_data.values)
    plt.subplot(2, 1, 2)
    plt.plot(np.arange(1, max_iter+1), loss)

    plt.subplots_adjust(hspace=0.4)
    plt.show()
Esempio n. 2
0
File: main.py Progetto: L4v/ori
def main():
    # NOTE(Jovan): Load data
    data = pd.read_csv("data/skincancer.csv", delimiter=',', index_col=0)
    mort = data.Mort.values
    lat = data.Lat.values
    lon = data.Long.values

    # NOTE(Jovan): Init LinearRegression and predict
    lin_reg = LinearRegression(lat, mort)
    hawaii = lin_reg.predict(20)
    print("Prediction for hawaii[lat=20]:", hawaii)

    # NOTE(Jovan): Init KMeans and add lat and long points
    k_means = KMeans()
    for i, j in zip(lat, lon):
        k_means.points.append(Point(i, j))
    k_means.split(2, 0.01)

    # NOTE(Jovan): Plot clusters
    fig = plt.figure()
    ax = fig.add_axes([0,0,1,1])
    # NOTE(Jovan): First clusters
    for p in k_means._clusters[0].points:
        ax.scatter(p.x, p.y, c="#ff0000")
    # NOTE(Jovan): Second clusters
    for p in k_means._clusters[1].points:
        ax.scatter(p.x, p.y, c="#00ff00")

    # NOTE(Jovan): Plot cluster centers
    center1 = k_means._clusters[0].center
    center2 = k_means._clusters[1].center
    ax.scatter(center1.x, center1.y, marker="P", c="#ff0000")
    ax.scatter(center2.x, center2.y, marker="P", c="#00ff00")
    plt.show()
Esempio n. 3
0
def test_l2_regularization_gradient():
    from linear_regression import LinearRegression
    model = LinearRegression(input_dimensions=2)
    model.weights = np.float32([[1, 2, 4]]).T
    gradient = model._l2_regularization_gradient()
    desired = np.float32([[1, 2, 4]]).T
    assert (np.allclose(gradient, desired, rtol=1e-3, atol=1e-3) or np.allclose(gradient, 2*desired, rtol=1e-3, atol=1e-3))
Esempio n. 4
0
def visual_3d():
    data = pd.read_csv('food_profit.txt', names=['population', 'profit'])
    x = data['population']
    y = data['profit']

    x = x[:, np.newaxis]
    x = np.hstack((np.ones_like(x), x))

    theta0 = np.zeros((40, 50))
    theta1 = np.zeros((40, 50))
    jvals = np.zeros((40, 50))
    for i, t0 in enumerate(np.arange(-10, 10, .5)):
        for j, t1 in enumerate(np.arange(-1, 4, .5)):
            theta0[i, j] = t0
            theta1[i, j] = t1
            jvals[i, j] = 0.5*np.mean((x.dot(np.array([t0, t1])) - y)**2)

    import mpl_toolkits.mplot3d
    ax = plt.gca(projection='3d')
    ax.plot_surface(theta0, theta1, jvals, cmap=plt.get_cmap('BuPu_r'))

    alpha = 0.01
    max_iter = 1500
    model = LinearRegression(alpha, max_iter)
    loss, w_list = model.fit(x, y)
    w_list = np.array(w_list)

    plt.plot([w_list[0,0]], [w_list[0,1]], [loss[0]], 'rx')
    plt.plot(w_list[:, 0], w_list[:, 1], loss, 'o')
    plt.plot([w_list[-1, 0]], [w_list[-1, 1]], [loss[-1]], 'gx')
    plt.show()
def test_fit_functional():
    import sklearn.model_selection
    import numpy as np

    from linear_regression import LinearRegression
    X = np.zeros((900, 3), dtype=np.float32)
    num_samples = 30

    xx = np.linspace(-5, 5, num_samples)
    XX, YY = np.meshgrid(xx, xx)
    X[:, 0] = XX.flatten()
    X[:, 1] = YY.flatten()
    X[:, -1] = 1  # a column of 1's for the bias trick
    Z = 0.1 * XX + 0.2 * YY + 0.4
    y = Z.reshape(-1, 1)
    X_train, X_val, y_train, y_val = sklearn.model_selection.train_test_split(
        X, y)
    model = LinearRegression(input_dimensions=2)
    train_mse, val_mse = model.fit(X_train,
                                   y_train,
                                   X_val,
                                   y_val,
                                   num_epochs=20,
                                   batch_size=4,
                                   alpha=0.1,
                                   _lambda=0.0)
    final_train_mse = train_mse[-1]
    desired_weights = np.float32([[0.1, 0.2, 0.4]]).T
    np.testing.assert_allclose(model.weights,
                               desired_weights,
                               rtol=1e-3,
                               atol=1e-3)
    assert final_train_mse < 0.001
    assert final_train_mse < 0.00001
    assert final_train_mse < 1e-10
def test_predict():
    from linear_regression import LinearRegression
    model = LinearRegression(input_dimensions=2)
    model.weights = np.float32([[1, 2, 4]]).T
    X = np.float32([[1, 2, 1], [0, 0, 1]])
    desired = np.float32([[9, 4]]).T
    actual = model.predict(X)
    np.testing.assert_allclose(actual, desired, rtol=1e-3, atol=1e-3)
 def test_trains(self):
     '''
     Trains returns new model with trained thetas
     '''
     model = LinearRegression('SquaredError')
     model_trained = model.train(self.training_set, range(-20, 20),
                                 range(-20, 20))
     self.assertEqual(model_trained.theta_0, 0)
     self.assertEqual(model_trained.theta_1, 1)
def test_mse_gradient():
    from linear_regression import LinearRegression
    model = LinearRegression(input_dimensions=2)
    model.weights = np.float32([[1, 2, 4]]).T
    X = np.float32([[1, 2, 1], [0, 0, 1]])
    y = np.float32([[10, 2]]).T
    gradient = model._mse_gradient(X, y)
    desired = np.float32([[-0.5, -1., 0.5]]).T
    np.testing.assert_allclose(gradient, desired, rtol=1e-3, atol=1e-3)
class TestLinearRegression(unittest.TestCase):
    def setUp(self):
        self.model_simple = LinearRegression(1)
        self.model_multiple = LinearRegression(2)

    def test_mean_squared_error(self):
        pred = np.array([0, 0])
        label = np.array([1, 1])
        err = self.model_simple._mean_squared_error(pred, label)
        exp = 1
        self.assertTrue(err, exp)

        label2 = np.array([2, 4])
        err2 = self.model_simple._mean_squared_error(pred, label2)
        exp2 = 10
        self.assertTrue(err2, exp2)

    def test_predict(self):
        # Both W and b are 0 after initialization
        x1 = np.array([1, 2])
        pred1 = self.model_simple.predict(x1)
        exp1 = np.array([0, 0])

        self.assertTrue(np.array_equal(pred1, exp1))

        x2 = np.array([[1,1],
                       [2,2]])
        pred2 = self.model_multiple.predict(x2)
        exp2 = np.array([0, 0])

        self.assertTrue(np.array_equal(pred2, exp2))

    def test_train(self):
        # y = x + 0
        x1 = np.array([2, 4])
        y1 = np.array([2, 4])
        m1 = LinearRegression(1)
        m1.train(x1, y1, n_iter=1, lr=0.1)

        # expected W and b after 1 iteration with lr 0.1
        exp_W1 = np.array([1.0])
        exp_b1 = 0.3
        self.assertTrue(np.array_equal(m1.W, exp_W1))
        self.assertAlmostEqual(m1.b[0], exp_b1)

        # y = x1 + x2 + 0
        x2 = np.array([[2, 2],
                       [4, 4]])
        y2 = np.array([4, 8])
        m2 = LinearRegression(2)
        m2.train(x2, y2, n_iter=1, lr=0.1)

        # expected W and b after 1 iteration with lr 0.1
        exp_W2 = np.array([2.0, 2.0])
        exp_b2 = 0.6
        self.assertTrue(np.array_equal(m2.W, exp_W2))
        self.assertAlmostEqual(m2.b[0], exp_b2)
Esempio n. 10
0
def main():
	try:
		model = LinearRegression(filename=None, mode='No_file')
		model.predict_data_value()
	except IOError as e:
		print(Style.BRIGHT + Fore.RED + 'I/O Error: ' + Style.RESET_ALL + Fore.RESET + str(e))
	except ParserException as e:
		print(Style.BRIGHT + Fore.RED + 'ParserException: ' + Style.RESET_ALL + Fore.RESET + str(e))
	except LogisticRegressionException as e:
		print(Style.BRIGHT + Fore.RED + 'Logistic Regression Exception: ' + Style.RESET_ALL + Fore.RESET + str(e))
def learning_curve(X, y, Xval, yval, ilambda):
    """ 计算学习曲线所需的参数, 返回训练误差, 交叉验证误差"""

    # 随机选择训练样本以及验证样本, 重复50次取误差的平均值
    num_items = 50

    # 训练集大小
    m = X.shape[0]

    # 训练误差以及交叉验证误差
    error_train = np.zeros((m, 1))
    error_val = np.zeros((m, 1))

    for i in range(m):
        # 建立线性回归模型
        my_lr = LR(X[0:i + 1, :], y[0:i + 1])
        my_lr.gradient_descent_reg(0.001, ilambda, 5000)
        theta = my_lr.theta
        error_train[i:], _ = my_lr.compute_cost_reg(theta,
                                                    0,
                                                    X=X[:i + 1, :],
                                                    y=y[:i + 1])
        error_val[i:], _ = my_lr.compute_cost_reg(theta, 0, X=Xval, y=yval)

    # ~ for i in range(m):
    # ~ print("样本 {}".format(i+1))

    # ~ for t in range(num_items):
    # ~ # 随机获取训练以及交叉验证样本
    # ~ rand_indices = np.arange(m)			# 随机获取样本的100行, 进行可视化
    # ~ np.random.shuffle(rand_indices)		# shuffle返回None 就地打乱
    # ~ sel_1 = rand_indices[:i+1]

    # ~ np.random.shuffle(rand_indices)
    # ~ sel_2 = rand_indices[:i+1]

    # ~ # 建立线性回归模型
    # ~ my_lr = LR(X[sel_1, :], y[sel_1])

    # ~ # 使用不用的训练样本进行参数学习 -> 进行验证
    # ~ my_lr.gradient_descent_reg(0.001, ilambda, 5000)
    # ~ theta = my_lr.theta

    # ~ # 使用不同训练样本计算训练误差, lambda=0
    # ~ cost_train, _ = my_lr.compute_cost_reg(theta, 0, X=X[sel_1, :], y=y[sel_1])
    # ~ error_train[i:] += cost_train
    # ~ # 使用全部交叉验证样本计算交叉验证误差, lambda=0
    # ~ cost_val, _ = my_lr.compute_cost_reg(theta, 0, X=Xval[sel_2, :], y=yval[sel_2])
    # ~ error_val[i:] += cost_val

    # ~ # 计算误差平均值
    # ~ error_train /= num_items
    # ~ error_val /= num_items

    return error_train, error_val
Esempio n. 12
0
def test_plot_data():
    X = [1, 2, 3, 4]
    y = [2, 3, 4, 5]
    lr = LinearRegression(X, y, 50, 0.01)
    try:
        lr.plot_data(X, y, 'X_VALS', 'Y_VALS', 'TEST_CHART')
    except Exception as e:
        print('Test failed it exception: {}'.format(e))
        return

    print('pass')
Esempio n. 13
0
def test_load_data():
    lr = LinearRegression([], [], 50, 0.01)
    try:
        features = lr.load('./data/test_data1.txt')
    except Exception as e:
        print('Test failed it exception: {}'.format(e))
        return

    if features is not None:
        print('pass')
    else:
        print('fail')
def test_train_on_batch():
    from linear_regression import LinearRegression
    model = LinearRegression(input_dimensions=2)
    weights_old = np.float32([[1, 2, 4]]).T
    model.weights = np.float32([[1, 2, 4]]).T

    X = np.float32([[1, 2, 1], [0, 0, 1]])
    y = np.float32([[10, 2]]).T
    model._train_on_batch(X, y, 0.3, _lambda=0.001)
    desired = np.float32([[-0.14970, -0.29940, 0.15120]]).T
    weight_delta = (weights_old - model.weights)
    np.testing.assert_allclose(weight_delta, desired, rtol=1e-3, atol=1e-3)
Esempio n. 15
0
def main():

    # Get training matrices for linear regression model
    x, y = get_train_matrices()

    # Create instance of LinearRegression with the training matrices
    linear_regression = LinearRegression(x, y)

    # Fit with learning rate, no of iterations and regularization(L2) parameter
    linear_regression.fit(0.01, 1000, 0)

    # Predict for all the input values
    y_pred = linear_regression.predict(x)

    # Plot the scatter plots of training data and graph of our linear model
    plt.scatter(x, y)
    plt.plot(x, y_pred)
    plt.show()

    # Print the weights and biases of the model
    print("Weights: {}\nBiases: {}".format(linear_regression.w,
                                           linear_regression.c))

    # Validate the model by printing the performance metrics
    linear_regression.validate()

    # Predict for the input data in test folder and save as output.csv in test folder
    x_test = pd.read_csv('test/input.csv')['x'].values.reshape(-1, 1)
    y_test = linear_regression.predict(x_test)
    df_predict = pd.DataFrame({'y': y_test.reshape(-1)})
    df_predict.to_csv('test/output.csv')
Esempio n. 16
0
def main():
	if len(sys.argv) != 2:
		print('usage: ' + Fore.RED + 'python' + Fore.BLUE + ' train.py ' + Fore.RESET + 'data_file.csv')
		sys.exit(-1)
	data_file = sys.argv[1]
	try:
		model = LinearRegression(data_file)
		model.train_model()
	except IOError as e:
		print(Style.BRIGHT + Fore.RED + 'I/O Error: ' + Style.RESET_ALL + Fore.RESET + str(e))
	except ParserException as e:
		print(Style.BRIGHT + Fore.RED + 'ParserException: ' + Style.RESET_ALL + Fore.RESET + str(e))
	except LogisticRegressionException as e:
		print(Style.BRIGHT + Fore.RED + 'Logistic Regression Exception: ' + Style.RESET_ALL + Fore.RESET + str(e))
Esempio n. 17
0
def main():
    trainfile = r"data/ex1data1.txt"
    train_X, train_y = loadDataSet(trainfile)
    clf = LinearRegression()
    weigh = clf.fit(train_X, train_y, alpha=0.01, maxCycles=500)
    Fig = plt.figure(figsize=(8, 4))  # Create a `figure' instance
    Ax = Fig.add_subplot(111)  # Create a `axes' instance in the figure
    Ax.plot(train_X, train_y, 'o')  # Create a Line2D instance in the axes
    #Ax.plot(a1,a2)
    a1 = [0, 1]
    a2 = [0, 1 * weigh]
    b1 = [0, 25]
    b2 = [0, 25 * weigh]
    Ax.plot(a1, a2, b1, b2)
    Fig.savefig("test.pdf")
def test_fit():
    y = np.array([
        0.09459717, 0.50650243, 1.03329565, 0.52587828, 0.49264871,
        -0.64896441, -0.86499999, -1.00885329, -0.80418399, 0.57436388
    ]).reshape(-1, 1)
    x_mat = np.array([
        0., 0., 0.6981317, 0.48738787, 1.3962634, 1.94955149, 2.0943951,
        4.38649084, 2.7925268, 7.79820595, 3.4906585, 12.18469679, 4.1887902,
        17.54596338, 4.88692191, 23.88200571, 5.58505361, 31.19282379,
        6.28318531, 39.4784176
    ]).reshape(-1, 2)
    model = LinearRegression()
    model.fit(x_mat, y)
    fitted_weights = model.weights_
    correct_weights = np.array([[0.77483422], [-0.42288373], [0.03914334]])
    np.testing.assert_almost_equal(fitted_weights, correct_weights, decimal=8)
    def test_simple_regression(self):
        r = LinearRegression(order=1, stds=[100, 100, 100, 1000])
        coeffs = np.array([1, 10, 100, 1000])
        old_value = np.array([0, 0, 0, 0])
        for datum_id in range(1000):
            value = np.random.uniform(-1000, 1000, 4)
            value[0] = np.sum(old_value * coeffs)
            old_value = value
            r.feed(value)

        print("With penalty_range: {0}".format(r.penalty_range))
        print()
        print(r.ranking[0])
        print()
        print(r.means)
        print(r.stds)
        print()
Esempio n. 20
0
    def test_cross_val_regression(self):
        data = np.genfromtxt("../src/datasets/wine.data", delimiter=",", dtype=float, usecols=np.arange(13))
        labels = np.genfromtxt("../src/datasets/wine.data", delimiter=",", dtype=float, usecols=13)

        lr = LinearRegression()
        folds = 5
        cross_val_score = GeneralUtilities.cross_val(lr, data, labels, folds)
        self.assertTrue(0 <= cross_val_score <= 1)
Esempio n. 21
0
def main():
    args = parse_args()

    train_X, train_y, valid_X, valid_y, col_name = preprocess_training_set()

    linreg = LinearRegression()
    if args.method == 'pseudo_inverse':
        linreg.train_by_pseudo_inverse(
            train_X, train_y, alpha=0.5, validate_data=(valid_X, valid_y))
    elif args.method == 'gradient_descent':
        linreg.train_by_gradient_descent(
            train_X, train_y, epoch=1000, rate=0.000001, batch=100, alpha=0.00000001,
            validate_data=(valid_X, valid_y))
    else:
        raise Exception('wrong method')

    test_X, ids = preprocess_testing_set(col_name)
    pred_y = linreg.predict(test_X)

    result = list()
    for i in range(ids.shape[0]):
        result.append([ids[i], pred_y[i]])

    with open(args.output, 'w') as fw:
        for id, pred in result:
            fw.write('{id},{pred}\n'.format(id=id, pred=pred))
    def setUp(self):
        # create and train the model
        self.model = LinearRegression.from_csv(TRAIN_CSV)
        self.weights, self.cost_history = self.model.fit_sgd(epochs=5)

        # import test data
        test = np.loadtxt(TEST_CSV, delimiter=",")
        self.x_test = add_one_bias(normalize(test[:, :5]))
        self.y_test = normalize(test[:, 5])
    def setUp(self):
        self.data = np.genfromtxt("../src/airfoil_self_noise.txt",
                                  dtype=float,
                                  usecols=np.arange(5))
        self.labels = np.genfromtxt("../src/airfoil_self_noise.txt",
                                    dtype=float,
                                    usecols=5)
        self.train_data, self.train_labels, self.test_data, self.test_labels = GeneralUtilities.dataset_split(
            self.data, self.labels)

        self.lr = LinearRegression()
def validation_curve(X, y, Xval, yval):
    """ 自动选择lambda """
    lambda_vec = np.array([0, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1, 3, 10])
    lambda_vec = lambda_vec.reshape(lambda_vec.size, 1)

    error_train = np.zeros((lambda_vec.shape[0], 1))
    error_val = np.zeros((lambda_vec.shape[0], 1))

    for i in range(lambda_vec.size):
        # 遍历lambda, 计算每一个的训练以及验证误差
        ilambda = lambda_vec[i]

        #建立模型, 获得参数, 计算误差
        my_lr = LR(X, y)
        my_lr.gradient_descent_reg(0.001, ilambda, 5000)
        theta = my_lr.theta
        error_train[i:], _ = my_lr.compute_cost_reg(theta, 0, X=X, y=y)
        error_val[i:], _ = my_lr.compute_cost_reg(theta, 0, X=Xval, y=yval)

    return lambda_vec, error_train, error_val
Esempio n. 25
0
def test2():
    import pandas as pd
    # from sklearn.linear_model import LinearRegression
    from linear_regression import LinearRegression

    data_path = '../mnist/data/'
    df_train = pd.read_csv(data_path + "train.csv")
    X = df_train.iloc[:, 1:].to_numpy()
    y = df_train.iloc[:, 0].to_numpy()
    lg = LinearRegression().fit(X, y)

    df_test = pd.read_csv(data_path + "test.csv").to_numpy()
    predictions = lg.predict(df_test)
    submission = pd.DataFrame({
        "ImageId":
        range(1, 1 + len(predictions)),
        "Label":
        list(map(lambda x: int(round(x)), predictions))
    })
    submission.to_csv("mnist-submission9.csv", index=False)
Esempio n. 26
0
    def test_folds_parameter(self):
        data = np.genfromtxt("../src/datasets/wine.data", delimiter=",", dtype=float, usecols=np.arange(13))
        labels = np.genfromtxt("../src/datasets/wine.data", delimiter=",", dtype=float, usecols=13)

        lr = LinearRegression()
        folds = -5

        self.assertRaises(ValueError, GeneralUtilities.cross_val, lr, data, labels, folds)

        folds = 1000
        self.assertRaises(ValueError, GeneralUtilities.cross_val, lr, data, labels, folds)
Esempio n. 27
0
    def test_train_and_predict(self):
        # let's fit a line to the following x and y values
        #
        #   |
        # 3 |         x
        #   |
        # 2 |     x
        #   |
        # 1 | x
        #   |________________
        #     1   2   3   4
        X = np.array([1, 2, 3])
        Y = np.array([1, 2, 3])
        clf = LinearRegression()
        m, b = clf.train(X, Y)
        assert m == 1.0
        assert b == 0.0

        # now let's use our trained model to predict a y value for a new x
        predicted_y = clf. predict(4)
        assert predicted_y == 4.0
Esempio n. 28
0
def run_linear_regression():
    print('Plotting data\n')
    features = setup()
    features.columns = ['Profits', 'CityPopulation']
    X = features.Profits
    y = features.CityPopulation
    m = len(y)
    iterations = 1500
    alpha = 0.01
    theta = np.zeros(m)  # Set the initial theta value
    lr = LinearRegression(X, y, iterations, alpha)

    lr.plot_data(X, y, 'Profits', 'City Population',
                 'Food Truck Profit v. City Pop')

    print('Testing gradient descent algorithm...\n')
    # Add a column of ones to X
    # X.bias = np.ones((m, 1))

    print('Initial cost: {}'.format(lr.cost_function(X, y, theta)))

    # Run the gradient descent
    theta, cost_history = lr.gradient_descent(X, y, theta, alpha, iterations)

    print('Optimum theta found by gradient descent: {}'.format(theta))
    def test_train(self):
        # y = x + 0
        x1 = np.array([2, 4])
        y1 = np.array([2, 4])
        m1 = LinearRegression(1)
        m1.train(x1, y1, n_iter=1, lr=0.1)

        # expected W and b after 1 iteration with lr 0.1
        exp_W1 = np.array([1.0])
        exp_b1 = 0.3
        self.assertTrue(np.array_equal(m1.W, exp_W1))
        self.assertAlmostEqual(m1.b[0], exp_b1)

        # y = x1 + x2 + 0
        x2 = np.array([[2, 2],
                       [4, 4]])
        y2 = np.array([4, 8])
        m2 = LinearRegression(2)
        m2.train(x2, y2, n_iter=1, lr=0.1)

        # expected W and b after 1 iteration with lr 0.1
        exp_W2 = np.array([2.0, 2.0])
        exp_b2 = 0.6
        self.assertTrue(np.array_equal(m2.W, exp_W2))
        self.assertAlmostEqual(m2.b[0], exp_b2)
 def test_score(self):
     train_data, train_labels, test_data, test_labels = GeneralUtilities.dataset_split(
         self.data, self.labels)
     lr = LinearRegression()
     lr.fit(train_data, train_labels)
     lr.predict(test_data, test_labels)
     self.assertTrue(0 <= lr.r_squared <= 1)
    def __init__(self, numpy_rng, theano_rng=None, n_ins=100,
                 hidden_layers_size=None, n_outs=1, L1_reg=0.00,
                 L2_reg=0.0001):
        if hidden_layers_size is None:
            hidden_layers_size = [100, 100]

        self.sigmoid_layers = []
        self.rbm_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_size)

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = MRG_RandomStreams(numpy_rng.randint(2 ** 30))

        self.x = T.matrix('x')
        self.y = T.vector('y')

        for i in range(self.n_layers):
            if i == 0:
                input_sizes = n_ins
            else:
                input_sizes = hidden_layers_size[i - 1]

            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input,
                                        n_in=input_sizes, n_out=hidden_layers_size[i],
                                        activation=T.nnet.sigmoid)
            self.sigmoid_layers.append(sigmoid_layer)
            self.params.extend(sigmoid_layer.params)

            rbm_layer = RBM(numpy_rng=numpy_rng, theano_rng=theano_rng,
                            input=layer_input, n_visible=input_sizes,
                            n_hidden=hidden_layers_size[i], W=sigmoid_layer.W,
                            hbias=sigmoid_layer.b)
            self.rbm_layers.append(rbm_layer)

        self.linearRegressionLayer = LinearRegression(input=self.sigmoid_layers[-1].output,
                                                      n_in=hidden_layers_size[-1],
                                                      n_out=n_outs)
        self.L1 = abs(self.sigmoid_layers[-1].W).sum() + abs(self.linearRegressionLayer.W).sum()
        self.L2_sqr = (self.sigmoid_layers[-1].W ** 2).sum() + (self.linearRegressionLayer.W ** 2).sum()
        self.squared_errors = self.linearRegressionLayer.squared_errors(self.y)
        self.finetune_cost = self.squared_errors + L1_reg * self.L1 + L2_reg * self.L2_sqr
        self.y_pred = self.linearRegressionLayer.p_y_given_x
        self.params = self.params + self.linearRegressionLayer.params
def main():
    X, y = datasets.make_regression(n_samples=500, n_features=1, noise=20)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

    # 比较sklearn上的模型和自己写的模型
    lr1 = linear_model.LinearRegression()
    lr2 = LinearRegression()
    lasso1 = linear_model.Lasso(alpha=0.1)
    lasso2 = LinearRegression(l1=0.1)
    ridge1 = linear_model.Ridge(alpha=0.5)
    ridge2 = LinearRegression(l2=0.5)
    elasticnet1 = linear_model.ElasticNet(alpha=0.5, l1_ratio=0.5)
    elasticnet2 = LinearRegression(l1=0.25, l2=0.25*0.5)
    

    models = {'linear1': lr1, 'linear': lr2, 'lasso1': lasso1, 'lasso2': lasso2, 
            'ridge1': ridge1, 'ridge2': ridge2, 'elasticnet1': elasticnet1, 'elasticnet2': elasticnet2}
    for model_name, model in models.items():
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        y_pred = np.reshape(y_pred, y_test.shape)
        mse = mean_squared_error(y_test, y_pred)
        print('{}: {}'.format(model_name, mse))
Esempio n. 33
0
def fitting():
    data = pd.read_csv('food_profit.txt', names=['population', 'profit'])
    x = data['population']
    y = data['profit']

    alpha = 0.01
    max_iter = 1500
    model = LinearRegression(alpha, max_iter)
    loss, _ = model.fit(x, y)
    p = model.predict(x)

    plt.figure(figsize=(10, 6))
    plt.subplot(2,1,1)
    plt.plot(np.arange(1, 1501), loss)
    plt.title('Loss Curve')

    plt.subplot(2,1,2)
    plt.plot(x, y, 'rx', markersize=10, label='Traing Data')
    plt.plot(x, p, 'b', label='Linear Regression')
    plt.xlabel('Population of City in 10,000s')
    plt.ylabel('Profit in $10,000s')
    plt.grid(True)
    plt.legend()
    plt.show()
    def test_r_calcuation(self):
        # check that the adjusted_r_squared value is calculated and different to r_squared
        # when using multiple attributes
        # it is possible that the adjusted value can drop below 0 which is why the test doesn't check for that
        lr = LinearRegression()
        lr.fit(self.train_data, self.train_labels)
        lr.predict(self.test_data, self.test_labels)

        self.assertTrue(lr.adj_r_squared <= 1)
        self.assertTrue(lr.adj_r_squared != lr.r_squared)
 def setUp(self):
     self.model_simple = LinearRegression(1)
     self.model_multiple = LinearRegression(2)
 def test_feeding_data(self):
     r = LinearRegression(order=2, target_index=0)
     r.feed([0, 0, 0, 0])
     r.feed([1, 1, 0, 0])
     r.feed([2, 0, 1, 0])
     r.feed([4, 0, 0, 1])
Esempio n. 37
0
if __name__ == '__main__':
    pd_train = pd.read_csv('./data/train.csv', sep=';')
    pd_test = pd.read_csv('./data/test.csv', sep=';')
    pd_validate = pd.read_csv('./data/validate.csv', sep=';')

    # 1.对原始特征进行均匀预处理
    trn_X, trn_X_max, trn_X_min = uniform_norm(pd_train.drop('quality', axis=1).values)
    trn_y = pd_train['quality'].values

    val_X = (pd_validate.drop('quality', axis=1).values - trn_X_min) / (trn_X_max - trn_X_min)
    val_y = pd_validate['quality'].values

    test_X = (pd_test.drop('quality', axis=1).values - trn_X_min) / (trn_X_max - trn_X_min)
    test_y = pd_test['quality'].values

    model_1 = LinearRegression()
    train_costs = model_1.fit(trn_X, trn_y, alpha=0.5, lmbda=0, algorithm="batch_gd", verbose=True)
    val_pred = model_1.predict(val_X)
    test_pred = model_1.predict(test_X)

    print("Validate Error %f" % (sum((val_pred - val_y) ** 2) * 0.5 / val_X.shape[0]))
    print("Test Error %f" % (sum((test_pred - test_y) ** 2) * 0.5 / test_X.shape[0]))
    print("\n\n")

    # 2.对原始特征进行高斯预处理
    trn_X, trn_X_mean, trn_X_std = gaussian_norm(pd_train.drop('quality', axis=1).values)
    trn_y = pd_train['quality'].values

    val_X = (pd_validate.drop('quality', axis=1).values - trn_X_mean) / trn_X_std
    val_y = pd_validate['quality'].values
housing = fetch_california_housing(data_home='/home/bdol/data')
train_data, test_data, train_target, test_target = split_train_test(
    housing.data, housing.target
)

# Normalize the data
train_data = preprocessing.scale(train_data)
test_data = preprocessing.scale(test_data)

# Append bias feature
train_data = np.hstack((train_data, np.ones((train_data.shape[0], 1),
                                            dtype=train_data.dtype)))
test_data = np.hstack((test_data, np.ones((test_data.shape[0], 1),
                                          dtype=test_data.dtype)))

train_target = train_target[:, None]
test_target = test_target[:, None]

lin_reg = LinearRegression()
print "Training..."
# lin_reg.train(train_data, train_target)
lin_reg.train_closed_form_unregularized(train_data, train_target)
print
print "Done!"

# Get training error
train_preds = lin_reg.test(train_data)
test_preds = lin_reg.test(test_data)
print "Train error:", RMSE(train_preds, train_target)
print "Test error:", RMSE(test_preds, test_target)
Esempio n. 39
0
# between 0.5 and 1.0) and 20 smaller ones (between 0.0 and 0.1)
true_w = 0.3*np.random.rand(20, 1)
true_w = np.append(true_w, 0.5*np.random.rand(10, 1) + 0.5)

# Now generate the dataset using the true weights
N = 50
train_data = np.random.rand(N, 30)
train_target = train_data.dot(true_w)[:, None]+np.random.randn(N, 1)
test_data = np.random.rand(N, 30)
test_target = test_data.dot(true_w)[:, None]+np.random.randn(N, 1)

lam_range = np.logspace(-1, 1, 100)
unreg_results = np.zeros((len(lam_range), 1))
reg_results = np.zeros((len(lam_range), 1))

lin_reg = LinearRegression()
i = 0
for l in lam_range:
    lin_reg.train_closed_form_unregularized(train_data, train_target)
    yhat = lin_reg.test(test_data)
    unreg_results[i] = RMSE(yhat, test_target)

    lin_reg.train_closed_form_ridge(train_data, train_target, l)
    yhat = lin_reg.test(test_data)
    reg_results[i] = RMSE(yhat, test_target)

    i += 1

fig = plt.figure()
ax = fig.add_subplot(111)
ax.set_xscale("log")
class DBNR(object):
    def __init__(self, numpy_rng, theano_rng=None, n_ins=100,
                 hidden_layers_size=None, n_outs=1, L1_reg=0.00,
                 L2_reg=0.0001):
        if hidden_layers_size is None:
            hidden_layers_size = [100, 100]

        self.sigmoid_layers = []
        self.rbm_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_size)

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = MRG_RandomStreams(numpy_rng.randint(2 ** 30))

        self.x = T.matrix('x')
        self.y = T.vector('y')

        for i in range(self.n_layers):
            if i == 0:
                input_sizes = n_ins
            else:
                input_sizes = hidden_layers_size[i - 1]

            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input,
                                        n_in=input_sizes, n_out=hidden_layers_size[i],
                                        activation=T.nnet.sigmoid)
            self.sigmoid_layers.append(sigmoid_layer)
            self.params.extend(sigmoid_layer.params)

            rbm_layer = RBM(numpy_rng=numpy_rng, theano_rng=theano_rng,
                            input=layer_input, n_visible=input_sizes,
                            n_hidden=hidden_layers_size[i], W=sigmoid_layer.W,
                            hbias=sigmoid_layer.b)
            self.rbm_layers.append(rbm_layer)

        self.linearRegressionLayer = LinearRegression(input=self.sigmoid_layers[-1].output,
                                                      n_in=hidden_layers_size[-1],
                                                      n_out=n_outs)
        self.L1 = abs(self.sigmoid_layers[-1].W).sum() + abs(self.linearRegressionLayer.W).sum()
        self.L2_sqr = (self.sigmoid_layers[-1].W ** 2).sum() + (self.linearRegressionLayer.W ** 2).sum()
        self.squared_errors = self.linearRegressionLayer.squared_errors(self.y)
        self.finetune_cost = self.squared_errors + L1_reg * self.L1 + L2_reg * self.L2_sqr
        self.y_pred = self.linearRegressionLayer.p_y_given_x
        self.params = self.params + self.linearRegressionLayer.params

    def pretraining_function(self, train_set_x, batch_size, k):
        index = T.lscalar('index')
        learning_rate = T.scalar('lr')
        n_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
        batch_begin = index * batch_size
        batch_end = batch_begin + batch_size

        pretrain_fns = []
        for rbm in self.rbm_layers:
            cost, updates = rbm.get_cost_updates(learning_rate, persistent=None, k = k)
            fn = theano.function(
                inputs=[index, theano.In(learning_rate, value=0.1)],
                outputs=cost, updates=updates, givens={
                    self.x: train_set_x[batch_begin:batch_end]
                }
            )
            pretrain_fns.append(fn)

        return pretrain_fns

    def build_finetune_functions(self, datasets, batch_size, learning_rate):
        (train_set_x, train_set_y) = datasets[0]
        (valid_set_x, valid_set_y) = datasets[1]
        (test_set_x, test_set_y) = datasets[2]

        n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
        n_valid_batches /= batch_size
        n_test_batches = test_set_x.get_value(borrow=True).shape[0]
        n_test_batches /= batch_size

        index = T.lscalar('index')

        gparams = T.grad(self.finetune_cost, self.params)
        updates = []
        for param, gparam in zip(self.params, gparams):
            updates.append((param, param - gparam * learning_rate))

        train_fn = theano.function(
            inputs=[index], outputs=self.finetune_cost, updates=updates,
            givens={
                self.x: train_set_x[index * batch_size: (index + 1) * batch_size],
                self.y: train_set_y[index * batch_size: (index + 1) * batch_size]
            }
        )

        test_score_i = theano.function(
            inputs=[index], outputs=self.squared_errors, givens={
                self.x: test_set_x[index * batch_size: (index + 1) * batch_size],
                self.y: test_set_y[index * batch_size: (index + 1) * batch_size]
            }
        )

        valid_score_i = theano.function(
            inputs=[index], outputs=self.squared_errors, givens={
                self.x: valid_set_x[index * batch_size: (index + 1) * batch_size],
                self.y: valid_set_y[index * batch_size: (index + 1) * batch_size]
            }
        )

        def valid_score():
            return [valid_score_i(i) for i in range(n_valid_batches)]

        def test_score():
            return [test_score_i(i) for i in range(n_test_batches)]

        return train_fn, valid_score, test_score
    dummy_x = list(range(200, 2100, 100))
    dummy_y = model.W * dummy_x + model.b

    plt.scatter(x, y)
    plt.plot(dummy_x, dummy_y)

    # automatically close plot after 2 seconds
    plt.show(block=False)
    plt.pause(2)
    plt.close()

if __name__=="__main__":
    x = np.array([d[0] for d in data])
    y = np.array([d[1] for d in data])

    model = LinearRegression(1)
    # without this initial parameter, model couldn't be optimized...
    model.b = 17
    display_data(x, y, model)

    # learning rate higher than this would increase error after each iter...
    # perhaps, data is not suitable for linear slope and intercept
    # or, there are too few data points
    model.train(x, y, n_iter=10, lr=0.000001)
    display_data(x, y, model)

    test_x, test_y = testing
    pred = model.predict(test_x)
    print('Dist from Dublin to Gdansk: {}\n'
            'answer  {}\n'
            'predict {}'.format(test_x, test_y, pred))
Esempio n. 42
0
File: main.py Progetto: ruivaz/my_ml
import numpy as np
from linear_regression import LinearRegression
file = '/Users/ruivaz/workspace/git/c_apps/data.csv'

if __name__ == "__main__":
    data_table = np.loadtxt(open(file,"rb"), delimiter=",", skiprows=1)
    lr = LinearRegression(data_table)
    lr.run_gradient_descent()
    lr.plot_solution()