Esempio n. 1
0
def main():

    # Get training matrices for linear regression model
    x, y = get_train_matrices()

    # Create instance of LinearRegression with the training matrices
    linear_regression = LinearRegression(x, y)

    # Fit with learning rate, no of iterations and regularization(L2) parameter
    linear_regression.fit(0.01, 1000, 0)

    # Predict for all the input values
    y_pred = linear_regression.predict(x)

    # Plot the scatter plots of training data and graph of our linear model
    plt.scatter(x, y)
    plt.plot(x, y_pred)
    plt.show()

    # Print the weights and biases of the model
    print("Weights: {}\nBiases: {}".format(linear_regression.w,
                                           linear_regression.c))

    # Validate the model by printing the performance metrics
    linear_regression.validate()

    # Predict for the input data in test folder and save as output.csv in test folder
    x_test = pd.read_csv('test/input.csv')['x'].values.reshape(-1, 1)
    y_test = linear_regression.predict(x_test)
    df_predict = pd.DataFrame({'y': y_test.reshape(-1)})
    df_predict.to_csv('test/output.csv')
 def test_score(self):
     train_data, train_labels, test_data, test_labels = GeneralUtilities.dataset_split(
         self.data, self.labels)
     lr = LinearRegression()
     lr.fit(train_data, train_labels)
     lr.predict(test_data, test_labels)
     self.assertTrue(0 <= lr.r_squared <= 1)
    def test_r_calcuation(self):
        # check that the adjusted_r_squared value is calculated and different to r_squared
        # when using multiple attributes
        # it is possible that the adjusted value can drop below 0 which is why the test doesn't check for that
        lr = LinearRegression()
        lr.fit(self.train_data, self.train_labels)
        lr.predict(self.test_data, self.test_labels)

        self.assertTrue(lr.adj_r_squared <= 1)
        self.assertTrue(lr.adj_r_squared != lr.r_squared)
    def test_ridge_equal_least_squares(self):
        lr = LinearRegression(model="Ridge Regression", lam=0)
        lr.fit(self.train_data, self.train_labels)
        lr.predict(self.test_data, self.test_labels)
        ridge_score = lr.r_squared

        lr = LinearRegression(model=None)
        lr.fit(self.train_data, self.train_labels)
        lr.predict(self.test_data, self.test_labels)
        least_squares_score = lr.r_squared
        self.assertTrue(ridge_score == least_squares_score)
    def test_ridge_scores(self):
        lr = LinearRegression(model=None)
        lr.fit(self.train_data, self.train_labels)
        lr.predict(self.test_data, self.test_labels)
        least_squares_score = lr.r_squared

        lam_values = [10, 1, 0.1, 0.01, 0.001]
        for lam in lam_values:
            lr = LinearRegression(model="Ridge Regression", lam=lam)
            lr.fit(self.train_data, self.train_labels)
            lr.predict(self.test_data, self.test_labels)
            self.assertTrue(least_squares_score != lr.r_squared)
def linearRegression_Model(learning_rate, n_iters, split_test_ratio):

    X, y = datasets.make_regression(n_samples=100,
                                    n_features=1,
                                    noise=20,
                                    random_state=4)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=split_test_ratio, random_state=1234)

    regressor = LinearRegression(learning_rate=0.01, n_iters=1000)
    regressor.fit(X_train, y_train)
    predictions = regressor.predict(X_test)

    mse = mean_squared_error(y_test, predictions)
    print("MSE:", mse)

    y_pred_line = regressor.predict(X)
    cmap = plt.get_cmap('viridis')
    fig = plt.figure(figsize=(8, 6))
    m1 = plt.scatter(X_train, y_train, color=cmap(0.9), s=10)
    m2 = plt.scatter(X_test, y_test, color=cmap(0.5), s=10)
    plt.plot(X, y_pred_line, color='black', linewidth=2, label="Prediction")
    plt.show()

    return predictions, mse, plt


# X, y = datasets.make_regression(n_samples=100, n_features=1, noise=20, random_state=4)
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)

# Inspect data
#fig = plt.figure(figsize=(8,6))
#plt.scatter(X[:, 0], y, color = "b", marker = "o", s = 30)
#plt.show()

# regressor = LinearRegression(learning_rate=0.01, n_iters=1000)
# regressor.fit(X_train, y_train)
# predictions = regressor.predict(X_test)

# mse = mean_squared_error(y_test, predictions)
# print("MSE:", mse)

# y_pred_line = regressor.predict(X)
# cmap = plt.get_cmap('viridis')
# fig = plt.figure(figsize=(8,6))
# m1 = plt.scatter(X_train, y_train, color=cmap(0.9), s=10)
# m2 = plt.scatter(X_test, y_test, color=cmap(0.5), s=10)
# plt.plot(X, y_pred_line, color='black', linewidth=2, label="Prediction")
# plt.show()
def test_fit():
    y = np.array([
        0.09459717, 0.50650243, 1.03329565, 0.52587828, 0.49264871,
        -0.64896441, -0.86499999, -1.00885329, -0.80418399, 0.57436388
    ]).reshape(-1, 1)
    x_mat = np.array([
        0., 0., 0.6981317, 0.48738787, 1.3962634, 1.94955149, 2.0943951,
        4.38649084, 2.7925268, 7.79820595, 3.4906585, 12.18469679, 4.1887902,
        17.54596338, 4.88692191, 23.88200571, 5.58505361, 31.19282379,
        6.28318531, 39.4784176
    ]).reshape(-1, 2)
    model = LinearRegression()
    model.fit(x_mat, y)
    fitted_weights = model.weights_
    correct_weights = np.array([[0.77483422], [-0.42288373], [0.03914334]])
    np.testing.assert_almost_equal(fitted_weights, correct_weights, decimal=8)
def test_fit_functional():
    import sklearn.model_selection
    import numpy as np

    from linear_regression import LinearRegression
    X = np.zeros((900, 3), dtype=np.float32)
    num_samples = 30

    xx = np.linspace(-5, 5, num_samples)
    XX, YY = np.meshgrid(xx, xx)
    X[:, 0] = XX.flatten()
    X[:, 1] = YY.flatten()
    X[:, -1] = 1  # a column of 1's for the bias trick
    Z = 0.1 * XX + 0.2 * YY + 0.4
    y = Z.reshape(-1, 1)
    X_train, X_val, y_train, y_val = sklearn.model_selection.train_test_split(
        X, y)
    model = LinearRegression(input_dimensions=2)
    train_mse, val_mse = model.fit(X_train,
                                   y_train,
                                   X_val,
                                   y_val,
                                   num_epochs=20,
                                   batch_size=4,
                                   alpha=0.1,
                                   _lambda=0.0)
    final_train_mse = train_mse[-1]
    desired_weights = np.float32([[0.1, 0.2, 0.4]]).T
    np.testing.assert_allclose(model.weights,
                               desired_weights,
                               rtol=1e-3,
                               atol=1e-3)
    assert final_train_mse < 0.001
    assert final_train_mse < 0.00001
    assert final_train_mse < 1e-10
Esempio n. 9
0
def visual_3d():
    data = pd.read_csv('food_profit.txt', names=['population', 'profit'])
    x = data['population']
    y = data['profit']

    x = x[:, np.newaxis]
    x = np.hstack((np.ones_like(x), x))

    theta0 = np.zeros((40, 50))
    theta1 = np.zeros((40, 50))
    jvals = np.zeros((40, 50))
    for i, t0 in enumerate(np.arange(-10, 10, .5)):
        for j, t1 in enumerate(np.arange(-1, 4, .5)):
            theta0[i, j] = t0
            theta1[i, j] = t1
            jvals[i, j] = 0.5*np.mean((x.dot(np.array([t0, t1])) - y)**2)

    import mpl_toolkits.mplot3d
    ax = plt.gca(projection='3d')
    ax.plot_surface(theta0, theta1, jvals, cmap=plt.get_cmap('BuPu_r'))

    alpha = 0.01
    max_iter = 1500
    model = LinearRegression(alpha, max_iter)
    loss, w_list = model.fit(x, y)
    w_list = np.array(w_list)

    plt.plot([w_list[0,0]], [w_list[0,1]], [loss[0]], 'rx')
    plt.plot(w_list[:, 0], w_list[:, 1], loss, 'o')
    plt.plot([w_list[-1, 0]], [w_list[-1, 1]], [loss[-1]], 'gx')
    plt.show()
Esempio n. 10
0
def main():
    xs, ys = create_dataset(50, 10, 2, correlation='neg')
    trian_xs, train_ys = xs[:25], ys[:25]
    test_xs, test_ys = xs[25:], ys[25:]

    # ml model
    model = LinearRegression()
    model.fit(trian_xs, train_ys)
    preds = model.predictions(xs)
    print("{:1.2f}".format(model.score(ys, preds)))

    # plot the dataset
    plt.scatter(trian_xs, train_ys)
    plt.scatter(test_xs, test_ys)
    plt.plot(xs, preds)
    plt.show()
Esempio n. 11
0
def fitting():
    data = pd.read_csv('house_data.txt', names=['area', 'bedroom', 'price'])

    x_data = data[['area', 'bedroom']]
    x_data = (x_data - x_data.mean())/(x_data.max() - x_data.min())
    y_data = data['price']
    plt.figure(figsize=(10, 6))

    plt.subplot(2, 2, 1)
    plt.plot(x_data['area'], y_data, 'rx')
    plt.title('area-price')

    plt.subplot(2, 2, 2)
    plt.plot(x_data['bedroom'], y_data, 'bx')
    plt.title('bedroom-price')


    alpha = 10
    max_iter = 50

    model = LinearRegression(alpha, max_iter)
    loss, _ = model.fit(x_data.values, y_data.values)
    plt.subplot(2, 1, 2)
    plt.plot(np.arange(1, max_iter+1), loss)

    plt.subplots_adjust(hspace=0.4)
    plt.show()
def main():
    X, y = make_regression(n_samples=100, n_features=1, noise=20)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)
    n_samples, n_features = np.shape(X)

    model = LinearRegression(n_iterations=3000,
                             regularization=l1_l2(alpha=0.5))
    model.fit(X_train, y_train)

    # Training error plot
    n = len(model.training_errors)
    training, = plt.plot(range(n),
                         model.training_errors,
                         label="Training Error")
    plt.legend(handles=[training])
    plt.title("Error Plot")
    plt.ylabel('Mean Squared Error')
    plt.xlabel('Iterations')
    plt.show()

    y_pred = model.predict(X_test)
    y_pred = np.reshape(y_pred, y_test.shape)

    mse = mean_squared_error(y_test, y_pred)
    print("Mean squared error: %s" % (mse))

    y_pred_line = model.predict(X)

    # Plot the result
    cmap = plt.get_cmap('viridis')
    m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10)
    m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10)
    plt.plot(366 * X,
             y_pred_line,
             color='black',
             linewidth=2,
             label="Prediction")
    plt.suptitle("Linear Regression")
    plt.title("MSE: %.2f" % mse, fontsize=10)
    plt.xlabel('Day')
    plt.ylabel('Temperature in Celcius')
    plt.legend((m1, m2), ("Training data", "Test data"), loc='lower right')
    plt.show()

    print('Done!')
Esempio n. 13
0
def test1():
    X1 = np.array([[1], [2], [3], [4], [5]])
    y1 = np.array([4, 7, 10, 13, 16])

    X2 = np.array([[1], [2], [3], [4], [5]])
    y2 = np.array([2, 4, 6, 8, 10])

    X3 = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])
    y3 = np.dot(X3, np.array([1, 2])) + 3

    reg = LinearRegression().fit(X1, y1)
    print(reg.predict(np.array([[9], [10]])))

    reg = reg.fit(X2, y2)
    print(reg.predict(np.array([[9], [10]])))

    reg = reg.fit(X3, y3)
    print(reg.predict(np.array([[3, 5]])))
Esempio n. 14
0
def main():

    print("Loading data...")

    # Get training matrices for linear regression model
    x, y = get_train_matrices()

    print("Data loaded.\n")

    # Create instance of LinearRegression with the training matrices
    linear_regression = LinearRegression(x, y)

    print("Fitting the model with data...")

    # Fit with learning rate, no of iterations and regularization(L2) parameter
    linear_regression.fit(0.01, 1000, 0)

    print("Model fitted.\n")

    # Predict for all the input values
    y_pred = linear_regression.predict(x)

    if x.shape[1] == 1:
        print("The model is fitted as shown in the figure.\n")
        # Plot the scatter plots of training data and graph of our linear model
        plt.scatter(x, y)
        plt.plot(x, y_pred)
        plt.show()

    # Print the weights and biases of the model
    print("The weights and biases are printed as:\n"
          "Weights: {}\nBiases: {}\n".format(linear_regression.w,
                                             linear_regression.c))

    print("Performance statistics:")
    # Validate the model by printing the performance metrics
    linear_regression.validate()

    # Predict for the input data in test folder and save as output.csv in test folder
    x_test = pd.read_csv('test/input.csv').values[:, 1:]
    y_test = linear_regression.predict(x_test)
    df_predict = pd.DataFrame({'y': y_test.reshape(-1)})
    df_predict.to_csv('test/output.csv')
Esempio n. 15
0
def main():
    trainfile = r"data/ex1data1.txt"
    train_X, train_y = loadDataSet(trainfile)
    clf = LinearRegression()
    weigh = clf.fit(train_X, train_y, alpha=0.01, maxCycles=500)
    Fig = plt.figure(figsize=(8, 4))  # Create a `figure' instance
    Ax = Fig.add_subplot(111)  # Create a `axes' instance in the figure
    Ax.plot(train_X, train_y, 'o')  # Create a Line2D instance in the axes
    #Ax.plot(a1,a2)
    a1 = [0, 1]
    a2 = [0, 1 * weigh]
    b1 = [0, 25]
    b2 = [0, 25 * weigh]
    Ax.plot(a1, a2, b1, b2)
    Fig.savefig("test.pdf")
Esempio n. 16
0
def fitting():
    data = pd.read_csv('food_profit.txt', names=['population', 'profit'])
    x = data['population']
    y = data['profit']

    alpha = 0.01
    max_iter = 1500
    model = LinearRegression(alpha, max_iter)
    loss, _ = model.fit(x, y)
    p = model.predict(x)

    plt.figure(figsize=(10, 6))
    plt.subplot(2,1,1)
    plt.plot(np.arange(1, 1501), loss)
    plt.title('Loss Curve')

    plt.subplot(2,1,2)
    plt.plot(x, y, 'rx', markersize=10, label='Traing Data')
    plt.plot(x, p, 'b', label='Linear Regression')
    plt.xlabel('Population of City in 10,000s')
    plt.ylabel('Profit in $10,000s')
    plt.grid(True)
    plt.legend()
    plt.show()
Esempio n. 17
0
from linear_regression import LinearRegression


def calculate_mean_squared_error(y_true, y_pred):
    return np.mean(np.square(y_true - y_pred))


if __name__ == '__main__':
    data = make_regression(n_samples=200, n_features=1, n_targets=1, random_state=42)
    X, y = data[0], data[1]

    X_train, X_test, Y_train, Y_test = train_test_split(X, y, shuffle=True, test_size=0.2)

    regressor = LinearRegression(lr=0.01, n_iters=500)
    regressor.fit(X_train, Y_train)

    predictions = regressor.predict(X_test)

    mse = calculate_mean_squared_error(Y_test, predictions)
    r_score = r2_score(Y_test, predictions)
    print("The mean squared error of the regressor is: {}".format(mse))
    print("The R-squared loss of the regressor is: {}".format(r2_score))

    y_pred_line = regressor.predict(X)

    assert X.shape[1] == 1
    cmap = plt.get_cmap('viridis')
    fig = plt.figure(figsize=(8, 6))
    plt.scatter(X_train[:, 0], Y_train, color=cmap(0.9), s=10)
    plt.scatter(X_test[:, 0], Y_test, color=cmap(0.5), s=10)
Esempio n. 18
0
import numpy.linalg as la

from sklearn import preprocessing


# load the dataset
X_train, Y_train = load_boston(return_X_y=True);

# create the linear regression model object
model = LinearRegression();

# create the list of the regularisation parameters to be used
reg_param = [0.1, 1.0, 10.0, 100.0, 1000.0];
num_epochs = 100000;

model_params_ls = model.fit('ls', X_train, Y_train);
print(model_params_ls);

# compute the lasso parameters for different reg param values
# and plot the values in a graph
plt.figure(1);
plt.title('LASSO parameter profiles at different regularisation params');
plt.xlabel('parameter index');
plt.ylabel('parameter values');
for i in range(len(reg_param)):

    model_params_lasso = model.fit('lasso', X_train, Y_train, 
                                    regularization_param=reg_param[i], 
                                    num_epochs=num_epochs);
    plt.plot(model_params_lasso, label= '' + str(reg_param[i]));
    binary_labels = metrics.make_binary(labels)

    cm = metrics.confusion_matrix(testt, binary_labels)
    a = metrics.accuracy(testt, binary_labels)
    p, r = metrics.precision_and_recall(testt, binary_labels)
    f = metrics.f1_measure(testt, binary_labels)
    print(binary_labels)
    print("Accuracy = %f\n" % a)
    print("Precision = %f, Recall = %f\n" % (p, r))
    print("F1 measure = %f\n" % f)
    print(sum(binary_labels) / len(binary_labels))

elif model == "linear_regression":

    lr = LinearRegression()
    lr.fit(trainf, traint)
    lrc = lr.score(testf, testt)
    labels = lr.predict(testf)
    binary_labels = metrics.make_binary(labels)
    print(binary_labels)
    a = metrics.accuracy(testt, binary_labels)
    print("Accuracy = %f\n" % a)
    print("R2 sore:", lrc)

    # GOOGL_closing_data = features[:,5].reshape(-1,1)
    # n = 3
    #
    # #Data Processing
    # data0 = features[:,5]
    # example0 = data0[:-n].reshape(-1,1)
    #
Esempio n. 20
0
    encoder = utilities.OneHotEncoder()
    scaler = utilities.StandardScaler()

    encoder.fit(X_train[:, bin_feat_reg])

    X_train_new = np.hstack(
        (encoder.transform(X_train[:, bin_feat_reg]), X_train[:,
                                                              con_feat_reg]))

    X_test_new = np.hstack(
        (encoder.transform(X_test[:, bin_feat_reg]), X_test[:, con_feat_reg]))

    scaler.fit(X_train_new)
    X_train_scaled = scaler.transform(X_train_new)
    X_test_scaled = scaler.transform(X_test_new)

    model = LinearRegression(learning_rate=10e-5, penalty='l2')
    model.fit(X_train_scaled, y_train)

    print('Train metrics')
    utilities.regression_report(y_train, model.predict(X_train_scaled))
    print('Test metrics')
    utilities.regression_report(y_test, model.predict(X_test_scaled))

    print('Feature importances')

    args = np.argsort(np.fabs(model.w))[::-1]
    for i in args[:5]:
        print(name_features_insurance[i], model.w[i])
Esempio n. 21
0
    X = np.array([X[i] for i in features]).T
    X = normalize_features(X)

    # FItting model
    reg = LinearRegression(learning_rate=0.01, n_iters=300)

    # Train, test split with 80/20 ratio
    X_train, y_train, X_test, y_test = train_test_split(X, y, 0.20)

    # Quick hack to get around tuple to array conversion
    X_train = np.array(X_train)
    y_train = np.array(y_train)
    X_test = np.array(X_test)
    y_test = np.array(y_test)

    costs = reg.fit(X_train, y_train)
    weights = reg.weights_
    bias = reg.bias_
    pred_train = reg.predict(X_train)
    pred = reg.predict(X_test)

    dimension_check()

    # Mean absolute error / mean absolute deviation of training set
    print("""\n### Mean absolute error / mean absolute deviation
          of training set ###""")
    print(abs(pred_train - y_train).mean())

    # Mean absolute error / mean absolute deviation of test set
    print("""\n### Mean absolute error / mean absolute deviation
          of test set ###""")
Esempio n. 22
0
FILE_NAME = "data.csv"
data = np.genfromtxt(FILE_NAME, delimiter=",", dtype=np.float32, skip_header=1)

# split data
n_samples, n_features = data.shape
n_features -= 1
X = data[:, 0:n_features]
y = data[:, n_features]
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=42)

# fit
model = LinearRegression(X_train, y_train)
model.fit()

# plot
y_pred_line = model.predict(X)
cmap = plt.get_cmap('viridis')
fig = plt.figure(figsize=(8, 6))
scat = plt.scatter(X, y, color=cmap(0.9), s=10)
plt.plot(X, y_pred_line, color='black', linewidth=2, label="Prediction")
plt.show()

# accuracy
train_accuracy = model.score()
test_accuracy = model.score(X_test, y_test)

table_accuracy = pd.DataFrame([[train_accuracy], [test_accuracy]],
                              ['Train Accuracy', 'Test Accuracy'],
Esempio n. 23
0
import pandas as pd
import matplotlib.pyplot as plt

from linear_regression import LinearRegression


def load_boston():
    dataset = pd.read_csv('boston.csv')

    X = dataset.iloc[:, 0:13].to_numpy()
    y = dataset.iloc[:, 13].to_numpy()

    return X, y


def plot_losses(losses):
    plt.plot(losses)
    plt.xlabel('iterations')
    plt.ylabel('loss')
    plt.tight_layout()
    plt.show()


if __name__ == '__main__':
    X, y = load_boston()

    linear_regression = LinearRegression(X, y)
    losses = linear_regression.fit()

    plot_loss(losses)
Esempio n. 24
0
from linear_regression import LinearRegression


def min_max_normalization_inverse(x, ma, mi):
    return x * (ma - mi) + mi


if __name__ == '__main__':
    # データをロード
    auto_mpg_data = Data()
    max_mpg = auto_mpg_data.max_value
    min_mpg = auto_mpg_data.min_value
    X_train, t_train, X_test, t_test = auto_mpg_data.get_data()
    # ガウス基底関数モデル
    lr = LinearRegression()
    lr.fit(X_train, t_train)
    gbf_prediction = lr.predict(X_test)
    print('gaussian_basis_function r2_score:',
          r2_score(t_test, gbf_prediction))
    # シグモイド関数モデル
    lr = LinearRegression(func_name='sigmoid')
    lr.fit(X_train, t_train)
    sigmoid_prediction = lr.predict(X_test)
    print('sigmoid r2score:', r2_score(t_test, sigmoid_prediction))
    # 結果をプロット
    plt.scatter(min_max_normalization_inverse(t_test, max_mpg, min_mpg),
                min_max_normalization_inverse(gbf_prediction, max_mpg,
                                              min_mpg),
                c="cyan",
                label="gbf")
    plt.scatter(min_max_normalization_inverse(t_test, max_mpg, min_mpg),
sys.path.append(os.path.join('..', 'algorithms'))

# import linear regression class
from linear_regression import LinearRegression
import pandas as pd


# create Linear regression object
lr = LinearRegression()

# read csv data from text file
df = pd.read_csv('data/dummy_data_linear_regression.txt')

# convert pandas dataframe to matrix
data = df.as_matrix()

# pick out feature values from the matrix
X = data[:,:2]

# pick out regression values
y = data[:,2]

# fit to linear regression model with different parameters (make normalize = true if the dataset has too much variance)
lr.fit(X[:30],y[:30],learning_rate=0.1,n_iter=200,normalize=True)

# plot cost in different iterations to tweak the learning rate
lr.plot_costs()

# check the R^2 score used in sklearn models
print(lr.score(X[31:46],y[31:46]))
Esempio n. 26
0
                                noise=20,
                                random_state=24)
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=24)


def mse(y_pred, y_true):
    return np.mean((y_pred - y_true)**2)


# plt.figure(figsize=(8,6))
# plt.scatter(X_train[:,0],y_train,color='r',marker='o')
# plt.show()

regresoor = LinearRegression(n_iterations=10000)
regresoor.fit(X_train, y_train)
y_pred = regresoor.predict(X_test)

error = mse(y_pred, y_test)
print(error)

y_pred_line = regresoor.predict(X)

if X.shape[1] == 1:
    plt.figure(figsize=(8, 6))
    plt.scatter(X_train, y_train, color='r')
    plt.scatter(X_test, y_test, color='b')
    plt.plot(X, y_pred_line, color='black', label='Prediction')
    plt.show()
Esempio n. 27
0
    pd_train = pd.read_csv('./data/train.csv', sep=';')
    pd_test = pd.read_csv('./data/test.csv', sep=';')
    pd_validate = pd.read_csv('./data/validate.csv', sep=';')

    # 1.对原始特征进行均匀预处理
    trn_X, trn_X_max, trn_X_min = uniform_norm(pd_train.drop('quality', axis=1).values)
    trn_y = pd_train['quality'].values

    val_X = (pd_validate.drop('quality', axis=1).values - trn_X_min) / (trn_X_max - trn_X_min)
    val_y = pd_validate['quality'].values

    test_X = (pd_test.drop('quality', axis=1).values - trn_X_min) / (trn_X_max - trn_X_min)
    test_y = pd_test['quality'].values

    model_1 = LinearRegression()
    train_costs = model_1.fit(trn_X, trn_y, alpha=0.5, lmbda=0, algorithm="batch_gd", verbose=True)
    val_pred = model_1.predict(val_X)
    test_pred = model_1.predict(test_X)

    print("Validate Error %f" % (sum((val_pred - val_y) ** 2) * 0.5 / val_X.shape[0]))
    print("Test Error %f" % (sum((test_pred - test_y) ** 2) * 0.5 / test_X.shape[0]))
    print("\n\n")

    # 2.对原始特征进行高斯预处理
    trn_X, trn_X_mean, trn_X_std = gaussian_norm(pd_train.drop('quality', axis=1).values)
    trn_y = pd_train['quality'].values

    val_X = (pd_validate.drop('quality', axis=1).values - trn_X_mean) / trn_X_std
    val_y = pd_validate['quality'].values

    test_X = (pd_test.drop('quality', axis=1).values - trn_X_mean) / trn_X_std
                                n_features=1,
                                noise=20,
                                random_state=4)
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=1234)

# fig = plt.figure(figsize=(8,6))
# plt.scatter(X[:, 0], y, color="b", marker = "o", s = 30)
# plt.show()

from linear_regression import LinearRegression

regressor = LinearRegression(lr=0.01)
regressor.fit(X_train, y_train)
predicted = regressor.predict(X_test)

# def mse(y_true, y_predicted):
# 	return np.mean((y_true-y_predicted)**2)

# mse_value = mse(y_test, predicted)
# print(mse_value)
# print(y_test)

y_pred_line = regressor.predict(X)
cmap = plt.get_cmap('viridis')
fig = plt.figure(figsize=(8, 6))
m1 = plt.scatter(X_train, y_train, color=cmap(0.9), s=10)
m2 = plt.scatter(X_test, y_test, color=cmap(0.5), s=10)
plt.plot(X, y_pred_line, color='black', linewidth=2, label="Prediction")
Esempio n. 29
0
import numpy as np
from linear_regression import LinearRegression
from sklearn.datasets import load_boston

X, y = load_boston(return_X_y=True)
X = X[:, 4]
X = X[:, np.newaxis]

model = LinearRegression()
model.fit(X, y, 0.1, 300)
Esempio n. 30
0
def test_linear_regression():
    X = np.random.normal(size=(100, 2))
    y = X[:, 0] * 5
    lr = LinearRegression()
    lr.fit(X, y)
    assert pytest.approx(lr.predict(X)[-1], rel=1e-3) == y[-1]
Esempio n. 31
0
                          y=y_combined,
                          classifier=logr_clf,
                          test_idx=range(X_train.shape[0],
                                         X_train.shape[0] + X_test.shape[0]),
                          ax=ax[1])
    ax[1].set_xlabel("x1", fontsize="large")
    ax[1].set_ylabel("x2", fontsize="large")
    ax[1].legend(loc="upper right", fontsize="large")
    ax[1].set_title("Logistic regression (IRLS) on dataset %s" % l,
                    fontsize="x-large",
                    fontweight="bold")
    """
    Run linear regression fitted by solving the normal equation
    """
    linr_clf = LinearRegression()
    linr_clf.fit(X_train, y_train)

    linr_train_error = np.mean(linr_clf.predict(X_train) != y_train)
    linr_test_error = np.mean(linr_clf.predict(X_test) != y_test)

    plot_decision_regions(X=X_combined,
                          y=y_combined,
                          classifier=linr_clf,
                          test_idx=range(X_train.shape[0],
                                         X_A_train.shape[0] + X_test.shape[0]),
                          ax=ax[2])
    ax[2].set_xlabel("x1", fontsize="large")
    ax[2].set_ylabel("x2", fontsize="large")
    ax[2].legend(loc="upper right", fontsize="large")
    ax[2].set_title("Linear regression (normal equation) on dataset %s" % l,
                    fontsize="x-large",