Beispiel #1
0
 def __build_model(self, var_idx, method='forward'):
     linear_reg = LinearRegression(gradient_descent=False)
     if method == 'forward':
         candidate_features = self.__best_features + [var_idx]
     elif method == 'backward':
         candidate_features = deepcopy(self.__best_features)
         candidate_features.remove(var_idx)
     X = self.X[:, candidate_features]
     linear_reg.fit(X, self.y)
     y_preds = [linear_reg.predict(x) for x in X]
     return calculate_r2(self.y, y_preds)
Beispiel #2
0
    def run(self):
        x, y = self.__readData()
        model = LinearRegression()
        model.fit(x, y)
        y_predicted = [model.sumForRow(row) for row in x]

        # this plot is just to make sure we get values of a linear function
        plt.scatter(y, y_predicted, c='r')
        plt.show()

        mean_error = model.error(y_predicted, y)
        return mean_error
Beispiel #3
0
 def build_models(self, df):
     self.n, self.p = df.shape
     performances = []
     for k in range(1, self.p):
         for var_combo in itertools.combinations(df.columns[:-1], k):
             linear_reg = LinearRegression()
             X = np.asarray(df[list(var_combo)])
             self.y = np.asarray(df.iloc[:, -1])
             linear_reg.fit(X, self.y)
             y_preds = [linear_reg.predict(x) for x in X]
             adj_r2, aic, bic, r2, rss = self.__calculate_criterions(
                 y_preds, k)
             performance = [var_combo, k, aic, bic, rss, r2, adj_r2]
             performances.append(performance)
             col_names = [
                 'subset', 'num_of_variables', 'aic', 'bic', 'rss', 'r2',
                 'adj_r2'
             ]
             self.models_summary = pd.DataFrame(performances,
                                                columns=col_names)
     self.__visualize_best_subset_performance()
Beispiel #4
0
def main():
    X, y = make_regression(n_samples=100, n_features=1, noise=20)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    n_samples, n_features = np.shape(X)

    model = LinearRegression(n_iterations=100)

    model.fit(X_train, y_train)

    # Training error plot
    n = len(model.training_errors)
    training, = plt.plot(range(n), model.training_errors, label="Training Error")
    plt.legend(handles=[training])
    plt.title("Error Plot")
    plt.ylabel('Mean Squared Error')
    plt.xlabel('Iterations')
    plt.show()

    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    print ("Mean squared error: %s" % (mse))

    y_pred_line = model.predict(X)

    # Color map
    cmap = plt.get_cmap('viridis')

    # Plot the results
    m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10)
    m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10)
    plt.plot(366 * X, y_pred_line, color='b', linewidth=2, label="Prediction")
    plt.suptitle("Linear Regression")
    plt.title("MSE: %.2f" % mse, fontsize=10)
    plt.xlabel('Day')
    plt.ylabel('Temperature in Celcius')
    plt.legend((m1, m2), ("Training data", "Test data"), loc='lower right')
    plt.show()
def main():
    X, y = make_regression(n_samples=100, n_features=1, noise=20)

    x_train, x_test, y_train, y_test = DataManipulation().train_test_split(
        X, y, test_size=0.4)
    n_samples, n_features = X.shape
    model = LinearRegression()
    model.fit(x_train, y_train)

    n = len(model.errors)
    training = plt.plot(range(n), model.errors, label='Training Errors')
    plt.title('Error plot')
    plt.xlabel('Iteration')
    plt.ylabel('Mean Squared Error')
    plt.show()

    y_pred = model.predict(x_test)

    y_pred_line = model.predict(X)

    # Color map
    cmap = plt.get_cmap('viridis')

    # Plot the results
    m1 = plt.scatter(366 * x_train, y_train, color=cmap(0.9), s=10)
    m2 = plt.scatter(366 * x_test, y_test, color=cmap(0.5), s=10)
    plt.plot(366 * X,
             y_pred_line,
             color='black',
             linewidth=2,
             label="Prediction")
    plt.suptitle("Linear Regression")
    plt.xlabel('Day')
    plt.ylabel('Temperature in Celcius')
    plt.legend((m1, m2), ("Training data", "Test data"), loc='lower right')
    plt.show()
data['floor'] = data['floor'].apply(lambda x: x / max)

max = data['top_floor'].max()

data['top_floor'] = data['top_floor'].apply(lambda x: x / max)

price = data['price'].values

X = np.array([np.ones(len(price)), data['size'].values, data['room'].values, data['year'].values, data['floor'].values, data['top_floor'].values]).T

Y = np.array(price)

regression = LinearRegression(alpha=0.000001, iteration=300, feature_count=5)

regression.fit(X, Y)

regression.plot()

print("price:", "13800000")
print("price:", int(regression.predict(np.array([1, 45,2,1977,5,5]))))
print()
print("price:", "15333333")
print("price:", int(regression.predict(np.array([1, 40,2,1967,1,4]))))

# while (True):
#     size = int(input("Enter size of house:"))
#     bedroom = int(input("Enter number of bedroom:"))
#     print("price:", int(regression.predict(np.array([1, size, bedroom]))))

# 14700000
"""
  @author Victor I. Afolabi
  A.I. Engineer & Software developer
  [email protected]
  Created on 26 August, 2017 @ 9:33 PM.
  Copyright (c) 2017. victor. All rights reserved.
"""

# Create a LinearRegression object
from regression import LinearRegression
import numpy as np

data = np.genfromtxt('data.csv', delimiter=',')
num_iter = 1000

clf = LinearRegression(learning_rate=1e-4)
clf.fit(data=data, num_iter=num_iter)
print('After {:,} iterations. m = {:.2f} and b = {:.2f}'.format(num_iter, clf.m, clf.b))
def linear_solve(x_data, y_data):
    model = LinearRegression()
    model.fit(x_data, y_data)
    return model.predict(x_data)
Beispiel #9
0
def __test_cross_validation_methods():
    # A small implementation of a test case
    from regression import LinearRegression
    import matplotlib.pyplot as plt

    # Initial values
    n = 100
    N_bs = 1000
    k_splits = 4
    test_percent = 0.2
    noise = 0.3
    np.random.seed(1234)

    # Sets up random matrices
    x = np.random.rand(n, 1)

    def func_excact(_x):        return 2*_x*_x + np.exp(-2*_x) + noise * \
np.random.randn(_x.shape[0], _x.shape[1])

    y = func_excact(x)

    def design_matrix(_x):
        return np.c_[np.ones(_x.shape), _x, _x * _x]

    # Sets up design matrix
    X = design_matrix(x)

    # Performs regression
    reg = LinearRegression()
    reg.fit(X, y)
    y = y.ravel()
    y_predict = reg.predict(X).ravel()
    print("Regular linear regression")
    print("R2:    {:-20.16f}".format(reg.score(y, y_predict)))
    print("MSE:   {:-20.16f}".format(metrics.mse(y, y_predict)))
    # print (metrics.bias(y, y_predict))
    print("Bias^2:{:-20.16f}".format(metrics.bias2(y, y_predict)))

    # Small plotter
    import matplotlib.pyplot as plt
    plt.plot(x, y, "o", label="data")
    plt.plot(x,
             y_predict,
             "o",
             label=r"Pred, $R^2={:.4f}$".format(reg.score(y, y_predict)))

    print("k-fold Cross Validation")
    kfcv = kFoldCrossValidation(x, y, LinearRegression, design_matrix)
    kfcv.cross_validate(k_splits=k_fold_size, test_percent=test_percent)
    print("R2:    {:-20.16f}".format(kfcv.R2))
    print("MSE:   {:-20.16f}".format(kfcv.MSE))
    print("Bias^2:{:-20.16f}".format(kfcv.bias))
    print("Var(y):{:-20.16f}".format(kfcv.var))
    print("MSE = Bias^2 + Var(y) = ")
    print("{} = {} + {} = {}".format(kfcv.MSE, kfcv.bias, kfcv.var,
                                     kfcv.bias + kfcv.var))
    print("Diff: {}".format(abs(kfcv.bias + kfcv.var - kfcv.MSE)))

    plt.errorbar(kfcv.x_pred_test,
                 kfcv.y_pred,
                 yerr=np.sqrt(kfcv.y_pred_var),
                 fmt="o",
                 label=r"k-fold CV, $R^2={:.4f}$".format(kfcv.R2))

    print("kk Cross Validation")
    kkcv = kkFoldCrossValidation(x, y, LinearRegression, design_matrix)
    kkcv.cross_validate(k_splits=k_fold_size, test_percent=test_percent)
    print("R2:    {:-20.16f}".format(kkcv.R2))
    print("MSE:   {:-20.16f}".format(kkcv.MSE))
    print("Bias^2:{:-20.16f}".format(kkcv.bias))
    print("Var(y):{:-20.16f}".format(kkcv.var))
    print("MSE = Bias^2 + Var(y) = ")
    print("{} = {} + {} = {}".format(kkcv.MSE, kkcv.bias, kkcv.var,
                                     kkcv.bias + kkcv.var))
    print("Diff: {}".format(abs(kkcv.bias + kkcv.var - kkcv.MSE)))

    plt.errorbar(kkcv.x_pred_test.ravel(),
                 kkcv.y_pred.ravel(),
                 yerr=np.sqrt(kkcv.y_pred_var.ravel()),
                 fmt="o",
                 label=r"kk-fold CV, $R^2={:.4f}$".format(kkcv.R2))

    print("Monte Carlo Cross Validation")
    mccv = MCCrossValidation(x, y, LinearRegression, design_matrix)
    mccv.cross_validate(N_bs, k_splits=k_fold_size, test_percent=test_percent)
    print("R2:    {:-20.16f}".format(mccv.R2))
    print("MSE:   {:-20.16f}".format(mccv.MSE))
    print("Bias^2:{:-20.16f}".format(mccv.bias))
    print("Var(y):{:-20.16f}".format(mccv.var))
    print("MSE = Bias^2 + Var(y) = ")
    print("{} = {} + {} = {}".format(mccv.MSE, mccv.bias, mccv.var,
                                     mccv.bias + mccv.var))
    print("Diff: {}".format(abs(mccv.bias + mccv.var - mccv.MSE)))

    print("\nCross Validation methods tested.")

    plt.errorbar(mccv.x_pred_test,
                 mccv.y_pred,
                 yerr=np.sqrt(mccv.y_pred_var),
                 fmt="o",
                 label=r"MC CV, $R^2={:.4f}$".format(mccv.R2))

    plt.xlabel(r"$x$")
    plt.ylabel(r"$y$")
    plt.title(r"$y=2x^2$")
    plt.legend()
    plt.show()
Beispiel #10
0
def __test_bootstrap_fit():
        # A small implementation of a test case
    from regression import LinearRegression

    N_bs = 1000

    # Initial values
    n = 200
    noise = 0.2
    np.random.seed(1234)
    test_percent = 0.35

    # Sets up random matrices
    x = np.random.rand(n, 1)

    def func_excact(_x): return 2*_x*_x + np.exp(-2*_x) + noise * \
        np.random.randn(_x.shape[0], _x.shape[1])

    y = func_excact(x)

    def design_matrix(_x):
        return np.c_[np.ones(_x.shape), _x, _x*_x]

    # Sets up design matrix
    X = design_matrix(x)

    # Performs regression
    reg = LinearRegression()
    reg.fit(X, y)
    y = y.ravel()
    y_predict = reg.predict(X).ravel()
    print("Regular linear regression")
    print("R2:  {:-20.16f}".format(reg.score(y_predict, y)))
    print("MSE: {:-20.16f}".format(metrics.mse(y, y_predict)))
    print("Beta:      ", reg.coef_.ravel())
    print("var(Beta): ", reg.coef_var.ravel())
    print("")

    # Performs a bootstrap
    print("Bootstrapping")
    bs_reg = BootstrapRegression(x, y, LinearRegression, design_matrix)
    bs_reg.bootstrap(N_bs, test_percent=test_percent)

    print("R2:    {:-20.16f}".format(bs_reg.R2))
    print("MSE:   {:-20.16f}".format(bs_reg.MSE))
    print("Bias^2:{:-20.16f}".format(bs_reg.bias))
    print("Var(y):{:-20.16f}".format(bs_reg.var))
    print("Beta:      ", bs_reg.coef_.ravel())
    print("var(Beta): ", bs_reg.coef_var.ravel())
    print("MSE = Bias^2 + Var(y) = ")
    print("{} = {} + {} = {}".format(bs_reg.MSE, bs_reg.bias, bs_reg.var,
                                     bs_reg.bias + bs_reg.var))
    print("Diff: {}".format(abs(bs_reg.bias + bs_reg.var - bs_reg.MSE)))

    import matplotlib.pyplot as plt
    plt.plot(x.ravel(), y, "o", label="Data")
    plt.plot(x.ravel(), y_predict, "o", 
        label=r"Pred, R^2={:.4f}".format(reg.score(y_predict, y)))
    print (bs_reg.y_pred.shape, bs_reg.y_pred_var.shape)
    plt.errorbar(bs_reg.x_pred_test, bs_reg.y_pred, 
        yerr=np.sqrt(bs_reg.y_pred_var), fmt="o", 
        label=r"Bootstrap Prediction, $R^2={:.4f}$".format(bs_reg.R2))
    plt.xlabel(r"$x$")
    plt.ylabel(r"$y$")
    plt.title(r"$2x^2 + \sigma^2$")
    plt.legend()
    plt.show()
Beispiel #11
0
import pandas as pd
import numpy as np
from regression import LinearRegression

df = pd.read_csv("/Users/yliang/data/trunk1/spark/assembly/target/tmp/LinearRegressionSuite/datasetWithDenseFeature2/part-00000", header = None)
X = np.array(df[df.columns[1:3]])
y = np.array(df[df.columns[0]])
lir = LinearRegression(fit_intercept=True, alpha=2.3, max_iter=100, tol=1e-06, standardization=False,
					   lower_bound=[-np.inf, 6.0, -np.inf], upper_bound=[0.0, 10.0, np.inf])
lir.fit(X, y)
print("coefficients = " + str(lir.coef_))
print("intercept = " + str(lir.intercept_))
Beispiel #12
0
 def __get_full_model_r2(self):
     linear_reg = LinearRegression(gradient_descent=False)
     linear_reg.fit(self.X, self.y)
     y_preds = [linear_reg.predict(x) for x in self.X]
     return calculate_r2(self.y, y_preds)