Ejemplo n.º 1
0
def test_sanity():
    X = np.array([[1, 1], [2, 2], [3, 3]], dtype=np.float32).reshape(-1, 2)
    y = np.array([3, 5, 7], dtype=np.float32).reshape(-1, 1)
    lin_reg = LinearRegression()
    lin_reg.fit(X, y)
    y_pred = lin_reg.predict(X)
    assert np.isclose(y, y_pred, rtol=0.1).any()
Ejemplo n.º 2
0
class ChanceModel:
    def __init__(self, margin_mapping):
        self.margin_mapping = margin_mapping
        self.reg = LogisticRegression()
        self.reg = LinearRegression()

    def getChance(self, margin):
        if margin not in self.margin_mapping.keys():
            return 0

        return self.margin_mapping[margin].getWinFraction()

    def getChanceLinear(self, margin):
        return self.reg.predict(margin)

    def getChanceLog(self, X):
        return self.reg.predict_log_proba(X)

    def fitRegression(self):
        x_list = list()
        y_list = list()

        for key,value in self.margin_mapping.items():
            x_list.append(key)
            y_list.append(value.getWinFraction())

        self.reg.fit(x_list,y_list)
Ejemplo n.º 3
0
def output(part_id):
    # Random Test Cases
    X = np.column_stack((np.ones(10),
                          (np.sin(np.arange(1, 16, 1.5))),
                          (np.cos(np.arange(1, 16, 1.5)))))
    y = np.sin(np.arange(1, 30, 3))

    Xval = np.column_stack((np.ones(10),
                          (np.sin(np.arange(0, 14, 1.5))),
                          (np.cos(np.arange(0, 14, 1.5)))))
    yval = np.sin(np.arange(1, 11))

    lr = LinearRegression()
    lr.fit(X, y)

    if part_id == 1:
        J, _ = lr.costFunction(X, y, np.array([0.1, 0.2, 0.3]), 0.5)
        return sprintf('%0.5f ', J)
    elif part_id == 2:
        _, grad = lr.costFunction(X, y, np.array([0.1, 0.2, 0.3]), 0.5)
        return sprintf('%0.5f ', grad)
    elif part_id == 3:
        error_train, error_val = lr.learningCurve(X, y, Xval, yval, 1)
        return sprintf('%0.5f ', np.hstack((error_train, error_val)))
    elif part_id == 4:
        X_poly = lr.polyFeatures(X[1, :].T, 8)
        return sprintf('%0.5f ', X_poly)
    elif part_id == 5:
        lambda_vec, error_train, error_val = lr.validationCurve(X, y,
                                                                Xval, yval)
        return sprintf('%0.5f', np.hstack((lambda_vec, error_train, error_val)))
def best_params():
    lr_list = [0.1, 0.01, 0.05, 0.001, 0.005, 0.0001, 0.0005]
    mse_min = 10000000
    lr_min = 0
    n_iter_min = 0
    for lr_val in lr_list:
        for iteration in range(1000, 10000, 10):
            reg = LinearRegression(learning_rate=lr_val, n_iters=iteration)
            reg.fit(X_train, Y_train)
            predicted = reg.predict(X_test)
            mse_val = mse(Y_test, predicted)
            if mse_val < mse_min:
                mse_min = mse_val
                lr_min = lr_val
                n_iter_min = iteration
    return (lr_min, n_iter_min)
Ejemplo n.º 5
0
def get_multiple_estimates(X, y, learning_rate=[0.0000000001], max_iter=1000, iteration_threshold=100, plotlabels=None,
                           reg_strength=0, regularization="Ridge", method="GD", minibatch_size=1, plot_by_lr=True, plot_by_mb=False,
                           learning_rate_decay=False, cost_threshold=None):
    cost_by_lr = []
    iterations = []
    if plotlabels is None:
        plotlabels = []

    if plot_by_lr:
        for lr in learning_rate:
            estimator_linReg = LinearRegression(learning_rate=lr, reg_strength=reg_strength, regularization=regularization,
                                                max_iter=max_iter, iteration_threshold=iteration_threshold, method=method,
                                                learning_rate_decay=learning_rate_decay, cost_threshold=cost_threshold)
            estimator_linReg.fit(X, y)

            cost_by_lr.append(estimator_linReg.cost_by_iteration.tolist())
            iterations.append(estimator_linReg.iterations.tolist())
            plotlabels.append("Learning rate = " + str(lr))
    elif plot_by_mb:
        for mb in minibatch_size:
            estimator_linReg = LinearRegression(learning_rate=learning_rate, reg_strength=reg_strength, regularization=regularization,
                                                max_iter=max_iter, iteration_threshold=iteration_threshold,
                                                method=method, minibatch_size=mb, learning_rate_decay=learning_rate_decay,
                                                cost_threshold=cost_threshold)
            estimator_linReg.fit(X, y)

            cost_by_lr.append(estimator_linReg.cost_by_iteration.tolist())
            iterations.append(estimator_linReg.iterations.tolist())
            plotlabels.append("Minibatch size = " + str(mb))
    else:
        cnt = 0
        for pl in plotlabels:
            estimator_linReg = LinearRegression(learning_rate=learning_rate[cnt], reg_strength=reg_strength,
                                                regularization=regularization,
                                                max_iter=max_iter, iteration_threshold=iteration_threshold,
                                                method=pl, minibatch_size=minibatch_size, learning_rate_decay=learning_rate_decay,
                                                cost_threshold=cost_threshold)
            estimator_linReg.fit(X, y)

            cost_by_lr.append(estimator_linReg.cost_by_iteration.tolist())
            iterations.append(estimator_linReg.iterations.tolist())
            cnt += 1

    return cost_by_lr, iterations, plotlabels
Ejemplo n.º 6
0
from LinearRegression import LinearRegression

if __name__ == '__main__':
    lr = LinearRegression(12345, 1e-2)
    # lr.cv([1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1, 2], 2)
    lr.fit(1e-6, 2)
import sys
sys.path.append('../models')

# include the OLS class
from LinearRegression import LinearRegression

data = pd.read_csv('data/machine.data.txt', header=None)

# lets keep 9 attributes
y = data[9]
X = data.drop([0, 1, 9], axis=1)

X = X.values
y = y.values

reg = linear_model.LinearRegression(normalize=True)
reg.fit(X, y)
skout = reg.predict(X)
plt.scatter(y, skout, color='r', alpha=0.4, label='scikit-learn')
plt.plot([-200, 0], [0, 0], 'k--', lw=1)
plt.plot([0, 0], [-200, 0], 'k--', lw=1)


regr = LinearRegression()
regr.fit(X, y)
outs = regr.predict(X)
plt.scatter(y, outs, color='g', alpha=0.5, label='pyLinear')

plt.legend()
plt.show()
Ejemplo n.º 8
0
import numpy as np
from LinearRegression import LinearRegression

# EXTRACT DATA FROM FILE AND CHANGE IT TO THE RIGHT FORMAT
matrix = np.loadtxt("text", delimiter=',', skiprows=1).T
X, Y = matrix[0], matrix[1]
m = X.shape[0]
X = X.reshape((m, 1))
Y = Y.reshape((m, 1))

model = LinearRegression()

model.fit(X, Y, visualize=True)

model.save()
Ejemplo n.º 9
0
age = np.array(pd.DataFrame(age).fillna(method='pad'))

for i in range(len(sex)):
    if (sex[i] == 'male'):
        sex[i] = 1
    else:
        sex[i] = 0

X = [list(pclass), list(sex), list(sibsp), list(parch), list(age)]
X = np.transpose(X)

########################## TRAINING REGRESSION MODEL ##########################

regr = LinearRegression(max_epoch=30, al=0.5)
regr.fit(X, y)

######################### IMPORTING AND PROCESSING TEST DATASET ##########################

test = pd.read_csv('../data/test.csv')

test_keys = test.keys()

p_id = np.array(test[test_keys[0]])
pclass = np.array(test[test_keys[1]])
sex = np.array(test[test_keys[3]])
age = np.array(test[test_keys[4]])
sibsp = np.array(test[test_keys[5]])
parch = np.array(test[test_keys[6]])
fare = np.array(test[test_keys[8]])
Ejemplo n.º 10
0
from Dataset import Dataset
from LinearRegression import LinearRegression
from LinearRegressionB import LinearRegressionB
from MSE import MSE
import matplotlib.pyplot as plt
import numpy as np

# Import dataset and split in training and test sets
ds = Dataset('data/income')
training_x, training_y, test_x, test_y = Dataset.split_dataset(ds.data['income'], ds.data['happiness'], 0.8)

# Create and fit a Linear Regression model
lr_model = LinearRegression()
lr_model.fit(training_x, training_y)

# Create and fit a Linear Regression model
lrb_model = LinearRegressionB()
lrb_model.fit(training_x, training_y)

# Predict test set
lr_results = lr_model.predict(test_x)
lrb_results = lrb_model.predict(test_x)

# Evaluate MSE error
mse = MSE()
lr_mse = mse(test_y, lr_results)
lrb_mse = mse(test_y, lrb_results)
print("Linear regression MSE: " + str(round(lr_mse, 4)))
print("Linear regression B MSE: " + str(round(lrb_mse, 4)))

# Plot results
Ejemplo n.º 11
0
from LinearRegression import LinearRegression
import pandas as pd 
import numpy as np 

# load the data
data = pd.read_csv(r'datasets\MoviesSales.csv')

# get ys
ys = data[['Total Sales']].to_numpy()

# get predictors and add 1 column with 1's
xs = data[['First Year Gains', 'Total Production Cost', 'Total Promotional Cost']].to_numpy()
xs = np.hstack((np.ones((xs.shape[0],1)), xs))

# calculate the coefficients
L = LinearRegression()
L.fit(xs, ys)

print('Intercept is {0}'.format(L.intercept))
print('Coefficients are: {0}'.format(L.coefficients))
print('R^2 is: {0:4f}'.format(L.R_squared(ys, xs)))

Ejemplo n.º 12
0
import numpy as np
from LinearRegression import LinearRegression

amp = 5
X = np.random.rand(100, 1)
y = 4 + 8 * X + np.random.rand(100, 1) * amp - 0.5 * amp
lr = LinearRegression()
lr.fit(X, y, gradient_method='MBGD')
print(lr.weights)
print(lr.rmse(X, y))
lr.fit(X, y, gradient_method='BGD', learning_rate=0.1)
print(lr.weights)
print(lr.rmse(X, y))
lr.fit(X, y, gradient_method='SGD')
print(lr.weights)
print(lr.rmse(X, y))
            predicted = reg.predict(X_test)
            mse_val = mse(Y_test, predicted)
            if mse_val < mse_min:
                mse_min = mse_val
                lr_min = lr_val
                n_iter_min = iteration
    return (lr_min, n_iter_min)


best_lr, best_n_iter = best_params()

print("Best Learning Rate:", best_lr)
print("Best Number Of Iterations:", best_n_iter)
# Best LR = 0.001
# Best N_Iter = 2550

reg = LinearRegression(learning_rate=best_lr, n_iters=best_n_iter)
reg.fit(X_train, Y_train)
predictions = reg.predict(X_test)

mse_val = mse(Y_test, predictions)
print("MSE:", mse_val)

y_pred_line = reg.predict(X)
cmap = plt.get_cmap('viridis')
fig = plt.figure(figsize=(8, 6))
m1 = plt.scatter(X_train, Y_train, color=cmap(0.9), s=10)
m2 = plt.scatter(X_test, Y_test, color=cmap(0.5), s=10)
plt.plot(X, y_pred_line, color='black', linewidth=2, label="Prediction")
plt.show()
Ejemplo n.º 14
0
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, SGDRegressor
from sklearn.datasets import load_boston
from sklearn.metrics import mean_squared_error

# 获取数据
boston = load_boston()

# 划分数据集
x_train, x_test, y_train, y_test = train_test_split(boston.data,
                                                    boston.target,
                                                    random_state=8)

# 1> 创建一个转换器
transfer = StandardScaler()

# 2> 数据标准化
x_train = transfer.fit_transform(x_train)
x_test = transfer.transform(x_test)

# 模型训练
reg2 = LinearRegression()
reg2.fit(x_train, y_train)
print(reg2.coef_)
print(reg2.intercept_)

# 模型评估
y_predict = reg2.score(x_test, y_test)
print(y_predict)
Ejemplo n.º 15
0
from sklearn.metrics import mean_squared_error
from sklearn import datasets
import matplotlib.pyplot as plt

from LinearRegression import LinearRegression

X, y = datasets.make_regression(n_samples=50,
                                n_features=1,
                                noise=20,
                                random_state=4)
print('X shape: ', X.shape)

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=42)

LR = LinearRegression(learning_rate=0.01, n_iters=1000)
LR.fit(X_train, y_train)
y_pred = LR.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
print("MSE: ", mse)

y_pred_line = LR.predict(X)
cmap = plt.get_cmap('viridis')
fig = plt.figure(figsize=(8, 6))
m1 = plt.scatter(X_train, y_train, color=cmap(0.2), s=10)
m2 = plt.scatter(X_test, y_test, color=cmap(0.9), s=10)
plt.plot(X, y_pred_line, color='black', linewidth=2, label="Prediction")
plt.savefig('LinearRegression_visulization.png')
Ejemplo n.º 16
0
from Matrix import Matrix
from LinearRegression import LinearRegression
from functions import train_test_split
from functions import rmse

if __name__ == "__main__":
    X, y = make_regression(n_samples=20, n_features=3, noise=10)
    X, y = X.tolist(), y.tolist()

    X = Matrix(X)
    y = Matrix(y)

    X_train, X_test, y_train, y_test = train_test_split(X, y, 0.7)
    lnrReg = LinearRegression()
    lnrReg.fit(X_train, y_train)
    prediction = lnrReg.predict(X_test)

    print('RMSE = ', rmse(y_test, prediction))

    beg = 0
    end = 100

    func = lambda i: i if i > 0 else (-1 / i if i < 0 else 1)
    lmbd = [func(i) for i in range(beg, end, 1)]
    coefs = [[]] * len(lnrReg.coef.arr)
    for col in range(0, len(lnrReg.coef.arr)):
        coefs[col] = [0] * len(lmbd)

    for i in range(beg, end, 1):
Ejemplo n.º 17
0
from LinearRegression import LinearRegression

x = [5, 7, 8, 7, 2, 17, 2, 9, 4, 11, 12, 9, 6]
y = [99, 86, 87, 88, 111, 86, 103, 87, 94, 78, 77, 85, 86]

model = LinearRegression()
model.fit(x, y)
result = model.predict([10])
print(result)
Ejemplo n.º 18
0
import Data_Utils
from LinearRegression import LinearRegression
import sys

if __name__ == "__main__":
    print "Loading Data ..."
    X, Y = Data_Utils.Load_Dataset("labels.csv")
    print "Splitting Data ..."
    train_x, test_x, train_y, test_y = Data_Utils.Split_Data(X, Y)
    print "Total: {}".format(len(X))
    print "Train split: {}".format(len(train_x))
    print "Test split: {}".format(len(test_x))
    clf = LinearRegression(batch_size=64, max_iter=10000, learning_rate=1e-3, convergence_threshold=1e-5)
    clf.fit(train_x, train_y, test_x, test_y, verbose=True, log_filename=sys.argv[1] + ".log")
    clf.save_weights(sys.argv[1] + ".mdl")
    print(clf.score(test_x, test_y))

def runplt():
    plt.figure()
    plt.title("Pizza price plotted against diameter")
    plt.xlabel('Diameter')
    plt.ylabel('Price')
    plt.grid(True)
    plt.xlim(0, 25)
    plt.ylim(0, 25)
    return plt


# 加载数据
pizza = pd.read_csv("data/pizza.csv", index_col='Id')
dia = pizza.loc[:, 'Diameter'].values
price = pizza.loc[:, 'Price'].values

# 创建并拟合模型
model = LinearRegression()
model.fit(dia, price)

x2 = np.array([0., 25.])  # 取两个预测值
y2 = model.predict(x2)  # 进行预测
print(y2)  # 查看预测值

runplt()
plt.plot(dia, price, 'k.')
plt.plot(x2, y2, 'g-')  # 画出拟合曲线
plt.show()
Ejemplo n.º 20
0
# Load dataset (you can also load your own with pandas but sklearn offers
# a range of different datasets)
(X, y) = datasets.load_boston(return_X_y=True)

# Preprocess it with sklearn (not necessary, but improves gradient descent)
X = preprocessing.scale(X)

# Divide dataset into train and test data
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, train_size=0.65)

# Instantiate a new LinearRegression and call its fit-method with the
# train data
lr = LinearRegression()
lr.fit(X=X_train, y=y_train)

# Predict the result with the test data and calculate the absolute costs
# between the prediction and the target values
y_pred = lr.predict(X_test)
costs = np.absolute(y_pred - y_test).sum()

# Print interesting information
print(costs)
print(costs / len(X_test))
print(lr.weights)

# Plot costs per iteration
plt.plot(lr.costs_per_iter, marker='o')
plt.xlabel('Iteration')
plt.ylabel('Total costs')
Ejemplo n.º 21
0
y_test = func(x_test)

# plt.scatter(x_train, y_train, facecolor='None', edgecolors='b', s=50, label='training data')
# plt.plot(x_test, y_test, c='g', label='$\sin(2\pi x)$')
# plt.legend()
# plt.show()

for i, degree in enumerate([0, 1, 3, 9]):
    plt.subplot(2, 2, i + 1)
    features = PolynomialFeatures(degree)
    X_train = features.transform(x_train)
    X_test = features.transform(x_test)
    print("X_train shape", X_train.shape)
    print("X_test shape", X_test.shape)
    model = LinearRegression()
    model.fit(X_train, y_train)

    y = model.predict(X_test)

    plt.scatter(x_train, y_train, edgecolors='b', s=50, label='training data')
    plt.plot(x_test, y_test, c='g', label='sin(2*pi*x)')
    plt.plot(x_test, y, c='r', label='fitting')
    plt.ylim(-1.5, 1.5)
plt.legend()

plt.show()


# rmse curse
def rmse(a, b):
    return np.sqrt(np.mean(np.square(a - b)))
Ejemplo n.º 22
0
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import datasets
import matplotlib.pyplot as plt

X,y = datasets.make_regression(n_samples = 100,n_features=1,noise=20,random_state=4)
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=1234)

from LinearRegression import LinearRegression

regresor = LinearRegression(lr = 0.01)
regresor.fit(X_train,y_train)

predikcije = regresor.predict(X_test)
#mean squared error
def MSE(y_true,y_pred):
    return np.mean((y_true-y_pred)**2)

mse=MSE(y_test,predikcije)

print(mse)  

y_pred_linija = regresor.predict(X)
fig = plt.figure(figsize=(8,6))
m1=plt.scatter(X_train,y_train,s=10)
m2=plt.scatter(X_test,y_test,s=10)
plt.plot(X,y_pred_linija,color="black",linewidth=2,label="predikcija")
plt.show()