def plotCostHistory(X, y, standardization=False, iterations=50, learningRate=1e-7, regularization=0.0): lr = r.LinearRegression(X, y, standardization) costHistory = lr.gradientDescent(iterations=iterations, learningRate=learningRate, regularization=regularization) plt.figure() ax = plt.gca() plt.subplots_adjust(top=0.95, bottom=0.13, left=0.13, right=0.98) plt.rcParams.update({"font.size": 18}) plt.plot(np.arange(1, iterations + 1), costHistory, color="black") plt.text(0.4, 0.9, "learning rate = " + str(learningRate), transform=ax.transAxes) plt.text(0.4, 0.8, "regularization = " + str(regularization), transform=ax.transAxes) plt.xlabel("number of iterations") plt.ylabel(r"cost function $J$") plt.grid(color="lightgray") plt.show()
def plotCostFunction (X, y, standardization=False): lr = r.LinearRegression(X, y, standardization=standardization) theta0 = np.arange(-5.0, 15.0 + 1e-4, 0.1) theta1 = np.arange(-5.0, 15.0 + 1e-4, 0.1) Theta0, Theta1, Cost = lr.getCost(theta0, theta1) plt.figure() ax = plt.axes(projection="3d") plt.rcParams.update({"font.size": 18}) plt.subplots_adjust(top=1.05, bottom=0.05, left=0.01, right=0.99) ax.plot_surface(Theta0, Theta1, Cost, cmap="viridis") ax.set_xlabel(r"$\theta_0$", fontsize=18, labelpad=15.0) ax.set_ylabel(r"$\theta_1$", fontsize=18, labelpad=15.0) ax.set_zlabel(r"cost $J$", fontsize=18, labelpad=15.0) plt.show() plt.figure() plt.subplots_adjust(top=0.97, bottom=0.14, left=0.13, right=0.96) plt.rcParams.update({'font.size': 18}) plt.contour(Theta0, Theta1, Cost, 100) plt.xlabel(r"$\theta_0$") plt.ylabel(r"$\theta_1$") plt.show()
def plotData (X, y, standardization=False, iterations=1500, learningRate=0.01, regularization=0.0): lr = r.LinearRegression(X, y, standardization=standardization) lr.gradientDescent(iterations=iterations, learningRate=learningRate, regularization=regularization) plt.figure() ax = plt.gca() plt.subplots_adjust(top=0.98, bottom=0.14, left=0.13, right=0.97) plt.rcParams.update({"font.size": 18}) plt.scatter(X, y, color="black", marker="x", zorder=2) plt.plot(X, lr.predict(X), color="red") plt.text(0.65, 0.15, r"$\theta_0$ = " + str(np.around(lr.theta[0], 4)), transform=ax.transAxes) plt.text(0.65, 0.05, r"$\theta_1$ = " + str(np.around(lr.theta[1], 4)), transform=ax.transAxes) plt.xlabel("population of city in 10,000s") plt.ylabel("profit in $10,000s") plt.xlim(0.0, 25.0) plt.grid(color="lightgray") plt.show()
import numpy as np import dataset import regression X, Y = dataset.load_nonlinear_example1() ex_X = dataset.polynomial3_features(X) model = regression.LinearRegression() model.fit(ex_X,Y) samples = np.arange(0,4,0.1) x_samples = np.c_[np.ones(len(samples)),samples] ex_x_samples = dataset.polynomial3_features(x_samples) import matplotlib.pyplot as plt plt.scatter(X[:,1],Y) plt.plot(samples,model.predict(ex_x_samples)) plt.show()
def GetData(fileName): file = open(fileName, "r") data_str = file.read() file.close() data_arr = data_str.split("\n") #remove empty lines for str in data_arr: if str == "": data_arr.remove(str) #remove whitespace for str in data_arr: str.replace(" ", "") data = [[0 for i in range(2)] for j in range(len(data_arr))] for i in range(len(data_arr)): point_str = data_arr[i].split(",") data[i][0] = float(point_str[0]) data[i][1] = float(point_str[1]) return data data = GetData("data") res = regression.LinearRegression(data) print("a: {:.10f}\nb: {:.10f}".format(res[0], res[1]))
def plotData(X, y, size=True, bedrooms=True, standardization=False, iterations=50, learningRate=1e-7, regularization=0.0): lr = r.LinearRegression(X, y, standardization) lr.gradientDescent(iterations=iterations, learningRate=learningRate, regularization=regularization) if size == True: plt.figure() ax = plt.gca() plt.subplots_adjust(top=0.98, bottom=0.14, left=0.23, right=0.97) plt.rcParams.update({"font.size": 18}) plt.scatter(X[:, 0], y, color="black", marker="x", zorder=2) plt.scatter(X[:, 0], lr.predict(X), color="red", zorder=3) plt.text(0.60, 0.25, r"$\theta_0$ = " + str(np.around(lr.theta[0], 1)), transform=ax.transAxes) plt.text(0.60, 0.15, r"$\theta_1$ = " + str(np.around(lr.theta[1], 1)), transform=ax.transAxes) plt.text(0.60, 0.05, r"$\theta_2$ = " + str(np.around(lr.theta[2], 1)), transform=ax.transAxes) plt.xlabel(r"house size in ft$^2$") plt.ylabel("house price in $") plt.grid(color="lightgray") plt.show() if bedrooms == True: plt.figure() ax = plt.gca() plt.subplots_adjust(top=0.98, bottom=0.14, left=0.23, right=0.97) plt.rcParams.update({"font.size": 18}) plt.scatter(X[:, 1], y, color="black", marker="x", zorder=2) plt.scatter(X[:, 1], lr.predict(X), color="red", zorder=3) plt.text(0.05, 0.90, r"$\theta_0$ = " + str(np.around(lr.theta[0], 1)), transform=ax.transAxes) plt.text(0.05, 0.80, r"$\theta_1$ = " + str(np.around(lr.theta[1], 1)), transform=ax.transAxes) plt.text(0.05, 0.70, r"$\theta_2$ = " + str(np.around(lr.theta[2], 1)), transform=ax.transAxes) plt.xlabel(r"number of bedrooms") plt.ylabel("house price in $") plt.grid(color="lightgray") plt.show()
if BFinalPrediction == 0: X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.33, random_state=42) ## Preprocessiong if BTask1bTransformation == 1: trafo = preprocessing.task1btransformation() X_train = trafo.transform(X_train) if BFinalPrediction == 0: X_test = trafo.transform(X_test) ## Linear Regression if BLinearRegression == 1: LinReg = regression.LinearRegression() LinReg.fit(X_train, y_train) if BFinalPrediction == 0: y_pred = LinReg.predict(X_test) w = LinReg.getcoeff() if BRidgeRegression == 1: l = 15 RidgeReg = regression.RidgeRegression(alpha=l) RidgeReg.fit(X_train, y_train) if BFinalPrediction == 0: y_pred = RidgeReg.predict(X_test) w = RidgeReg.getcoeff() if BLassoRegression == 1: l = 1
test = all_data[len_train:] #---------------------------Model ---------------------------------- #Validation function n_folds = 5 random_grid = {'alpha': [0.0001,0.0001,0.001,0.01,0.1,1.0,1.1]} lasso=utils.cros_validation(Lasso(random_state=1),train.values,train_label,n_folds,random_grid) #My implemantation of l2 regularization random_grid = {'regularization_factor': [0.0001,0.0001,0.001,0.01,0.1,1.0,1.1]} multi=utils.cros_validation(regression.LinearRegression(regularization_factor=1.0),train.values,train_label,n_folds,random_grid) #Univariate Case X=[train['GrLivArea'].values , np.ones(len(train))] X=np.transpose(X) uni=utils.cros_validation(regression.LinearRegression(regularization_factor=1.0),X,train_label,n_folds,random_grid) random_grid = {'n_estimators': [100,200,300,400,500,600,700,720,740,760,780,800]} lgb = lgb.LGBMRegressor(objective='regression',num_leaves=5, learning_rate=0.05, n_estimators=800, max_bin = 60, bagging_fraction = 0.8, bagging_freq = 5, feature_fraction = 0.2319, feature_fraction_seed=9, bagging_seed=9, min_data_in_leaf =6, min_sum_hessian_in_leaf = 11)
# build a design matrix with shifted copies of the impulse function from numpy import roll, zeros maxshift = 20 num_stacks = numpy.size(events) designmatrix = zeros((maxshift, num_stacks)) for i in range(0, maxshift): designmatrix[i, :] = roll(events, i) #image(designmatrix) #just plotting function # In[12]: ## construct the model #print dir(regression) model = regression.LinearRegression(fit_intercept=False) # In[13]: # ## before fitting to all voxels, we'll fit to the mean # ## there's a decent signal in the mean, # ## and this will be an easy way to see what the regression is doing # y = data.mean() # ## apply the model to this one signal # from thunder.regression.estimators import PseudoInv # estimator = PseudoInv(designmatrix.T) # b_example = estimator.estimate(y) # plt.plot(b_example) # In[14]: