def linear_test(): X, y = make_regression(n_features=1, noise=20, random_state=1234) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1234) lr = LinearRegression() lr.fit(X_train, y_train) y_pred = lr.predict(X_test) plt.figure(1, figsize=(5, 4)) plt.scatter(X_test, y_test, c="black") plt.plot(X_test, lr.theta * X_test + lr.bias, linewidth=1, c="red") plt.axhline(0.5, color=".5") plt.ylabel("y") plt.xlabel("X") plt.legend( ("Linear Regression Model", ), loc="lower right", fontsize="small", ) plt.tight_layout() plt.show()
def one_hot_regression(proc, data): """ linear regression using a one hot representation :proc processing object :data tuple containing train/test (one hot encoded space) return linear regression object """ print('one hot regression...') """ridge and random forest regression""" # train and test trainOneHotX, trainY, testOneHotX, testY = data ######### linear regression ######### linear_models = 'ridge' print('ridge regression with one hot representation...') linReg = LinearRegression(model=linear_models) linReg.fit(trainOneHotX, trainY) preds = linReg.predict(testOneHotX) print('test r2 score: ', metrics.r2_score(testY, preds)) print('test mse: ', metrics.mse(testY, preds)) return linReg
def logistic_test(): n_samples = 100 np.random.seed(0) X_train = np.random.normal(size=n_samples) y_train = (X_train > 0).astype(float) X_train[X_train > 0] *= 4 X_train += 0.3 * np.random.normal(size=n_samples) X_train = X_train[:, np.newaxis] X, y = make_classification( n_features=1, n_classes=2, n_redundant=0, n_informative=1, n_clusters_per_class=1, class_sep=0.75, shuffle=True, random_state=0, ) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=0) df_test = pd.DataFrame(data=[X_test.flatten(), y_test]).T df_test.columns = ["X", "y"] lr = LogisticRegression() lr.fit(X_train, y_train) y_pred = lr.predict(X_test) score = [1 if yi == yi_pred else 0 for yi, yi_pred in zip(y_test, y_pred)] print(np.sum(score) / len(score)) # and plot the result plt.figure(1, figsize=(4, 3)) plt.clf() plt.scatter(X_train.ravel(), y_train, color="black", zorder=20) df_test["loss"] = expit(X_test * lr.theta + lr.bias).ravel() df_test = df_test.sort_values("X") plt.plot(df_test["X"], df_test["loss"], color="red", linewidth=3) ols = LinearRegression() ols.fit(X_train, y_train) plt.plot(X_test, ols.theta * X_test + ols.bias, linewidth=1) plt.axhline(0.5, color=".5") plt.ylabel("y") plt.xlabel("X") plt.xticks(range(-5, 10)) plt.yticks([0, 0.5, 1]) plt.ylim(-0.25, 1.25) plt.xlim(-2, 2) plt.legend( ("Logistic Regression Model", "Linear Regression Model"), loc="lower right", fontsize="small", ) plt.tight_layout() plt.show()
import sys from models import LinearRegression try: passes = int(sys.argv[1]) split = float(sys.argv[2]) except: print("Using default number of passes and split ratio") passes = 2 split = 0.72 model = LinearRegression('data/iris.data') model.fit(passes, split)
def sin_fitting_example(): # y = sin(x) amt_points = 36 x = np.linspace(0, 360, num=amt_points) y = np.sin(x * np.pi / 180.) noise = np.random.normal(0, .1, y.shape) noisy_y = y + noise X_train = x y_train = noisy_y regression = LinearRegression() # linear X_linear = np.vstack((X_train, np.ones(len(X_train)))).T regression.fit(X_linear, y_train.reshape(-1, 1)) W_linear = regression.model y_linear = W_linear[0] * x + W_linear[1] # quadratic X_quadratic = np.vstack((np.power(X_train, 2), X_train, np.ones(len(X_train)))).T regression.fit(X_quadratic, y_train.reshape(-1, 1)) W_quadratic = regression.model y_quadratic = W_quadratic[0] * np.power( x, 2) + W_quadratic[1] * x + W_quadratic[2] # cubic X_cubic = np.vstack( (np.power(X_train, 3), np.power(X_train, 2), X_train, np.ones(len(X_train)))).T regression.fit(X_cubic, y_train.reshape(-1, 1)) W_cubic = regression.model y_cubic = W_cubic[0] * np.power(x, 3) + W_cubic[1] * np.power( x, 2) + W_cubic[2] * x + W_cubic[3] # X10 X_10 = np.vstack( (np.power(X_train, 10), np.power(X_train, 9), np.power(X_train, 8), np.power(X_train, 7), np.power(X_train, 6), np.power(X_train, 5), np.power(X_train, 4), np.power(X_train, 3), np.power(X_train, 2), X_train, np.ones(len(X_train)))).T regression.fit(X_10, y_train.reshape(-1, 1)) W_10 = regression.model y_10 = W_10[0] * np.power(x, 10) + W_10[1] * np.power(x, 9) + W_10[2] * np.power(x, 8) + \ W_10[3] * np.power(x, 7) + W_10[4] * np.power(x, 6) + W_10[5] * np.power(x, 5) + \ W_10[6] * np.power(x, 4) + W_10[7] * np.power(x, 3) + W_10[8] * np.power(x, 2) + \ W_10[9] * x + W_10[10] # PLOTS plt.figure() plt.subplot(1, 1, 1) plt.gca().set_title('Sin(x) - Fitting curves') # original plt.plot(x, noisy_y, 'o') # linear plt.plot(x, y_linear, '-') # quadratic plt.plot(x, y_quadratic, '-') # cubic plt.plot(x, y_cubic, '-') # 10 power plt.plot(x, y_10, '-') plt.legend(['noisy signal', 'linear', 'quadratic', 'cubic', '10th power']) plt.show()
import numpy as np import matplotlib.pyplot as plt from dataset_income import Data from metrics import MSE from models import ConstantModel, LinearRegression, LinearRegressionWithB from gradient_descent import stochastic_gradient_descent, gradient_descent, mini_batch_gradient_descent if __name__ == '__main__': dataset = Data( r'C:\Users\Lautaro\PycharmProjects\ceia_intro_a_IA\clase_3\ejercicios\data\income.csv' ) X_train, X_test, y_train, y_test = dataset.split(0.8) linear_regression = LinearRegression() linear_regression.fit(X_train, y_train) lr_y_hat = linear_regression.predict(X_test) linear_regression_b = LinearRegressionWithB() linear_regression_b.fit(X_train, y_train) lrb_y_hat = linear_regression_b.predict(X_test) constant_model = ConstantModel() constant_model.fit(X_train, y_train) ct_y_hat = constant_model.predict(X_test) mse = MSE() lr_mse = mse(y_test, lr_y_hat) lrb_mse = mse(y_test, lrb_y_hat) ct_mse = mse(y_test, ct_y_hat)
import numpy as np from models import LinearRegression, LinearRegression2 from sklearn import datasets from sklearn import model_selection from sklearn import linear_model from sklearn import metrics if __name__ == '__main__': iris = datasets.load_iris() train_x, test_x, train_y, test_y = model_selection.train_test_split(iris.data, iris.target, test_size=0.4) regressor = LinearRegression() regressor.fit(train_x, train_y) pred_y = regressor.predit(test_x) acc = metrics.mean_squared_error(test_y, pred_y) print(acc) regressor2 = LinearRegression2(train_x.shape[1]) regressor2.fit(train_x, train_y) pred_y = regressor2.predict(test_x) acc = metrics.mean_squared_error(test_y, pred_y) print(acc)