def linear_test(): X, y = make_regression(n_features=1, noise=20, random_state=1234) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1234) lr = LinearRegression() lr.fit(X_train, y_train) y_pred = lr.predict(X_test) plt.figure(1, figsize=(5, 4)) plt.scatter(X_test, y_test, c="black") plt.plot(X_test, lr.theta * X_test + lr.bias, linewidth=1, c="red") plt.axhline(0.5, color=".5") plt.ylabel("y") plt.xlabel("X") plt.legend( ("Linear Regression Model", ), loc="lower right", fontsize="small", ) plt.tight_layout() plt.show()
def test_LinearRegression(dim): model_name = "LinearRegression" x, y = make_regression(n_samples=1000, n_features=dim) model = LinearRegression(dim) check_model(model, model_name, x, y, category="regression")
def test_linreg(): ''' Helper function that tests LinearRegression. @param: None @return: None ''' m = np.array([[2, 3], [1, 0]]) mm = np.array([[2, 3, 4, 5], [1, 1, 3, 0]]) for l in range(2): print(mm[l, range(2)]) ###print(m) n = np.append(m, np.ones((len(m), 1)), axis=1) #print(n) #print(m.shape[1]) s = LinearRegression(m.shape[1]) #print(s.weights) X_train, X_test, Y_train, Y_test = import_wine(WINE_FILE_PATH) #print(X_train.shape[1]) num_features = X_train.shape[1] # Padding the inputs with a bias X_train_b = np.append(X_train, np.ones((len(X_train), 1)), axis=1) X_test_b = np.append(X_test, np.ones((len(X_test), 1)), axis=1) #print(6.7**2) #### Matrix Inversion ###### print('---- LINEAR REGRESSION w/ Matrix Inversion ---') solver_model = LinearRegression(num_features) solver_model.train(X_train_b, Y_train) print('Average Training Loss:', solver_model.average_loss(X_train_b, Y_train)) print('Average Testing Loss:', solver_model.average_loss(X_test_b, Y_test))
def one_hot_regression(proc, data): """ linear regression using a one hot representation :proc processing object :data tuple containing train/test (one hot encoded space) return linear regression object """ print('one hot regression...') """ridge and random forest regression""" # train and test trainOneHotX, trainY, testOneHotX, testY = data ######### linear regression ######### linear_models = 'ridge' print('ridge regression with one hot representation...') linReg = LinearRegression(model=linear_models) linReg.fit(trainOneHotX, trainY) preds = linReg.predict(testOneHotX) print('test r2 score: ', metrics.r2_score(testY, preds)) print('test mse: ', metrics.mse(testY, preds)) return linReg
def _initialize_models(self, data_generator): """Initializes models prior to training.""" models = { "Linear Regression": LinearRegression(), "Logistic Regression": LogisticRegression(), "Quadratic Regression": QuadraticRegression(), "Naive Bayes'": NaiveBayes(std_X=data_generator.std_X, m0=data_generator.m0s, m1=data_generator.m1s), "kNN CV": kNNCV(n_folds=self.n_folds) } return models
def test_models(dataset, epochs, test_size=0.2): ''' Tests LinearRegression, OneLayerNN, TwoLayerNN on a given dataset. :param dataset The path to the dataset :return None ''' # Check if the file exists if not os.path.exists(dataset): print('The file {} does not exist'.format(dataset)) exit() # Load in the dataset data = np.loadtxt(dataset, skiprows=1) X, Y = data[:, 1:], data[:, 0] # Normalize the features X = (X - np.mean(X, axis=0)) / np.std(X, axis=0) X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=test_size) print('Running models on {} dataset'.format(dataset)) #### Linear Regression ###### print('----- LINEAR REGRESSION -----') # Add a bias X_train_b = np.append(X_train, np.ones((len(X_train), 1)), axis=1) X_test_b = np.append(X_test, np.ones((len(X_test), 1)), axis=1) regmodel = LinearRegression() regmodel.train(X_train_b, Y_train) print('Average Training Loss:', regmodel.average_loss(X_train_b, Y_train)) print('Average Testing Loss:', regmodel.average_loss(X_test_b, Y_test)) #### 1-Layer NN ###### print('----- 1-Layer NN -----') nnmodel = OneLayerNN() nnmodel.train(X_train_b, Y_train, epochs=epochs, print_loss=False) print('Average Training Loss:', nnmodel.average_loss(X_train_b, Y_train)) print('Average Testing Loss:', nnmodel.average_loss(X_test_b, Y_test)) #### 2-Layer NN ###### print('----- 2-Layer NN -----') model = TwoLayerNN(5) # Use X without a bias, since we learn a bias in the 2 layer NN. model.train(X_train, Y_train, epochs=epochs, print_loss=False) print('Average Training Loss:', model.average_loss(X_train, Y_train)) print('Average Testing Loss:', model.average_loss(X_test, Y_test))
def main(grid): # Get Clean Data X, Y = read_clean_data() # Linear Regression try: LinearRegression(X, Y, grid) except Exception as e: print(e) # Binarize Y Y_binary = BinaryY(Y) # Logistic Regression try: LogisticRegression(X, Y_binary, grid) except Exception as e: print(e) # Decision Tree try: DecisionTree(X, Y_binary, grid) except Exception as e: print(e) # Support Vector Machine try: SVM(X, Y_binary, grid) except Exception as e: print(e) # Random Forest try: RandomForest(X, Y_binary, grid) except Exception as e: print(e) # Bagging Classifier try: Bagging(X, Y_binary, grid) except Exception as e: print(e) # Neural Network try: NeuralNet(X, Y_binary, grid) except Exception as e: print(e)
def test_linreg(): ''' Helper function that tests LinearRegression. @param: None @return: None ''' X_train, X_test, Y_train, Y_test = import_wine(WINE_FILE_PATH) num_features = X_train.shape[1] # Padding the inputs with a bias X_train_b = np.append(X_train, np.ones((len(X_train), 1)), axis=1) X_test_b = np.append(X_test, np.ones((len(X_test), 1)), axis=1) #### Matrix Inversion ###### print('---- LINEAR REGRESSION w/ Matrix Inversion ---') solver_model = LinearRegression(num_features) solver_model.train(X_train_b, Y_train) print('Average Training Loss:', solver_model.average_loss(X_train_b, Y_train)) print('Average Testing Loss:', solver_model.average_loss(X_test_b, Y_test))
import models.LinearRegression as lr df = lr.pd.read_csv('Datasets/house.csv') df.columns=['size' , 'price'] x = df['size'] y = df['price'] thetas = lr.gradient(x,y) y_fit = thetas[0] + thetas[1]*x lr.plt.figure(figsize=(20,3)) lr.plt.scatter(x, y,s=20) lr.plt.plot(x,y_fit,color="red") lr.plt.show()
if __name__ == "__main__": # N is the number of training points. # D_in is input dimension # D_out is output dimension. N, D_in, D_out = 64, 1, 1 # Add some noise to the observations noise_var = 0.5 # Create random input and output data X = lhs(D_in, N) y = 5 * X + noise_var * np.random.randn(N, D_out) # Define the model model = LinearRegression(X, y) # Define an optimizer optimizer = SGD(model.num_params, lr=1e-3, momentum=0.9) # optimizer = Adam(model.num_params, lr = 1e-3) # optimizer = RMSprop(model.num_params, lr = 1e-3) # Train the model model.train(10000, optimizer) # Print the learned parameters print('w = %e, sigma_sq = %e' % (model.theta[:-1], np.exp(model.theta[-1]))) # Make predictions y_pred = model.predict(X)
def main(): use_cuda = torch.cuda.is_available() opt = parser.parse_args() print(opt) input_size = 784 #num_classes = 10 output_size = 10 net = LinearRegression(input_size, output_size) if use_cuda: net.cuda() net = torch.nn.DataParallel(net, device_ids=range( torch.cuda.device_count())) cudnn.benchmark = True criterion = loss_fn(opt) train_loader, test_loader = data_set(opt) # optimizer = get_optimizer(net, opt, opt.lr) epoch_step = json.loads(opt.epoch_step) optimizer = get_optimizer(net, opt, opt.lr) base_ = str(random.randint(1, 100000)) + '_' + str( random.randint(1, 100000)) progress = KeepProgress(net, opt, base=base_) print(base_) def train(epoch): if epoch in epoch_step: lr_adjust(optimizer, epoch, opt, epoch_step) net.train() train_loss = 0 correct = 0 total = 0 for t, (images, labels) in enumerate(train_loader): inputs, targets = Variable(images.view(-1, 28 * 28)), Variable(labels) use_cuda = torch.cuda.is_available() if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() def closure(): net.zero_grad() outputs = net(inputs) loss = criterion(outputs, targets) loss.backward() return loss loss = optimizer.step(closure) outputs = net(inputs) #loss = closure() train_loss += loss.data[0] _, predicted = torch.max(outputs.data, 1) total += targets.size(0) correct += predicted.eq(targets.data).cpu().sum() if t % 100 == 0: print(t) progress.train_progress({ 'train_accuracy': 100 * correct / total, 'train_loss': loss.data[0] }) def test(epoch): correct = 0 total = 0 test_loss = 0 for inputs, targets in test_loader: inputs = Variable(inputs.view(-1, 28 * 28), volatile=True) targets = Variable(targets) if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() print(use_cuda) outputs = net(inputs) loss = criterion(outputs, targets) test_loss += loss.data[0] _, predicted = torch.max(outputs.data, 1) total += targets.size(0) correct += predicted.eq(targets.data).cpu().sum() print('Accuracy of the model on the 10000 test images: %d %%' % (100 * correct / total)) progress.test_progress({ 'test_loss': test_loss / len(test_loader.dataset), 'test_accuracy': 100 * correct / total }) for epoch in range(opt.epochs): train(epoch) test(epoch)
import numpy as np from helperFunctions import * import matplotlib.pyplot as plt from models import LinearRegression # Test scipt lm = LinearRegression() n = 50 # Generate some noisy train data noise = np.random.randn(n) x = np.linspace(0, 20, n) y = 32.3 + 5.2 * (x + noise) x = scaleFeature(x) y = scaleFeature(y) x = x.reshape((-1, 1)) # Train the lm, print out the parameters, plot the fit lm.train(x, y, 0.5, 100, 0) print(lm.parameters) # Plot the fit of the linear model y_hat = lm.predict(x) # Plot the fit of line and train data plt.plot(x, y, 'o') plt.plot(x, y_hat) plt.ylabel("y") plt.xlabel("x")
import time import numpy as np import matplotlib.pyplot as plt from dataset_income import Data from metrics import MSE from models import ConstantModel, LinearRegression, LinearRegressionWithB from gradient_descent import stochastic_gradient_descent, gradient_descent, mini_batch_gradient_descent if __name__ == '__main__': dataset = Data( r'C:\Users\Lautaro\PycharmProjects\ceia_intro_a_IA\clase_3\ejercicios\data\income.csv' ) X_train, X_test, y_train, y_test = dataset.split(0.8) linear_regression = LinearRegression() linear_regression.fit(X_train, y_train) lr_y_hat = linear_regression.predict(X_test) linear_regression_b = LinearRegressionWithB() linear_regression_b.fit(X_train, y_train) lrb_y_hat = linear_regression_b.predict(X_test) constant_model = ConstantModel() constant_model.fit(X_train, y_train) ct_y_hat = constant_model.predict(X_test) mse = MSE() lr_mse = mse(y_test, lr_y_hat) lrb_mse = mse(y_test, lrb_y_hat) ct_mse = mse(y_test, ct_y_hat)
def main(): """ """ path = get_resource_path() classifiers = [ # DecisionTree(), # RandomForest(size=40), # ExtremelyRandomizedTrees(size=40), # XGB(), # SVM(), # LinearSVM(), # KNN(n_neighbors=7), LogRegression(), # GausNB(), # BaggingRandomForest(size=40), # MLPC(input_size=[16, 32, 16, 8]) ] error_generators = [ Anomalies(), Typos(), ExplicitMissingValues(), ImplicitMissingValues(), SwapFields() ] # TODO: dataset size as a hyperparameter # TODO: random_state as a hyperparameter hyperparams = { 'train_ratio': .7, 'val_ratio': .1, 'test_ratio': .1, 'target_ratio': .1, 'random_state': [0], # 'row_fraction': [0.01, 0.05, 0.1, 0.2, 0.3, 0.5, 0.8], 'row_fraction': [0.2], 'classifier': classifiers, # Ordering of error generators # 'mask': [(0, 0, 1, 0, 0), (0, 0, 0, 1, 0), (0, 0, 0, 0, 1), # (0, 2, 0, 0, 1)], 'mask': [(0, 0, 0, 1, 0)], 'testset_size': 100 } datasets = pd.read_csv(os.path.join(path, 'datasets.csv')) for dataset_info in datasets.values: filepath, name, target_feature, task = tuple(dataset_info) data = pd.read_csv(os.path.join(path, 'data', filepath)) for state in HyperParameterHolder(hyperparams): print("HyperParam : %s" % str(state)) # Dataset Split (X_train, y_train, X_val, y_val, X_test, y_test, X_target, y_target) = split_dataset(data, target_feature, state) tuning_done = False while not tuning_done: # ML Pipeline Training Procedure model = BlackBox().train(state['classifier'], X_train, y_train) # ML Pipeline Validation Procedures predicted = model.predict(X_val) score = performance_metric(y_val, predicted) print("Validation : accuracy = %.4f" % round(score, 4)) tuning_done = True # ML Pipeline final performance score predicted = model.predict(X_test) score = performance_metric(y_test, predicted) print("Test : accuracy = %.4f" % round(score, 4)) # Meta Classifier Training Procedure error_gen_strat = ErrorGenerationStrategy(error_generators, state) # TODO: so far, X_test/y_test is used for training # prepare a dataset based on X_test and repeated error generation # NB: returns a python list, not a numpy array or pandas dataframe list_of_corrupted_X_test = error_gen_strat.on(X_test, state) try: meta_classifier = MetaClassifier(model, LinearRegression()) print(str(meta_classifier)) meta_classifier.fit(list_of_corrupted_X_test, y_test) # Meta Classifier Evaluation Procedure list_of_corrupted_X_target = error_gen_strat.on( X_target, state) predicted_scores = meta_classifier.predict( list_of_corrupted_X_target) actual_scores = [ performance_metric(y_target, model.predict(x)) for x in list_of_corrupted_X_target ] plt.plot(range(len(actual_scores)), actual_scores, 'g^') plt.plot(range(len(predicted_scores)), predicted_scores, 'ro') plt.gca().legend(('ground truth', 'predicted scores')) plt.grid(True) plt.show() result = distance_metric(actual_scores, predicted_scores) print("Evaluation : distance metric = %.4f" % round(result, 4)) print() except Exception as e: print("\nException : %s\n%s\n" % (str(error_gen_strat), e))
def linear_regression_model(): return LinearRegression()
import numpy as np from sklearn.metrics import mean_squared_error, r2_score import config_loader window_size = config_loader.get_window_size() num_days = config_loader.get_num_predicted_days() # Load the training data with n days held out print("Loading training data...") X_train, y_train, y_test = DataLoader.get_date_separated_testing_data( window_size, num_days, ) # train the model (and save it to file) print("Training the model...") model = LinearRegression() model.train(X_train, y_train) # get metrics for the multi-day predictions in each state states = DataLoader.get_states() all_errors = np.empty((0, num_days)) all_predictions = np.empty((0, num_days)) all_actual = np.empty((0, num_days)) all_control = np.empty((0, num_days)) for state_name, state_abbrev in states.items(): if state_abbrev != "US": # get the multi-day predictions case_df = DataLoader.get_daily_cases_df(state_abbrev)[:-num_days] vax_df = DataLoader.get_daily_vaccinations_df(state_abbrev)[:-num_days] future_vaccinations = DataLoader.get_assumed_vaccinations_dict( vax_df, num_days, multiplier=1)
import sys from models import LinearRegression try: passes = int(sys.argv[1]) split = float(sys.argv[2]) except: print("Using default number of passes and split ratio") passes = 2 split = 0.72 model = LinearRegression('data/iris.data') model.fit(passes, split)
def sin_fitting_example(): # y = sin(x) amt_points = 36 x = np.linspace(0, 360, num=amt_points) y = np.sin(x * np.pi / 180.) noise = np.random.normal(0, .1, y.shape) noisy_y = y + noise X_train = x y_train = noisy_y regression = LinearRegression() # linear X_linear = np.vstack((X_train, np.ones(len(X_train)))).T regression.fit(X_linear, y_train.reshape(-1, 1)) W_linear = regression.model y_linear = W_linear[0] * x + W_linear[1] # quadratic X_quadratic = np.vstack((np.power(X_train, 2), X_train, np.ones(len(X_train)))).T regression.fit(X_quadratic, y_train.reshape(-1, 1)) W_quadratic = regression.model y_quadratic = W_quadratic[0] * np.power( x, 2) + W_quadratic[1] * x + W_quadratic[2] # cubic X_cubic = np.vstack( (np.power(X_train, 3), np.power(X_train, 2), X_train, np.ones(len(X_train)))).T regression.fit(X_cubic, y_train.reshape(-1, 1)) W_cubic = regression.model y_cubic = W_cubic[0] * np.power(x, 3) + W_cubic[1] * np.power( x, 2) + W_cubic[2] * x + W_cubic[3] # X10 X_10 = np.vstack( (np.power(X_train, 10), np.power(X_train, 9), np.power(X_train, 8), np.power(X_train, 7), np.power(X_train, 6), np.power(X_train, 5), np.power(X_train, 4), np.power(X_train, 3), np.power(X_train, 2), X_train, np.ones(len(X_train)))).T regression.fit(X_10, y_train.reshape(-1, 1)) W_10 = regression.model y_10 = W_10[0] * np.power(x, 10) + W_10[1] * np.power(x, 9) + W_10[2] * np.power(x, 8) + \ W_10[3] * np.power(x, 7) + W_10[4] * np.power(x, 6) + W_10[5] * np.power(x, 5) + \ W_10[6] * np.power(x, 4) + W_10[7] * np.power(x, 3) + W_10[8] * np.power(x, 2) + \ W_10[9] * x + W_10[10] # PLOTS plt.figure() plt.subplot(1, 1, 1) plt.gca().set_title('Sin(x) - Fitting curves') # original plt.plot(x, noisy_y, 'o') # linear plt.plot(x, y_linear, '-') # quadratic plt.plot(x, y_quadratic, '-') # cubic plt.plot(x, y_cubic, '-') # 10 power plt.plot(x, y_10, '-') plt.legend(['noisy signal', 'linear', 'quadratic', 'cubic', '10th power']) plt.show()
def logistic_test(): n_samples = 100 np.random.seed(0) X_train = np.random.normal(size=n_samples) y_train = (X_train > 0).astype(float) X_train[X_train > 0] *= 4 X_train += 0.3 * np.random.normal(size=n_samples) X_train = X_train[:, np.newaxis] X, y = make_classification( n_features=1, n_classes=2, n_redundant=0, n_informative=1, n_clusters_per_class=1, class_sep=0.75, shuffle=True, random_state=0, ) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=0) df_test = pd.DataFrame(data=[X_test.flatten(), y_test]).T df_test.columns = ["X", "y"] lr = LogisticRegression() lr.fit(X_train, y_train) y_pred = lr.predict(X_test) score = [1 if yi == yi_pred else 0 for yi, yi_pred in zip(y_test, y_pred)] print(np.sum(score) / len(score)) # and plot the result plt.figure(1, figsize=(4, 3)) plt.clf() plt.scatter(X_train.ravel(), y_train, color="black", zorder=20) df_test["loss"] = expit(X_test * lr.theta + lr.bias).ravel() df_test = df_test.sort_values("X") plt.plot(df_test["X"], df_test["loss"], color="red", linewidth=3) ols = LinearRegression() ols.fit(X_train, y_train) plt.plot(X_test, ols.theta * X_test + ols.bias, linewidth=1) plt.axhline(0.5, color=".5") plt.ylabel("y") plt.xlabel("X") plt.xticks(range(-5, 10)) plt.yticks([0, 0.5, 1]) plt.ylim(-0.25, 1.25) plt.xlim(-2, 2) plt.legend( ("Logistic Regression Model", "Linear Regression Model"), loc="lower right", fontsize="small", ) plt.tight_layout() plt.show()
import numpy as np from models import LinearRegression, LinearRegression2 from sklearn import datasets from sklearn import model_selection from sklearn import linear_model from sklearn import metrics if __name__ == '__main__': iris = datasets.load_iris() train_x, test_x, train_y, test_y = model_selection.train_test_split(iris.data, iris.target, test_size=0.4) regressor = LinearRegression() regressor.fit(train_x, train_y) pred_y = regressor.predit(test_x) acc = metrics.mean_squared_error(test_y, pred_y) print(acc) regressor2 = LinearRegression2(train_x.shape[1]) regressor2.fit(train_x, train_y) pred_y = regressor2.predict(test_x) acc = metrics.mean_squared_error(test_y, pred_y) print(acc)