def init_data(): X, y = import_power_plant_data() X, y = X.to_numpy(), y.to_numpy() #print(X,y) #exit() X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,shuffle=True, random_state=1234) print(X_train.shape, X_test.shape, y_train.shape, y_test.shape) opt = SGD(lr=0.01) epoch = 10000 regressor = LinearRegression(opt, epoch=epoch) x_plot = list(range(1,epoch+1)) all_mse = regressor.fit(X_train, y_train) predicted = regressor.predict(X_test) #print(len(predicted)) #exit() mse_value = Metrics.mse(y_test, predicted) #print(len(x_plot), len(all_mse)) #print(mse_value) #y_pred_line = regressor.predict(X) #cmap = plt.get_cmap('viridis') #fig = plt.figure(figsize=(8,6)) #m1 = plt.scatter(X_train, y_train, color=cmap(0.9), s=10) #m2 = plt.scatter(X_test, y_test, color=cmap(0.5), s=10) #plt.plot(x_plot, all_mse, color = "blue", linewidth=2) Plot.plot_time_series(x_plot, all_mse, "mse_plot", "number of iterations", "Mean Square Error (MSE)", "MSE vs Number of iterations") plt.show()
def main(_): """High level pipeline. This script performs the trainsing, evaling and testing state of the model. """ learning_rate = FLAGS.learning_rate w_decay_factor = FLAGS.w_decay_factor num_steps = FLAGS.num_steps opt_method = FLAGS.opt_method feature_columns = FLAGS.feature_columns.split(',') # Load dataset. dataset = read_dataset("data/train.csv") # Data processing. train_set = preprocess_data(dataset, feature_columns=feature_columns, squared_features=True) # Initialize model. ndim = train_set[0].shape[1] model = LinearRegression(ndim, 'zeros') # Train model. if opt_method == 'iter': # Perform gradient descent. train_model(train_set, model, learning_rate, num_steps=num_steps, shuffle=True) print('Performed gradient descent.') else: # Compute closed form solution. train_model_analytic(train_set, model) print('Closed form solution.') train_loss = eval_model(train_set, model) print("Train loss: %s" % train_loss) # Plot the x vs. y if one dimension. if train_set[0].shape[1] == 1: plot_x_vs_y(train_set, model) # Eval model. raw_eval = read_dataset("data/val.csv") eval_set = preprocess_data(raw_eval, feature_columns=feature_columns, squared_features=True) eval_loss = eval_model(eval_set, model) print("Eval loss: %s" % eval_loss) # Test model. raw_test = read_dataset("data/test.csv") test_set = preprocess_data(raw_test, feature_columns=feature_columns, squared_features=True) test_loss = eval_model(test_set, model) print("Test loss: %s" % test_loss)
def train_model(args): if not os.path.isdir("./saved_models"): os.makedirs("./saved_models") if args.model == "linear": model = LinearRegression({ "stock_code": args.regression_stock_code, "use_stock_price": args.regression_use_stock_price, "n": args.regression_n }) stock_prices = pd.read_csv("./data/stock_prices/" + args.regression_stock_code + ".csv", nrows=args.regression_n) model.train(stock_prices) model.save("./saved_models/linear") elif args.model == "svr": model = SupportVectorRegression({ "stock_code": args.regression_stock_code, "use_stock_price": args.regression_use_stock_price, "n": args.regression_n, "kernel": args.kernel, "degree": args.degree, "gamma": args.gamma if args.gamma != -1 else "auto", "coef0": args.coef0, "tol": args.tol, "C": args.C, "epsilon": args.epsilon, "shrinking": args.shrinking, "cache_size": args.cache_size, "verbose": args.verbose, "max_iter": args.max_iter }) stock_prices = pd.read_csv("./data/stock_prices/" + args.regression_stock_code + ".csv", nrows=args.regression_n) model.train(stock_prices) model.save("./saved_models/svr") else: return
def update_plot(self, plot_state): data = plot_state['inputs'] X, y = np.array(data['x']), np.array(data['y']) regressor = LinearRegression( basis_function=ScalarBasisFunctions.Polynomial( plot_state['Polynomial Degree']), l2_cost=plot_state['L2 Weight Penalty']) regressor.fit(X, y) inputs = np.linspace(*X_RANGE, self.PLOT_POINTS) self.fit_line.data_source.data = dict(x=inputs, y=regressor.predict(inputs))
def generate_regression_predictions(): X, Y = get_regression_training_data() test_X = get_regression_testing_data() lr = LinearRegression() lr.fit(X, Y) predictions = [str(datetime.timedelta(seconds=int(s))) for s in lr.predict(test_X)] for i, x in enumerate(test_X): # set those who don't have a full marathon to -1 if x[2] == -1: predictions[i] = -1 return predictions
def main(): dataset = datasets.load_breast_cancer() features = dataset.data features = StandardScaler().fit_transform(features) num_features = features.shape[1] labels = dataset.target train_features, test_features, train_labels, test_labels = train_test_split( features, labels, test_size=0.3, stratify=labels) train_size = train_features.shape[0] test_size = test_features.shape[0] # slice the dataset to be exact as per the batch size # e.g. train_size = 1898322, batch_size = 256 # [:1898322-(1898322%256)] = [:1898240] # 1898322 // 256 = 7415; 7415 * 256 = 1898240 train_features = train_features[:train_size - (train_size % BATCH_SIZE)] train_labels = train_labels[:train_size - (train_size % BATCH_SIZE)] # modify the size of the dataset to be passed on model.train() train_size = train_features.shape[0] # slice the dataset to be exact as per the batch size test_features = test_features[:test_size - (test_size % BATCH_SIZE)] test_labels = test_labels[:test_size - (test_size % BATCH_SIZE)] test_size = test_features.shape[0] model = LinearRegression( alpha=LEARNING_RATE, batch_size=BATCH_SIZE, num_classes=NUM_CLASSES, sequence_length=num_features, ) model.train( epochs=3000, log_path="./log_path/linear_regression/", train_data=[train_features, train_labels], train_size=train_size, validation_data=[test_features, test_labels], validation_size=test_size, result_path="./results/linear_regression/", )
def main(_): """High level pipeline. This script performs the trainsing, evaling and testing state of the model. """ # learning_rate = FLAGS.learning_rate # feature_type = FLAGS.feature_type # model_type = FLAGS.model_type # num_steps = FLAGS.num_steps feature_type = 'default' model_type = 'svm' # Load dataset. data = read_dataset('data/train_lab.txt', 'data/image_data') # Data Processing. data = preprocess_data(data, 'default') print("Finish preprocessing...") # Initialize model. ndim = data['image'].shape[1] if model_type == 'linear': model = LinearRegression(ndim, 'uniform') elif model_type == 'logistic': model = LogisticRegression(ndim, 'uniform') elif model_type == 'svm': model = SupportVectorMachine(ndim, 'uniform') # Train Model. print("Start to train the model...") model = train_model(data, model) # Eval Model. print("Start to evaluate the model...") data_val = read_dataset('data/val_lab.txt', 'data/image_data') data_val = preprocess_data(data_val, feature_type) loss, acc = eval_model(data_val, model) print(loss, acc) # Test Model. print("Start doing the test") data_test = read_dataset('data/test_lab.txt', 'data/image_data') print("Start preprocess testing data") data_test = preprocess_data(data_test, feature_type) print("Making predictions") data_test['label'] = model.predict(model.forward(data_test['image'])) print("Output the results to csv file") write_dataset('data/test_lab.txt', data_test) # Generate Kaggle output. print("Finished!")
def test_input_output(self): model = LinearRegression(10) x = np.zeros([4, 10]) y = np.zeros([4, ]) # Check forward shape. f = model.forward(x) self.assertEqual(f.shape, (4,)) # Check backward shape. gradient = model.backward(f, y) self.assertEqual(gradient.shape, (11,)) # Check loss shape. loss = model.loss(f, y) self.assertEqual(loss.shape, ())
def main(): parser = argparse.ArgumentParser(description='Linear Regression test') parser.add_argument('-m', '--method', type=str, default='ols', help='model method: ols or grad_descent') parser.add_argument('-n', '--n_iter', type=int, default=50, help='number of iterations for grad_descent') args = parser.parse_args() method = args.method n_iter = args.n_iter X, y, m, bias = \ generate_linear_data(n_samples=1000, n_features=10, bias=10) X_train, X_test, y_train, y_test = split_dataset(X, y) print("Training size: %s, Test size %s" % (len(X_train), len(X_test))) print("-" * 20) # Fit and predict model = LinearRegression(n_iter=n_iter) model.fit(X_train, y_train, method) y_pred = model.predict(X_test) print("-" * 20) # Scoring model.score(y_test, y_pred) print("-" * 20) print("True coefs: ", np.insert(m, 0, bias)) print("Model coefs:", model.beta_hat) print("-" * 20) # Plotting plot_regression_residual(y_test, y_pred, bins=int(len(X_train) / 20)) if method == 'grad_descent': plot_iteration_vs_cost(n_iter, model.cost_h)
def linear_predict(stock_code): if "useStockPrice" not in request.args or "n" not in request.args: return jsonify({ "success": False, "error": { "code": "invalid-argument" } }) model_options = { "stock_code": stock_code, "use_stock_price": False if request.args.get("useStockPrice") != "true" else True, "n": int(request.args.get("n")) } model = LinearRegression(model_options, load=True, saved_model_dir="./saved_models/linear") if model.model is None: return jsonify({ "success": False, "error": { "code": "invalid-argument" } }) if not model_options["use_stock_price"]: stock_prices = pd.read_csv("./data/stock_prices/" + stock_code + ".csv", nrows=1) predictions = model.predict(stock_prices.loc[0, "adjusted_close"]) else: predictions = model.predict() return jsonify({"success": True, "predictions": predictions.tolist()})
def main(_): """High level pipeline. This script performs the trainsing, evaling and testing state of the model. """ learning_rate = FLAGS.learning_rate feature_type = FLAGS.feature_type model_type = FLAGS.model_type num_steps = FLAGS.num_steps # Load dataset. data = read_dataset('data/val_lab.txt', 'data/image_data') # Data Processing. data = preprocess_data(data, feature_type) # Initialize model. ndim = data['image'].shape[1] if model_type == 'linear': model = LinearRegression(ndim, 'ones') elif model_type == 'logistic': model = LogisticRegression(ndim, 'zeros') elif model_type == 'svm': model = SupportVectorMachine(ndim, 'zeros') # Train Model. model = train_model(data, model, learning_rate, num_steps=num_steps) # Eval Model. data_test = read_dataset('data/test_lab.txt', 'data/image_data') data_test = preprocess_data(data_test, feature_type) acc, loss = eval_model(data_test, model) # Test Model. data_test = read_dataset('data/test_lab.txt', 'data/image_data') data_test = preprocess_data(data_test, feature_type)
def main(): args = getArguments() print('[DEBUG]', args) x, y = make_regression(n_samples=args.n_samples, n_features=1, noise=args.noise, bias=np.random.uniform(-200, 200), random_state=42) scaler = StandardScaler() x = scaler.fit_transform(x) lr = LinearRegression(x, y.reshape(-1, 1), alpha=args.lr, max_epochs=args.max_epochs, epsilon=args.epsilon, batch_size=args.batch_size) bestTheta = lr.getThetaByNormalEquations() bestPredictions = lr.getPrediction(lr.x, bestTheta) bestCost = lr.getCost(bestPredictions, lr.y) print(f'[DEBUG] Best Theta: {bestTheta.tolist()}') print(f'[DEBUG] Best Cost: {bestCost}') lr.runGradientDescent() optimizedTheta = lr.theta optimizedPredictions = lr.getPrediction(lr.x, optimizedTheta) optimizedCost = lr.getCost(optimizedPredictions, lr.y) print(f'[DEBUG] Optimized Theta: {optimizedTheta.tolist()}') print(f'[DEBUG] Optimized Cost: {optimizedCost}') plotAndSaveGraphs(lr, args, scaler)
from data.datasets import StatsDatasetRegression from models.linear_regression import LinearRegression from trainer.regression_trainer import RegressionTrainer import visualizer import pandas as pd import torch # --------------------------------------------------------------------------------------------- # This file trains and tests performance of the linear regression model on the advanced dataset # --------------------------------------------------------------------------------------------- # MODEL VARIABLES MODEL = LinearRegression(18, 2) TRAINING_SET = StatsDatasetRegression( pd.read_csv("../../data/datasets/processed/adv_train_data.csv")) TESTING_SET = StatsDatasetRegression( pd.read_csv("../../data/datasets/processed/adv_test_data.csv")) EPOCHS = 500 LEARNING_RATE = 0.001 OPTIMIZER = torch.optim.SGD(MODEL.parameters(), lr=LEARNING_RATE) LOSS = torch.nn.MSELoss() if __name__ == '__main__': trainer = RegressionTrainer(MODEL, TRAINING_SET, TESTING_SET, EPOCHS, OPTIMIZER, LOSS) trainer.train() trainer.print_best_results() visualizer.plot_accuracy(trainer.epochs, trainer.val_accuracy, "../../results/graphs/accuracy/adv_reg_acc.png") visualizer.plot_loss(trainer.epochs, trainer.val_loss, "../../results/graphs/loss/adv_reg_loss.png")
def train_models(train_models_data): """Trains models. Args: train_models_data: Train models data. Format: { models: [ { "model": "model type, matches MODEL in a model class", "stockCode": "the predicting stock", "modelOptions": "model options dict", "inputOptions": "input options dict" } ] } Refer to train_models_sample.json. """ if not path.isdir(SAVED_MODELS_DIR): makedirs(SAVED_MODELS_DIR) for train_model_data_idx, train_model_data in enumerate(train_models_data): print("Model {}".format(train_model_data_idx + 1)) # initialize the model if train_model_data["model"] == LinearRegression.MODEL: model = LinearRegression(train_model_data["modelOptions"], train_model_data["inputOptions"], stock_code=train_model_data["stockCode"]) elif train_model_data["model"] == SupportVectorRegression.MODEL: model = SupportVectorRegression( train_model_data["modelOptions"], train_model_data["inputOptions"], stock_code=train_model_data["stockCode"]) elif train_model_data["model"] == LinearIndexRegression.MODEL: model = LinearIndexRegression(train_model_data["modelOptions"], train_model_data["inputOptions"], train_model_data["stock_code"]) elif train_model_data["model"] == SupportVectorIndexRegression.MODEL: model = SupportVectorIndexRegression( train_model_data["modelOptions"], train_model_data["inputOptions"], train_model_data["stock_code"]) elif train_model_data["model"] == DenseNeuralNetwork.MODEL: model = DenseNeuralNetwork( train_model_data["modelOptions"], train_model_data["inputOptions"], stock_code=train_model_data["stockCode"]) # prepare the data x, y, other_data = build_training_dataset( train_model_data["inputOptions"], model.model_options["predict_n"]) if train_model_data["model"] in [ LinearRegression.MODEL, SupportVectorRegression.MODEL, DenseNeuralNetwork.MODEL ]: # get the training set x = x[:-100] y = y[:-100] if "normalize" in train_model_data["inputOptions"]: model.input_options["normalize_data"] = other_data[ "normalize_data"] # train the model model.train(x, y) # save the model model.save(SAVED_MODELS_DIR_MAP[train_model_data["model"]])
######################### main ######################### ### load in the test set ### with open("../data_augmentation_models/data/all_early_stages/pkl/test_features.pkl", "rb") as fin: test_features = pickle.load(fin) ### define the model ### # to get the arguments params = load_config('./models/config.yaml') # define the model bootstrap_model = LinearRegression(params["feature_dim"], params["output_dim"]) # load in the pre-trained model PATH_pretrained = "./models/model_6.pth" print("read in", PATH_pretrained) bootstrap_model.load_state_dict(torch.load(PATH_pretrained)) # define the device and move the model into the device device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("the device is", device) bootstrap_model.to(device) ### bootstrap to get confidence interval ###
except: scores = pd.DataFrame( columns=['model', 'target_month', 'window_length', 'score']) scores = pd.DataFrame( columns=['model', 'target_month', 'window_length', 'score']) dataset = 'datasets/r_non_stationary.pkl' models = [ _LSTM("LSTM"), CONVNET("CONVNET"), _Prophet("Prophet"), MEAN("MEAN"), LAST("LAST"), LinearRegression("Linear Regression", False), SES("SES") ] lr_u = LinearRegression("Univariate Linear Regression", True) lr_m = LinearRegression("Multivariate Linear Regression", False) mean = MEAN("MEAN") models = [mean, lr_u, lr_m, _Prophet("Prophet")] models = add_xgb(models) models = [Gauss("Gauss-1"), Gauss("Gauss-2"), lr_m, mean] data = get_data_from_dataset(dataset) source_data = preprocess(data)
from models.linear_regression import LinearRegression # Use custom styling from file matplotlib.rc_file('../plotstyle') # Generate data random.seed(0) X = np.array([i for i in range(20)], dtype='float32') X = np.reshape(X, (20, 1)) X = np.concatenate((np.ones((20, 1), dtype='float32'), X), axis=1) y = np.array([(i + random.uniform(-2, 2)) for i in range(20)], dtype='float32') y = np.reshape(y, (20, 1)) # Fit model to data model = LinearRegression(data=X, labels=y) weights = model.fit() # Generate line of best fit x_bf = np.linspace(0, 20, dtype='float32') y_bf = np.array([(weights[0][0] + x * weights[1][0]) for x in x_bf], dtype='float32') plt.scatter(X[:, 1], y, color='b', s=50, label='Samples') plt.plot(x_bf, y_bf, color='r', label='Fitted Model') plt.xlabel('$x$') plt.ylabel('$y$') plt.title('Linear Regression') plt.legend() plt.show()
from data.datasets import StatsDatasetRegression from models.linear_regression import LinearRegression from trainer.regression_trainer import RegressionTrainer import pandas as pd import visualizer import torch # ------------------------------------------------------------------------------------------- # This file trains and tests performance of the linear regression model on the simple dataset # ------------------------------------------------------------------------------------------- # MODEL VARIABLES MODEL = LinearRegression(10, 2) TRAINING_SET = StatsDatasetRegression(pd.read_csv("../../data/datasets/processed/simple_train_data.csv")) TESTING_SET = StatsDatasetRegression(pd.read_csv("../../data/datasets/processed/simple_test_data.csv")) EPOCHS = 500 LEARNING_RATE = 0.002 OPTIMIZER = torch.optim.SGD(MODEL.parameters(), lr=LEARNING_RATE) LOSS = torch.nn.MSELoss() if __name__ == '__main__': trainer = RegressionTrainer(MODEL, TRAINING_SET, TESTING_SET, EPOCHS, OPTIMIZER, LOSS) trainer.train() trainer.print_best_results() visualizer.plot_accuracy(trainer.epochs, trainer.val_accuracy, "../../results/graphs/accuracy/simple_reg_acc.png") visualizer.plot_loss(trainer.epochs, trainer.val_loss, "../../results/graphs/loss/simple_reg_loss.png")
y_batch(numpy.ndarray): label data of dimension (N, 1). model(LinearModel): Initialized linear model. """ f = LinearRegression.forward(model, x_batch) grad = learning_rate * LinearRegression.backward(model, f, y_batch) model.w = model.w - learning_rate * grad dataset = io_tools.read_dataset('train.csv') # print(dataset) data = data_tools.preprocess_data(dataset) ndim = data[0].shape[1] print('data[0]', data[0]) print('ndim', ndim) # print(data) train_model(data, LinearRegression(ndim)) def train_model_analytic(processed_dataset, model): """Computes and sets the optimal model weights (model.w). Args: processed_dataset(list): List of [x,y] processed from utils.data_tools.preprocess_data. model(LinearRegression): LinearRegression model. """ # model.w = model.w - def eval_model(processed_dataset, model): """Performs evaluation on a dataset.
from data.datasets import StatsDatasetRegression from models.linear_regression import LinearRegression from trainer.regression_trainer import RegressionTrainer import pandas as pd import visualizer import torch # ---------------------------------------------------------------------------------------------- # This file trains and tests performance of the linear regression model on the optimized dataset # ---------------------------------------------------------------------------------------------- # MODEL VARIABLES MODEL = LinearRegression(4, 2) TRAINING_SET = StatsDatasetRegression(pd.read_csv("../../data/datasets/processed/opt_reg_train.csv")) TESTING_SET = StatsDatasetRegression(pd.read_csv("../../data/datasets/processed/opt_reg_test.csv")) EPOCHS = 500 LEARNING_RATE = 0.007 OPTIMIZER = torch.optim.SGD(MODEL.parameters(), lr=LEARNING_RATE) LOSS = torch.nn.MSELoss() if __name__ == '__main__': trainer = RegressionTrainer(MODEL, TRAINING_SET, TESTING_SET, EPOCHS, OPTIMIZER, LOSS) trainer.train() trainer.print_best_results() visualizer.plot_accuracy(trainer.epochs, trainer.val_accuracy, "../../results/graphs/accuracy/opt_reg_acc.png") visualizer.plot_loss(trainer.epochs, trainer.val_loss, "../../results/graphs/loss/opt_reg_loss.png")