Example #1
0
def init_data():
    X, y = import_power_plant_data()
    X, y = X.to_numpy(), y.to_numpy()
    #print(X,y)
    #exit()
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,shuffle=True, random_state=1234)
    print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
    opt = SGD(lr=0.01)
    epoch = 10000
    regressor = LinearRegression(opt, epoch=epoch)
    x_plot = list(range(1,epoch+1))
    all_mse = regressor.fit(X_train, y_train)
    predicted = regressor.predict(X_test)
    #print(len(predicted))
    #exit()
    mse_value = Metrics.mse(y_test, predicted)
    #print(len(x_plot), len(all_mse))
    #print(mse_value)
    #y_pred_line = regressor.predict(X)
    #cmap = plt.get_cmap('viridis')
    #fig = plt.figure(figsize=(8,6))
    #m1 = plt.scatter(X_train, y_train, color=cmap(0.9), s=10)
    #m2 = plt.scatter(X_test, y_test, color=cmap(0.5), s=10)
    #plt.plot(x_plot, all_mse, color = "blue", linewidth=2)
    Plot.plot_time_series(x_plot, all_mse, "mse_plot", "number of iterations", "Mean Square Error (MSE)", "MSE vs Number of iterations")

    plt.show()
Example #2
0
def main(_):
    """High level pipeline.
    This script performs the trainsing, evaling and testing state of the model.
    """
    learning_rate = FLAGS.learning_rate
    w_decay_factor = FLAGS.w_decay_factor
    num_steps = FLAGS.num_steps
    opt_method = FLAGS.opt_method
    feature_columns = FLAGS.feature_columns.split(',')

    # Load dataset.
    dataset = read_dataset("data/train.csv")

    # Data processing.
    train_set = preprocess_data(dataset,
                                feature_columns=feature_columns,
                                squared_features=True)

    # Initialize model.
    ndim = train_set[0].shape[1]
    model = LinearRegression(ndim, 'zeros')

    # Train model.
    if opt_method == 'iter':
        # Perform gradient descent.
        train_model(train_set,
                    model,
                    learning_rate,
                    num_steps=num_steps,
                    shuffle=True)
        print('Performed gradient descent.')
    else:
        # Compute closed form solution.
        train_model_analytic(train_set, model)
        print('Closed form solution.')

    train_loss = eval_model(train_set, model)
    print("Train loss: %s" % train_loss)

    # Plot the x vs. y if one dimension.
    if train_set[0].shape[1] == 1:
        plot_x_vs_y(train_set, model)

    # Eval model.
    raw_eval = read_dataset("data/val.csv")
    eval_set = preprocess_data(raw_eval,
                               feature_columns=feature_columns,
                               squared_features=True)
    eval_loss = eval_model(eval_set, model)
    print("Eval loss: %s" % eval_loss)

    # Test model.
    raw_test = read_dataset("data/test.csv")
    test_set = preprocess_data(raw_test,
                               feature_columns=feature_columns,
                               squared_features=True)
    test_loss = eval_model(test_set, model)
    print("Test loss: %s" % test_loss)
Example #3
0
def train_model(args):
    if not os.path.isdir("./saved_models"):
        os.makedirs("./saved_models")
    if args.model == "linear":
        model = LinearRegression({
            "stock_code": args.regression_stock_code,
            "use_stock_price": args.regression_use_stock_price,
            "n": args.regression_n
        })
        stock_prices = pd.read_csv("./data/stock_prices/" +
                                   args.regression_stock_code + ".csv",
                                   nrows=args.regression_n)
        model.train(stock_prices)

        model.save("./saved_models/linear")
    elif args.model == "svr":
        model = SupportVectorRegression({
            "stock_code":
            args.regression_stock_code,
            "use_stock_price":
            args.regression_use_stock_price,
            "n":
            args.regression_n,
            "kernel":
            args.kernel,
            "degree":
            args.degree,
            "gamma":
            args.gamma if args.gamma != -1 else "auto",
            "coef0":
            args.coef0,
            "tol":
            args.tol,
            "C":
            args.C,
            "epsilon":
            args.epsilon,
            "shrinking":
            args.shrinking,
            "cache_size":
            args.cache_size,
            "verbose":
            args.verbose,
            "max_iter":
            args.max_iter
        })

        stock_prices = pd.read_csv("./data/stock_prices/" +
                                   args.regression_stock_code + ".csv",
                                   nrows=args.regression_n)

        model.train(stock_prices)

        model.save("./saved_models/svr")
    else:
        return
 def update_plot(self, plot_state):
     data = plot_state['inputs']
     X, y = np.array(data['x']), np.array(data['y'])
     regressor = LinearRegression(
         basis_function=ScalarBasisFunctions.Polynomial(
             plot_state['Polynomial Degree']),
         l2_cost=plot_state['L2 Weight Penalty'])
     regressor.fit(X, y)
     inputs = np.linspace(*X_RANGE, self.PLOT_POINTS)
     self.fit_line.data_source.data = dict(x=inputs,
                                           y=regressor.predict(inputs))
Example #5
0
def generate_regression_predictions():
  X, Y = get_regression_training_data()
  test_X = get_regression_testing_data()

  lr = LinearRegression()
  lr.fit(X, Y)
  predictions = [str(datetime.timedelta(seconds=int(s))) for s in lr.predict(test_X)]

  for i, x in enumerate(test_X):
    # set those who don't have a full marathon to -1
    if x[2] == -1:
      predictions[i] = -1

  return predictions
Example #6
0
def main():
    dataset = datasets.load_breast_cancer()

    features = dataset.data

    features = StandardScaler().fit_transform(features)

    num_features = features.shape[1]

    labels = dataset.target

    train_features, test_features, train_labels, test_labels = train_test_split(
        features, labels, test_size=0.3, stratify=labels)

    train_size = train_features.shape[0]
    test_size = test_features.shape[0]

    # slice the dataset to be exact as per the batch size
    # e.g. train_size = 1898322, batch_size = 256
    # [:1898322-(1898322%256)] = [:1898240]
    # 1898322 // 256 = 7415; 7415 * 256 = 1898240
    train_features = train_features[:train_size - (train_size % BATCH_SIZE)]
    train_labels = train_labels[:train_size - (train_size % BATCH_SIZE)]

    # modify the size of the dataset to be passed on model.train()
    train_size = train_features.shape[0]

    # slice the dataset to be exact as per the batch size
    test_features = test_features[:test_size - (test_size % BATCH_SIZE)]
    test_labels = test_labels[:test_size - (test_size % BATCH_SIZE)]

    test_size = test_features.shape[0]

    model = LinearRegression(
        alpha=LEARNING_RATE,
        batch_size=BATCH_SIZE,
        num_classes=NUM_CLASSES,
        sequence_length=num_features,
    )

    model.train(
        epochs=3000,
        log_path="./log_path/linear_regression/",
        train_data=[train_features, train_labels],
        train_size=train_size,
        validation_data=[test_features, test_labels],
        validation_size=test_size,
        result_path="./results/linear_regression/",
    )
def main(_):
    """High level pipeline.
    This script performs the trainsing, evaling and testing state of the model.
    """
    #    learning_rate = FLAGS.learning_rate
    #    feature_type = FLAGS.feature_type
    #    model_type = FLAGS.model_type
    #    num_steps = FLAGS.num_steps

    feature_type = 'default'
    model_type = 'svm'
    # Load dataset.
    data = read_dataset('data/train_lab.txt', 'data/image_data')

    # Data Processing.
    data = preprocess_data(data, 'default')
    print("Finish preprocessing...")

    # Initialize model.
    ndim = data['image'].shape[1]
    if model_type == 'linear':
        model = LinearRegression(ndim, 'uniform')
    elif model_type == 'logistic':
        model = LogisticRegression(ndim, 'uniform')
    elif model_type == 'svm':
        model = SupportVectorMachine(ndim, 'uniform')

    # Train Model.
    print("Start to train the model...")
    model = train_model(data, model)

    # Eval Model.
    print("Start to evaluate the model...")
    data_val = read_dataset('data/val_lab.txt', 'data/image_data')
    data_val = preprocess_data(data_val, feature_type)
    loss, acc = eval_model(data_val, model)
    print(loss, acc)

    # Test Model.
    print("Start doing the test")
    data_test = read_dataset('data/test_lab.txt', 'data/image_data')
    print("Start preprocess testing data")
    data_test = preprocess_data(data_test, feature_type)
    print("Making predictions")
    data_test['label'] = model.predict(model.forward(data_test['image']))
    print("Output the results to csv file")
    write_dataset('data/test_lab.txt', data_test)
    # Generate Kaggle output.
    print("Finished!")
    def test_input_output(self):
        model = LinearRegression(10)

        x = np.zeros([4, 10])
        y = np.zeros([4, ])

        # Check forward shape.
        f = model.forward(x)
        self.assertEqual(f.shape, (4,))

        # Check backward shape.
        gradient = model.backward(f, y)
        self.assertEqual(gradient.shape, (11,))

        # Check loss shape.
        loss = model.loss(f, y)
        self.assertEqual(loss.shape, ())
Example #9
0
def main():
    parser = argparse.ArgumentParser(description='Linear Regression test')
    parser.add_argument('-m',
                        '--method',
                        type=str,
                        default='ols',
                        help='model method: ols or grad_descent')
    parser.add_argument('-n',
                        '--n_iter',
                        type=int,
                        default=50,
                        help='number of iterations for grad_descent')
    args = parser.parse_args()
    method = args.method
    n_iter = args.n_iter

    X, y, m, bias = \
        generate_linear_data(n_samples=1000, n_features=10, bias=10)
    X_train, X_test, y_train, y_test = split_dataset(X, y)
    print("Training size: %s, Test size %s" % (len(X_train), len(X_test)))
    print("-" * 20)

    # Fit and predict
    model = LinearRegression(n_iter=n_iter)
    model.fit(X_train, y_train, method)
    y_pred = model.predict(X_test)
    print("-" * 20)

    # Scoring
    model.score(y_test, y_pred)
    print("-" * 20)
    print("True coefs: ", np.insert(m, 0, bias))
    print("Model coefs:", model.beta_hat)
    print("-" * 20)

    # Plotting
    plot_regression_residual(y_test, y_pred, bins=int(len(X_train) / 20))
    if method == 'grad_descent':
        plot_iteration_vs_cost(n_iter, model.cost_h)
Example #10
0
def linear_predict(stock_code):
    if "useStockPrice" not in request.args or "n" not in request.args:
        return jsonify({
            "success": False,
            "error": {
                "code": "invalid-argument"
            }
        })

    model_options = {
        "stock_code":
        stock_code,
        "use_stock_price":
        False if request.args.get("useStockPrice") != "true" else True,
        "n":
        int(request.args.get("n"))
    }

    model = LinearRegression(model_options,
                             load=True,
                             saved_model_dir="./saved_models/linear")
    if model.model is None:
        return jsonify({
            "success": False,
            "error": {
                "code": "invalid-argument"
            }
        })

    if not model_options["use_stock_price"]:
        stock_prices = pd.read_csv("./data/stock_prices/" + stock_code +
                                   ".csv",
                                   nrows=1)
        predictions = model.predict(stock_prices.loc[0, "adjusted_close"])
    else:
        predictions = model.predict()

    return jsonify({"success": True, "predictions": predictions.tolist()})
def main(_):
    """High level pipeline.
    This script performs the trainsing, evaling and testing state of the model.
    """
    learning_rate = FLAGS.learning_rate
    feature_type = FLAGS.feature_type
    model_type = FLAGS.model_type
    num_steps = FLAGS.num_steps

    # Load dataset.
    data = read_dataset('data/val_lab.txt', 'data/image_data')


    # Data Processing.
    data = preprocess_data(data, feature_type)

    # Initialize model.
    ndim = data['image'].shape[1]

    if model_type == 'linear':
        model = LinearRegression(ndim, 'ones')
    elif model_type == 'logistic':
        model = LogisticRegression(ndim, 'zeros')
    elif model_type == 'svm':
        model = SupportVectorMachine(ndim, 'zeros')

    # Train Model.
    model = train_model(data, model, learning_rate, num_steps=num_steps)

    # Eval Model.
    data_test = read_dataset('data/test_lab.txt', 'data/image_data')
    data_test = preprocess_data(data_test, feature_type)
    acc, loss = eval_model(data_test, model)

    # Test Model.
    data_test = read_dataset('data/test_lab.txt', 'data/image_data')
    data_test = preprocess_data(data_test, feature_type)
def main():
    args = getArguments()
    print('[DEBUG]', args)

    x, y = make_regression(n_samples=args.n_samples,
                           n_features=1,
                           noise=args.noise,
                           bias=np.random.uniform(-200, 200),
                           random_state=42)

    scaler = StandardScaler()
    x = scaler.fit_transform(x)

    lr = LinearRegression(x,
                          y.reshape(-1, 1),
                          alpha=args.lr,
                          max_epochs=args.max_epochs,
                          epsilon=args.epsilon,
                          batch_size=args.batch_size)

    bestTheta = lr.getThetaByNormalEquations()
    bestPredictions = lr.getPrediction(lr.x, bestTheta)
    bestCost = lr.getCost(bestPredictions, lr.y)

    print(f'[DEBUG] Best Theta: {bestTheta.tolist()}')
    print(f'[DEBUG] Best Cost: {bestCost}')

    lr.runGradientDescent()
    optimizedTheta = lr.theta
    optimizedPredictions = lr.getPrediction(lr.x, optimizedTheta)
    optimizedCost = lr.getCost(optimizedPredictions, lr.y)

    print(f'[DEBUG] Optimized Theta: {optimizedTheta.tolist()}')
    print(f'[DEBUG] Optimized Cost: {optimizedCost}')

    plotAndSaveGraphs(lr, args, scaler)
from data.datasets import StatsDatasetRegression
from models.linear_regression import LinearRegression
from trainer.regression_trainer import RegressionTrainer
import visualizer
import pandas as pd
import torch

# ---------------------------------------------------------------------------------------------
# This file trains and tests performance of the linear regression model on the advanced dataset
# ---------------------------------------------------------------------------------------------

# MODEL VARIABLES
MODEL = LinearRegression(18, 2)
TRAINING_SET = StatsDatasetRegression(
    pd.read_csv("../../data/datasets/processed/adv_train_data.csv"))
TESTING_SET = StatsDatasetRegression(
    pd.read_csv("../../data/datasets/processed/adv_test_data.csv"))
EPOCHS = 500
LEARNING_RATE = 0.001
OPTIMIZER = torch.optim.SGD(MODEL.parameters(), lr=LEARNING_RATE)
LOSS = torch.nn.MSELoss()

if __name__ == '__main__':
    trainer = RegressionTrainer(MODEL, TRAINING_SET, TESTING_SET, EPOCHS,
                                OPTIMIZER, LOSS)
    trainer.train()
    trainer.print_best_results()
    visualizer.plot_accuracy(trainer.epochs, trainer.val_accuracy,
                             "../../results/graphs/accuracy/adv_reg_acc.png")
    visualizer.plot_loss(trainer.epochs, trainer.val_loss,
                         "../../results/graphs/loss/adv_reg_loss.png")
Example #14
0
def train_models(train_models_data):
    """Trains models.

    Args:
        train_models_data: Train models data.
            Format:
            {
                models: [
                    {
                        "model": "model type, matches MODEL in a model class",
                        "stockCode": "the predicting stock",
                        "modelOptions": "model options dict",
                        "inputOptions": "input options dict"
                    }
                ]
            }
            Refer to train_models_sample.json.

    """

    if not path.isdir(SAVED_MODELS_DIR):
        makedirs(SAVED_MODELS_DIR)

    for train_model_data_idx, train_model_data in enumerate(train_models_data):
        print("Model {}".format(train_model_data_idx + 1))

        # initialize the model
        if train_model_data["model"] == LinearRegression.MODEL:
            model = LinearRegression(train_model_data["modelOptions"],
                                     train_model_data["inputOptions"],
                                     stock_code=train_model_data["stockCode"])
        elif train_model_data["model"] == SupportVectorRegression.MODEL:
            model = SupportVectorRegression(
                train_model_data["modelOptions"],
                train_model_data["inputOptions"],
                stock_code=train_model_data["stockCode"])
        elif train_model_data["model"] == LinearIndexRegression.MODEL:
            model = LinearIndexRegression(train_model_data["modelOptions"],
                                          train_model_data["inputOptions"],
                                          train_model_data["stock_code"])
        elif train_model_data["model"] == SupportVectorIndexRegression.MODEL:
            model = SupportVectorIndexRegression(
                train_model_data["modelOptions"],
                train_model_data["inputOptions"],
                train_model_data["stock_code"])
        elif train_model_data["model"] == DenseNeuralNetwork.MODEL:
            model = DenseNeuralNetwork(
                train_model_data["modelOptions"],
                train_model_data["inputOptions"],
                stock_code=train_model_data["stockCode"])

        # prepare the data
        x, y, other_data = build_training_dataset(
            train_model_data["inputOptions"], model.model_options["predict_n"])
        if train_model_data["model"] in [
                LinearRegression.MODEL, SupportVectorRegression.MODEL,
                DenseNeuralNetwork.MODEL
        ]:
            # get the training set
            x = x[:-100]
            y = y[:-100]
        if "normalize" in train_model_data["inputOptions"]:
            model.input_options["normalize_data"] = other_data[
                "normalize_data"]
        # train the model
        model.train(x, y)

        # save the model
        model.save(SAVED_MODELS_DIR_MAP[train_model_data["model"]])
Example #15
0

######################### main #########################

### load in the test set ###
with open("../data_augmentation_models/data/all_early_stages/pkl/test_features.pkl", "rb") as fin:
    test_features = pickle.load(fin)


### define the model ###

# to get the arguments 
params = load_config('./models/config.yaml')       

# define the model
bootstrap_model = LinearRegression(params["feature_dim"], params["output_dim"])

# load in the pre-trained model
PATH_pretrained = "./models/model_6.pth"
print("read in", PATH_pretrained)

bootstrap_model.load_state_dict(torch.load(PATH_pretrained))

# define the device and move the model into the device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("the device is", device)

bootstrap_model.to(device)


### bootstrap to get confidence interval ###
Example #16
0
except:
    scores = pd.DataFrame(
        columns=['model', 'target_month', 'window_length', 'score'])

scores = pd.DataFrame(
    columns=['model', 'target_month', 'window_length', 'score'])

dataset = 'datasets/r_non_stationary.pkl'

models = [
    _LSTM("LSTM"),
    CONVNET("CONVNET"),
    _Prophet("Prophet"),
    MEAN("MEAN"),
    LAST("LAST"),
    LinearRegression("Linear Regression", False),
    SES("SES")
]

lr_u = LinearRegression("Univariate Linear Regression", True)
lr_m = LinearRegression("Multivariate Linear Regression", False)
mean = MEAN("MEAN")

models = [mean, lr_u, lr_m, _Prophet("Prophet")]
models = add_xgb(models)

models = [Gauss("Gauss-1"), Gauss("Gauss-2"), lr_m, mean]

data = get_data_from_dataset(dataset)
source_data = preprocess(data)
Example #17
0
from models.linear_regression import LinearRegression

# Use custom styling from file
matplotlib.rc_file('../plotstyle')

# Generate data
random.seed(0)
X = np.array([i for i in range(20)], dtype='float32')
X = np.reshape(X, (20, 1))
X = np.concatenate((np.ones((20, 1), dtype='float32'), X), axis=1)

y = np.array([(i + random.uniform(-2, 2)) for i in range(20)], dtype='float32')
y = np.reshape(y, (20, 1))

# Fit model to data
model = LinearRegression(data=X, labels=y)
weights = model.fit()

# Generate line of best fit
x_bf = np.linspace(0, 20, dtype='float32')
y_bf = np.array([(weights[0][0] + x * weights[1][0]) for x in x_bf],
                dtype='float32')

plt.scatter(X[:, 1], y, color='b', s=50, label='Samples')
plt.plot(x_bf, y_bf, color='r', label='Fitted Model')
plt.xlabel('$x$')
plt.ylabel('$y$')
plt.title('Linear Regression')
plt.legend()
plt.show()
Example #18
0
from data.datasets import StatsDatasetRegression
from models.linear_regression import LinearRegression
from trainer.regression_trainer import RegressionTrainer
import pandas as pd
import visualizer
import torch

# -------------------------------------------------------------------------------------------
# This file trains and tests performance of the linear regression model on the simple dataset
# -------------------------------------------------------------------------------------------

# MODEL VARIABLES
MODEL = LinearRegression(10, 2)
TRAINING_SET = StatsDatasetRegression(pd.read_csv("../../data/datasets/processed/simple_train_data.csv"))
TESTING_SET = StatsDatasetRegression(pd.read_csv("../../data/datasets/processed/simple_test_data.csv"))
EPOCHS = 500
LEARNING_RATE = 0.002
OPTIMIZER = torch.optim.SGD(MODEL.parameters(), lr=LEARNING_RATE)
LOSS = torch.nn.MSELoss()

if __name__ == '__main__':
    trainer = RegressionTrainer(MODEL, TRAINING_SET, TESTING_SET, EPOCHS, OPTIMIZER, LOSS)
    trainer.train()
    trainer.print_best_results()
    visualizer.plot_accuracy(trainer.epochs, trainer.val_accuracy, "../../results/graphs/accuracy/simple_reg_acc.png")
    visualizer.plot_loss(trainer.epochs, trainer.val_loss, "../../results/graphs/loss/simple_reg_loss.png")


Example #19
0
        y_batch(numpy.ndarray): label data of dimension (N, 1).
        model(LinearModel): Initialized linear model.
    """
    f = LinearRegression.forward(model, x_batch)
    grad = learning_rate * LinearRegression.backward(model, f, y_batch)
    model.w = model.w - learning_rate * grad


dataset = io_tools.read_dataset('train.csv')
# print(dataset)
data = data_tools.preprocess_data(dataset)
ndim = data[0].shape[1]
print('data[0]', data[0])
print('ndim', ndim)
# print(data)
train_model(data, LinearRegression(ndim))


def train_model_analytic(processed_dataset, model):
    """Computes and sets the optimal model weights (model.w).

    Args:
        processed_dataset(list): List of [x,y] processed
            from utils.data_tools.preprocess_data.
        model(LinearRegression): LinearRegression model.
    """
    # model.w = model.w -


def eval_model(processed_dataset, model):
    """Performs evaluation on a dataset.
from data.datasets import StatsDatasetRegression
from models.linear_regression import LinearRegression
from trainer.regression_trainer import RegressionTrainer
import pandas as pd
import visualizer
import torch

# ----------------------------------------------------------------------------------------------
# This file trains and tests performance of the linear regression model on the optimized dataset
# ----------------------------------------------------------------------------------------------

# MODEL VARIABLES
MODEL = LinearRegression(4, 2)
TRAINING_SET = StatsDatasetRegression(pd.read_csv("../../data/datasets/processed/opt_reg_train.csv"))
TESTING_SET = StatsDatasetRegression(pd.read_csv("../../data/datasets/processed/opt_reg_test.csv"))
EPOCHS = 500
LEARNING_RATE = 0.007
OPTIMIZER = torch.optim.SGD(MODEL.parameters(), lr=LEARNING_RATE)
LOSS = torch.nn.MSELoss()

if __name__ == '__main__':
    trainer = RegressionTrainer(MODEL, TRAINING_SET, TESTING_SET, EPOCHS, OPTIMIZER, LOSS)
    trainer.train()
    trainer.print_best_results()
    visualizer.plot_accuracy(trainer.epochs, trainer.val_accuracy, "../../results/graphs/accuracy/opt_reg_acc.png")
    visualizer.plot_loss(trainer.epochs, trainer.val_loss, "../../results/graphs/loss/opt_reg_loss.png")