Esempio n. 1
0
def read_data(dataset_path, normalization_method, past_history_factor):
    # read normalization params
    norm_params = None
    with open(
            os.path.normpath(dataset_path) +
            "/{}/norm_params.json".format(normalization_method),
            "r",
    ) as read_file:
        norm_params = json.load(read_file)

    # read training / validation data
    tmp_data_path = os.path.normpath(dataset_path) + "/{}/{}/".format(
        normalization_method, past_history_factor)

    x_train = np.load(tmp_data_path + "x_train.np.npy")
    y_train = np.load(tmp_data_path + "y_train.np.npy")
    x_test = np.load(tmp_data_path + "x_test.np.npy")
    y_test = np.load(tmp_data_path + "y_test.np.npy")
    y_test_denorm = np.asarray([
        denormalize(y_test[i], norm_params[i], normalization_method)
        for i in range(y_test.shape[0])
    ])
    print("TRAINING DATA")
    print("Input shape", x_train.shape)
    print("Output_shape", y_train.shape)
    print("TEST DATA")
    print("Input shape", x_test.shape)
    print("Output_shape", y_test.shape)

    return x_train, y_train, x_test, y_test, y_test_denorm, norm_params
Esempio n. 2
0
def train_trees(model_name, iter_params, x_train, y_train, x_test, norm_params,
                normalization_method):
    model = create_model_ml(model_name, iter_params)

    x_train2 = x_train.reshape(x_train.shape[0],
                               x_train.shape[1] * x_train.shape[2])
    print('x_train: {} -> {}'.format(x_train.shape, x_train2.shape))
    training_time_0 = time.time()
    model.fit(x_train2, y_train)
    training_time = time.time() - training_time_0

    x_test2 = x_test.reshape(x_test.shape[0],
                             x_test.shape[1] * x_test.shape[2])
    print('x_test: {} -> {}'.format(x_test.shape, x_test2.shape))
    test_time_0 = time.time()
    test_forecast = model.predict(x_test2)
    test_time = time.time() - test_time_0

    for i in range(test_forecast.shape[0]):
        nparams = norm_params[0]
        test_forecast[i] = denormalize(
            test_forecast[i],
            nparams,
            method=normalization_method,
        )

    return test_forecast, training_time, test_time
Esempio n. 3
0
def _run_experiment(
        gpu_device,
        dataset,
        dataset_path,
        results_path,
        csv_filepath,
        metrics,
        epochs,
        normalization_method,
        past_history_factor,
        max_steps_per_epoch,
        batch_size,
        learning_rate,
        model_name,
        model_index,
        model_args,
):
    import gc
    import tensorflow as tf
    from models import create_model

    tf.keras.backend.clear_session()

    def select_gpu_device(gpu_number):
        gpus = tf.config.experimental.list_physical_devices("GPU")
        if len(gpus) >= 2 and gpu_number is not None:
            device = gpus[gpu_number]
            tf.config.experimental.set_memory_growth(device, True)
            tf.config.experimental.set_visible_devices(device, "GPU")

    select_gpu_device(gpu_device)

    results = read_results_file(csv_filepath, metrics)

    x_train, y_train, x_test, y_test, y_test_denorm, norm_params = read_data(
        dataset_path, normalization_method, past_history_factor
    )
    x_train = tf.convert_to_tensor(x_train)
    y_train = tf.convert_to_tensor(y_train)
    x_test = tf.convert_to_tensor(x_test)
    y_test = tf.convert_to_tensor(y_test)
    y_test_denorm = tf.convert_to_tensor(y_test_denorm)

    forecast_horizon = y_test.shape[1]
    past_history = x_test.shape[1]
    steps_per_epoch = min(
        int(np.ceil(x_train.shape[0] / batch_size)), max_steps_per_epoch,
    )

    optimizer = tf.optimizers.Adam(learning_rate=learning_rate)
    model = create_model(
        model_name,
        x_train.shape,
        output_size=forecast_horizon,
        optimizer=optimizer,
        loss="mae",
        **model_args
    )
    print(model.summary())

    training_time_0 = time.time()
    history = model.fit(
        x_train,
        y_train,
        batch_size=batch_size,
        epochs=epochs,
        steps_per_epoch=steps_per_epoch,
        validation_data=(x_test, y_test),
        shuffle=True,
    )
    training_time = time.time() - training_time_0

    # Get validation metrics
    test_time_0 = time.time()
    test_forecast = model(x_test).numpy()
    test_time = time.time() - test_time_0

    for i in range(test_forecast.shape[0]):
        nparams = norm_params[0]
        test_forecast[i] = denormalize(
            test_forecast[i], nparams, method=normalization_method,
        )
    if metrics:
        test_metrics = evaluate(y_test_denorm, test_forecast, metrics)
    else:
        test_metrics = {}

    # Save results
    predictions_path = "{}/{}/{}/{}/{}/{}/{}/{}/".format(
        results_path,
        dataset,
        normalization_method,
        past_history_factor,
        epochs,
        batch_size,
        learning_rate,
        model_name,
    )
    if not os.path.exists(predictions_path):
        os.makedirs(predictions_path)
    np.save(
        predictions_path + str(model_index) + ".npy", test_forecast,
    )
    results = results.append(
        {
            "DATASET": dataset,
            "MODEL": model_name,
            "MODEL_INDEX": model_index,
            "MODEL_DESCRIPTION": str(model_args),
            "FORECAST_HORIZON": forecast_horizon,
            "PAST_HISTORY_FACTOR": past_history_factor,
            "PAST_HISTORY": past_history,
            "BATCH_SIZE": batch_size,
            "EPOCHS": epochs,
            "STEPS": steps_per_epoch,
            "OPTIMIZER": "Adam",
            "LEARNING_RATE": learning_rate,
            "NORMALIZATION": normalization_method,
            "TEST_TIME": test_time,
            "TRAINING_TIME": training_time,
            **test_metrics,
            "LOSS": str(history.history["loss"]),
            "VAL_LOSS": str(history.history["val_loss"]),
        },
        ignore_index=True,
    )

    results.to_csv(
        csv_filepath, sep=";",
    )

    gc.collect()
    del model, x_train, x_test, y_train, y_test, y_test_denorm, test_forecast
def generate_dataset(args):
    dataset, norm_method, past_history_factor = args

    train_url = DATASETS[dataset]["train"]
    test_url = DATASETS[dataset]["test"]
    if not os.path.exists(
            "../data/{}/train.csv".format(dataset)) or not os.path.exists(
                "../data/{}/test.csv".format(dataset)):
        if not os.path.exists("../data/{}".format(dataset)):
            os.system("mkdir -p ../data/{}".format(dataset))
        os.system("wget -O ../data/{}/train.csv {}".format(dataset, train_url))
        os.system("wget -O ../data/{}/test.csv  {}".format(dataset, test_url))

    if not os.path.exists("../data/{}/{}/{}/".format(dataset, norm_method,
                                                     past_history_factor)):
        os.system("mkdir -p ../data/{}/{}/{}/".format(dataset, norm_method,
                                                      past_history_factor))

    # Read data
    train = read_ts_dataset("../data/{}/train.csv".format(dataset))
    test = read_ts_dataset("../data/{}/test.csv".format(dataset))
    print("Shape test", test.shape)
    forecast_horizon = test.shape[1]

    print(
        dataset,
        {
            "Number of time series": train.shape[0],
            "Max length": np.max([ts.shape[0] for ts in train]),
            "Min length": np.min([ts.shape[0] for ts in train]),
            "Forecast Horizon": forecast_horizon,
        },
    )

    #Format training and test input/output data using the moving window strategy
    past_history = int(forecast_horizon * past_history_factor)

    # Normalize data
    train, test, norm_params = normalize_dataset(train,
                                                 test,
                                                 norm_method,
                                                 dtype="float32")

    norm_params_json = [{k: float(p[k]) for k in p} for p in norm_params]
    norm_params_json = json.dumps(norm_params_json)

    with open("../data/{}/{}/norm_params.json".format(dataset, norm_method),
              "w") as f:
        f.write(norm_params_json)

    invalidParams = []
    for i in range(len(train)):
        if len(train[i]) < past_history:
            invalidParams.append(i)

    x_train, y_train, x_test, y_test = moving_windows_preprocessing(
        train,
        test,
        past_history,
        forecast_horizon,
        np.float32,
        n_cores=NUM_CORES)

    y_test_denorm = np.copy(y_test)

    j = 0
    for i, nparams in enumerate(norm_params):
        if i not in invalidParams:
            y_test_denorm[j] = denormalize(y_test[j],
                                           nparams,
                                           method=norm_method)
            j += 1

    print("TRAINING DATA")
    print("Input shape", x_train.shape)
    print("Output_shape", y_train.shape)
    print()
    print("TEST DATA")
    print("Input shape", x_test.shape)
    print("Output_shape", y_test.shape)

    np.save(
        "../data/{}/{}/{}/x_train.np".format(dataset, norm_method,
                                             past_history_factor),
        x_train,
    )
    np.save(
        "../data/{}/{}/{}/y_train.np".format(dataset, norm_method,
                                             past_history_factor),
        y_train,
    )
    np.save(
        "../data/{}/{}/{}/x_test.np".format(dataset, norm_method,
                                            past_history_factor),
        x_test,
    )
    np.save(
        "../data/{}/{}/{}/y_test.np".format(dataset, norm_method,
                                            past_history_factor),
        y_test,
    )
    np.save(
        "../data/{}/{}/{}/y_test_denorm.np".format(dataset, norm_method,
                                                   past_history_factor),
        y_test_denorm,
    )

    # Save indexes of invalid normalization parametes
    if invalidParams != []:
        invalidParams = np.asarray(invalidParams)
        np.save(
            "../data/{}/{}/{}/invalidParams.np".format(dataset, norm_method,
                                                       past_history_factor),
            invalidParams,
        )
Esempio n. 5
0
def generate_dataset(args):
    dataset, norm_method, past_history_factor = args

    train_url = DATASETS[dataset]["train"]
    test_url = DATASETS[dataset]["test"]

    train = read_ts_dataset("../data/{}/train.csv".format(dataset))
    test = read_ts_dataset("../data/{}/test.csv".format(dataset))

    forecast_horizon = 24  #test.shape[1]

    print(
        dataset,
        {
            "Number of time series": train.shape[0],
            "Max length": np.max([ts.shape[0] for ts in train]),
            "Min length": np.min([ts.shape[0] for ts in train]),
            "Forecast Horizon": forecast_horizon,
        },
    )

    # Normalize data
    train, test, norm_params = normalize_dataset(train,
                                                 test,
                                                 norm_method,
                                                 dtype="float32")

    norm_params_json = [{k: float(p[k]) for k in p} for p in norm_params]
    norm_params_json = json.dumps(norm_params_json)

    with open("../data/{}/{}/norm_params.json".format(dataset, norm_method),
              "w") as f:
        f.write(norm_params_json)

    # Format training and test input/output data using the moving window strategy
    past_history = int(forecast_horizon * past_history_factor)

    x_train, y_train, x_test, y_test = moving_windows_preprocessing(
        train,
        test,
        past_history,
        forecast_horizon,
        np.float32,
        n_cores=NUM_CORES)

    y_test_denorm = np.copy(y_test)
    #i = 0
    for i in range(y_test.shape[0]):
        y_test_denorm[i] = denormalize(y_test[i],
                                       norm_params[0],
                                       method=norm_method)

    print("TRAINING DATA")
    print("Input shape", x_train.shape)
    print("Output_shape", y_train.shape)
    print()
    print("TEST DATA")
    print("Input shape", x_test.shape)
    print("Output_shape", y_test.shape)

    np.save(
        "../data/{}/{}/{}/x_train.np".format(dataset, norm_method,
                                             past_history_factor),
        x_train,
    )
    np.save(
        "../data/{}/{}/{}/y_train.np".format(dataset, norm_method,
                                             past_history_factor),
        y_train,
    )
    np.save(
        "../data/{}/{}/{}/x_test.np".format(dataset, norm_method,
                                            past_history_factor),
        x_test,
    )
    np.save(
        "../data/{}/{}/{}/y_test.np".format(dataset, norm_method,
                                            past_history_factor),
        y_test,
    )
    np.save(
        "../data/{}/{}/{}/y_test_denorm.np".format(dataset, norm_method,
                                                   past_history_factor),
        y_test_denorm,
    )
def _run_experiment_transformer(
    gpu_device,
    dataset,
    dataset_path,
    results_path,
    csv_filepath,
    metrics,
    epochs,
    normalization_method,
    past_history_factor,
    max_steps_per_epoch,
    batch_size,
    learning_rate,
    model_name,
    model_index,
    model_args,
):
    print("Start _run_experiment_transformer")
    import gc
    from models import create_model

    import torch
    from pytorch_lightning import Trainer, seed_everything
    from torch.utils.data import DataLoader, TensorDataset

    results = read_results_file(csv_filepath, metrics)

    x_train, y_train, x_test, y_test, y_test_denorm, norm_params = read_data(
        dataset_path, normalization_method, past_history_factor)

    forecast_horizon = y_test.shape[1]
    past_history = x_test.shape[1]

    steps_per_epoch = min(
        int(np.ceil(x_train.shape[0] / batch_size)),
        max_steps_per_epoch,
    )

    x_train = torch.from_numpy(x_train).float()
    y_train = torch.from_numpy(y_train).float().unsqueeze(-1)

    x_test = torch.from_numpy(x_test).float()
    y_test = torch.from_numpy(y_test).float().unsqueeze(-1)

    train_dataset = TensorDataset(x_train, y_train)
    val_dataset = TensorDataset(x_test, y_test)

    train_loader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              shuffle=True)

    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    test_loader = DataLoader(val_dataset, batch_size=256, shuffle=False)

    #seed_everything(42,workers=true)

    trainer = Trainer(max_epochs=epochs,
                      gpus=[gpu_device],
                      checkpoint_callback=False)

    model = create_model(model_name,
                         x_train.shape,
                         output_size=forecast_horizon,
                         **model_args)

    training_time_0 = time.time()
    trainer.fit(model, train_loader, val_loader)
    training_time = time.time() - training_time_0

    print("End training")

    train_loss = float(trainer.callback_metrics["train_loss"].to("cpu"))
    val_loss = float(trainer.callback_metrics["val_loss"].to("cpu"))
    print("loss saved")

    def predictMultiStepRegresive(model, x_test, nSteps):
        '''
        Inference for autoregresive models
        '''
        with torch.no_grad():
            encoderInput = x_test
            decoderInput = x_test[:, -1, :]
            decoderInput = decoderInput.unsqueeze(-1)
            for _ in range(
                    nSteps
            ):  # We append the prediction on step 0 to the original input to get the input for step 1 and so on.
                out = model.forward(
                    encoderInput, decoderInput
                )  # We don't need mask for evaluation, we are not giving the model any future input.
                lastPrediction = out[:, -1].detach(
                )  #detach() is quite important, otherwise we will keep the variable "out" in memory and cause an out of memory error.
                lastPrediction = lastPrediction.unsqueeze(-1)
                decoderInput = torch.cat((decoderInput, lastPrediction), 1)

            decoderInput = decoderInput.squeeze(-1)
        return decoderInput[:, 1:]

    # Only use "device" for inference, as Pytorch Lighting already handles it for training
    device = torch.device(
        "cuda:" + str(gpu_device) if torch.cuda.is_available() else "cpu")

    model.to(device)
    model.eval()
    x_test = x_test.to(device)
    if model_name.endswith("AR"):
        '''
        Autoregresive inference
        '''
        test_time_0 = time.time()
        test_forecast = predictMultiStepRegresive(model, x_test,
                                                  y_test.shape[1])
        test_time = time.time() - test_time_0
    else:
        '''
        Non-Autoregresive inference
        '''

        test_time_0 = time.time()
        test_forecast = model(x_test)
        test_time = time.time() - test_time_0
    print("End test")
    test_forecast = test_forecast.detach().to("cpu").numpy()

    for i, nparams in enumerate(norm_params):
        test_forecast[i] = denormalize(
            test_forecast[i],
            nparams,
            method=normalization_method,
        )

    if metrics:
        test_metrics = evaluate(y_test_denorm, test_forecast, metrics)
        print(test_metrics)
    else:
        print("Metrics empty")
        test_metrics = {}

    # Save results
    predictions_path = "{}/{}/{}/{}/{}/{}/{}/{}/".format(
        results_path,
        dataset,
        normalization_method,
        past_history_factor,
        epochs,
        batch_size,
        learning_rate,
        model_name,
    )
    if not os.path.exists(predictions_path):
        os.makedirs(predictions_path)
    np.save(
        predictions_path + str(model_index) + ".npy",
        test_forecast,
    )

    results = results.append(
        {
            "DATASET": dataset,
            "MODEL": model_name,
            "MODEL_INDEX": model_index,
            "MODEL_DESCRIPTION": str(model_args),
            "FORECAST_HORIZON": forecast_horizon,
            "PAST_HISTORY_FACTOR": past_history_factor,
            "PAST_HISTORY": past_history,
            "BATCH_SIZE": batch_size,
            "EPOCHS": epochs,
            "STEPS": steps_per_epoch,
            "OPTIMIZER": "CustomAdam",
            "LEARNING_RATE": learning_rate,
            "NORMALIZATION": normalization_method,
            "TEST_TIME": test_time,
            "TRAINING_TIME": training_time,
            **test_metrics,
            "LOSS": str(train_loss),
            "VAL_LOSS": str(val_loss),
        },
        ignore_index=True,
    )

    results.to_csv(
        csv_filepath,
        sep=";",
    )

    gc.collect()
    del model, x_train, x_test, y_train, y_test, y_test_denorm, test_forecast
def _run_experiment_transformer(
    gpu_device,
    dataset,
    dataset_path,
    results_path,
    csv_filepath,
    metrics,
    epochs,
    normalization_method,
    past_history_factor,
    max_steps_per_epoch,
    batch_size,
    learning_rate,
    model_name,
    model_index,
    model_args,
):
    print("Start _run_experiment_transformer")
    import gc
    from models import create_model
    from transformerOptimizer import get_std_opt
    from transformerTraining import train
    from transformerTraining import predictMultiStep
    from transformerTraining import predictMultiStepBatching
    import torch
    from torch.nn import L1Loss

    results = read_results_file(csv_filepath, metrics)

    x_train, y_train, x_test, y_test, y_test_denorm, norm_params = read_data(
        dataset_path, normalization_method, past_history_factor)

    forecast_horizon = y_test.shape[1]
    past_history = x_test.shape[1]

    device = torch.device(
        "cuda:" + str(gpu_device) if torch.cuda.is_available() else "cpu")

    steps_per_epoch = min(
        int(np.ceil(x_train.shape[0] / batch_size)),
        max_steps_per_epoch,
    )

    x_train = torch.from_numpy(x_train).float().to(device)
    y_train = torch.from_numpy(y_train).float().to(device)

    x_test = torch.from_numpy(x_test).float().to(device)
    y_test = torch.from_numpy(y_test).float().to(device)

    y_test_denorm = torch.from_numpy(y_test_denorm).float()

    model = create_model(model_name, x_train.shape, **model_args)
    if learning_rate == "Noam":
        model_opt = get_std_opt(model)
        optimizerName = "Noam"
    else:
        model_opt = torch.optim.Adam(model.parameters(), lr=learning_rate)
        optimizerName = "Adam"
    model.to(device)
    criterion = L1Loss()  # mean absolute error

    training_time_0 = time.time()
    trainLoss, valLoss = train(model, x_train, y_train, x_test, y_test, epochs,
                               steps_per_epoch, criterion, model_opt,
                               batch_size)
    training_time = time.time() - training_time_0

    # Get validation metrics
    test_time_0 = time.time()

    if y_test.shape[0] > 256:  #Batch inference to avoid out of memory error
        test_forecast = predictMultiStepBatching(x_test, model,
                                                 y_test.shape[1])
    else:
        test_forecast = predictMultiStep(x_test, model, y_test.shape[1])

    test_time = time.time() - test_time_0
    test_time = test_time / x_train.shape[0]

    normalized_test_forecast = test_forecast.numpy().copy()

    for i, nparams in enumerate(norm_params):
        test_forecast[i] = denormalize(
            test_forecast[i],
            nparams,
            method=normalization_method,
        )

    if metrics:
        print("y_test: ", y_test_denorm.numpy())
        print("test_forecast: ", test_forecast)
        test_metrics = evaluate(y_test_denorm.numpy(), test_forecast.numpy(),
                                metrics)
        print(test_metrics)
    else:
        print("Metrics empty")
        test_metrics = {}

    # Save results
    predictions_path = "{}/{}/{}/{}/{}/{}/{}/{}/".format(
        results_path,
        dataset,
        normalization_method,
        past_history_factor,
        epochs,
        batch_size,
        learning_rate,
        model_name,
    )
    if not os.path.exists(predictions_path):
        os.makedirs(predictions_path)
    np.save(
        predictions_path + str(model_index) + ".npy",
        test_forecast,
    )
    np.save(
        predictions_path + "Normalize" + str(model_index) + ".npy",
        normalized_test_forecast,
    )

    results = results.append(
        {
            "DATASET": dataset,
            "MODEL": model_name,
            "MODEL_INDEX": model_index,
            "MODEL_DESCRIPTION": str(model_args),
            "FORECAST_HORIZON": forecast_horizon,
            "PAST_HISTORY_FACTOR": past_history_factor,
            "PAST_HISTORY": past_history,
            "BATCH_SIZE": batch_size,
            "EPOCHS": epochs,
            "STEPS": steps_per_epoch,
            "OPTIMIZER": optimizerName,
            "LEARNING_RATE": learning_rate,
            "NORMALIZATION": normalization_method,
            "TEST_TIME": test_time,
            "TRAINING_TIME": training_time,
            **test_metrics,
            "LOSS": str(trainLoss),
            "VAL_LOSS": str(valLoss),
        },
        ignore_index=True,
    )

    results.to_csv(
        csv_filepath,
        sep=";",
    )

    gc.collect()
    del model, x_train, x_test, y_train, y_test, y_test_denorm, test_forecast