def generate_dataset(args):
    dataset, norm_method, past_history_factor = args

    train_url = DATASETS[dataset]["train"]
    test_url = DATASETS[dataset]["test"]
    if not os.path.exists(
            "../data/{}/train.csv".format(dataset)) or not os.path.exists(
                "../data/{}/test.csv".format(dataset)):
        if not os.path.exists("../data/{}".format(dataset)):
            os.system("mkdir -p ../data/{}".format(dataset))
        os.system("wget -O ../data/{}/train.csv {}".format(dataset, train_url))
        os.system("wget -O ../data/{}/test.csv  {}".format(dataset, test_url))

    if not os.path.exists("../data/{}/{}/{}/".format(dataset, norm_method,
                                                     past_history_factor)):
        os.system("mkdir -p ../data/{}/{}/{}/".format(dataset, norm_method,
                                                      past_history_factor))

    # Read data
    train = read_ts_dataset("../data/{}/train.csv".format(dataset))
    test = read_ts_dataset("../data/{}/test.csv".format(dataset))
    print("Shape test", test.shape)
    forecast_horizon = test.shape[1]

    print(
        dataset,
        {
            "Number of time series": train.shape[0],
            "Max length": np.max([ts.shape[0] for ts in train]),
            "Min length": np.min([ts.shape[0] for ts in train]),
            "Forecast Horizon": forecast_horizon,
        },
    )

    #Format training and test input/output data using the moving window strategy
    past_history = int(forecast_horizon * past_history_factor)

    # Normalize data
    train, test, norm_params = normalize_dataset(train,
                                                 test,
                                                 norm_method,
                                                 dtype="float32")

    norm_params_json = [{k: float(p[k]) for k in p} for p in norm_params]
    norm_params_json = json.dumps(norm_params_json)

    with open("../data/{}/{}/norm_params.json".format(dataset, norm_method),
              "w") as f:
        f.write(norm_params_json)

    invalidParams = []
    for i in range(len(train)):
        if len(train[i]) < past_history:
            invalidParams.append(i)

    x_train, y_train, x_test, y_test = moving_windows_preprocessing(
        train,
        test,
        past_history,
        forecast_horizon,
        np.float32,
        n_cores=NUM_CORES)

    y_test_denorm = np.copy(y_test)

    j = 0
    for i, nparams in enumerate(norm_params):
        if i not in invalidParams:
            y_test_denorm[j] = denormalize(y_test[j],
                                           nparams,
                                           method=norm_method)
            j += 1

    print("TRAINING DATA")
    print("Input shape", x_train.shape)
    print("Output_shape", y_train.shape)
    print()
    print("TEST DATA")
    print("Input shape", x_test.shape)
    print("Output_shape", y_test.shape)

    np.save(
        "../data/{}/{}/{}/x_train.np".format(dataset, norm_method,
                                             past_history_factor),
        x_train,
    )
    np.save(
        "../data/{}/{}/{}/y_train.np".format(dataset, norm_method,
                                             past_history_factor),
        y_train,
    )
    np.save(
        "../data/{}/{}/{}/x_test.np".format(dataset, norm_method,
                                            past_history_factor),
        x_test,
    )
    np.save(
        "../data/{}/{}/{}/y_test.np".format(dataset, norm_method,
                                            past_history_factor),
        y_test,
    )
    np.save(
        "../data/{}/{}/{}/y_test_denorm.np".format(dataset, norm_method,
                                                   past_history_factor),
        y_test_denorm,
    )

    # Save indexes of invalid normalization parametes
    if invalidParams != []:
        invalidParams = np.asarray(invalidParams)
        np.save(
            "../data/{}/{}/{}/invalidParams.np".format(dataset, norm_method,
                                                       past_history_factor),
            invalidParams,
        )
Exemple #2
0
def generate_dataset(args):
    dataset, norm_method, past_history_factor = args

    train_url = DATASETS[dataset]["train"]
    test_url = DATASETS[dataset]["test"]

    train = read_ts_dataset("../data/{}/train.csv".format(dataset))
    test = read_ts_dataset("../data/{}/test.csv".format(dataset))

    forecast_horizon = 24  #test.shape[1]

    print(
        dataset,
        {
            "Number of time series": train.shape[0],
            "Max length": np.max([ts.shape[0] for ts in train]),
            "Min length": np.min([ts.shape[0] for ts in train]),
            "Forecast Horizon": forecast_horizon,
        },
    )

    # Normalize data
    train, test, norm_params = normalize_dataset(train,
                                                 test,
                                                 norm_method,
                                                 dtype="float32")

    norm_params_json = [{k: float(p[k]) for k in p} for p in norm_params]
    norm_params_json = json.dumps(norm_params_json)

    with open("../data/{}/{}/norm_params.json".format(dataset, norm_method),
              "w") as f:
        f.write(norm_params_json)

    # Format training and test input/output data using the moving window strategy
    past_history = int(forecast_horizon * past_history_factor)

    x_train, y_train, x_test, y_test = moving_windows_preprocessing(
        train,
        test,
        past_history,
        forecast_horizon,
        np.float32,
        n_cores=NUM_CORES)

    y_test_denorm = np.copy(y_test)
    #i = 0
    for i in range(y_test.shape[0]):
        y_test_denorm[i] = denormalize(y_test[i],
                                       norm_params[0],
                                       method=norm_method)

    print("TRAINING DATA")
    print("Input shape", x_train.shape)
    print("Output_shape", y_train.shape)
    print()
    print("TEST DATA")
    print("Input shape", x_test.shape)
    print("Output_shape", y_test.shape)

    np.save(
        "../data/{}/{}/{}/x_train.np".format(dataset, norm_method,
                                             past_history_factor),
        x_train,
    )
    np.save(
        "../data/{}/{}/{}/y_train.np".format(dataset, norm_method,
                                             past_history_factor),
        y_train,
    )
    np.save(
        "../data/{}/{}/{}/x_test.np".format(dataset, norm_method,
                                            past_history_factor),
        x_test,
    )
    np.save(
        "../data/{}/{}/{}/y_test.np".format(dataset, norm_method,
                                            past_history_factor),
        y_test,
    )
    np.save(
        "../data/{}/{}/{}/y_test_denorm.np".format(dataset, norm_method,
                                                   past_history_factor),
        y_test_denorm,
    )