Beispiel #1
0
def var_predict(df, n_forwards=(1, 3), n_lags=4, test_ratio=0.2):
    n_sample, n_output = df.shape
    n_test = int(round(n_sample * test_ratio))
    n_train = n_sample - n_test
    df_train, df_test = df[:n_train], df[n_train:]

    scaler = StandardScaler(mean=df_train.values.mean(),
                            std=df_train.values.std())
    data = scaler.transform(df_train.values)
    var_model = VAR(data)
    var_result = var_model.fit(n_lags)
    max_n_forwards = np.max(n_forwards)
    # Do forecasting.
    result = np.zeros(shape=(len(n_forwards), n_test, n_output))
    start = n_train - n_lags - max_n_forwards + 1
    for input_ind in range(start, n_sample - n_lags):
        prediction = var_result.forecast(
            scaler.transform(df.values[input_ind:input_ind + n_lags]),
            max_n_forwards)
        for i, n_forward in enumerate(n_forwards):
            result_ind = input_ind - n_train + n_lags + n_forward - 1
            if 0 <= result_ind < n_test:
                result[i, result_ind, :] = prediction[n_forward - 1, :]

    df_predicts = []
    for i, n_forward in enumerate(n_forwards):
        df_predict = pd.DataFrame(scaler.inverse_transform(result[i]),
                                  index=df_test.index,
                                  columns=df_test.columns)
        df_predicts.append(df_predict)

    df_predict.to_csv("./df_predict.csv", sep=',', index=False)
    df_test.to_csv("./df_test.csv", sep=',', index=False)
    return df_predicts, df_test
def var_predict(df, n_forwards=(1, 3), n_lags=4, test_ratio=0.2):
    """
    Multivariate time series forecasting using Vector Auto-Regressive Model.
    :param df: pandas.DataFrame, index: time, columns: sensor id, content: data.
    :param n_forwards: a tuple of horizons.
    :param n_lags: the order of the VAR model.
    :param test_ratio:
    :return: [list of prediction in different horizon], dt_test
    """
    n_sample, n_output = df.shape
    n_test = int(round(n_sample * test_ratio))
    n_train = n_sample - n_test
    df_train, df_test = df[:n_train], df[n_train:]

    scaler = StandardScaler(mean=df_train.values.mean(), std=df_train.values.std())
    data = scaler.transform(df_train.values)
    var_model = VAR(data)
    var_result = var_model.fit(n_lags)
    max_n_forwards = np.max(n_forwards)
    # Do forecasting.
    result = np.zeros(shape=(len(n_forwards), n_test, n_output))
    start = n_train - n_lags - max_n_forwards + 1
    for input_ind in range(start, n_sample - n_lags):
        prediction = var_result.forecast(scaler.transform(df.values[input_ind: input_ind + n_lags]), max_n_forwards)
        for i, n_forward in enumerate(n_forwards):
            result_ind = input_ind - n_train + n_lags + n_forward - 1
            if 0 <= result_ind < n_test:
                result[i, result_ind, :] = prediction[n_forward - 1, :]

    df_predicts = []
    for i, n_forward in enumerate(n_forwards):
        df_predict = pd.DataFrame(scaler.inverse_transform(result[i]), index=df_test.index, columns=df_test.columns)
        df_predicts.append(df_predict)
    return df_predicts, df_test
Beispiel #3
0
    def test_transform_df(self):
        df = pd.DataFrame([[35., 0.], [0., 17.5], [70., 35.]])
        expected_result = np.array([[0., -1.], [-1, -0.5], [1., 0.]])
        scaler = StandardScaler(mean=35., std=35.)
        result = scaler.transform(df)

        self.assertTrue(np.array_equal(expected_result, result.values))
Beispiel #4
0
 def test_transform(self):
     data = np.array([[35., 0.], [0., 17.5], [70., 35.]])
     expected_result = np.array([[0., -1.], [-1, -0.5], [1., 0.]])
     scaler = StandardScaler(mean=35., std=35.)
     result = scaler.transform(data)
     self.assertTrue(np.array_equal(expected_result, result))
Beispiel #5
0
def setup_dataloader(
    arr3d,
    seq_len,
    horizon,
    length_dict,
    train_batch_size,
    val_batch_size,
    test_batch_size,
    scale,
    features,
    logger,
    seq_sampling,
):

    train_length = length_dict['train_length']
    val_length = length_dict['val_length']
    test_length = length_dict['test_length']

    test_arr3d = arr3d[-test_length:]
    val_arr3d = arr3d[train_length:train_length +
                      val_length] if val_length > 0 else test_arr3d
    train_arr3d = arr3d[:train_length]

    train_arr2d = train_arr3d[:, :, 0]
    val_arr2d = val_arr3d[:, :, 0]
    test_arr2d = test_arr3d[:, :, 0]

    train_z_arr3d = train_arr3d.copy()
    val_z_arr3d = val_arr3d.copy()
    test_z_arr3d = test_arr3d.copy()

    scaler = StandardScaler(mean=train_arr2d.mean(),
                            std=train_arr2d.std(),
                            scale=scale)
    train_z_arr3d[:, :, 0] = scaler.transform(train_arr2d)
    val_z_arr3d[:, :, 0] = scaler.transform(val_arr2d)
    test_z_arr3d[:, :, 0] = scaler.transform(test_arr2d)

    dataloaders = {}
    dataloaders['test_loader'] = \
        SpatioTemporalDataLoader(test_z_arr3d, test_batch_size, seq_len, horizon, shuffle=False,
                                 features=features, seq_sampling=seq_sampling)
    assert dataloaders[
        'test_loader'].num_batch > 0, 'num_batch for test dataset should be > 0'

    dataloaders['val_loader'] = \
        SpatioTemporalDataLoader(val_z_arr3d, val_batch_size, seq_len, horizon, shuffle=False,
                                 features=features, seq_sampling=seq_sampling)
    dataloaders['train_loader'] = \
        SpatioTemporalDataLoader(train_z_arr3d, train_batch_size, seq_len, horizon, shuffle=True,
                                 features=features, seq_sampling=seq_sampling)

    dataloaders['scaler'] = scaler
    logger.info('[train]      | # timesteps: {:06d} | # samples: {:06d} | # batches: {:06d}'.\
          format(train_length, dataloaders['train_loader'].size, dataloaders['train_loader'].num_batch))
    logger.info('[validation] | # timesteps: {:06d} | # samples: {:06d} | # batches: {:06d}'.\
          format(val_length, dataloaders['val_loader'].size, dataloaders['val_loader'].num_batch))
    logger.info('[test]       | # timesteps: {:06d} | # samples: {:06d} | # batches: {:06d}'.\
          format(test_length, dataloaders['test_loader'].size, dataloaders['test_loader'].num_batch))

    return dataloaders