def train(file_path, P, frac):
    target, df = create_dataset(file_path)
    i = 0
    rolling_test = []
    train_size = int(frac * df.shape[0])
    starts = [pd.Timestamp(df.index[0]) for _ in range(len(target))]
    delay = 0
    grouper_train = MultivariateGrouper(max_target_dim=df.shape[0])
    grouper_test = MultivariateGrouper(max_target_dim=df.shape[0])

    train_ds = ListDataset([{
        FieldName.TARGET: targets,
        FieldName.START: start
    } for (targets, start) in zip(target[:, 0:train_size - P], starts)],
                           freq='1B')
    train_ds = grouper_train(train_ds)

    while train_size + delay < df.shape[0]:
        delay = int(P) * i
        test_ds = ListDataset([{
            FieldName.TARGET: targets,
            FieldName.START: start
        } for (targets, start) in zip(target[:, 0:train_size + delay], starts)
                               ],
                              freq='1B')
        test_ds = grouper_test(test_ds)
        rolling_test.append(test_ds)
        i += 1
    estimator = GPVAREstimator(prediction_length=pred_len,
                               context_length=6,
                               freq='1B',
                               target_dim=df.shape[1],
                               trainer=Trainer(ctx="cpu", epochs=200))
    return train_ds, rolling_test, estimator, train_size
def make_predictions(predictor, region_df_dict, test_date, regions_list, target_col, feat_dynamic_cols=None,
                     num_eval_samples=100):
    if feat_dynamic_cols is not None:
        test_data = ListDataset(
            [{"item_id": region,
              "start": region_df_dict[region].index[0],
              "target": region_df_dict[region][target_col][:test_date + timedelta(hours=md.NB_HOURS_PRED)],
              "feat_dynamic_real": [
                  region_df_dict[region][feat_dynamic_col][:test_date + timedelta(hours=md.NB_HOURS_PRED)]
                  for feat_dynamic_col in feat_dynamic_cols]
              }
             for region in regions_list],
            freq=md.FREQ
        )
    else:
        test_data = ListDataset(
            [{"item_id": region,
              "start": region_df_dict[region].index[0],
              "target": region_df_dict[region][target_col][:test_date + timedelta(hours=md.NB_HOURS_PRED)],
              }
             for region in regions_list],
            freq=md.FREQ
        )

    forecast_it, ts_it = make_evaluation_predictions(test_data, predictor=predictor, num_eval_samples=num_eval_samples)

    return list(forecast_it), list(ts_it)
예제 #3
0
def test_estimator_with_features(estimator_constructor):
    freq = "1h"
    prediction_length = 12

    training_dataset = ListDataset(
        [
            {
                "start": "2021-01-01 00:00:00",
                "target": [1.0] * 200,
                "feat_static_cat": [0, 1],
                "feat_static_real": [42.0],
                "feat_dynamic_real": [[1.0] * 200] * 3,
            },
            {
                "start": "2021-02-01 00:00:00",
                "target": [1.0] * 100,
                "feat_static_cat": [1, 0],
                "feat_static_real": [1.0],
                "feat_dynamic_real": [[1.0] * 100] * 3,
            },
        ],
        freq=freq,
    )

    prediction_dataset = ListDataset(
        [
            {
                "start": "2021-01-01 00:00:00",
                "target": [1.0] * 200,
                "feat_static_cat": [0, 1],
                "feat_static_real": [42.0],
                "feat_dynamic_real": [[1.0] * (200 + prediction_length)] * 3,
            },
            {
                "start": "2021-02-01 00:00:00",
                "target": [1.0] * 100,
                "feat_static_cat": [1, 0],
                "feat_static_real": [1.0],
                "feat_dynamic_real": [[1.0] * (100 + prediction_length)] * 3,
            },
        ],
        freq=freq,
    )

    estimator = estimator_constructor(freq, prediction_length)

    predictor = estimator.train(
        training_data=training_dataset,
        validation_data=training_dataset,
        shuffle_buffer_length=5,
    )

    with tempfile.TemporaryDirectory() as td:
        predictor.serialize(Path(td))
        predictor_copy = Predictor.deserialize(Path(td))

    forecasts = predictor_copy.predict(prediction_dataset)

    for f in islice(forecasts, 5):
        f.mean
예제 #4
0
def test_from_gluonts_list_dataset():
    number_of_ts = 10  # number of time series
    ts_length = 100  # number of timesteps
    prediction_length = 24
    freq = "D"
    custom_dataset = np.random.normal(size=(number_of_ts, ts_length))
    start = pd.Timestamp("01-01-2019", freq=freq)  # type: ignore

    gluonts_list_dataset = ListDataset(
        [{
            "target": x,
            "start": start
        } for x in custom_dataset[:, :-prediction_length]],
        freq=freq,
    )
    TimeSeriesDataFrame(gluonts_list_dataset)

    ts_df = TimeSeriesDataFrame(ListDataset(SAMPLE_ITERABLE, freq=freq))
    pd.testing.assert_frame_equal(ts_df,
                                  SAMPLE_TS_DATAFRAME,
                                  check_dtype=False)

    empty_list_dataset = ListDataset([], freq=freq)
    with pytest.raises(ValueError):
        TimeSeriesDataFrame(empty_list_dataset)
def train_predictor(df_dict, end_train_date, regions_list, target_col, feat_dynamic_cols=None):
    estimator = DeepAREstimator(freq=data_freq, 
                                prediction_length=nb_hours_pred,
                                trainer=Trainer(epochs=max_epochs, learning_rate = learning_rate,
                                                learning_rate_decay_factor=0.01, patience=patience),
                                use_feat_dynamic_real=feat_dynamic_cols is not None)
    if feat_dynamic_cols is not None:
        
        training_data = ListDataset(
            [{"item_id": region,
                "start": df_dict[region].index[0],
              "target": df_dict[region][target_col][:end_train_date],
             "feat_dynamic_real": [df_dict[region][feat_dynamic_col][:end_train_date]
                                   for feat_dynamic_col in feat_dynamic_cols] 
             }
            for region in regions_list],
            freq = data_freq
        )
    else:
        training_data = ListDataset(
            [{"item_id": region,
                "start": df_dict[region].index[0],
              "target": df_dict[region][target_col][:end_train_date]
             }
            for region in regions_list],
            freq = data_freq
        )

    predictor = estimator.train(training_data=training_data)
    
    return predictor
    def preprocess_by_single_team(self, data):
        self.__log.info(
            "Starting preprocessing time series by a single team before training routine starts"
        )

        team_initials = self.team_initials
        custom_dataset = data[data.index.str.contains(team_initials)]
        custom_dataset = data.goals.values
        custom_dataset = custom_dataset.reshape(1, -1)
        prediction_length = self.prediction_length
        start_date = self.start_date
        freq = self.freq

        start = pd.Timestamp(start_date, freq)

        # train dataset: cut the last window of length "prediction_length", add "target" and "start" fields
        train_ds = ListDataset([{
            'target': x,
            'start': start
        } for x in custom_dataset[:, :-prediction_length]],
                               freq='1H')

        # test datListDatasetaset: use the whole dataset, add "target" and "start" fields
        test_ds = ListDataset([{
            'target': x,
            'start': start
        } for x in custom_dataset],
                              freq='1H')
        self.__log.info("Finished preprocessing time series by single team")
        return train_ds, test_ds
예제 #7
0
def train_and_test(training_data,
                   test_data,
                   freq,
                   num_test_windows,
                   model,
                   require_train=False):
    forecasts = []
    tss = []
    training_data = ListDataset(training_data, freq=freq)
    test_data = ListDataset(test_data, freq=freq)
    if require_train:
        predictor = model.train(training_data=training_data)
    else:
        predictor = model

    # Save the model locally for later deployment.
    model_name = model.__class__.__name__
    model_path = Path(f"models/{model_name}")
    os.makedirs(model_path, exist_ok=True)
    predictor.serialize(model_path)

    # Do the forecast on the test set.
    forecast_it, ts_it = make_evaluation_predictions(test_data,
                                                     predictor=predictor,
                                                     num_samples=100)
    forecasts.extend(list(forecast_it))
    tss.extend(list(ts_it))

    return forecasts, tss
예제 #8
0
def GlounTS():
    #from pts.dataset import ListDataset
    #from pts.model.deepar import DeepAREstimator
    #from pts import Trainer
    #from pts.dataset import to_pandas
    # gluonts crash in my sistem.
    from gluonts.dataset.common import ListDataset
    from gluonts.model.deepar import DeepAREstimator
    from gluonts.trainer import Trainer
    training_data = ListDataset([{
        "start": df.index[0],
        "target": df.value[:"2015-03-08 23:22:53"]
    }],
                                freq="5min")
    #estimator = DeepAREstimator(freq="5min",input_size = 43, prediction_length=forecast_size, trainer=Trainer(epochs=20))
    estimator = DeepAREstimator(freq="5min",
                                prediction_length=forecast_size,
                                trainer=Trainer(epochs=20))
    predictor = estimator.train(training_data=training_data)
    test_data = ListDataset([{
        "start": df.index[0],
        "target": df.value[:"2015-03-08 23:22:53"]
    }],
                            freq="5min")
    GluonTS_prediction = next(predictor.predict(test_data))
    GluonTS_mean_yhat = GluonTS_prediction.mean
    GluonTS_median_yhat = GluonTS_prediction.median
    return GluonTS_mean_yhat.tolist(), GluonTS_median_yhat.tolist(
    ), GluonTS_prediction
예제 #9
0
def prepare(df, P, frac, ep):
    rolling_test = []
    train_size = int(frac * df.shape[0])
    i = 0
    delay = 0
    train_ds = ListDataset([{
        "start":
        pd.Timestamp(df.index[0]),
        "target":
        df.Diff[0:train_size - P],
        'feat_dynamic_real': [
            df.fear[0:train_size - P], df.anger[0:train_size - P],
            df.anticipation[0:train_size - P], df.trust[0:train_size - P],
            df.suprise[0:train_size - P], df.positive[0:train_size - P],
            df.negative[0:train_size - P], df.sadness[0:train_size - P],
            df.disgust[0:train_size - P], df.joy[0:train_size - P],
            df.Volume_of_tweets[0:train_size - P],
            df.Retweet[0:train_size - P], df.Replies[0:train_size - P],
            df.Likes[0:train_size - P]
        ]
    }],
                           freq='1B')
    while train_size + delay < df.shape[0]:
        delay = int(P) * i
        test_ds = ListDataset([
            dict(start=pd.Timestamp(df.index[0]),
                 target=df.Diff[0:train_size + delay],
                 feat_dynamic_real=[
                     df.fear[0:train_size + delay],
                     df.anger[0:train_size + delay],
                     df.anticipation[0:train_size + delay],
                     df.trust[0:train_size + delay],
                     df.suprise[0:train_size + delay],
                     df.positive[0:train_size + delay],
                     df.negative[0:train_size + delay],
                     df.sadness[0:train_size + delay],
                     df.disgust[0:train_size + delay],
                     df.joy[0:train_size + delay],
                     df.Volume_of_tweets[0:train_size + delay],
                     df.Retweet[0:train_size + delay],
                     df.Replies[0:train_size + delay], df.Likes[0:train_size +
                                                                delay]
                 ])
        ],
                              freq='1B')
        i += 1
        rolling_test.append(test_ds)

    print("We have 1 training set of", train_size, "days and then ",
          len(rolling_test), "testing sets of ", delay, " days total")
    estimator = DeepAREstimator(prediction_length=P,
                                context_length=5,
                                freq='1B',
                                use_feat_dynamic_real=True,
                                trainer=Trainer(
                                    ctx="cpu",
                                    epochs=ep,
                                ))  #hybridize=False, ), )
    return train_ds, rolling_test, estimator, train_size, i
예제 #10
0
def get_custom_dataset(name, horizon):
    """
    """
    if name == "electricity":
        csv_path = r'/scratch/project_2002244/DeepAR/data/elect/electricity.csv'
        df = pd.read_csv(csv_path,
                         sep=",",
                         index_col=0,
                         parse_dates=True,
                         decimal='.').astype(float)
        df.fillna(0, inplace=True)
        train_start = '2012-01-01 00:00:00'
        train_end = '2014-05-26 23:00:00'
        test_start = '2014-05-27 00:00:00'
        test_end = '2014-12-31 23:00:00'
    elif name == "europe_power_system":
        csv_path = r'/scratch/project_2002244/DeepAR/data/elect/europe_power_system.csv'
        df = pd.read_csv(csv_path,
                         sep=",",
                         index_col=0,
                         parse_dates=True,
                         decimal='.').astype(float)
        df.fillna(0, inplace=True)
        train_start = '2015-01-01 00:00:00'
        train_end = '2017-06-23 23:00:00'
        test_start = '2017-06-24 00:00:00'
        test_end = '2017-11-30 23:00:00'

    train_target_values = df[:train_end].T.values
    test_target_values = df[:(pd.Timestamp(test_start) -
                              timedelta(hours=1))].T.values
    start_dates = np.array([
        pd.Timestamp(df.index[0], freq='1H')
        for _ in range(train_target_values.shape[0])
    ])

    train_ds = ListDataset([{
        FieldName.TARGET: target,
        FieldName.START: start
    } for (target, start) in zip(train_target_values, start_dates)],
                           freq="1H")

    test_ds = ListDataset(
        [{
            FieldName.TARGET: target,
            FieldName.START: start
        } for index in pd.date_range(start=(pd.Timestamp(test_start) -
                                            timedelta(hours=1) +
                                            timedelta(hours=horizon)),
                                     end=pd.Timestamp(test_end),
                                     freq='{}H'.format(horizon))
         for (target, start) in zip(df[:index].T.values, start_dates)],
        freq="1H")
    return train_ds, test_ds
예제 #11
0
def anomaly_gluonts(lista_datos,
                    num_fut,
                    desv_mse=0,
                    train=True,
                    name='model-name'):
    lista_puntos = np.arange(0, len(lista_datos), 1)
    df, df_train, df_test = create_train_test(lista_puntos, lista_datos)

    data_list = [{
        "start": "01-01-2012 04:05:00",
        "target": df_train['valores'].values
    }]

    dataset = ListDataset(data_iter=data_list, freq="5min")

    trainer = Trainer(epochs=15)
    estimator = deepar.DeepAREstimator(freq="5min",
                                       prediction_length=len(
                                           df_test['valores']),
                                       trainer=trainer)
    predictor = estimator.train(training_data=dataset)

    prediction = next(predictor.predict(dataset))

    engine = engine_output_creation('gluonts')
    engine.alerts_creation(prediction.mean.tolist(), df_test)
    engine.debug_creation(prediction.mean.tolist(), df_test)
    print('longitud del test' + str(df_test.shape) + 'frente a la prediccion' +
          str(len(prediction.mean.tolist())))
    engine.metrics_generation(df_test['valores'].values,
                              prediction.mean.tolist())

    ############## ANOMALY FINISHED,
    print("Anomaly finished. Start forecasting")
    ############## FORECAST START

    data_list = [{
        "start": "01-01-2012 04:05:00",
        "target": df['valores'].values
    }]

    dataset = ListDataset(data_iter=data_list, freq="5min")

    trainer = Trainer(epochs=15)
    estimator = deepar.DeepAREstimator(freq="5min",
                                       prediction_length=num_fut,
                                       trainer=trainer)
    predictor = estimator.train(training_data=dataset)

    prediction = next(predictor.predict(dataset))

    engine.forecast_creation(prediction.mean.tolist(), len(lista_datos),
                             num_fut)
    return (engine.engine_output)
def train(epochs, prediction_length, num_layers, dropout_rate):

    #create train dataset
    df = pd.read_csv(filepath_or_buffer=os.environ['SM_CHANNEL_TRAIN'] +
                     "/train.csv",
                     header=0,
                     index_col=0)

    training_data = ListDataset([{
        "start": df.index[0],
        "target": df.value[:]
    }],
                                freq="5min")

    #define DeepAR estimator
    deepar_estimator = DeepAREstimator(freq="5min",
                                       prediction_length=prediction_length,
                                       dropout_rate=dropout_rate,
                                       num_layers=num_layers,
                                       trainer=Trainer(epochs=epochs))

    #train the model
    deepar_predictor = deepar_estimator.train(training_data=training_data)

    #create test dataset
    df = pd.read_csv(filepath_or_buffer=os.environ['SM_CHANNEL_TEST'] +
                     "/test.csv",
                     header=0,
                     index_col=0)

    test_data = ListDataset([{
        "start": df.index[0],
        "target": df.value[:]
    }],
                            freq="5min")

    #evaluate trained model on test data
    forecast_it, ts_it = make_evaluation_predictions(test_data,
                                                     deepar_predictor,
                                                     num_samples=100)
    forecasts = list(forecast_it)
    tss = list(ts_it)
    evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9])
    agg_metrics, item_metrics = evaluator(iter(tss),
                                          iter(forecasts),
                                          num_series=len(test_data))

    print("MSE:", agg_metrics["MSE"])

    #save the model
    deepar_predictor.serialize(pathlib.Path(os.environ['SM_MODEL_DIR']))

    return deepar_predictor
예제 #13
0
    def predict(self, test_frame, pred_intervals):
        """ makes in-sample, out-of-sample, or both in-sample and out-of-sample 
            predictions using test_frame for all timesteps included in pred_intervals
        """

        if not self.train_dataset.has_group_cols():
            interval = pred_intervals[0]
            feat_df = self.train_dataset.get_features(self.train_frame)
            targets = self.train_dataset.get_targets(self.train_frame)
            min_interval = np.min(interval)
            max_interval = np.max(interval)
            self.max_intervals.append(max_interval)
            if np.max(interval) >= targets.shape[0]:
                feat_df = pd.concat((feat_df, test_frame))
            self._iterate_over_series(
                0,
                feat_df,
                targets,
                min_interval,
                max_interval,
            )
        else:
            group_cols = self.train_dataset.get_group_names()
            for series_idx, ((group, test_df), interval) in enumerate(
                    zip(test_frame.groupby(group_cols, sort=False),
                        pred_intervals)):
                if len(group_cols) == 1:
                    group = [group]
                query_list = [
                    f'{grp_col}=="{grp}"'
                    for grp_col, grp in zip(group_cols, group)
                ]
                train_df = self.train_frame.query(' & '.join(query_list))
                min_interval = np.min(interval)
                max_interval = np.max(interval)
                self.max_intervals.append(max_interval)
                if not train_df.shape[0]:
                    self.series_idxs.append(-1)
                    self.pre_pad_lens.append(0)
                    self.total_in_samples.append(0)
                else:
                    feat_df = self.train_dataset.get_features(train_df)
                    targets = self.train_dataset.get_targets(train_df)
                    if np.max(interval) >= targets.shape[0]:
                        feat_df = pd.concat((feat_df, test_df))
                    self._iterate_over_series(series_idx, feat_df, targets,
                                              min_interval, max_interval)
        self.series_idxs = np.array(self.series_idxs)
        self.data = ListDataset(self.data, freq=self.train_dataset.get_freq())
        forecasts = self._forecast()
        forecasts = self._pad(forecasts)
        return forecasts  # Num Series, Quantiles, Horizon
예제 #14
0
def test_multivariate_grouper_train(univariate_ts, multivariate_ts,
                                    train_fill_rule) -> None:
    univariate_ds = ListDataset(univariate_ts, freq="1D")
    multivariate_ds = ListDataset(multivariate_ts,
                                  freq="1D",
                                  one_dim_target=False)

    grouper = MultivariateGrouper(train_fill_rule=train_fill_rule)
    assert (list(grouper(univariate_ds))[0]["target"] == list(multivariate_ds)
            [0]["target"]).all()

    assert (list(grouper(univariate_ds))[0]["start"] == list(multivariate_ds)
            [0]["start"])
예제 #15
0
    def apply(self):
        train_scale = map(self._max_normalize, iter(self.datasets.train))
        unzip_train_scale = list(zip(*train_scale))
        train = ListDataset(unzip_train_scale[0], freq=self.freq)
        scales = unzip_train_scale[1]
        test = None
        if self.datasets.test is not None:
            test_scale = zip(iter(self.datasets.test), scales)
            test = ListDataset(
                map(lambda x: self._max_normalize(x[0], x[1])[0], test_scale), freq=self.freq,
            )

        self.datasets = TrainDatasets(self.datasets.metadata, train, test)
        return self
예제 #16
0
def constant_dataset() -> Tuple[DatasetInfo, Dataset, Dataset]:
    metadata = MetaData(
        freq='1H',
        feat_static_cat=[
            CategoricalFeatureInfo(
                name='feat_static_cat_000', cardinality='10'
            )
        ],
        feat_static_real=[BasicFeatureInfo(name='feat_static_real_000')],
    )

    start_date = '2000-01-01 00:00:00'

    train_ds = ListDataset(
        data_iter=[
            {
                'item': str(i),
                'start': start_date,
                'target': [float(i)] * 24,
                'feat_static_cat': [i],
                'feat_static_real': [float(i)],
            }
            for i in range(10)
        ],
        freq=metadata.freq,
    )

    test_ds = ListDataset(
        data_iter=[
            {
                'item': str(i),
                'start': start_date,
                'target': [float(i)] * 30,
                'feat_static_cat': [i],
                'feat_static_real': [float(i)],
            }
            for i in range(10)
        ],
        freq=metadata.freq,
    )

    info = DatasetInfo(
        name='constant_dataset',
        metadata=metadata,
        prediction_length=2,
        train_statistics=calculate_dataset_statistics(train_ds),
        test_statistics=calculate_dataset_statistics(test_ds),
    )

    return info, train_ds, test_ds
예제 #17
0
def make_dummy_datasets_with_features(
    num_ts: int = 5,
    start: str = "2018-01-01",
    freq: str = "D",
    min_length: int = 5,
    max_length: int = 10,
    prediction_length: int = 3,
    cardinality: List[int] = [],
    num_feat_dynamic_real: int = 0,
    num_past_feat_dynamic_real: int = 0,
) -> Tuple[ListDataset, ListDataset]:

    data_iter_train = []
    data_iter_test = []

    for k in range(num_ts):
        ts_length = randint(min_length, max_length)
        data_entry_train = {
            FieldName.START: start,
            FieldName.TARGET: [0.0] * ts_length,
        }
        if len(cardinality) > 0:
            data_entry_train[FieldName.FEAT_STATIC_CAT] = [
                randint(0, c) for c in cardinality
            ]
        if num_past_feat_dynamic_real > 0:
            data_entry_train[FieldName.PAST_FEAT_DYNAMIC_REAL] = [
                [float(1 + k)] * ts_length
                for k in range(num_past_feat_dynamic_real)
            ]
        # Since used directly in predict and not in make_evaluate_predictions,
        # where the test target would be chopped, test and train target have
        # the same lengths
        data_entry_test = data_entry_train.copy()
        if num_feat_dynamic_real > 0:
            data_entry_train[FieldName.FEAT_DYNAMIC_REAL] = [
                [float(1 + k)] * ts_length
                for k in range(num_feat_dynamic_real)
            ]
            data_entry_test[FieldName.FEAT_DYNAMIC_REAL] = [
                [float(1 + k)] * (ts_length + prediction_length)
                for k in range(num_feat_dynamic_real)
            ]
        data_iter_train.append(data_entry_train)
        data_iter_test.append(data_entry_test)

    return (
        ListDataset(data_iter=data_iter_train, freq=freq),
        ListDataset(data_iter=data_iter_test, freq=freq),
    )
예제 #18
0
def constant_dataset() -> Tuple[DatasetInfo, Dataset, Dataset]:
    metadata = MetaData(
        freq="1H",
        feat_static_cat=[
            CategoricalFeatureInfo(
                name="feat_static_cat_000", cardinality="10"
            )
        ],
        feat_static_real=[BasicFeatureInfo(name="feat_static_real_000")],
    )

    start_date = "2000-01-01 00:00:00"

    train_ds = ListDataset(
        data_iter=[
            {
                "item": str(i),
                "start": start_date,
                "target": [float(i)] * 24,
                "feat_static_cat": [i],
                "feat_static_real": [float(i)],
            }
            for i in range(10)
        ],
        freq=metadata.freq,
    )

    test_ds = ListDataset(
        data_iter=[
            {
                "item": str(i),
                "start": start_date,
                "target": [float(i)] * 30,
                "feat_static_cat": [i],
                "feat_static_real": [float(i)],
            }
            for i in range(10)
        ],
        freq=metadata.freq,
    )

    info = DatasetInfo(
        name="constant_dataset",
        metadata=metadata,
        prediction_length=2,
        train_statistics=calculate_dataset_statistics(train_ds),
        test_statistics=calculate_dataset_statistics(test_ds),
    )

    return info, train_ds, test_ds
예제 #19
0
def constant_dataset() -> Tuple[DatasetInfo, Dataset, Dataset]:
    metadata = MetaData(
        freq="1H",
        feat_static_cat=[
            CategoricalFeatureInfo(
                name="feat_static_cat_000", cardinality="10"
            )
        ],
        feat_static_real=[BasicFeatureInfo(name="feat_static_real_000")],
    )

    start_date = "2000-01-01 00:00:00"

    train_ds = ListDataset(
        data_iter=[
            {
                FieldName.ITEM_ID: str(i),
                FieldName.START: start_date,
                FieldName.TARGET: [float(i)] * 24,
                FieldName.FEAT_STATIC_CAT: [i],
                FieldName.FEAT_STATIC_REAL: [float(i)],
            }
            for i in range(10)
        ],
        freq=metadata.freq,
    )

    test_ds = ListDataset(
        data_iter=[
            {
                FieldName.ITEM_ID: str(i),
                FieldName.START: start_date,
                FieldName.TARGET: [float(i)] * 30,
                FieldName.FEAT_STATIC_CAT: [i],
                FieldName.FEAT_STATIC_REAL: [float(i)],
            }
            for i in range(10)
        ],
        freq=metadata.freq,
    )

    info = DatasetInfo(
        name="constant_dataset",
        metadata=metadata,
        prediction_length=2,
        train_statistics=calculate_dataset_statistics(train_ds),
        test_statistics=calculate_dataset_statistics(test_ds),
    )

    return info, train_ds, test_ds
예제 #20
0
def easy_train():
    import pandas as pd
    df = pd.read_csv("optiver_hacktheburgh/sp.csv",
                     header=0,
                     index_col=0,
                     usecols=[0, 2],
                     skiprows=lambda x: x % 5 != 0)
    # df[:100].plot(linewidth=2)
    print("Showing")
    # plt.show()
    from gluonts.dataset.common import ListDataset
    training_data = ListDataset([{
        "start": df.index[0],
        "target": df.values.flatten()
    }],
                                freq="1s")
    #from gluonts.model.simple_feedforward import SimpleFeedForwardEstimator
    from gluonts.model.deepar import DeepAREstimator
    from gluonts.trainer import Trainer
    estimator = DeepAREstimator(freq="1min",
                                prediction_length=100,
                                trainer=Trainer(epochs=20))
    predictor = estimator.train(training_data=training_data)
    test_data = ListDataset([{
        "start": df.index[0],
        "target": df.values.flatten()[:1000]
    }],
                            freq="10s")
    full_test_data = ListDataset([{
        "start": df.index[0],
        "target": df.values.flatten()
    }],
                                 freq="10s")

    means = []
    for i, (test_entry, forecast) in enumerate(
            zip(full_test_data, predictor.predict(test_data))):
        # if i > 0:
        #  break
        print(forecast.dim())
        plt.plot(test_entry["target"])
        #forecast.plot(color='g', prediction_intervals=[], output_file="test.png")
        means.extend(list(forecast.mean))
        print(forecast.mean)
    l = len(test_entry["target"])
    plt.axhline(y=means[0], xmin=0, xmax=l, linewidth=2, color='r')
    plt.axvline(x=5000, color='b')
    plt.grid(which='both')
    plt.show()
예제 #21
0
    def format_input(df, freq, target=None):
        if target:
            return ListDataset(
                [{
                    "start": df.index[0],
                    "target": df.to_frame().Target[:target]
                }],
                freq=freq,
            )

        return ListDataset([{
            "start": df.index[0],
            "target": df.to_frame().Target
        }],
                           freq=freq)
예제 #22
0
def get_dataset():

    data_entry_list = [
        {
            "target": np.c_[
                np.array([0.2, 0.7, 0.2, 0.5, 0.3, 0.3, 0.2, 0.1]),
                np.array([0, 1, 2, 0, 1, 2, 2, 2]),
            ].T,
            "start": pd.Timestamp("2011-01-01 00:00:00", freq="H"),
            "end": pd.Timestamp("2011-01-01 03:00:00", freq="H"),
        },
        {
            "target": np.c_[
                np.array([0.2, 0.1, 0.2, 0.5, 0.4]), np.array([0, 1, 2, 1, 1])
            ].T,
            "start": pd.Timestamp("2011-01-01 00:00:00", freq="H"),
            "end": pd.Timestamp("2011-01-01 03:00:00", freq="H"),
        },
        {
            "target": np.c_[
                np.array([0.2, 0.7, 0.2, 0.5, 0.1, 0.2, 0.1]),
                np.array([0, 1, 2, 0, 1, 0, 2]),
            ].T,
            "start": pd.Timestamp("2011-01-01 00:00:00", freq="H"),
            "end": pd.Timestamp("2011-01-01 03:00:00", freq="H"),
        },
    ]

    return ListDataset(data_entry_list, freq="H", one_dim_target=False)
예제 #23
0
def test_related_time_series_fail():
    params = dict(freq="1D", prediction_length=3, prophet={})

    dataset = ListDataset(
        data_iter=[
            {
                'start': '2017-01-01',
                'target': np.array([1.0, 2.0, 3.0, 4.0]),
                'feat_dynamic_real': np.array(
                    [
                        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
                        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
                    ]
                ),
            }
        ],
        freq=params['freq'],
    )

    with pytest.raises(AssertionError) as excinfo:
        predictor = ProphetPredictor(**params)
        list(predictor.predict(dataset))

    assert str(excinfo.value) == (
        'Length mismatch for dynamic real-valued feature #0: '
        'expected 7, got 6'
    )
예제 #24
0
def test_parallelized_predictor():
    dataset = ListDataset(
        data_iter=[{
            "start": "2012-01-01",
            "target": (np.zeros(20) + i).tolist()
        } for i in range(300)],
        freq="1H",
    )

    base_predictor = IdentityPredictor(freq="1H",
                                       prediction_length=10,
                                       num_samples=100)

    predictor = ParallelizedPredictor(base_predictor=base_predictor,
                                      num_workers=10,
                                      chunk_size=2)

    predictions = list(base_predictor.predict(dataset))
    parallel_predictions = list(predictor.predict(dataset))

    assert len(predictions) == len(parallel_predictions)

    for p, pp in zip(predictions, parallel_predictions):
        assert np.all(p.samples == pp.samples)
        assert np.all(p.index == pp.index)
예제 #25
0
    def invocations() -> Response:
        request_data = request.data.decode("utf8").strip()
        instances = list(map(json.loads, request_data.splitlines()))
        predictions = []

        # we have to take this as the initial start-time since the first
        # forecast is produced before the loop in predictor.predict
        start = time.time()

        forecast_iter = predictor.predict(
            ListDataset(instances, predictor.freq),
            num_samples=configuration.num_samples,
        )

        for forecast in forecast_iter:
            end = time.time()
            prediction = forecast.as_json_dict(configuration)

            if DEBUG:
                prediction["debug"] = {"timing": end - start}

            predictions.append(prediction)

            start = time.time()

        lines = list(map(json.dumps, map(jsonify_floats, predictions)))
        return Response("\n".join(lines), mimetype="application/jsonlines")
예제 #26
0
def predict_fn(input_data, model):
    print('[DEBUG] input_data type:', type(input_data), input_data)
    if 'freq' in input_data:
        freq = input_data['freq']
    else:
        freq = '1H'
    if 'target_quantile' in input_data:
        target_quantile = float(input_data['target_quantile'])
    else:
        target_quantile = 0.5
    if 'use_log1p' in input_data:
        use_log1p = input_data['use_log1p']
    else:
        use_log1p = False
    if 'instances' in input_data:
        instances = input_data['instances']
    else:
        if isinstance(input_data, list):
            instances = input_data
        elif isinstance(data, dict):
            instances = [input_data]

    ds = ListDataset(parse_data(instances), freq=freq)
    
    inference_result = model.predict(ds)
    
    if use_log1p:
        result = [np.expm1(resulti.quantile(target_quantile)).tolist() for resulti in inference_result]
    else:
        result = [resulti.quantile(target_quantile).tolist() for resulti in inference_result]
        
    return result
예제 #27
0
def test_feat_dynamic_real_success():
    params = dict(
        freq="1D", prediction_length=3, prophet_params=dict(n_changepoints=20)
    )

    dataset = ListDataset(
        data_iter=[
            {
                "start": "2017-01-01",
                "target": np.array([1.0, 2.0, 3.0, 4.0]),
                "feat_dynamic_real": np.array(
                    [
                        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0],
                        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0],
                    ]
                ),
            }
        ],
        freq=params["freq"],
    )

    predictor = ProphetPredictor(**params)

    act_fcst = next(predictor.predict(dataset))
    exp_fcst = np.arange(5.0, 5.0 + params["prediction_length"])

    assert np.all(np.isclose(act_fcst.quantile(0.1), exp_fcst, atol=0.02))
    assert np.all(np.isclose(act_fcst.quantile(0.5), exp_fcst, atol=0.02))
    assert np.all(np.isclose(act_fcst.quantile(0.9), exp_fcst, atol=0.02))
    def load(self, frequency: str, subset_filter: str,
             training: bool) -> ListDataset:
        """
        Load electricity dataset.

        :param frequency:
        :param subset_filter: dates as "from_date:to_date" in "YYYY-mm-dd H" format.
        :param training: If False then to_date will be extended to 7 days in future.
        :return:
        """
        dates = subset_filter.split(':')
        from_date = pd.to_datetime(dates[0])
        to_date = pd.to_datetime(dates[1])
        if not training:
            to_date = to_date + relativedelta(hours=24 * 7)

        items_all = [{
            'item_id': i,
            'start': from_date,
            'horizon': 24,
            'target': values
        } for i, values in enumerate(
            self.values[:, self._dates_to_index(from_date, to_date)])]

        return ListDataset(items_all, freq=frequency)
예제 #29
0
def initialize_model() -> nn.HybridBlock:
    # dummy training data
    N = 10  # number of time series
    T = 100  # number of timesteps
    prediction_length = 24
    freq = "1H"
    custom_dataset = np.zeros(shape=(N, T))
    start = pd.Timestamp("01-01-2019",
                         freq=freq)  # can be different for each time series
    train_ds = ListDataset(
        [{
            "target": x,
            "start": start
        } for x in custom_dataset[:, :-prediction_length]],
        freq=freq,
    )
    # create a simple model
    estimator = SimpleFeedForwardEstimator(
        num_hidden_dimensions=[10],
        prediction_length=prediction_length,
        context_length=T,
        freq=freq,
        trainer=Trainer(
            ctx="cpu",
            epochs=1,
            learning_rate=1e-3,
            num_batches_per_epoch=1,
        ),
    )

    # train model
    predictor = estimator.train(train_ds)

    return predictor.prediction_net
예제 #30
0
def test_feat_dynamic_real_bad_size():
    params = dict(freq="1D", prediction_length=3, prophet_params={})

    dataset = ListDataset(
        data_iter=[
            {
                "start": "2017-01-01",
                "target": np.array([1.0, 2.0, 3.0, 4.0]),
                "feat_dynamic_real": np.array(
                    [
                        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
                        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
                    ]
                ),
            }
        ],
        freq=params["freq"],
    )

    with pytest.raises(AssertionError) as excinfo:
        predictor = ProphetPredictor(**params)
        list(predictor.predict(dataset))

    assert str(excinfo.value) == (
        "Length mismatch for dynamic real-valued feature #0: "
        "expected 7, got 6"
    )