Esempio n. 1
0
def test_callbacks():
    n_epochs = 4

    history = TrainingHistory()
    iter_avg = ModelIterationAveraging(avg_strategy=NTA(epochs=2 * n_epochs))

    dataset = "m4_hourly"
    dataset = get_dataset(dataset)
    prediction_length = dataset.metadata.prediction_length
    freq = dataset.metadata.freq

    estimator = SimpleFeedForwardEstimator(
        prediction_length=prediction_length,
        freq=freq,
        trainer=Trainer(epochs=n_epochs, callbacks=[history, iter_avg]),
    )

    predictor = estimator.train(dataset.train, num_workers=None)

    assert len(history.loss_history) == n_epochs

    ws = WarmStart(predictor=predictor)

    estimator = SimpleFeedForwardEstimator(
        prediction_length=prediction_length,
        freq=freq,
        trainer=Trainer(epochs=n_epochs, callbacks=[history, iter_avg, ws]),
    )
    predictor = estimator.train(dataset.train, num_workers=None)

    assert len(history.loss_history) == n_epochs * 2
def train(file_path, P, frac):
    target, df = create_dataset(file_path)
    i = 0
    rolling_test = []
    train_size = int(frac * df.shape[0])
    starts = [pd.Timestamp(df.index[0]) for _ in range(len(target))]
    delay = 0
    grouper_train = MultivariateGrouper(max_target_dim=df.shape[0])
    grouper_test = MultivariateGrouper(max_target_dim=df.shape[0])

    train_ds = ListDataset([{
        FieldName.TARGET: targets,
        FieldName.START: start
    } for (targets, start) in zip(target[:, 0:train_size - P], starts)],
                           freq='1B')
    train_ds = grouper_train(train_ds)

    while train_size + delay < df.shape[0]:
        delay = int(P) * i
        test_ds = ListDataset([{
            FieldName.TARGET: targets,
            FieldName.START: start
        } for (targets, start) in zip(target[:, 0:train_size + delay], starts)
                               ],
                              freq='1B')
        test_ds = grouper_test(test_ds)
        rolling_test.append(test_ds)
        i += 1
    estimator = GPVAREstimator(prediction_length=pred_len,
                               context_length=6,
                               freq='1B',
                               target_dim=df.shape[1],
                               trainer=Trainer(ctx="cpu", epochs=200))
    return train_ds, rolling_test, estimator, train_size
def test_listing_1():
    """
    Test GluonTS paper examples from arxiv paper:
    https://arxiv.org/abs/1906.05264

    Listing 1
    """
    from gluonts.dataset.repository.datasets import get_dataset
    from gluonts.evaluation import backtest_metrics, Evaluator
    from gluonts.model.deepar import DeepAREstimator
    from gluonts.mx.trainer import Trainer

    # We use electricity in the paper but that would take too long to run in
    # the unit test
    dataset_info, train_ds, test_ds = constant_dataset()

    meta = dataset_info.metadata

    estimator = DeepAREstimator(
        freq=meta.freq,
        prediction_length=1,
        trainer=Trainer(epochs=1, batch_size=32),
    )
    predictor = estimator.train(train_ds)

    evaluator = Evaluator(quantiles=(0.1, 0.5, 0.9))
    agg_metrics, item_metrics = backtest_metrics(
        test_dataset=test_ds,
        predictor=predictor,
        evaluator=evaluator,
    )
Esempio n. 4
0
def assert_valid_param(param_name: str, param_values: List[Any]) -> None:
    try:
        for x in param_values:
            Trainer(**{param_name: x})
    except Exception as e:
        pytest.fail(f'Unexpected exception when initializing Trainer: "{e}"')
        raise e
Esempio n. 5
0
    def __init__(
            self,
            freq: str,
            context_length: int,
            prediction_length: int,
            trainer: Trainer = Trainer(),
            num_layers: int = 1,
            num_cells: int = 50,
            cell_type: str = "lstm",
            num_parallel_samples: int = 100,
            cardinality: List[int] = list([1]),
            embedding_dimension: int = 10,
            distr_output: DistributionOutput = StudentTOutput(),
    ) -> None:
        model = RNN(mode=cell_type,
                    num_layers=num_layers,
                    num_hidden=num_cells)

        super().__init__(
            model=model,
            is_sequential=True,
            freq=freq,
            context_length=context_length,
            prediction_length=prediction_length,
            trainer=trainer,
            num_parallel_samples=num_parallel_samples,
            cardinality=cardinality,
            embedding_dimension=embedding_dimension,
            distr_output=distr_output,
        )
Esempio n. 6
0
def test_quantile_levels():
    from gluonts.dataset.common import ListDataset
    from gluonts.model.tft import TemporalFusionTransformerEstimator
    from gluonts.mx.trainer import Trainer

    dataset = ListDataset(
        [{"start": "2020-01-01", "target": [10.0] * 50}], freq="D"
    )

    estimator = TemporalFusionTransformerEstimator(
        freq="D", prediction_length=2, trainer=Trainer(epochs=1)
    )
    predictor = estimator.train(training_data=dataset)

    forecast = next(iter(predictor.predict(dataset)))

    assert isinstance(forecast, QuantileForecast)
    assert isinstance(predictor, GluonPredictor)
    assert isinstance(
        predictor.prediction_net, TemporalFusionTransformerPredictionNetwork
    )
    assert all(
        float(k) == q
        for k, q in zip(
            forecast.forecast_keys, predictor.prediction_net.output.quantiles
        )
    )
Esempio n. 7
0
    def __init__(
            self,
            freq: str,
            context_length: int,
            prediction_length: int,
            trainer: Trainer = Trainer(),
            hidden_dim_sequence=list([50]),
            num_parallel_samples: int = 100,
            cardinality: List[int] = list([1]),
            embedding_dimension: int = 10,
            distr_output: DistributionOutput = StudentTOutput(),
    ) -> None:
        model = nn.HybridSequential()

        for layer, layer_dim in enumerate(hidden_dim_sequence):
            model.add(
                nn.Dense(
                    layer_dim,
                    flatten=False,
                    activation="relu",
                    prefix="mlp_%d_" % layer,
                ))

        super().__init__(
            model=model,
            is_sequential=False,
            freq=freq,
            context_length=context_length,
            prediction_length=prediction_length,
            trainer=trainer,
            num_parallel_samples=num_parallel_samples,
            cardinality=cardinality,
            embedding_dimension=embedding_dimension,
            distr_output=distr_output,
        )
Esempio n. 8
0
def test_smoke(
    hybridize: bool, target_dim_sample: int, use_marginal_transformation: bool
):
    num_batches_per_epoch = 1
    estimator = GPVAREstimator(
        distr_output=LowrankGPOutput(rank=2),
        num_cells=1,
        num_layers=1,
        pick_incomplete=True,
        prediction_length=metadata.prediction_length,
        target_dim=target_dim,
        target_dim_sample=target_dim_sample,
        freq=metadata.freq,
        use_marginal_transformation=use_marginal_transformation,
        trainer=Trainer(
            epochs=2,
            batch_size=10,
            learning_rate=1e-4,
            num_batches_per_epoch=num_batches_per_epoch,
            hybridize=hybridize,
        ),
    )

    predictor = estimator.train(training_data=dataset.train)

    agg_metrics, _ = backtest_metrics(
        test_dataset=dataset.test,
        predictor=predictor,
        num_samples=10,
        evaluator=MultivariateEvaluator(
            quantiles=(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9)
        ),
    )
    assert agg_metrics["ND"] < 2.5
def test_symbol_and_array(hybridize: bool):
    # Tests for cases like the one presented in issue 1211, in which the Inflated
    # Beta outputs used a method only available to arrays and not to symbols.
    # We simply go through a short training to ensure no exceptions are raised.
    data = [
        {
            "target": [0, 0.0460043, 0.263906, 0.4103112, 1],
            "start": pd.to_datetime("1999-01-04"),
        },
        {
            "target": [1, 0.65815564, 0.44982578, 0.58875054, 0],
            "start": pd.to_datetime("1999-01-04"),
        },
    ]
    dataset = common.ListDataset(data, freq="W-MON", one_dim_target=True)

    trainer = Trainer(epochs=1, num_batches_per_epoch=2, hybridize=hybridize)

    estimator = deepar.DeepAREstimator(
        freq="W",
        prediction_length=2,
        trainer=trainer,
        distr_output=ZeroAndOneInflatedBetaOutput(),
        context_length=2,
        batch_size=1,
        scaling=False,
    )

    estimator.train(dataset)
Esempio n. 10
0
    def __init__(
            self,
            freq: str,
            prediction_length: int,
            num_hidden_global: int = 50,
            num_layers_global: int = 1,
            num_factors: int = 10,
            num_hidden_local: int = 5,
            num_layers_local: int = 1,
            cell_type: str = "lstm",
            trainer: Trainer = Trainer(),
            context_length: Optional[int] = None,
            num_parallel_samples: int = 100,
            cardinality: List[int] = list([1]),
            embedding_dimension: int = 10,
            distr_output: DistributionOutput = StudentTOutput(),
    ) -> None:
        super().__init__(trainer=trainer)

        assert (prediction_length >
                0), "The value of `prediction_length` should be > 0"
        assert (context_length is None or context_length > 0
                ), "The value of `context_length` should be > 0"
        assert num_layers_global > 0, "The value of `num_layers` should be > 0"
        assert num_hidden_global > 0, "The value of `num_hidden` should be > 0"
        assert num_factors > 0, "The value of `num_factors` should be > 0"
        assert (num_hidden_local >
                0), "The value of `num_hidden_local` should be > 0"
        assert (num_layers_local >
                0), "The value of `num_layers_local` should be > 0"
        assert all([c > 0 for c in cardinality
                    ]), "Elements of `cardinality` should be > 0"
        assert (embedding_dimension >
                0), "The value of `embedding_dimension` should be > 0"
        assert (num_parallel_samples >
                0), "The value of `num_parallel_samples` should be > 0"

        self.freq = freq
        self.context_length = (context_length if context_length is not None
                               else prediction_length)
        self.prediction_length = prediction_length
        self.distr_output = distr_output
        self.num_parallel_samples = num_parallel_samples
        self.cardinality = cardinality
        self.embedding_dimensions = [embedding_dimension for _ in cardinality]

        self.global_model = RNNModel(
            mode=cell_type,
            num_hidden=num_hidden_global,
            num_layers=num_layers_global,
            num_output=num_factors,
        )

        # TODO: Allow the local model to be defined as an arbitrary local model, e.g. DF-GP and DF-LDS
        self.local_model = RNNModel(
            mode=cell_type,
            num_hidden=num_hidden_local,
            num_layers=num_layers_local,
            num_output=1,
        )
Esempio n. 11
0
def initialize_model() -> nn.HybridBlock:
    # dummy training data
    N = 10  # number of time series
    T = 100  # number of timesteps
    prediction_length = 24
    freq = "1H"
    custom_dataset = np.zeros(shape=(N, T))
    start = pd.Timestamp("01-01-2019",
                         freq=freq)  # can be different for each time series
    train_ds = ListDataset(
        [{
            "target": x,
            "start": start
        } for x in custom_dataset[:, :-prediction_length]],
        freq=freq,
    )
    # create a simple model
    estimator = SimpleFeedForwardEstimator(
        num_hidden_dimensions=[10],
        prediction_length=prediction_length,
        context_length=T,
        freq=freq,
        trainer=Trainer(
            ctx="cpu",
            epochs=1,
            learning_rate=1e-3,
            num_batches_per_epoch=1,
        ),
    )

    # train model
    predictor = estimator.train(train_ds)

    return predictor.prediction_net
Esempio n. 12
0
    def __init__(
        self,
        freq: str,
        prediction_length: int,
        context_length: Optional[int] = None,
        trainer: Trainer = Trainer(),
        hidden_dim: int = 32,
        variable_dim: Optional[int] = None,
        num_heads: int = 4,
        num_outputs: int = 3,
        num_instance_per_series: int = 100,
        dropout_rate: float = 0.1,
        time_features: List[TimeFeature] = [],
        static_cardinalities: Dict[str, int] = {},
        dynamic_cardinalities: Dict[str, int] = {},
        static_feature_dims: Dict[str, int] = {},
        dynamic_feature_dims: Dict[str, int] = {},
        past_dynamic_features: List[str] = [],
        batch_size: int = 32,
    ) -> None:
        super(TemporalFusionTransformerEstimator,
              self).__init__(trainer=trainer, batch_size=batch_size)
        assert (prediction_length >
                0), "The value of `prediction_length` should be > 0"
        assert (context_length is None or context_length > 0
                ), "The value of `context_length` should be > 0"
        assert dropout_rate >= 0, "The value of `dropout_rate` should be >= 0"

        self.freq = freq
        self.prediction_length = prediction_length
        self.context_length = context_length or prediction_length
        self.dropout_rate = dropout_rate
        self.hidden_dim = hidden_dim
        self.variable_dim = variable_dim or hidden_dim
        self.num_heads = num_heads
        self.num_outputs = num_outputs
        self.num_instance_per_series = num_instance_per_series

        if not time_features:
            self.time_features = time_features_from_frequency_str(self.freq)
        else:
            self.time_features = time_features
        self.static_cardinalities = static_cardinalities
        self.dynamic_cardinalities = dynamic_cardinalities
        self.static_feature_dims = static_feature_dims
        self.dynamic_feature_dims = dynamic_feature_dims
        self.past_dynamic_features = past_dynamic_features

        self.past_dynamic_cardinalities = {}
        self.past_dynamic_feature_dims = {}
        for name in self.past_dynamic_features:
            if name in self.dynamic_cardinalities:
                self.past_dynamic_cardinalities[
                    name] = self.dynamic_cardinalities.pop(name)
            elif name in self.dynamic_feature_dims:
                self.past_dynamic_feature_dims[
                    name] = self.dynamic_feature_dims.pop(name)
            else:
                raise ValueError(
                    f"Feature name {name} is not provided in feature dicts")
Esempio n. 13
0
    def __init__(
        self,
        model: HybridBlock,
        is_sequential: bool,
        freq: str,
        context_length: int,
        prediction_length: int,
        trainer: Trainer = Trainer(),
        num_parallel_samples: int = 100,
        cardinality: List[int] = list([1]),
        embedding_dimension: int = 10,
        distr_output: DistributionOutput = StudentTOutput(),
        batch_size: int = 32,
    ) -> None:
        super().__init__(trainer=trainer, batch_size=batch_size)

        # TODO: error checking
        self.freq = freq
        self.context_length = context_length
        self.prediction_length = prediction_length
        self.distr_output = distr_output
        self.num_parallel_samples = num_parallel_samples
        self.cardinality = cardinality
        self.embedding_dimensions = [embedding_dimension for _ in cardinality]
        self.model = model
        self.is_sequential = is_sequential
Esempio n. 14
0
def test_lstnet(
    skip_size,
    ar_window,
    lead_time,
    prediction_length,
    hybridize,
    scaling,
    dtype,
):
    estimator = LSTNetEstimator(
        skip_size=skip_size,
        ar_window=ar_window,
        num_series=NUM_SERIES,
        channels=6,
        kernel_size=2,
        context_length=4,
        freq=freq,
        lead_time=lead_time,
        prediction_length=prediction_length,
        trainer=Trainer(epochs=1,
                        batch_size=2,
                        learning_rate=0.01,
                        hybridize=hybridize),
        scaling=scaling,
        dtype=dtype,
    )

    predictor = estimator.train(dataset.train)

    with tempfile.TemporaryDirectory() as directory:
        predictor.serialize(Path(directory))
        predictor_copy = Predictor.deserialize(Path(directory))
        assert predictor == predictor_copy

    forecast_it, ts_it = make_evaluation_predictions(dataset=dataset.test,
                                                     predictor=predictor,
                                                     num_samples=NUM_SAMPLES)
    forecasts = list(forecast_it)
    tss = list(ts_it)
    assert len(forecasts) == len(tss) == len(dataset.test)
    test_ds = dataset.test.list_data[0]
    for fct in forecasts:
        assert fct.freq == freq
        assert fct.samples.shape == (
            NUM_SAMPLES,
            prediction_length,
            NUM_SERIES,
        )
        assert (fct.start_date == pd.period_range(
            start=test_ds["start"],
            periods=test_ds["target"].shape[1],  # number of test periods
            freq=freq,
        )[-prediction_length])

    evaluator = MultivariateEvaluator(
        quantiles=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
    agg_metrics, item_metrics = evaluator(iter(tss),
                                          iter(forecasts),
                                          num_series=len(dataset.test))
    assert agg_metrics["ND"] < 1.0
Esempio n. 15
0
def test_dynamic_integration(
    train_length: int,
    test_length: int,
    prediction_length: int,
    target_start: str,
    rolling_start: str,
    num_dynamic_feat: int,
):
    """
    Trains an estimator on a rolled dataset with dynamic features.
    Tests https://github.com/awslabs/gluon-ts/issues/1390
    """
    train_ds = create_dynamic_dataset(target_start, train_length,
                                      num_dynamic_feat)
    rolled_ds = generate_rolling_dataset(
        dataset=create_dynamic_dataset(target_start, test_length,
                                       num_dynamic_feat),
        strategy=StepStrategy(prediction_length=prediction_length),
        start_time=pd.Timestamp(rolling_start),
    )
    estimator = DeepAREstimator(
        freq="D",
        prediction_length=prediction_length,
        context_length=2 * prediction_length,
        use_feat_dynamic_real=True,
        trainer=Trainer(epochs=1),
    )
    predictor = estimator.train(training_data=train_ds)
    forecast_it, ts_it = make_evaluation_predictions(rolled_ds,
                                                     predictor=predictor,
                                                     num_samples=100)
    training_agg_metrics, _ = Evaluator(num_workers=0)(ts_it, forecast_it)
    # it should have failed by this point if the dynamic features were wrong
    assert training_agg_metrics
Esempio n. 16
0
 def __init__(
     self,
     freq: str,
     prediction_length: int,
     cardinality: List[int],
     embedding_dimension: int,
     encoder_mlp_layer: List[int],
     decoder_mlp_layer: List[int],
     decoder_mlp_static_dim: int,
     scaler: Scaler = NOPScaler(),
     context_length: Optional[int] = None,
     quantiles: Optional[List[float]] = None,
     trainer: Trainer = Trainer(),
     num_parallel_samples: int = 100,
 ) -> None:
     encoder = MLPEncoder(layer_sizes=encoder_mlp_layer)
     super(MLP2QRForecaster, self).__init__(
         freq=freq,
         prediction_length=prediction_length,
         encoder=encoder,
         cardinality=cardinality,
         embedding_dimension=embedding_dimension,
         decoder_mlp_layer=decoder_mlp_layer,
         decoder_mlp_static_dim=decoder_mlp_static_dim,
         context_length=context_length,
         scaler=scaler,
         quantiles=quantiles,
         trainer=trainer,
         num_parallel_samples=num_parallel_samples,
     )
Esempio n. 17
0
def evaluate(dataset_name, estimator):
    dataset = get_dataset(dataset_name)
    estimator = estimator(
        prediction_length=dataset.metadata.prediction_length,
        freq=dataset.metadata.freq,
        use_feat_static_cat=True,
        cardinality=[
            feat_static_cat.cardinality
            for feat_static_cat in dataset.metadata.feat_static_cat
        ],
        trainer=Trainer(
            epochs=epochs,
            num_batches_per_epoch=num_batches_per_epoch,
        ),
    )

    print(f"evaluating {estimator} on {dataset}")

    predictor = estimator.train(dataset.train)

    forecast_it, ts_it = make_evaluation_predictions(dataset.test,
                                                     predictor=predictor,
                                                     num_samples=100)

    agg_metrics, item_metrics = Evaluator()(ts_it,
                                            forecast_it,
                                            num_series=len(dataset.test))

    pprint.pprint(agg_metrics)

    eval_dict = agg_metrics
    eval_dict["dataset"] = dataset_name
    eval_dict["estimator"] = type(estimator).__name__
    return eval_dict
Esempio n. 18
0
    def __init__(
        self,
        freq: str,
        prediction_length: int,
        cardinality: List[int],
        embedding_dimension: int,
        encoder: Seq2SeqEncoder,
        decoder_mlp_layer: List[int],
        decoder_mlp_static_dim: int,
        scaler: Scaler = NOPScaler(),
        context_length: Optional[int] = None,
        quantiles: Optional[List[float]] = None,
        trainer: Trainer = Trainer(),
        train_sampler: Optional[InstanceSampler] = None,
        validation_sampler: Optional[InstanceSampler] = None,
        num_parallel_samples: int = 100,
        batch_size: int = 32,
    ) -> None:
        assert (
            prediction_length > 0
        ), "The value of `prediction_length` should be > 0"
        assert (
            context_length is None or context_length > 0
        ), "The value of `context_length` should be > 0"
        assert quantiles is None or all(
            0 <= d <= 1 for d in quantiles
        ), "Elements of `quantiles` should be >= 0 and <= 1"

        super().__init__(trainer=trainer, batch_size=batch_size)

        self.context_length = (
            context_length if context_length is not None else prediction_length
        )
        self.prediction_length = prediction_length
        self.freq = freq
        self.quantiles = (
            quantiles if quantiles is not None else [0.1, 0.5, 0.9]
        )
        self.encoder = encoder
        self.decoder_mlp_layer = decoder_mlp_layer
        self.decoder_mlp_static_dim = decoder_mlp_static_dim
        self.scaler = scaler
        self.embedder = FeatureEmbedder(
            cardinalities=cardinality,
            embedding_dims=[embedding_dimension for _ in cardinality],
        )
        self.train_sampler = (
            train_sampler
            if train_sampler is not None
            else ExpectedNumInstanceSampler(
                num_instances=1.0, min_future=prediction_length
            )
        )
        self.validation_sampler = (
            validation_sampler
            if validation_sampler is not None
            else ValidationSplitSampler(min_future=prediction_length)
        )
        self.num_parallel_samples = num_parallel_samples
Esempio n. 19
0
def prepare(df, P, frac, ep):
    rolling_test = []
    train_size = int(frac * df.shape[0])
    i = 0
    delay = 0
    train_ds = ListDataset([{
        "start":
        pd.Timestamp(df.index[0]),
        "target":
        df.Diff[0:train_size - P],
        'feat_dynamic_real': [
            df.fear[0:train_size - P], df.anger[0:train_size - P],
            df.anticipation[0:train_size - P], df.trust[0:train_size - P],
            df.suprise[0:train_size - P], df.positive[0:train_size - P],
            df.negative[0:train_size - P], df.sadness[0:train_size - P],
            df.disgust[0:train_size - P], df.joy[0:train_size - P],
            df.Volume_of_tweets[0:train_size - P],
            df.Retweet[0:train_size - P], df.Replies[0:train_size - P],
            df.Likes[0:train_size - P]
        ]
    }],
                           freq='1B')
    while train_size + delay < df.shape[0]:
        delay = int(P) * i
        test_ds = ListDataset([
            dict(start=pd.Timestamp(df.index[0]),
                 target=df.Diff[0:train_size + delay],
                 feat_dynamic_real=[
                     df.fear[0:train_size + delay],
                     df.anger[0:train_size + delay],
                     df.anticipation[0:train_size + delay],
                     df.trust[0:train_size + delay],
                     df.suprise[0:train_size + delay],
                     df.positive[0:train_size + delay],
                     df.negative[0:train_size + delay],
                     df.sadness[0:train_size + delay],
                     df.disgust[0:train_size + delay],
                     df.joy[0:train_size + delay],
                     df.Volume_of_tweets[0:train_size + delay],
                     df.Retweet[0:train_size + delay],
                     df.Replies[0:train_size + delay], df.Likes[0:train_size +
                                                                delay]
                 ])
        ],
                              freq='1B')
        i += 1
        rolling_test.append(test_ds)

    print("We have 1 training set of", train_size, "days and then ",
          len(rolling_test), "testing sets of ", delay, " days total")
    estimator = DeepAREstimator(prediction_length=P,
                                context_length=5,
                                freq='1B',
                                use_feat_dynamic_real=True,
                                trainer=Trainer(
                                    ctx="cpu",
                                    epochs=ep,
                                ))  #hybridize=False, ), )
    return train_ds, rolling_test, estimator, train_size, i
Esempio n. 20
0
    def __init__(
        self,
        prediction_length: int,
        freq: str,
        context_length: Optional[int] = None,
        decoder_mlp_dim_seq: List[int] = None,
        trainer: Trainer = Trainer(),
        quantiles: List[float] = None,
        scaling: bool = False,
        scaling_decoder_dynamic_feature: bool = False,
    ) -> None:

        assert (prediction_length >
                0), f"Invalid prediction length: {prediction_length}."
        assert decoder_mlp_dim_seq is None or all(
            d > 0 for d in decoder_mlp_dim_seq
        ), "Elements of `mlp_hidden_dimension_seq` should be > 0"
        assert quantiles is None or all(
            0 <= d <= 1 for d in
            quantiles), "Elements of `quantiles` should be >= 0 and <= 1"

        self.decoder_mlp_dim_seq = (decoder_mlp_dim_seq if decoder_mlp_dim_seq
                                    is not None else [30])
        self.quantiles = (quantiles if quantiles is not None else
                          [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])

        # `use_static_feat` and `use_dynamic_feat` always True because network
        # always receives input; either from the input data or constants
        encoder = RNNEncoder(
            mode="gru",
            hidden_size=50,
            num_layers=1,
            bidirectional=True,
            prefix="encoder_",
            use_static_feat=True,
            use_dynamic_feat=True,
        )

        decoder = ForkingMLPDecoder(
            dec_len=prediction_length,
            final_dim=self.decoder_mlp_dim_seq[-1],
            hidden_dimension_sequence=self.decoder_mlp_dim_seq[:-1],
            prefix="decoder_",
        )

        quantile_output = QuantileOutput(self.quantiles)

        super().__init__(
            encoder=encoder,
            decoder=decoder,
            quantile_output=quantile_output,
            freq=freq,
            prediction_length=prediction_length,
            context_length=context_length,
            trainer=trainer,
            scaling=scaling,
            scaling_decoder_dynamic_feature=scaling_decoder_dynamic_feature,
        )
Esempio n. 21
0
 def __init__(
     self,
     freq: str,
     prediction_length: int,
     context_length: int,
     num_series: int,
     skip_size: int,
     ar_window: int,
     channels: int,
     lead_time: int = 0,
     kernel_size: int = 6,
     trainer: Trainer = Trainer(),
     dropout_rate: Optional[float] = 0.2,
     output_activation: Optional[str] = None,
     rnn_cell_type: str = "gru",
     rnn_num_cells: int = 100,
     rnn_num_layers: int = 3,
     skip_rnn_cell_type: str = "gru",
     skip_rnn_num_layers: int = 1,
     skip_rnn_num_cells: int = 10,
     scaling: bool = True,
     train_sampler: Optional[InstanceSampler] = None,
     validation_sampler: Optional[InstanceSampler] = None,
     batch_size: int = 32,
     dtype: DType = np.float32,
 ) -> None:
     super().__init__(
         trainer=trainer,
         lead_time=lead_time,
         batch_size=batch_size,
         dtype=dtype,
     )
     self.freq = freq
     self.num_series = num_series
     self.skip_size = skip_size
     self.ar_window = ar_window
     self.prediction_length = prediction_length
     self.context_length = context_length
     self.channels = channels
     self.kernel_size = kernel_size
     self.dropout_rate = dropout_rate
     self.output_activation = output_activation
     self.rnn_cell_type = rnn_cell_type
     self.rnn_num_layers = rnn_num_layers
     self.rnn_num_cells = rnn_num_cells
     self.skip_rnn_cell_type = skip_rnn_cell_type
     self.skip_rnn_num_layers = skip_rnn_num_layers
     self.skip_rnn_num_cells = skip_rnn_num_cells
     self.scaling = scaling
     self.train_sampler = (train_sampler if train_sampler is not None else
                           ExpectedNumInstanceSampler(
                               num_instances=1.0,
                               min_future=prediction_length + lead_time))
     self.validation_sampler = (validation_sampler if validation_sampler
                                is not None else ValidationSplitSampler(
                                    min_future=prediction_length +
                                    lead_time))
     self.dtype = dtype
Esempio n. 22
0
    def __init__(
        self,
        freq: str,
        prediction_length: int,
        meta_context_length: Optional[List[int]] = None,
        meta_loss_function: Optional[List[str]] = None,
        meta_bagging_size: int = 10,
        trainer: Trainer = Trainer(),
        num_stacks: int = 30,
        widths: Optional[List[int]] = None,
        num_blocks: Optional[List[int]] = None,
        num_block_layers: Optional[List[int]] = None,
        expansion_coefficient_lengths: Optional[List[int]] = None,
        sharing: Optional[List[bool]] = None,
        stack_types: Optional[List[str]] = None,
        **kwargs,
    ) -> None:
        super().__init__()

        assert (prediction_length >
                0), "The value of `prediction_length` should be > 0"

        self.freq = freq
        self.prediction_length = prediction_length

        assert meta_loss_function is None or all(
            [
                loss_function in VALID_LOSS_FUNCTIONS
                for loss_function in meta_loss_function
            ]
        ), f"Each loss function has to be one of the following: {VALID_LOSS_FUNCTIONS}."
        assert meta_context_length is None or all([
            context_length > 0 for context_length in meta_context_length
        ]), "The value of each `context_length` should be > 0"
        assert (meta_bagging_size is None or meta_bagging_size > 0
                ), "The value of each `context_length` should be > 0"

        self.meta_context_length = (
            meta_context_length if meta_context_length is not None else
            [multiplier * prediction_length for multiplier in range(2, 8)])
        self.meta_loss_function = (meta_loss_function if meta_loss_function
                                   is not None else VALID_LOSS_FUNCTIONS)
        self.meta_bagging_size = meta_bagging_size

        # The following arguments are validated in the NBEATSEstimator:
        self.trainer = trainer
        print(f"TRAINER:{str(trainer)}")
        self.num_stacks = num_stacks
        self.widths = widths
        self.num_blocks = num_blocks
        self.num_block_layers = num_block_layers
        self.expansion_coefficient_lengths = expansion_coefficient_lengths
        self.sharing = sharing
        self.stack_types = stack_types

        # Actually instantiate the different models
        self.estimators = self._estimator_factory(**kwargs)
Esempio n. 23
0
def evaluate_nn(config):
    """ Pass a simple neural network to evaluate_gluon"""
    from gluonts.model.simple_feedforward import SimpleFeedForwardEstimator
    model = SimpleFeedForwardEstimator(
        freq=config['freq'],
        prediction_length=config['prediction_length'],
        trainer=Trainer(epochs=config['params'].get('epochs', 10)))

    evaluate_gluon(config, model)
Esempio n. 24
0
    def __init__(
        self,
        encoder: Seq2SeqEncoder,
        decoder: Seq2SeqDecoder,
        quantile_output: QuantileOutput,
        freq: str,
        prediction_length: int,
        context_length: Optional[int] = None,
        use_feat_dynamic_real: bool = False,
        use_feat_static_cat: bool = False,
        cardinality: List[int] = None,
        embedding_dimension: List[int] = None,
        add_time_feature: bool = False,
        add_age_feature: bool = False,
        enable_decoder_dynamic_feature: bool = False,
        trainer: Trainer = Trainer(),
        scaling: bool = False,
        dtype: DType = np.float32,
    ) -> None:
        super().__init__(trainer=trainer)

        assert (context_length is None or context_length > 0
                ), "The value of `context_length` should be > 0"
        assert (prediction_length >
                0), "The value of `prediction_length` should be > 0"
        assert (
            use_feat_static_cat or not cardinality
        ), "You should set `cardinality` if and only if `use_feat_static_cat=True`"
        assert cardinality is None or all(
            c > 0
            for c in cardinality), "Elements of `cardinality` should be > 0"
        assert embedding_dimension is None or all(
            e > 0 for e in embedding_dimension
        ), "Elements of `embedding_dimension` should be > 0"

        self.encoder = encoder
        self.decoder = decoder
        self.quantile_output = quantile_output
        self.freq = freq
        self.prediction_length = prediction_length
        self.context_length = (context_length if context_length is not None
                               else 4 * self.prediction_length)
        self.use_feat_dynamic_real = use_feat_dynamic_real
        self.use_feat_static_cat = use_feat_static_cat
        self.cardinality = (cardinality
                            if cardinality and use_feat_static_cat else [1])
        self.embedding_dimension = (
            embedding_dimension if embedding_dimension is not None else
            [min(50, (cat + 1) // 2) for cat in self.cardinality])
        self.add_time_feature = add_time_feature
        self.add_age_feature = add_age_feature
        self.use_dynamic_feat = (use_feat_dynamic_real or add_age_feature
                                 or add_time_feature)
        self.enable_decoder_dynamic_feature = enable_decoder_dynamic_feature
        self.scaling = scaling
        self.dtype = dtype
Esempio n. 25
0
    def __init__(
        self,
        freq: str,
        prediction_length: int,
        cardinalities: List[int] = [],
        context_length: Optional[int] = None,
        trainer: Trainer = Trainer(),
        model_dim: int = 64,
        ffn_dim_multiplier: int = 2,
        num_heads: int = 4,
        num_layers: int = 3,
        num_outputs: int = 3,
        kernel_sizes: List[int] = [3, 5, 7, 9],
        distance_encoding: Optional[str] = "dot",
        pre_layer_norm: bool = False,
        dropout: float = 0.1,
        temperature: float = 1.0,
        time_features: Optional[List[TimeFeature]] = None,
        use_feat_dynamic_real: bool = True,
        use_feat_dynamic_cat: bool = False,
        use_feat_static_real: bool = False,
        use_feat_static_cat: bool = True,
        train_sampler: Optional[InstanceSampler] = None,
        validation_sampler: Optional[InstanceSampler] = None,
        batch_size: int = 32,
    ):
        super().__init__(trainer=trainer, batch_size=batch_size)
        self.freq = freq
        self.prediction_length = prediction_length
        self.context_length = context_length or prediction_length
        self.model_dim = model_dim
        self.ffn_dim_multiplier = ffn_dim_multiplier
        self.num_heads = num_heads
        self.num_layers = num_layers
        self.num_outputs = num_outputs
        self.cardinalities = cardinalities
        self.kernel_sizes = kernel_sizes
        self.distance_encoding = distance_encoding
        self.pre_layer_norm = pre_layer_norm
        self.dropout = dropout
        self.temperature = temperature

        self.time_features = time_features or time_features_from_frequency_str(
            self.freq)
        self.use_feat_dynamic_cat = use_feat_dynamic_cat
        self.use_feat_dynamic_real = use_feat_dynamic_real
        self.use_feat_static_cat = use_feat_static_cat
        self.use_feat_static_real = use_feat_static_real
        self.train_sampler = (train_sampler if train_sampler is not None else
                              ExpectedNumInstanceSampler(
                                  num_instances=1.0,
                                  min_future=prediction_length))
        self.validation_sampler = (validation_sampler if validation_sampler
                                   is not None else ValidationSplitSampler(
                                       min_future=prediction_length))
Esempio n. 26
0
def test_deepar():
    from gluonts.model.deepar import DeepAREstimator

    config = {}
    config['directory'] = 'results/deepar'

    model = DeepAREstimator(freq="30min", 
                        prediction_length=48, 
                        trainer=Trainer(epochs=3))

    evaluate_model(model, config)
Esempio n. 27
0
def test_nn():
    from gluonts.model.simple_feedforward import SimpleFeedForwardEstimator

    config = {}
    config['directory'] = 'results/nn'

    model = SimpleFeedForwardEstimator(freq="30min", 
                                prediction_length=48, 
                                trainer=Trainer(epochs=3))

    evaluate_model(model, config)
Esempio n. 28
0
    def __init__(
        self,
        freq: str,
        prediction_length: int,
        sampling: bool = True,
        trainer: Trainer = Trainer(),
        num_hidden_dimensions: Optional[List[int]] = None,
        context_length: Optional[int] = None,
        distr_output: DistributionOutput = StudentTOutput(),
        imputation_method: Optional[MissingValueImputation] = None,
        batch_normalization: bool = False,
        mean_scaling: bool = True,
        num_parallel_samples: int = 100,
        train_sampler: Optional[InstanceSampler] = None,
        validation_sampler: Optional[InstanceSampler] = None,
        batch_size: int = 32,
    ) -> None:
        """
        Defines an estimator. All parameters should be serializable.
        """
        super().__init__(trainer=trainer, batch_size=batch_size)

        assert (prediction_length >
                0), "The value of `prediction_length` should be > 0"
        assert (context_length is None or context_length > 0
                ), "The value of `context_length` should be > 0"
        assert num_hidden_dimensions is None or ([
            d > 0 for d in num_hidden_dimensions
        ]), "Elements of `num_hidden_dimensions` should be > 0"
        assert (num_parallel_samples >
                0), "The value of `num_parallel_samples` should be > 0"

        self.num_hidden_dimensions = (num_hidden_dimensions
                                      if num_hidden_dimensions is not None else
                                      list([40, 40]))
        self.prediction_length = prediction_length
        self.context_length = (context_length if context_length is not None
                               else prediction_length)
        self.freq = freq
        self.distr_output = distr_output
        self.batch_normalization = batch_normalization
        self.mean_scaling = mean_scaling
        self.num_parallel_samples = num_parallel_samples
        self.sampling = sampling
        self.imputation_method = (imputation_method if imputation_method
                                  is not None else DummyValueImputation(
                                      self.distr_output.value_in_support))
        self.train_sampler = (train_sampler if train_sampler is not None else
                              ExpectedNumInstanceSampler(
                                  num_instances=1.0,
                                  min_future=prediction_length))
        self.validation_sampler = (validation_sampler if validation_sampler
                                   is not None else ValidationSplitSampler(
                                       min_future=prediction_length))
Esempio n. 29
0
    def __init__(
        self,
        prediction_interval_length: float,
        context_interval_length: float,
        num_marks: int,
        time_distr_output: TPPDistributionOutput = WeibullOutput(),
        embedding_dim: int = 5,
        trainer: Trainer = Trainer(hybridize=False),
        num_hidden_dimensions: int = 10,
        num_parallel_samples: int = 100,
        num_training_instances: int = 100,
        freq: str = "H",
        batch_size: int = 32,
    ) -> None:
        assert (
            not trainer.hybridize
        ), "DeepTPP currently only supports the non-hybridized training"

        super().__init__(trainer=trainer, batch_size=batch_size)

        assert (
            prediction_interval_length > 0
        ), "The value of `prediction_interval_length` should be > 0"
        assert (
            context_interval_length is None or context_interval_length > 0
        ), "The value of `context_interval_length` should be > 0"
        assert (
            num_hidden_dimensions > 0
        ), "The value of `num_hidden_dimensions` should be > 0"
        assert (
            num_parallel_samples > 0
        ), "The value of `num_parallel_samples` should be > 0"
        assert num_marks > 0, "The value of `num_marks` should be > 0"
        assert (
            num_training_instances > 0
        ), "The value of `num_training_instances` should be > 0"

        self.num_hidden_dimensions = num_hidden_dimensions
        self.prediction_interval_length = prediction_interval_length
        self.context_interval_length = (
            context_interval_length
            if context_interval_length is not None
            else prediction_interval_length
        )
        self.num_marks = num_marks
        self.time_distr_output = time_distr_output
        self.embedding_dim = embedding_dim
        self.num_parallel_samples = num_parallel_samples
        self.num_training_instances = num_training_instances
        self.freq = freq
Esempio n. 30
0
    def __init__(
        self,
        freq: str,
        prediction_length: int,
        cardinality: int,
        trainer: Trainer = Trainer(),
        context_length: Optional[int] = None,
        kernel_output: KernelOutput = RBFKernelOutput(),
        params_scaling: bool = True,
        dtype: DType = np.float64,
        max_iter_jitter: int = 10,
        jitter_method: str = "iter",
        sample_noise: bool = True,
        time_features: Optional[List[TimeFeature]] = None,
        num_parallel_samples: int = 100,
        batch_size: int = 32,
    ) -> None:
        self.float_type = dtype
        super().__init__(
            trainer=trainer, batch_size=batch_size, dtype=self.float_type
        )

        assert (
            prediction_length > 0
        ), "The value of `prediction_length` should be > 0"
        assert cardinality > 0, "The value of `cardinality` should be > 0"
        assert (
            context_length is None or context_length > 0
        ), "The value of `context_length` should be > 0"
        assert (
            num_parallel_samples > 0
        ), "The value of `num_parallel_samples` should be > 0"

        self.freq = freq
        self.prediction_length = prediction_length
        self.context_length = (
            context_length if context_length is not None else prediction_length
        )
        self.cardinality = cardinality
        self.kernel_output = kernel_output
        self.params_scaling = params_scaling
        self.max_iter_jitter = max_iter_jitter
        self.jitter_method = jitter_method
        self.sample_noise = sample_noise
        self.time_features = (
            time_features
            if time_features is not None
            else time_features_from_frequency_str(self.freq)
        )
        self.num_parallel_samples = num_parallel_samples