Ejemplo n.º 1
0
    def __init__(
        self,
        freq: str,
        prediction_length: int,
        cardinality: List[int],
        add_trend: bool = False,
        past_length: Optional[int] = None,
        num_periods_to_train: int = 4,
        trainer: Trainer = Trainer(epochs=100,
                                   num_batches_per_epoch=50,
                                   hybridize=False),
        num_layers: int = 2,
        num_cells: int = 40,
        cell_type: str = "lstm",
        num_parallel_samples: int = 100,
        dropout_rate: float = 0.1,
        use_feat_dynamic_real: bool = False,
        use_feat_static_cat: bool = True,
        embedding_dimension: Optional[List[int]] = None,
        issm: Optional[ISSM] = None,
        scaling: bool = True,
        time_features: Optional[List[TimeFeature]] = None,
        noise_std_bounds: ParameterBounds = ParameterBounds(1e-6, 1.0),
        prior_cov_bounds: ParameterBounds = ParameterBounds(1e-6, 1.0),
        innovation_bounds: ParameterBounds = ParameterBounds(1e-6, 0.01),
        batch_size: int = 32,
    ) -> None:
        super().__init__(trainer=trainer, batch_size=batch_size)

        assert (prediction_length >
                0), "The value of `prediction_length` should be > 0"
        assert (past_length is None
                or past_length > 0), "The value of `past_length` should be > 0"
        assert num_layers > 0, "The value of `num_layers` should be > 0"
        assert num_cells > 0, "The value of `num_cells` should be > 0"
        assert (num_parallel_samples >
                0), "The value of `num_parallel_samples` should be > 0"
        assert dropout_rate >= 0, "The value of `dropout_rate` should be >= 0"
        assert not use_feat_static_cat or any(c > 1 for c in cardinality), (
            f"Cardinality of at least one static categorical feature must be larger than 1 "
            f"if `use_feat_static_cat=True`. But cardinality provided is: {cardinality}"
        )
        assert embedding_dimension is None or all(
            e > 0 for e in embedding_dimension
        ), "Elements of `embedding_dimension` should be > 0"

        assert all(
            np.isfinite(p.lower) and np.isfinite(p.upper) and p.lower > 0
            for p in [noise_std_bounds, prior_cov_bounds, innovation_bounds]
        ), "All parameter bounds should be finite, and lower bounds should be positive"

        self.freq = freq
        self.past_length = (past_length if past_length is not None else
                            num_periods_to_train *
                            longest_period_from_frequency_str(freq))
        self.prediction_length = prediction_length
        self.add_trend = add_trend
        self.num_layers = num_layers
        self.num_cells = num_cells
        self.cell_type = cell_type
        self.num_parallel_samples = num_parallel_samples
        self.scaling = scaling
        self.dropout_rate = dropout_rate
        self.use_feat_dynamic_real = use_feat_dynamic_real
        self.use_feat_static_cat = use_feat_static_cat
        self.cardinality = (cardinality
                            if cardinality and use_feat_static_cat else [1])
        self.embedding_dimension = (
            embedding_dimension if embedding_dimension is not None else
            [min(50, (cat + 1) // 2) for cat in self.cardinality])

        self.issm = (issm if issm is not None else CompositeISSM.get_from_freq(
            freq, add_trend))

        self.time_features = (time_features if time_features is not None else
                              time_features_from_frequency_str(self.freq))

        self.noise_std_bounds = noise_std_bounds
        self.prior_cov_bounds = prior_cov_bounds
        self.innovation_bounds = innovation_bounds
Ejemplo n.º 2
0
def test_composite_issm_h_default():
    issm = CompositeISSM.get_from_freq(freq="H")

    assert issm.latent_dim() == 2 + 24 + 7
    assert issm.output_dim() == 1

    time_features = issm.time_features()

    assert len(time_features) == 3

    time_indices = [
        pd.date_range("2020-01-01 21:00:00", periods=10, freq="H"),
        pd.date_range("2020-01-02 03:00:00", periods=10, freq="H"),
    ]

    features = mx.nd.array(
        np.stack(
            [
                np.stack([f(time_index) for f in time_features], axis=-1)
                for time_index in time_indices
            ],
            axis=0,
        ))

    emission_coeff, transition_coeff, innovation_coeff = issm.get_issm_coeff(
        features)

    season_indices_hod = [
        [21, 22, 23, 0, 1, 2, 3, 4, 5, 6],
        [3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
    ]

    season_indices_dow = [[2] * 3 + [3] * 7, [3] * 10]

    for item in range(2):
        for time in range(10):
            expected_coeff = mx.nd.concat(
                mx.nd.ones((1, 2)),
                mx.nd.one_hot(mx.nd.array([season_indices_hod[item][time]]),
                              24),
                mx.nd.one_hot(mx.nd.array([season_indices_dow[item][time]]),
                              7),
                dim=-1,
            )

            sliced_emission_coeff = mx.nd.slice(
                emission_coeff,
                begin=(item, time, None, None),
                end=(item + 1, time + 1, None, None),
            )
            assert (sliced_emission_coeff == expected_coeff).asnumpy().all()

            sliced_transition_coeff = mx.nd.slice(
                transition_coeff,
                begin=(item, time, None, None),
                end=(item + 1, time + 1, None, None),
            )
            expected_transition_coeff = mx.nd.eye(2 + 24 + 7)
            expected_transition_coeff[0, 1] = 1
            assert ((sliced_transition_coeff == expected_transition_coeff
                     ).asnumpy().all())

            sliced_innovation_coeff = mx.nd.slice(
                innovation_coeff,
                begin=(item, time, None),
                end=(item + 1, time + 1, None),
            )
            assert (sliced_innovation_coeff == expected_coeff).asnumpy().all()
Ejemplo n.º 3
0
    def __init__(
        self,
        freq: str,
        prediction_length: int,
        add_trend: bool = False,
        past_length: Optional[int] = None,
        num_periods_to_train: int = 4,
        trainer: Trainer = Trainer(epochs=25, hybridize=False),
        num_layers: int = 2,
        num_cells: int = 40,
        cell_type: str = "lstm",
        num_eval_samples: int = 100,
        dropout_rate: float = 0.1,
        use_feat_dynamic_real: bool = False,
        use_feat_static_cat: bool = False,
        cardinality: Optional[List[int]] = None,
        embedding_dimension: int = 20,
        issm: Optional[ISSM] = None,
        scaling: bool = True,
        time_features: Optional[List[TimeFeature]] = None,
    ) -> None:
        super().__init__(trainer=trainer)

        assert (
            prediction_length > 0
        ), "The value of `prediction_length` should be > 0"
        assert (
            past_length is None or past_length > 0
        ), "The value of `past_length` should be > 0"
        assert num_layers > 0, "The value of `num_layers` should be > 0"
        assert num_cells > 0, "The value of `num_cells` should be > 0"
        assert (
            num_eval_samples > 0
        ), "The value of `num_eval_samples` should be > 0"
        assert dropout_rate >= 0, "The value of `dropout_rate` should be >= 0"
        assert (
            cardinality is not None or not use_feat_static_cat
        ), "You must set `cardinality` if `use_feat_static_cat=True`"
        assert cardinality is None or [
            c > 0 for c in cardinality
        ], "Elements of `cardinality` should be > 0"
        assert (
            embedding_dimension > 0
        ), "The value of `embedding_dimension` should be > 0"

        self.freq = freq
        self.past_length = (
            past_length
            if past_length is not None
            else num_periods_to_train * longest_period_from_frequency_str(freq)
        )
        self.prediction_length = prediction_length
        self.add_trend = add_trend
        self.num_layers = num_layers
        self.num_cells = num_cells
        self.cell_type = cell_type
        self.num_sample_paths = num_eval_samples
        self.scaling = scaling
        self.dropout_rate = dropout_rate
        self.use_feat_dynamic_real = use_feat_dynamic_real
        self.use_feat_static_cat = use_feat_static_cat
        self.cardinality = cardinality if use_feat_static_cat else [1]
        self.embedding_dimension = embedding_dimension

        self.issm = (
            issm
            if issm is not None
            else CompositeISSM.get_from_freq(freq, add_trend)
        )

        self.time_features = (
            time_features
            if time_features is not None
            else time_features_from_frequency_str(self.freq)
        )
Ejemplo n.º 4
0
                past_length=28,
                n_steps_forecast=14,
            )
            freq = dataset.metadata.freq
            cardinalities = get_cardinalities(
                dataset=dataset, add_trend=add_trend
            )
            batch = next(iter(inference_loader))

            gts_seasonal_indicators = mx.ndarray.concat(
                batch["past_seasonal_indicators"],
                batch["future_seasonal_indicators"],
                dim=1,
            )
            gts_issm = GtsCompositeISSM.get_from_freq(
                freq=freq, add_trend=add_trend
            )
            (
                emission_coeff,
                transition_coeff,
                innovation_coeff,
            ) = gts_issm.get_issm_coeff(gts_seasonal_indicators)

            data = transform_gluonts_to_pytorch(
                batch=batch,
                device="cuda",
                dtype=torch.float32,
                time_features=TimeFeatType.seasonal_indicator,
                **cardinalities,
            )