Example #1
0
def test_smoke(
    hybridize: bool, target_dim_sample: int, use_marginal_transformation: bool
):
    num_batches_per_epoch = 1
    estimator = GPVAREstimator(
        distr_output=LowrankGPOutput(rank=2),
        num_cells=1,
        num_layers=1,
        pick_incomplete=True,
        prediction_length=metadata.prediction_length,
        target_dim=target_dim,
        target_dim_sample=target_dim_sample,
        freq=metadata.freq,
        use_marginal_transformation=use_marginal_transformation,
        trainer=Trainer(
            epochs=2,
            batch_size=10,
            learning_rate=1e-4,
            num_batches_per_epoch=num_batches_per_epoch,
            hybridize=hybridize,
        ),
    )

    predictor = estimator.train(training_data=dataset.train)

    agg_metrics, _ = backtest_metrics(
        test_dataset=dataset.test,
        predictor=predictor,
        num_samples=10,
        evaluator=MultivariateEvaluator(
            quantiles=(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9)
        ),
    )
    assert agg_metrics["ND"] < 2.5
Example #2
0
def test_gp_output():
    # test that gp output gives expected shapes
    batch = 1
    hidden_size = 3
    dim = 4
    rank = 2

    states = mx.ndarray.ones(shape=(batch, dim, hidden_size))

    lowrank_gp_output = LowrankGPOutput(dim=dim, rank=rank)

    proj = lowrank_gp_output.get_args_proj()

    proj.initialize()

    distr_args = proj(states)

    mu, D, W = distr_args

    assert mu.shape == (batch, dim)
    assert D.shape == (batch, dim)
    assert W.shape == (batch, dim, rank)
Example #3
0
    def __init__(
        self,
        freq: str,
        prediction_length: int,
        target_dim: int,
        trainer: Trainer = Trainer(),
        # number of dimension to sample at training time
        context_length: Optional[int] = None,
        num_layers: int = 2,
        num_cells: int = 40,
        cell_type: str = "lstm",
        num_parallel_samples: int = 100,
        dropout_rate: float = 0.1,
        target_dim_sample: Optional[int] = None,
        distr_output: Optional[DistributionOutput] = None,
        rank: Optional[int] = 2,
        scaling: bool = True,
        pick_incomplete: bool = False,
        lags_seq: Optional[List[int]] = None,
        shuffle_target_dim: bool = True,
        time_features: Optional[List[TimeFeature]] = None,
        conditioning_length: int = 100,
        use_marginal_transformation: bool = False,
        train_sampler: Optional[InstanceSampler] = None,
        validation_sampler: Optional[InstanceSampler] = None,
        batch_size: int = 32,
    ) -> None:
        super().__init__(trainer=trainer, batch_size=batch_size)

        assert (
            prediction_length > 0
        ), "The value of `prediction_length` should be > 0"
        assert (
            context_length is None or context_length > 0
        ), "The value of `context_length` should be > 0"
        assert num_layers > 0, "The value of `num_layers` should be > 0"
        assert num_cells > 0, "The value of `num_cells` should be > 0"
        assert (
            num_parallel_samples > 0
        ), "The value of `num_eval_samples` should be > 0"
        assert dropout_rate >= 0, "The value of `dropout_rate` should be >= 0"

        if distr_output is not None:
            self.distr_output = distr_output
        else:
            self.distr_output = LowrankGPOutput(rank=rank)
        self.freq = freq
        self.context_length = (
            context_length if context_length is not None else prediction_length
        )
        self.prediction_length = prediction_length
        self.target_dim = target_dim
        self.target_dim_sample = (
            target_dim
            if target_dim_sample is None
            else min(target_dim_sample, target_dim)
        )
        self.shuffle_target_dim = shuffle_target_dim
        self.num_layers = num_layers
        self.num_cells = num_cells
        self.cell_type = cell_type
        self.num_parallel_samples = num_parallel_samples
        self.dropout_rate = dropout_rate

        self.lags_seq = (
            lags_seq
            if lags_seq is not None
            else get_lags_for_frequency(freq_str=freq)
        )
        self.time_features = (
            time_features
            if time_features is not None
            else time_features_from_frequency_str(self.freq)
        )

        self.history_length = self.context_length + max(self.lags_seq)
        self.pick_incomplete = pick_incomplete
        self.scaling = scaling
        self.conditioning_length = conditioning_length
        self.use_marginal_transformation = use_marginal_transformation
        self.output_transform = (
            cdf_to_gaussian_forward_transform
            if self.use_marginal_transformation
            else None
        )

        self.train_sampler = (
            train_sampler
            if train_sampler is not None
            else ExpectedNumInstanceSampler(
                num_instances=1.0,
                min_past=0 if pick_incomplete else self.history_length,
                min_future=prediction_length,
            )
        )
        self.validation_sampler = (
            validation_sampler
            if validation_sampler is not None
            else ValidationSplitSampler(
                min_past=0 if pick_incomplete else self.history_length,
                min_future=prediction_length,
            )
        )