Ejemplo n.º 1
0
def test_neg_binomial(mu_alpha: Tuple[float, float], hybridize: bool) -> None:
    '''
    Test to check that maximizing the likelihood recovers the parameters
    '''
    # test instance
    mu, alpha = mu_alpha

    # generate samples
    mus = mx.nd.zeros((NUM_SAMPLES, )) + mu
    alphas = mx.nd.zeros((NUM_SAMPLES, )) + alpha

    neg_bin_distr = NegativeBinomial(mu=mus, alpha=alphas)
    samples = neg_bin_distr.sample()

    init_biases = [
        inv_softplus(mu - START_TOL_MULTIPLE * TOL * mu),
        inv_softplus(alpha + START_TOL_MULTIPLE * TOL * alpha),
    ]

    mu_hat, alpha_hat = maximum_likelihood_estimate_sgd(
        NegativeBinomialOutput(),
        samples,
        hybridize=hybridize,
        init_biases=init_biases,
        num_epochs=PositiveInt(15),
    )

    assert (np.abs(mu_hat - mu) <
            TOL * mu), f"mu did not match: mu = {mu}, mu_hat = {mu_hat}"
    assert (np.abs(alpha_hat - alpha) < TOL * alpha
            ), f"alpha did not match: alpha = {alpha}, alpha_hat = {alpha_hat}"
 def get_distribution_type(self):
     """ get distribution type of dataset """
     if self.count_data:
         return NegativeBinomialOutput()
     elif self.count_data == False:
         return StudentTOutput()
     elif "http://schema.org/Integer" in self.target_semantic_types:
         if np.min(self.frame.iloc[:, self.target_col]) >= 0:
             return NegativeBinomialOutput()
         else:
             return StudentTOutput()
     elif "http://schema.org/Float" in self.target_semantic_types:
         return StudentTOutput()
     else:
         raise ValueError(
             "Target column is not of type 'Integer' or 'Float'")
 (
     DirichletMultinomialOutput(dim=5, n_trials=10),
     mx.nd.random.gamma(shape=(3, 4, 5)),
     [None],
     (3, 4),
     (5, ),
 ),
 (
     LaplaceOutput(),
     mx.nd.random.normal(shape=(3, 4, 5, 6)),
     [None, mx.nd.ones(shape=(3, 4, 5))],
     (3, 4, 5),
     (),
 ),
 (
     NegativeBinomialOutput(),
     mx.nd.random.normal(shape=(3, 4, 5, 6)),
     [None, mx.nd.ones(shape=(3, 4, 5))],
     (3, 4, 5),
     (),
 ),
 (
     UniformOutput(),
     mx.nd.random.normal(shape=(3, 4, 5, 6)),
     [None, mx.nd.ones(shape=(3, 4, 5))],
     (3, 4, 5),
     (),
 ),
 (
     PiecewiseLinearOutput(num_pieces=3),
     mx.nd.random.normal(shape=(3, 4, 5, 6)),
Ejemplo n.º 4
0
    def __init__(
        self,
        freq: str,
        prediction_length: int,
        trainer: Optional[Trainer] = Trainer(),
        context_length: Optional[int] = None,
        num_layers: Optional[int] = 2,
        num_cells: Optional[int] = 40,
        cell_type: Optional[str] = "lstm",
        dropout_rate: Optional[float] = 0.1,
        use_feat_dynamic_real: Optional[bool] = False,
        use_feat_static_cat: Optional[bool] = False,
        use_feat_static_real: Optional[bool] = False,
        cardinality: Optional[List[int]] = None,
        embedding_dimension: Optional[List[int]] = None,
        scaling: Optional[bool] = True,
        lags_seq: Optional[List[int]] = None,
        time_features: Optional[List[TimeFeature]] = None,
        num_parallel_samples: Optional[int] = 100,
        forecast_type: Optional[str] = "flat",
        dtype: Optional[DType] = np.float32,
    ) -> None:

        super().__init__(trainer=trainer, dtype=dtype)

        assert prediction_length > 0, "The value of `prediction_length` should be > 0"
        assert (
            context_length is None or context_length > 0
        ), "The value of `context_length` should be > 0"
        assert num_layers > 0, "The value of `num_layers` should be > 0"
        assert num_cells > 0, "The value of `num_cells` should be > 0"
        assert dropout_rate >= 0, "The value of `dropout_rate` should be >= 0"
        assert (cardinality is not None and use_feat_static_cat) or (
            cardinality is None and not use_feat_static_cat
        ), "You should set `cardinality` if and only if `use_feat_static_cat=True`"
        assert cardinality is None or all(
            [c > 0 for c in cardinality]
        ), "Elements of `cardinality` should be > 0"
        assert embedding_dimension is None or all(
            [e > 0 for e in embedding_dimension]
        ), "Elements of `embedding_dimension` should be > 0"
        assert (
            num_parallel_samples > 0
        ), "The value of `num_parallel_samples` should be > 0"

        self.freq = freq
        self.context_length = (
            context_length if context_length is not None else prediction_length
        )
        self.prediction_length = prediction_length
        self.distr_output_m = NegativeBinomialOutput()
        self.distr_output_q = NegativeBinomialOutput()
        self.distr_output_m.dtype = dtype
        self.distr_output_q.dtype = dtype
        self.num_layers = num_layers
        self.num_cells = num_cells
        self.cell_type = cell_type
        self.dropout_rate = dropout_rate
        self.use_feat_dynamic_real = use_feat_dynamic_real
        self.use_feat_static_cat = use_feat_static_cat
        self.use_feat_static_real = use_feat_static_real
        self.cardinality = cardinality if cardinality and use_feat_static_cat else [1]
        self.embedding_dimension = (
            embedding_dimension
            if embedding_dimension is not None
            else [min(50, (cat + 1) // 2) for cat in self.cardinality]
        )
        self.scaling = scaling
        self.lags_seq = (
            lags_seq if lags_seq is not None else get_lags_for_frequency(freq_str=freq)
        )
        self.time_features = (
            time_features
            if time_features is not None
            else time_features_from_frequency_str(self.freq)
        )

        self.history_length = self.context_length + max(self.lags_seq)

        self.num_parallel_samples = num_parallel_samples
        self.forecast_type = forecast_type
Ejemplo n.º 5
0
def fit_deepar(
    training_data: ListDataset,
    validation_data: ListDataset = None,
    freq: str = 'M',
    pred_length: int = 12,
    num_layers: int = 2,
    hidden_dim: int = 40,
    use_cat_var: bool = False,
    cardinality: Optional[List[int]] = None,
    epochs: int = 25,
    batch_size: int = 32,
    sampler: Optional[InstanceSampler] = None,
    self_supervised_penalty: float = 0.0,
    embedding_agg_penalty: float = 0,
    embedding_dist_metric: str = 'cosine',
    embedding_dim_ratio: float = 1,
    hierarchy_agg_dict: Optional[Dict[int, List[int]]] = None,
    ignore_future_targets: bool = False,
    print_rec_penalty: bool = True,
) -> Tuple[GluonPredictor, HybridBlock]:
    """ fits DeepAREstimator with optional reconciliation penalties to training dataset

    Arguments:
        training_data {ListDataset} -- training data

    Keyword Arguments:
        validation_data {ListDataset} -- optional validation data. If set, the model checkpoint
            from the best epoch with respect to this dataset will be returned
        freq {str} -- frequency (default: {'M'})
        pred_length {int} -- prediction length (default: {12})
        num_layers {int} -- number of RNN layers to include in estimator (default: {2})
        hidden_dim {int} -- dimension of hidden state of each RNN (default: {40})
        use_cat_var {bool} -- whether to include the whether to include a categorical variable for
            series in this model (default: {False})
        cardinality {Optional[List[int]]} -- cardinality of each categorical variable if including 
            in model (default: {None})
        epochs {int} -- number of training epochs (default: {25})
        batch_size {int} -- if self-supervised reconciliation penalty > 0, will be set to training set size
            (default: {32})
        sampler {Optional[InstanceSampler]} -- GluonTS sampler object containing logic for how to sample mini-batches
            (default: {None})
        self_supervised_penalty {float} -- lambda for self-supervised reconciliation penalty 
            (default: {0.0})
        embedding_agg_penalty {float} -- lambda for embedding rec. penalty 
            (default: {0.0})
        embedding_dist_metric {str} -- distance metric for embedding rec. penalty
            (default: {'cosine'})
        embedding_dim_ratio {float} -- ratio between embedding dim and RNN hidden state dim
            (default: {1.0})
        hierarchy_agg_dict {Optional[Dict[int, List[int]]]} -- mapping from individual series to 
            columns that represent other series that aggregate to this individual series, necessary
            if self_supervised_penalty > 0 (default: {None})
        ignore_future_targets {bool} -- whether to include future targets in forecasting loss
            and past targets in self-supervised reconciliation penalty (default: {False})
        print_rec_penalty {bool} -- whether to print the reconciliation penalty at each step
            of every epoch (default: {True})

    Returns:
        Tuple[GluonPredictor, HybridBlock] -- [description]
    """
    
    if self_supervised_penalty > 0 and hierarchy_agg_dict is None:
        raise ValueError("Must supply 'hierarchy_agg_dict' argument if 'self_supervised_penalty' > 0")

    if embedding_agg_penalty > 0 and hierarchy_agg_dict is None:
        raise ValueError("Must supply 'hierarchy_agg_dict' argument if 'embedding_agg_penalty' > 0")

    if embedding_dist_metric != 'cosine' and embedding_dist_metric != 'l2':
        raise ValueError("Embedding distance metric must be either 'cosine' or 'l2'")

    if self_supervised_penalty > 0:
        batch_size = len(training_data)
        sampler = FixedUnitSampler()
    else:
        sampler = sampler

    if use_cat_var is False:
        cardinality = None

    estimator = DeepARRecPenaltyEstimator(
        freq=freq, 
        prediction_length=pred_length,
        use_feat_static_cat=use_cat_var,
        cardinality=cardinality,
        embedding_dimension=[int(hidden_dim*embedding_dim_ratio)],
        distr_output=NegativeBinomialOutput(),
        trainer=Trainer(
            epochs=epochs,
            batch_size=batch_size,
            hybridize=False,
        ),
        num_layers=num_layers,
        num_cells=hidden_dim,
        sampler=sampler,
        self_supervised_penalty=self_supervised_penalty,
        embedding_agg_penalty=embedding_agg_penalty,
        embedding_dist_metric=embedding_dist_metric,
        hierarchy_agg_dict=hierarchy_agg_dict,
        ignore_future_targets=ignore_future_targets,
        print_rec_penalty=print_rec_penalty,
    )

    _, trained_net, predictor = estimator.train_model(training_data = training_data)

    return predictor, trained_net