Example #1
0
    def __init__(
        self,
        num_layers: int,
        num_cells: int,
        cell_type: str,
        history_length: int,
        context_length: int,
        prediction_length: int,
        distr_output: DistributionOutput,
        dropout_rate: float,
        cardinality: List[int],
        embedding_dimension: List[int],
        lags_seq: List[int],
        dropoutcell_type: str = "ZoneoutCell",
        scaling: bool = True,
        dtype: DType = np.float32,
        **kwargs,
    ) -> None:
        super().__init__(**kwargs)
        self.num_layers = num_layers
        self.num_cells = num_cells
        self.cell_type = cell_type
        self.history_length = history_length
        self.context_length = context_length
        self.prediction_length = prediction_length
        self.dropoutcell_type = dropoutcell_type
        self.dropout_rate = dropout_rate
        self.cardinality = cardinality
        self.embedding_dimension = embedding_dimension
        self.num_cat = len(cardinality)
        self.scaling = scaling
        self.dtype = dtype

        assert len(cardinality) == len(
            embedding_dimension
        ), "embedding_dimension should be a list with the same size as cardinality"

        assert len(
            set(lags_seq)) == len(lags_seq), "no duplicated lags allowed!"
        lags_seq.sort()

        self.lags_seq = lags_seq

        self.distr_output = distr_output
        RnnCell = {
            "lstm": mx.gluon.rnn.LSTMCell,
            "gru": mx.gluon.rnn.GRUCell
        }[self.cell_type]

        self.target_shape = distr_output.event_shape

        # TODO: is the following restriction needed?
        assert (
            len(self.target_shape) <= 1
        ), "Argument `target_shape` should be a tuple with 1 element at most"

        Dropout = {
            "ZoneoutCell": ZoneoutCell,
            "RNNZoneoutCell": RNNZoneoutCell,
            "VariationalDropoutCell": VariationalDropoutCell,
            "VariationalZoneoutCell": VariationalZoneoutCell,
        }[self.dropoutcell_type]

        with self.name_scope():
            self.proj_distr_args = distr_output.get_args_proj()
            self.rnn = mx.gluon.rnn.HybridSequentialRNNCell()
            for k in range(num_layers):
                cell = RnnCell(hidden_size=num_cells)
                cell = mx.gluon.rnn.ResidualCell(cell) if k > 0 else cell
                # we found that adding dropout to outputs doesn't improve the performance, so we only drop states
                if "Zoneout" in self.dropoutcell_type:
                    cell = (Dropout(cell, zoneout_states=dropout_rate)
                            if dropout_rate > 0.0 else cell)
                elif "Dropout" in self.dropoutcell_type:
                    cell = (Dropout(cell, drop_states=dropout_rate)
                            if dropout_rate > 0.0 else cell)
                self.rnn.add(cell)
            self.rnn.cast(dtype=dtype)
            self.embedder = FeatureEmbedder(
                cardinalities=cardinality,
                embedding_dims=embedding_dimension,
                dtype=self.dtype,
            )
            if scaling:
                self.scaler = MeanScaler(keepdims=True)
            else:
                self.scaler = NOPScaler(keepdims=True)
Example #2
0
def maximum_likelihood_estimate_sgd(
    distr_output: DistributionOutput,
    samples: mx.ndarray,
    init_biases: List[mx.ndarray.NDArray] = None,
    num_epochs: PositiveInt = PositiveInt(5),
    learning_rate: PositiveFloat = PositiveFloat(1e-2),
    hybridize: bool = True,
) -> List[np.ndarray]:
    model_ctx = mx.cpu()

    arg_proj = distr_output.get_args_proj()
    arg_proj.initialize()

    if hybridize:
        arg_proj.hybridize()

    if init_biases is not None:
        for param, bias in zip(arg_proj.proj, init_biases):
            param.params[param.prefix + "bias"].initialize(
                mx.initializer.Constant(bias), force_reinit=True)

    trainer = mx.gluon.Trainer(
        arg_proj.collect_params(),
        "sgd",
        {
            "learning_rate": learning_rate,
            "clip_gradient": 10.0
        },
    )

    # The input data to our model is one-dimensional
    dummy_data = mx.nd.array(np.ones((len(samples), 1)))

    train_data = mx.gluon.data.DataLoader(
        mx.gluon.data.ArrayDataset(dummy_data, samples),
        batch_size=BATCH_SIZE,
        shuffle=True,
    )

    for e in range(num_epochs):
        cumulative_loss = 0
        num_batches = 0
        # inner loop
        for i, (data, sample_label) in enumerate(train_data):
            data = data.as_in_context(model_ctx)
            sample_label = sample_label.as_in_context(model_ctx)
            with mx.autograd.record():
                distr_args = arg_proj(data)
                distr = distr_output.distribution(distr_args)
                loss = distr.loss(sample_label)
                if not hybridize:
                    assert loss.shape == distr.batch_shape
            loss.backward()

            trainer.step(BATCH_SIZE)
            num_batches += 1

            cumulative_loss += mx.nd.mean(loss).asscalar()

            assert not np.isnan(cumulative_loss)
        print("Epoch %s, loss: %s" % (e, cumulative_loss / num_batches))

    if len(distr_args[0].shape) == 1:
        return [
            param.asnumpy() for param in arg_proj(mx.nd.array(np.ones((1, 1))))
        ]

    # alpha parameter of zero inflated Neg Bin was not returned using param[0]
    ls = [[p.asnumpy() for p in param]
          for param in arg_proj(mx.nd.array(np.ones((1, 1))))]
    return reduce(lambda x, y: x + y, ls)
Example #3
0
    def __init__(
        self,
        num_layers: int,
        num_cells: int,
        cell_type: str,
        history_length: int,
        context_length: int,
        prediction_length: int,
        distr_output: DistributionOutput,
        dropout_rate: float,
        lags_seq: List[int],
        target_dim: int,
        cardinality: List[int] = [1],
        embedding_dimension: int = 1,
        scaling: bool = True,
        **kwargs,
    ) -> None:
        super().__init__(**kwargs)
        self.num_layers = num_layers
        self.num_cells = num_cells
        self.cell_type = cell_type
        self.history_length = history_length
        self.context_length = context_length
        self.prediction_length = prediction_length
        self.dropout_rate = dropout_rate
        self.cardinality = cardinality
        self.embedding_dimension = embedding_dimension
        self.num_cat = len(cardinality)
        self.target_dim = target_dim
        self.scaling = scaling
        self.target_dim_sample = target_dim

        assert len(set(lags_seq)) == len(
            lags_seq
        ), "no duplicated lags allowed!"
        lags_seq.sort()

        self.lags_seq = lags_seq

        self.distr_output = distr_output

        self.target_dim = target_dim

        with self.name_scope():
            self.proj_dist_args = distr_output.get_args_proj()

            residual = True

            self.rnn = make_rnn_cell(
                cell_type=cell_type,
                num_cells=num_cells,
                num_layers=num_layers,
                residual=residual,
                dropout_rate=dropout_rate,
            )

            self.embed_dim = 1
            self.embed = mx.gluon.nn.Embedding(
                input_dim=self.target_dim, output_dim=self.embed_dim
            )

            if scaling:
                self.scaler = MeanScaler(keepdims=True)
            else:
                self.scaler = NOPScaler(keepdims=True)