def __init__( self, num_layers: int, num_cells: int, cell_type: str, history_length: int, context_length: int, prediction_length: int, distr_output: DistributionOutput, dropout_rate: float, cardinality: List[int], embedding_dimension: List[int], lags_seq: List[int], dropoutcell_type: str = "ZoneoutCell", scaling: bool = True, dtype: DType = np.float32, **kwargs, ) -> None: super().__init__(**kwargs) self.num_layers = num_layers self.num_cells = num_cells self.cell_type = cell_type self.history_length = history_length self.context_length = context_length self.prediction_length = prediction_length self.dropoutcell_type = dropoutcell_type self.dropout_rate = dropout_rate self.cardinality = cardinality self.embedding_dimension = embedding_dimension self.num_cat = len(cardinality) self.scaling = scaling self.dtype = dtype assert len(cardinality) == len( embedding_dimension ), "embedding_dimension should be a list with the same size as cardinality" assert len( set(lags_seq)) == len(lags_seq), "no duplicated lags allowed!" lags_seq.sort() self.lags_seq = lags_seq self.distr_output = distr_output RnnCell = { "lstm": mx.gluon.rnn.LSTMCell, "gru": mx.gluon.rnn.GRUCell }[self.cell_type] self.target_shape = distr_output.event_shape # TODO: is the following restriction needed? assert ( len(self.target_shape) <= 1 ), "Argument `target_shape` should be a tuple with 1 element at most" Dropout = { "ZoneoutCell": ZoneoutCell, "RNNZoneoutCell": RNNZoneoutCell, "VariationalDropoutCell": VariationalDropoutCell, "VariationalZoneoutCell": VariationalZoneoutCell, }[self.dropoutcell_type] with self.name_scope(): self.proj_distr_args = distr_output.get_args_proj() self.rnn = mx.gluon.rnn.HybridSequentialRNNCell() for k in range(num_layers): cell = RnnCell(hidden_size=num_cells) cell = mx.gluon.rnn.ResidualCell(cell) if k > 0 else cell # we found that adding dropout to outputs doesn't improve the performance, so we only drop states if "Zoneout" in self.dropoutcell_type: cell = (Dropout(cell, zoneout_states=dropout_rate) if dropout_rate > 0.0 else cell) elif "Dropout" in self.dropoutcell_type: cell = (Dropout(cell, drop_states=dropout_rate) if dropout_rate > 0.0 else cell) self.rnn.add(cell) self.rnn.cast(dtype=dtype) self.embedder = FeatureEmbedder( cardinalities=cardinality, embedding_dims=embedding_dimension, dtype=self.dtype, ) if scaling: self.scaler = MeanScaler(keepdims=True) else: self.scaler = NOPScaler(keepdims=True)
def maximum_likelihood_estimate_sgd( distr_output: DistributionOutput, samples: mx.ndarray, init_biases: List[mx.ndarray.NDArray] = None, num_epochs: PositiveInt = PositiveInt(5), learning_rate: PositiveFloat = PositiveFloat(1e-2), hybridize: bool = True, ) -> List[np.ndarray]: model_ctx = mx.cpu() arg_proj = distr_output.get_args_proj() arg_proj.initialize() if hybridize: arg_proj.hybridize() if init_biases is not None: for param, bias in zip(arg_proj.proj, init_biases): param.params[param.prefix + "bias"].initialize( mx.initializer.Constant(bias), force_reinit=True) trainer = mx.gluon.Trainer( arg_proj.collect_params(), "sgd", { "learning_rate": learning_rate, "clip_gradient": 10.0 }, ) # The input data to our model is one-dimensional dummy_data = mx.nd.array(np.ones((len(samples), 1))) train_data = mx.gluon.data.DataLoader( mx.gluon.data.ArrayDataset(dummy_data, samples), batch_size=BATCH_SIZE, shuffle=True, ) for e in range(num_epochs): cumulative_loss = 0 num_batches = 0 # inner loop for i, (data, sample_label) in enumerate(train_data): data = data.as_in_context(model_ctx) sample_label = sample_label.as_in_context(model_ctx) with mx.autograd.record(): distr_args = arg_proj(data) distr = distr_output.distribution(distr_args) loss = distr.loss(sample_label) if not hybridize: assert loss.shape == distr.batch_shape loss.backward() trainer.step(BATCH_SIZE) num_batches += 1 cumulative_loss += mx.nd.mean(loss).asscalar() assert not np.isnan(cumulative_loss) print("Epoch %s, loss: %s" % (e, cumulative_loss / num_batches)) if len(distr_args[0].shape) == 1: return [ param.asnumpy() for param in arg_proj(mx.nd.array(np.ones((1, 1)))) ] # alpha parameter of zero inflated Neg Bin was not returned using param[0] ls = [[p.asnumpy() for p in param] for param in arg_proj(mx.nd.array(np.ones((1, 1))))] return reduce(lambda x, y: x + y, ls)
def __init__( self, num_layers: int, num_cells: int, cell_type: str, history_length: int, context_length: int, prediction_length: int, distr_output: DistributionOutput, dropout_rate: float, lags_seq: List[int], target_dim: int, cardinality: List[int] = [1], embedding_dimension: int = 1, scaling: bool = True, **kwargs, ) -> None: super().__init__(**kwargs) self.num_layers = num_layers self.num_cells = num_cells self.cell_type = cell_type self.history_length = history_length self.context_length = context_length self.prediction_length = prediction_length self.dropout_rate = dropout_rate self.cardinality = cardinality self.embedding_dimension = embedding_dimension self.num_cat = len(cardinality) self.target_dim = target_dim self.scaling = scaling self.target_dim_sample = target_dim assert len(set(lags_seq)) == len( lags_seq ), "no duplicated lags allowed!" lags_seq.sort() self.lags_seq = lags_seq self.distr_output = distr_output self.target_dim = target_dim with self.name_scope(): self.proj_dist_args = distr_output.get_args_proj() residual = True self.rnn = make_rnn_cell( cell_type=cell_type, num_cells=num_cells, num_layers=num_layers, residual=residual, dropout_rate=dropout_rate, ) self.embed_dim = 1 self.embed = mx.gluon.nn.Embedding( input_dim=self.target_dim, output_dim=self.embed_dim ) if scaling: self.scaler = MeanScaler(keepdims=True) else: self.scaler = NOPScaler(keepdims=True)