def maximum_likelihood_estimate_sgd( distr_output: DistributionOutput, samples: mx.ndarray, init_biases: List[mx.ndarray.NDArray] = None, num_epochs: PositiveInt = PositiveInt(5), learning_rate: PositiveFloat = PositiveFloat(1e-2), hybridize: bool = True, ) -> Iterable[float]: model_ctx = mx.cpu() arg_proj = distr_output.get_args_proj() arg_proj.initialize() if hybridize: arg_proj.hybridize() if init_biases is not None: for param, bias in zip(arg_proj.proj, init_biases): param.params[param.prefix + "bias"].initialize( mx.initializer.Constant(bias), force_reinit=True ) trainer = mx.gluon.Trainer( arg_proj.collect_params(), "sgd", {"learning_rate": learning_rate, "clip_gradient": 10.0}, ) # The input data to our model is one-dimensional dummy_data = mx.nd.array(np.ones((len(samples), 1))) train_data = mx.gluon.data.DataLoader( mx.gluon.data.ArrayDataset(dummy_data, samples), batch_size=BATCH_SIZE, shuffle=True, ) for e in range(num_epochs): cumulative_loss = 0 num_batches = 0 # inner loop for i, (data, sample_label) in enumerate(train_data): data = data.as_in_context(model_ctx) sample_label = sample_label.as_in_context(model_ctx) with mx.autograd.record(): distr_args = arg_proj(data) distr = distr_output.distribution(distr_args) loss = distr.loss(sample_label) if not hybridize: assert loss.shape == distr.batch_shape loss.backward() trainer.step(BATCH_SIZE) num_batches += 1 cumulative_loss += mx.nd.mean(loss).asscalar() print("Epoch %s, loss: %s" % (e, cumulative_loss / num_batches)) return [ param[0].asnumpy() for param in arg_proj(mx.nd.array(np.ones((1, 1)))) ]
def test_distribution_output_shapes( distr_out: DistributionOutput, data: Tensor, loc: List[Union[None, Tensor]], scale: List[Union[None, Tensor]], expected_batch_shape: Tuple, expected_event_shape: Tuple, ): args_proj = distr_out.get_args_proj() args_proj.initialize() args = args_proj(data) assert distr_out.event_shape == expected_event_shape for l, s in product(loc, scale): distr = distr_out.distribution(args, loc=l, scale=s) assert distr.batch_shape == expected_batch_shape assert distr.event_shape == expected_event_shape x = distr.sample() assert x.shape == distr.batch_shape + distr.event_shape loss = distr.loss(x) assert loss.shape == distr.batch_shape x1 = distr.sample(num_samples=1) assert x1.shape == (1, ) + distr.batch_shape + distr.event_shape x3 = distr.sample(num_samples=3) assert x3.shape == (3, ) + distr.batch_shape + distr.event_shape
def __init__( self, encoder: TransformerEncoder, decoder: TransformerDecoder, history_length: int, context_length: int, prediction_length: int, distr_output: DistributionOutput, cardinality: List[int], embedding_dimension: int, lags_seq: List[int], scaling: bool = True, **kwargs, ) -> None: super().__init__(**kwargs) self.history_length = history_length self.context_length = context_length self.prediction_length = prediction_length self.scaling = scaling self.cardinality = cardinality self.embedding_dimension = embedding_dimension self.distr_output = distr_output assert len( set(lags_seq)) == len(lags_seq), "no duplicated lags allowed!" lags_seq.sort() self.lags_seq = lags_seq self.target_shape = distr_output.event_shape with self.name_scope(): self.proj_dist_args = distr_output.get_args_proj() self.encoder = encoder self.decoder = decoder self.embedder = FeatureEmbedder( cardinalities=cardinality, embedding_dims=[embedding_dimension for _ in cardinality], ) if scaling: self.scaler = MeanScaler(keepdims=True) else: self.scaler = NOPScaler(keepdims=True)
def __init__( self, num_layers: int, num_cells: int, cell_type: str, history_length: int, context_length: int, prediction_length: int, distr_output: DistributionOutput, dropout_rate: float, cardinality: List[int], embedding_dimension: int, lags_seq: List[int], scaling: bool = True, **kwargs, ) -> None: super().__init__(**kwargs) self.num_layers = num_layers self.num_cells = num_cells self.cell_type = cell_type self.history_length = history_length self.context_length = context_length self.prediction_length = prediction_length self.dropout_rate = dropout_rate self.cardinality = cardinality self.embedding_dimension = embedding_dimension self.num_cat = len(cardinality) self.scaling = scaling assert len(set(lags_seq)) == len( lags_seq ), "no duplicated lags allowed!" lags_seq.sort() self.lags_seq = lags_seq self.distr_output = distr_output RnnCell = {"lstm": mx.gluon.rnn.LSTMCell, "gru": mx.gluon.rnn.GRUCell}[ self.cell_type ] self.target_shape = distr_output.event_shape # TODO: is the following restriction needed? assert ( len(self.target_shape) <= 1 ), "Argument `target_shape` should be a tuple with 1 element at most" with self.name_scope(): self.proj_distr_args = distr_output.get_args_proj() self.rnn = mx.gluon.rnn.HybridSequentialRNNCell() for k in range(num_layers): cell = RnnCell(hidden_size=num_cells) cell = mx.gluon.rnn.ResidualCell(cell) if k > 0 else cell cell = ( mx.gluon.rnn.ZoneoutCell(cell, zoneout_states=dropout_rate) if dropout_rate > 0.0 else cell ) self.rnn.add(cell) self.embedder = FeatureEmbedder( cardinalities=cardinality, embedding_dims=[embedding_dimension for _ in cardinality], ) if scaling: self.scaler = MeanScaler(keepdims=True) else: self.scaler = NOPScaler(keepdims=True)
def maximum_likelihood_estimate_sgd( distr_output: DistributionOutput, samples: mx.ndarray, init_biases: List[mx.ndarray.NDArray] = None, num_epochs: PositiveInt = 5, learning_rate: PositiveFloat = 1e-2, hybridize: bool = True, ) -> Iterable[float]: model_ctx = mx.cpu() arg_proj = distr_output.get_args_proj() arg_proj.initialize() if hybridize: arg_proj.hybridize() if init_biases is not None: for param, bias in zip(arg_proj.proj, init_biases): param.params[param.prefix + "bias"].initialize( mx.initializer.Constant(bias), force_reinit=True) trainer = mx.gluon.Trainer( arg_proj.collect_params(), 'sgd', { 'learning_rate': learning_rate, 'clip_gradient': 10.0 }, ) dummy_data = mx.nd.array(np.ones((len(samples), 1))) train_data = mx.gluon.data.DataLoader( mx.gluon.data.ArrayDataset(dummy_data, samples), batch_size=BATCH_SIZE, shuffle=True, ) dummy_out = mx.nd.array(np.ones((1, 1))) for e in range(num_epochs): cumulative_loss = 0 # inner loop for i, (data, sample_label) in enumerate(train_data): # data: (batch, 1), the "1" defines the dimension of the projection layer and should be kept data = data.as_in_context(model_ctx) # sample_label: (batch,), for univariate and (batch, d) for multivariate if sample_label.shape[-1] == 1: sample_label = sample_label.as_in_context(model_ctx).squeeze( axis=-1) else: sample_label = sample_label.as_in_context(model_ctx) with mx.autograd.record(): distr_args = arg_proj(data) distr = distr_output.distribution(distr_args) loss = distr.loss(sample_label) loss.backward() trainer.step(BATCH_SIZE) cumulative_loss += mx.nd.mean(loss).asscalar() print("Epoch %s, loss: %s" % (e, cumulative_loss / len(samples))) return [param[0].asnumpy() for param in arg_proj(dummy_out)]
def __init__( self, num_layers: int, num_cells: int, cell_type: str, history_length: int, context_length: int, prediction_length: int, distr_output: DistributionOutput, dropout_rate: float, lags_seq: List[int], target_dim: int, conditioning_length: int, cardinality: List[int] = [1], embedding_dimension: int = 1, scaling: bool = True, **kwargs, ) -> None: super().__init__(**kwargs) self.num_layers = num_layers self.num_cells = num_cells self.cell_type = cell_type self.history_length = history_length self.context_length = context_length self.prediction_length = prediction_length self.dropout_rate = dropout_rate self.cardinality = cardinality self.embedding_dimension = embedding_dimension self.num_cat = len(cardinality) self.target_dim = target_dim self.scaling = scaling self.target_dim_sample = target_dim self.conditioning_length = conditioning_length assert len(set(lags_seq)) == len( lags_seq ), "no duplicated lags allowed!" lags_seq.sort() self.lags_seq = lags_seq self.distr_output = distr_output self.target_dim = target_dim with self.name_scope(): self.proj_dist_args = distr_output.get_args_proj() residual = True self.rnn = make_rnn_cell( cell_type=cell_type, num_cells=num_cells, num_layers=num_layers, residual=residual, dropout_rate=dropout_rate, ) self.embed_dim = 1 self.embed = mx.gluon.nn.Embedding( input_dim=self.target_dim, output_dim=self.embed_dim ) if scaling: self.scaler = MeanScaler(keepdims=True) else: self.scaler = NOPScaler(keepdims=True)