def test_weighted_average(x, weights) -> None: x = mx.nd.array(x) weights = mx.nd.array(weights) assert weighted_average(F=mx.nd, x=x, weights=weights, axis=0) == mx.nd.array([2.0]) assert (weighted_average( F=mx.nd, x=x, weights=weights, axis=0, include_zeros_in_denominator=True, ) == mx.nd.array([1.0]))
def hybrid_forward( self, F, past_target: Tensor, past_observed_values: Tensor, past_is_pad: Tensor, future_target: Tensor, future_observed_values: Tensor, past_feat_dynamic_real: Tensor, past_feat_dynamic_cat: Tensor, future_feat_dynamic_real: Tensor, future_feat_dynamic_cat: Tensor, feat_static_real: Tensor, feat_static_cat: Tensor, ) -> Tensor: ( past_target, past_covariates, past_observed_values, future_target, future_covariates, offset, scale, ) = self._preprocess( F, past_target, past_observed_values, past_is_pad, past_feat_dynamic_real, past_feat_dynamic_cat, future_target, future_feat_dynamic_real, future_feat_dynamic_cat, feat_static_real, feat_static_cat, ) target = F.concat(past_target, future_target, dim=1) covars = F.concat(past_covariates, future_covariates, dim=1) observed_values = F.concat(past_observed_values, future_observed_values, dim=1) target = F.slice_axis(target, axis=1, begin=0, end=-1) covars = F.slice_axis(covars, axis=1, begin=0, end=-1) observed_values = F.slice_axis(observed_values, axis=1, begin=0, end=-1) preds = self._forward_step(F, self.prediction_length, target, covars, observed_values) preds = self._postprocess(F, preds, offset, scale) future_target = future_target * (scale + self.normalizer_eps) + offset loss = self.loss(future_target, preds) loss = weighted_average(F, loss, future_observed_values) return loss.mean()
def hybrid_forward( self, F, past_target: Tensor, past_observed_values: Tensor, future_target: Tensor, future_observed_values: Tensor, past_feat_dynamic_real: Tensor, past_feat_dynamic_cat: Tensor, feat_dynamic_real: Tensor, feat_dynamic_cat: Tensor, feat_static_real: Tensor, feat_static_cat: Tensor, ) -> Tensor: ( past_covariates, future_covariates, static_covariates, offset, scale, ) = self._preprocess( F, past_target, past_observed_values, past_feat_dynamic_real, past_feat_dynamic_cat, feat_dynamic_real, feat_dynamic_cat, feat_static_real, feat_static_cat, ) preds = self._forward( F, past_observed_values, past_covariates, future_covariates, static_covariates, ) preds = self._postprocess(F, preds, offset, scale) loss = self.loss(future_target, preds) loss = weighted_average(F, loss, future_observed_values) return loss.mean()
def hybrid_forward( self, F, past_target: Tensor, future_target: Tensor, future_observed_values: Tensor, ) -> Tensor: """ Computes a probability distribution for future data given the past, and returns the loss associated with the actual future observations. Parameters ---------- F past_target Tensor with past observations. Shape: (batch_size, context_length, target_dim). future_target Tensor with future observations. Shape: (batch_size, prediction_length, target_dim). future_observed_values Tensor indicating which values in the target are observed, and which ones are imputed instead. Returns ------- Tensor Loss tensor. Shape: (batch_size, ). """ distr_args, loc, scale = self.get_distr_args(F, past_target) distr = self.distr_output.distribution(distr_args, loc=loc, scale=scale) # (batch_size, prediction_length, target_dim) loss = distr.loss(future_target) weighted_loss = weighted_average(F=F, x=loss, weights=future_observed_values, axis=1) # (batch_size, ) return weighted_loss
def hybrid_forward( self, F, feat_static_cat: Tensor, past_observed_values: Tensor, past_seasonal_indicators: Tensor, past_time_feat: Tensor, past_target: Tensor, ) -> Tensor: lds, _ = self.compute_lds( F, feat_static_cat=feat_static_cat, seasonal_indicators=past_seasonal_indicators.slice_axis( axis=1, begin=-self.past_length, end=None ), time_feat=past_time_feat.slice_axis( axis=1, begin=-self.past_length, end=None ), length=self.past_length, ) _, scale = self.scaler(past_target, past_observed_values) observed_context = past_observed_values.slice_axis( axis=1, begin=-self.past_length, end=None ) ll, _, _ = lds.log_prob( x=past_target.slice_axis( axis=1, begin=-self.past_length, end=None ), observed=observed_context.min(axis=-1, keepdims=False), scale=scale, ) return weighted_average( F=F, x=-ll, axis=1, weights=observed_context.squeeze(axis=-1) )
def hybrid_forward( self, F, past_target: Tensor, future_target: Tensor, past_feat_dynamic: Tensor, future_feat_dynamic: Tensor, feat_static_cat: Tensor, past_observed_values: Tensor, future_observed_values: Tensor, ) -> Tensor: """ Parameters ---------- F: mx.symbol or mx.ndarray Gluon function space past_target: Tensor shape (batch_size, encoder_length, 1) future_target: Tensor shape (batch_size, num_forking, decoder_length) past_feat_dynamic shape (batch_size, encoder_length, num_past_feat_dynamic) future_feat_dynamic shape (batch_size, num_forking, decoder_length, num_feat_dynamic) feat_static_cat shape (batch_size, num_feat_static_cat) past_observed_values: Tensor shape (batch_size, encoder_length, 1) future_observed_values: Tensor shape (batch_size, num_forking, decoder_length) Returns ------- loss with shape (batch_size, prediction_length) """ # shape: (batch_size, num_forking, decoder_length, decoder_mlp_dim_seq[0]) dec_output, scale = self.get_decoder_network_output( F, past_target, past_feat_dynamic, future_feat_dynamic, feat_static_cat, past_observed_values, ) if self.quantile_output is not None: # shape: (batch_size, num_forking, decoder_length, len(quantiles)) dec_dist_output = self.quantile_proj(dec_output) # shape: (batch_size, num_forking, decoder_length = prediction_length) loss = self.loss(future_target, dec_dist_output) else: assert self.distr_output is not None distr_args = self.distr_args_proj(dec_output) distr = self.distr_output.distribution( distr_args, scale=scale.expand_dims(axis=1)) loss = distr.loss(future_target) # mask the loss based on observed indicator # shape: (batch_size, decoder_length) weighted_loss = weighted_average(F=F, x=loss, weights=future_observed_values, axis=1) return weighted_loss
def train_hybrid_forward( self, F, target_dimension_indicator: Tensor, past_time_feat: Tensor, past_target_cdf: Tensor, past_observed_values: Tensor, past_is_pad: Tensor, future_time_feat: Tensor, future_target_cdf: Tensor, future_observed_values: Tensor, ) -> Tuple[Tensor, ...]: """ Computes the loss for training DeepVAR, all inputs tensors representing time series have NTC layout. Parameters ---------- F target_dimension_indicator Indices of the target dimension (batch_size, target_dim) past_time_feat Dynamic features of past time series (batch_size, history_length, num_features) past_target_cdf Past marginal CDF transformed target values (batch_size, history_length, target_dim) past_observed_values Indicator whether or not the values were observed (batch_size, history_length, target_dim) past_is_pad Indicator whether the past target values have been padded (batch_size, history_length) future_time_feat Future time features (batch_size, prediction_length, num_features) future_target_cdf Future marginal CDF transformed target values (batch_size, prediction_length, target_dim) future_observed_values Indicator whether or not the future values were observed (batch_size, prediction_length, target_dim) Returns ------- distr Loss with shape (batch_size, 1) likelihoods Likelihoods for each time step (batch_size, context + prediction_length, 1) distr_args Distribution arguments (context + prediction_length, number_of_arguments) """ seq_len = self.context_length + self.prediction_length # unroll the decoder in "training mode", i.e. by providing future data # as well rnn_outputs, _, scale, lags_scaled, inputs = self.unroll_encoder( F=F, past_time_feat=past_time_feat, past_target_cdf=past_target_cdf, past_observed_values=past_observed_values, past_is_pad=past_is_pad, future_time_feat=future_time_feat, future_target_cdf=future_target_cdf, target_dimension_indicator=target_dimension_indicator, ) # put together target sequence # (batch_size, seq_len, target_dim) target = F.concat( past_target_cdf.slice_axis( axis=1, begin=-self.context_length, end=None ), future_target_cdf, dim=1, ) # assert_shape(target, (-1, seq_len, self.target_dim)) distr, distr_args = self.distr( time_features=inputs, rnn_outputs=rnn_outputs, scale=scale, lags_scaled=lags_scaled, target_dimension_indicator=target_dimension_indicator, seq_len=self.context_length + self.prediction_length, ) # we sum the last axis to have the same shape for all likelihoods # (batch_size, subseq_length, 1) likelihoods = -distr.log_prob(target).expand_dims(axis=-1) assert_shape(likelihoods, (-1, seq_len, 1)) past_observed_values = F.broadcast_minimum( past_observed_values, 1 - past_is_pad.expand_dims(axis=-1) ) # (batch_size, subseq_length, target_dim) observed_values = F.concat( past_observed_values.slice_axis( axis=1, begin=-self.context_length, end=None ), future_observed_values, dim=1, ) # mask the loss at one time step if one or more observations is missing # in the target dimensions (batch_size, subseq_length, 1) loss_weights = observed_values.min(axis=-1, keepdims=True) assert_shape(loss_weights, (-1, seq_len, 1)) loss = weighted_average( F=F, x=likelihoods, weights=loss_weights, axis=1 ) assert_shape(loss, (-1, -1, 1)) self.distribution = distr return (loss, likelihoods) + distr_args
def hybrid_forward( self, F, feat_static_cat: Tensor, feat_static_real: Tensor, past_time_feat: Tensor, past_target: Tensor, past_observed_values: Tensor, past_is_pad: Optional[Tensor], future_time_feat: Tensor, future_target: Tensor, future_observed_values: Tensor, ) -> Tensor: """ Computes the loss for training DeepAR, all inputs tensors representing time series have NTC layout. Parameters ---------- F feat_static_cat : (batch_size, num_features) feat_static_real : (batch_size, num_features) past_time_feat : (batch_size, history_length, num_features) past_target : (batch_size, history_length, *target_shape) past_observed_values : (batch_size, history_length, *target_shape, seq_len) future_time_feat : (batch_size, prediction_length, num_features) future_target : (batch_size, prediction_length, *target_shape) future_observed_values : (batch_size, prediction_length, *target_shape) Returns loss with shape (batch_size, context + prediction_length, 1) ------- """ outputs = self.distribution( feat_static_cat=feat_static_cat, feat_static_real=feat_static_real, past_time_feat=past_time_feat, past_target=past_target, past_observed_values=past_observed_values, past_is_pad=past_is_pad, future_time_feat=future_time_feat, future_target=future_target, future_observed_values=future_observed_values, return_rnn_outputs=True, ) # since return_rnn_outputs=True, assert: assert isinstance(outputs, tuple) distr, rnn_outputs = outputs # put together target sequence # (batch_size, seq_len, *target_shape) target = F.concat( past_target.slice_axis( axis=1, begin=self.history_length - self.context_length, end=None, ), future_target, dim=1, ) # (batch_size, seq_len) loss = distr.loss(target) # (batch_size, seq_len, *target_shape) observed_values = F.concat( past_observed_values.slice_axis( axis=1, begin=self.history_length - self.context_length, end=self.history_length, ), future_observed_values, dim=1, ) # mask the loss at one time step iff one or more observations is missing in the target dimensions # (batch_size, seq_len) loss_weights = (observed_values if (len(self.target_shape) == 0) else observed_values.min(axis=-1, keepdims=False)) weighted_loss = weighted_average( F=F, x=loss, weights=loss_weights, axis=1, include_zeros_in_denominator=self.include_zeros_in_denominator, ) # need to mask possible nans and -inf loss = F.where(condition=loss_weights, x=loss, y=F.zeros_like(loss)) # rnn_outputs is already merged into a single tensor assert not isinstance(rnn_outputs, list) # it seems that the trainer only uses the first return value for backward # so we only add regularization to weighted_loss if self.alpha: ar_loss = self.ar_loss(rnn_outputs) weighted_loss = weighted_loss + ar_loss if self.beta: tar_loss = self.tar_loss(rnn_outputs) weighted_loss = weighted_loss + tar_loss return weighted_loss, loss