def forward( self, feat_static_cat: torch.Tensor, feat_static_real: torch.Tensor, past_time_feat: torch.Tensor, past_target: torch.Tensor, past_observed_values: torch.Tensor, future_time_feat: torch.Tensor, future_target: torch.Tensor, future_observed_values: torch.Tensor, ) -> torch.Tensor: distr = self.distribution( feat_static_cat=feat_static_cat, feat_static_real=feat_static_real, past_time_feat=past_time_feat, past_target=past_target, past_observed_values=past_observed_values, future_time_feat=future_time_feat, future_target=future_target, future_observed_values=future_observed_values, ) # put together target sequence # (batch_size, seq_len, *target_shape) target = torch.cat( ( past_target[ :, self.history_length - self.context_length :, ... ], future_target, ), dim=1, ) # (batch_size, seq_len) loss = -distr.log_prob(target) # return loss # (batch_size, seq_len, *target_shape) observed_values = torch.cat( ( past_observed_values[ :, self.history_length - self.context_length :, ... ], future_observed_values, ), dim=1, ) # mask the loss at one time step iff one or more observations is missing in the target dimensions # (batch_size, seq_len) loss_weights = ( observed_values if (len(self.target_shape) == 0) else observed_values.min(dim=-1, keepdim=False) ) weighted_loss = weighted_average(loss, weights=loss_weights) return loss
def forward( self, past_target: torch.Tensor, past_observed_values: torch.Tensor, future_target: torch.Tensor, future_observed_values: torch.Tensor, past_feat_dynamic_real: torch.Tensor, past_feat_dynamic_cat: torch.Tensor, feat_dynamic_real: torch.Tensor, feat_dynamic_cat: torch.Tensor, feat_static_real: torch.Tensor, feat_static_cat: torch.Tensor, ) -> torch.Tensor: ( past_covariates, future_covariates, static_covariates, offset, scale, ) = self._preprocess( past_target, past_observed_values, past_feat_dynamic_real, past_feat_dynamic_cat, feat_dynamic_real, feat_dynamic_cat, feat_static_real, feat_static_cat, ) preds = super().forward( past_observed_values, past_covariates, future_covariates, static_covariates, ) preds = self._postprocess(preds, offset, scale) loss = self.loss(future_target, preds) loss = weighted_average(loss, future_observed_values) return loss.mean()
def forward( self, target_dimension_indicator: torch.Tensor, past_time_feat: torch.Tensor, past_target_cdf: torch.Tensor, past_observed_values: torch.Tensor, past_is_pad: torch.Tensor, future_time_feat: torch.Tensor, future_target_cdf: torch.Tensor, future_observed_values: torch.Tensor, ) -> Tuple[torch.Tensor, ...]: """ Computes the loss for training DeepVAR, all inputs tensors representing time series have NTC layout. Parameters ---------- target_dimension_indicator Indices of the target dimension (batch_size, target_dim) past_time_feat Dynamic features of past time series (batch_size, history_length, num_features) past_target_cdf Past marginal CDF transformed target values (batch_size, history_length, target_dim) past_observed_values Indicator whether or not the values were observed (batch_size, history_length, target_dim) past_is_pad Indicator whether the past target values have been padded (batch_size, history_length) future_time_feat Future time features (batch_size, prediction_length, num_features) future_target_cdf Future marginal CDF transformed target values (batch_size, prediction_length, target_dim) future_observed_values Indicator whether or not the future values were observed (batch_size, prediction_length, target_dim) Returns ------- distr Loss with shape (batch_size, 1) likelihoods Likelihoods for each time step (batch_size, context + prediction_length, 1) distr_args Distribution arguments (context + prediction_length, number_of_arguments) """ seq_len = self.context_length + self.prediction_length # unroll the decoder in "training mode", i.e. by providing future data # as well rnn_outputs, _, scale, _, inputs = self.unroll_encoder( past_time_feat=past_time_feat, past_target_cdf=past_target_cdf, past_observed_values=past_observed_values, past_is_pad=past_is_pad, future_time_feat=future_time_feat, future_target_cdf=future_target_cdf, target_dimension_indicator=target_dimension_indicator, ) # put together target sequence # (batch_size, seq_len, target_dim) target = torch.cat( (past_target_cdf[:, -self.context_length:, ...], future_target_cdf), dim=1, ) # assert_shape(target, (-1, seq_len, self.target_dim)) distr, distr_args = self.distr(rnn_outputs=rnn_outputs, scale=scale) # we sum the last axis to have the same shape for all likelihoods # (batch_size, subseq_length, 1) likelihoods = -distr.log_prob(target).unsqueeze(-1) # assert_shape(likelihoods, (-1, seq_len, 1)) past_observed_values = torch.min(past_observed_values, 1 - past_is_pad.unsqueeze(-1)) # (batch_size, subseq_length, target_dim) observed_values = torch.cat( ( past_observed_values[:, -self.context_length:, ...], future_observed_values, ), dim=1, ) # mask the loss at one time step if one or more observations is missing # in the target dimensions (batch_size, subseq_length, 1) loss_weights, _ = observed_values.min(dim=-1, keepdim=True) # assert_shape(loss_weights, (-1, seq_len, 1)) loss = weighted_average(likelihoods, weights=loss_weights, dim=1) # assert_shape(loss, (-1, -1, 1)) self.distribution = distr return (loss.mean(), likelihoods) + distr_args