def sampling_decoder( self, F, static_feat: Tensor, past_target: Tensor, time_feat: Tensor, scale: Tensor, enc_out: Tensor, ) -> Tensor: """ Computes sample paths by unrolling the LSTM starting with a initial input and state. Parameters ---------- static_feat : Tensor static features. Shape: (batch_size, num_static_features). past_target : Tensor target history. Shape: (batch_size, history_length, 1). time_feat : Tensor time features. Shape: (batch_size, prediction_length, num_time_features). scale : Tensor tensor containing the scale of each element in the batch. Shape: (batch_size, ). enc_out: Tensor output of the encoder. Shape: (batch_size, num_cells) Returns -------- sample_paths : Tensor a tensor containing sampled paths. Shape: (batch_size, num_sample_paths, prediction_length). """ # blows-up the dimension of each tensor to batch_size * self.num_sample_paths for increasing parallelism repeated_past_target = past_target.repeat( repeats=self.num_sample_paths, axis=0) repeated_time_feat = time_feat.repeat(repeats=self.num_sample_paths, axis=0) repeated_static_feat = static_feat.repeat( repeats=self.num_sample_paths, axis=0).expand_dims(axis=1) repeated_enc_out = enc_out.repeat(repeats=self.num_sample_paths, axis=0).expand_dims(axis=1) repeated_scale = scale.repeat(repeats=self.num_sample_paths, axis=0) future_samples = [] # for each future time-units we draw new samples for this time-unit and update the state for k in range(self.prediction_length): lags = self.get_lagged_subsequences( F=F, sequence=repeated_past_target, sequence_length=self.history_length + k, indices=self.shifted_lags, subsequences_length=1, ) # (batch_size * num_samples, 1, *target_shape, num_lags) lags_scaled = F.broadcast_div(lags, repeated_scale.expand_dims(axis=-1)) # from (batch_size * num_samples, 1, *target_shape, num_lags) # to (batch_size * num_samples, 1, prod(target_shape) * num_lags) input_lags = F.reshape( data=lags_scaled, shape=(-1, 1, prod(self.target_shape) * len(self.lags_seq)), ) # (batch_size * num_samples, 1, prod(target_shape) * num_lags + num_time_features + num_static_features) dec_input = F.concat( input_lags, repeated_time_feat.slice_axis(axis=1, begin=k, end=k + 1), repeated_static_feat, dim=-1, ) dec_output = self.decoder(dec_input, repeated_enc_out, None, False) distr_args = self.proj_dist_args(dec_output) # compute likelihood of target given the predicted parameters distr = self.distr_output.distribution(distr_args, scale=repeated_scale) # (batch_size * num_samples, 1, *target_shape) new_samples = distr.sample() # (batch_size * num_samples, seq_len, *target_shape) repeated_past_target = F.concat(repeated_past_target, new_samples, dim=1) future_samples.append(new_samples) # reset cache of the decoder self.decoder.cache_reset() # (batch_size * num_samples, prediction_length, *target_shape) samples = F.concat(*future_samples, dim=1) # (batch_size, num_samples, *target_shape, prediction_length) return samples.reshape(shape=((-1, self.num_sample_paths) + self.target_shape + (self.prediction_length, )))
def sampling_decoder( self, F, static_feat: Tensor, past_target: Tensor, time_feat: Tensor, scale: Tensor, begin_states: List, ) -> Tensor: """ Computes sample paths by unrolling the LSTM starting with a initial input and state. Parameters ---------- static_feat : Tensor static features. Shape: (batch_size, num_static_features). past_target : Tensor target history. Shape: (batch_size, history_length). time_feat : Tensor time features. Shape: (batch_size, prediction_length, num_time_features). scale : Tensor tensor containing the scale of each element in the batch. Shape: (batch_size, 1, 1). begin_states : List list of initial states for the LSTM layers. the shape of each tensor of the list should be (batch_size, num_cells) Returns -------- Tensor A tensor containing sampled paths. Shape: (batch_size, num_sample_paths, prediction_length). """ # blows-up the dimension of each tensor to batch_size * self.num_parallel_samples for increasing parallelism repeated_past_target = past_target.repeat( repeats=self.num_parallel_samples, axis=0) repeated_time_feat = time_feat.repeat( repeats=self.num_parallel_samples, axis=0) repeated_static_feat = static_feat.repeat( repeats=self.num_parallel_samples, axis=0).expand_dims(axis=1) repeated_scale = scale.repeat(repeats=self.num_parallel_samples, axis=0) repeated_states = [ s.repeat(repeats=self.num_parallel_samples, axis=0) for s in begin_states ] future_samples = [] batch_size = past_target.shape[0] # for each future time-units we draw new samples for this time-unit and update the state for k in range(self.prediction_length): # (batch_size * num_samples, 1, *target_shape, num_lags) lags_m = self.get_lagged_subsequences( F=F, sequence=repeated_past_target.slice_axis( axis=2, begin=0, end=1).squeeze(), # repeated_past_target[:,:,0] sequence_length=self.history_length + k, indices=self.shifted_lags, subsequences_length=1, ) lags_q = self.get_lagged_subsequences( F=F, sequence=repeated_past_target.slice_axis( axis=2, begin=1, end=2).squeeze(), # repeated_past_target[:,:,1] sequence_length=self.history_length + k, indices=self.shifted_lags, subsequences_length=1, ) # (batch_size * num_samples, 1, *target_shape, num_lags) lags_scaled_m = F.broadcast_div( lags_m, repeated_scale.expand_dims(axis=-1)) lags_scaled_q = F.broadcast_div( lags_q, repeated_scale.expand_dims(axis=-1)) # from (batch_size * num_samples, 1, *target_shape, num_lags) # to (batch_size * num_samples, 1, prod(target_shape) * num_lags) input_lags_m = F.reshape( data=lags_scaled_m, shape=(-1, 1, prod(self.target_shape) * len(self.lags_seq)), ) input_lags_q = F.reshape( data=lags_scaled_q, shape=(-1, 1, prod(self.target_shape) * len(self.lags_seq)), ) # (batch_size * num_samples, 1, prod(target_shape) * num_lags + num_time_features + num_static_features) decoder_input = F.concat( input_lags_m, input_lags_q, repeated_time_feat.slice_axis(axis=1, begin=k, end=k + 1), repeated_static_feat, dim=-1, ) # output shape: (batch_size * num_samples, 1, num_cells) # state shape: (batch_size * num_samples, num_cells) rnn_outputs, repeated_states = self.rnn.unroll( inputs=decoder_input, length=1, begin_state=repeated_states, layout="NTC", merge_outputs=True, ) distr_args_m = self.proj_distr_args_m(rnn_outputs) distr_args_q = self.proj_distr_args_q(rnn_outputs) # compute likelihood of target given the predicted parameters distr_m = self.distr_output_m.distribution(distr_args_m, scale=repeated_scale) distr_q = self.distr_output_q.distribution(distr_args_q, scale=repeated_scale) # (batch_size * num_samples, 1, *target_shape) new_samples_m = distr_m.sample(dtype=self.dtype) new_samples_q = distr_q.sample(dtype=self.dtype) new_samples = F.concat(new_samples_m, new_samples_q, dim=1) new_samples = new_samples.expand_dims(axis=1) # new_samples = new_samples_m # (batch_size * num_samples, seq_len, *target_shape) repeated_past_target = F.concat(repeated_past_target, new_samples, dim=1) future_samples.append(new_samples) # (batch_size * num_samples, prediction_length, *target_shape) samples = F.concat(*future_samples, dim=1) # (batch_size, num_samples, prediction_length, *target_shape) return samples.reshape(shape=((batch_size, self.num_parallel_samples) + (self.prediction_length, ) + (samples.shape[-1], )))
def sampling_decoder( self, F, static_feat: Tensor, past_target: Tensor, time_feat: Tensor, scale: Tensor, begin_states: List, ) -> Tensor: """ Computes sample paths by unrolling the LSTM starting with a initial input and state. Parameters ---------- static_feat : Tensor static features. Shape: (batch_size, num_static_features). past_target : Tensor target history. Shape: (batch_size, history_length). time_feat : Tensor time features. Shape: (batch_size, prediction_length, num_time_features). scale : Tensor tensor containing the scale of each element in the batch. Shape: (batch_size, 1, 1). begin_states : List list of initial states for the LSTM layers. the shape of each tensor of the list should be (batch_size, num_cells) Returns -------- Tensor A tensor containing sampled paths. Shape: (batch_size, num_sample_paths, prediction_length). """ time_feat.attach_grad() past_target.attach_grad() with autograd.record(): # blows-up the dimension of each tensor to batch_size * self.num_parallel_samples for increasing parallelism repeated_past_target = past_target.repeat( repeats=self.num_parallel_samples, axis=0) repeated_time_feat = time_feat.repeat( repeats=self.num_parallel_samples, axis=0) repeated_static_feat = static_feat.repeat( repeats=self.num_parallel_samples, axis=0).expand_dims(axis=1) repeated_scale = scale.repeat(repeats=self.num_parallel_samples, axis=0) repeated_states = [ s.repeat(repeats=self.num_parallel_samples, axis=0) for s in begin_states ] future_samples = [] # for each future time-units we draw new samples for this time-unit and update the state for k in range(self.prediction_length): # (batch_size * num_samples, 1, *target_shape, num_lags) lags = self.get_lagged_subsequences( F=F, sequence=repeated_past_target, sequence_length=self.history_length + k, indices=self.shifted_lags, subsequences_length=1, ) # (batch_size * num_samples, 1, *target_shape, num_lags) lags_scaled = F.broadcast_div( lags, repeated_scale.expand_dims(axis=-1)) # from (batch_size * num_samples, 1, *target_shape, num_lags) # to (batch_size * num_samples, 1, prod(target_shape) * num_lags) input_lags = F.reshape( data=lags_scaled, shape=(-1, 1, prod(self.target_shape) * len(self.lags_seq)), ) # (batch_size * num_samples, 1, prod(target_shape) * num_lags + num_time_features + num_static_features) decoder_input = F.concat( input_lags, repeated_time_feat.slice_axis(axis=1, begin=k, end=k + 1), repeated_static_feat, dim=-1, ) # output shape: (batch_size * num_samples, 1, num_cells) # state shape: (batch_size * num_samples, num_cells) rnn_outputs, repeated_states = self.rnn.unroll( inputs=decoder_input, length=1, begin_state=repeated_states, layout="NTC", merge_outputs=True, ) distr_args = self.proj_distr_args(rnn_outputs) # compute likelihood of target given the predicted parameters distr = self.distr_output.distribution(distr_args, scale=repeated_scale) #gaussian has mu and stddev, student T has mu sigma and nu gradient_mu_feat = autograd.grad(distr.base_distribution.mu, [time_feat], create_graph=True) gradient_sigma_feat = autograd.grad( distr.base_distribution.sigma, [time_feat], create_graph=True) gradient_nu_feat = autograd.grad(distr.base_distribution.nu, [time_feat], create_graph=True) # (batch_size * num_samples, 1, *target_shape) new_samples = distr.sample(dtype=self.dtype) with open('gradients.npy', 'wb') as f: np.save(f, gradient_mu_feat[0].asnumpy()) np.save(f, gradient_nu_feat[0].asnumpy()) np.save(f, gradient_sigma_feat[0].asnumpy()) # (batch_size * num_samples, seq_len, *target_shape) repeated_past_target = F.concat(repeated_past_target, new_samples, dim=1) future_samples.append(new_samples) # (batch_size * num_samples, prediction_length, *target_shape) samples = F.concat(*future_samples, dim=1) # (batch_size, num_samples, prediction_length, *target_shape) return samples.reshape(shape=((-1, self.num_parallel_samples) + (self.prediction_length, ) + self.target_shape))