Exemple #1
0
    def sampling_decoder(
        self,
        F,
        static_feat: Tensor,
        past_target: Tensor,
        time_feat: Tensor,
        scale: Tensor,
        enc_out: Tensor,
    ) -> Tensor:
        """
        Computes sample paths by unrolling the LSTM starting with a initial input and state.

        Parameters
        ----------
        static_feat : Tensor
            static features. Shape: (batch_size, num_static_features).
        past_target : Tensor
            target history. Shape: (batch_size, history_length, 1).
        time_feat : Tensor
            time features. Shape: (batch_size, prediction_length, num_time_features).
        scale : Tensor
            tensor containing the scale of each element in the batch. Shape: (batch_size, ).
        enc_out: Tensor
            output of the encoder. Shape: (batch_size, num_cells)

        Returns
        --------
        sample_paths : Tensor
            a tensor containing sampled paths. Shape: (batch_size, num_sample_paths, prediction_length).
        """

        # blows-up the dimension of each tensor to batch_size * self.num_sample_paths for increasing parallelism
        repeated_past_target = past_target.repeat(
            repeats=self.num_sample_paths, axis=0)
        repeated_time_feat = time_feat.repeat(repeats=self.num_sample_paths,
                                              axis=0)
        repeated_static_feat = static_feat.repeat(
            repeats=self.num_sample_paths, axis=0).expand_dims(axis=1)
        repeated_enc_out = enc_out.repeat(repeats=self.num_sample_paths,
                                          axis=0).expand_dims(axis=1)
        repeated_scale = scale.repeat(repeats=self.num_sample_paths, axis=0)

        future_samples = []

        # for each future time-units we draw new samples for this time-unit and update the state
        for k in range(self.prediction_length):
            lags = self.get_lagged_subsequences(
                F=F,
                sequence=repeated_past_target,
                sequence_length=self.history_length + k,
                indices=self.shifted_lags,
                subsequences_length=1,
            )

            # (batch_size * num_samples, 1, *target_shape, num_lags)
            lags_scaled = F.broadcast_div(lags,
                                          repeated_scale.expand_dims(axis=-1))

            # from (batch_size * num_samples, 1, *target_shape, num_lags)
            # to (batch_size * num_samples, 1, prod(target_shape) * num_lags)
            input_lags = F.reshape(
                data=lags_scaled,
                shape=(-1, 1, prod(self.target_shape) * len(self.lags_seq)),
            )

            # (batch_size * num_samples, 1, prod(target_shape) * num_lags + num_time_features + num_static_features)
            dec_input = F.concat(
                input_lags,
                repeated_time_feat.slice_axis(axis=1, begin=k, end=k + 1),
                repeated_static_feat,
                dim=-1,
            )

            dec_output = self.decoder(dec_input, repeated_enc_out, None, False)

            distr_args = self.proj_dist_args(dec_output)

            # compute likelihood of target given the predicted parameters
            distr = self.distr_output.distribution(distr_args,
                                                   scale=repeated_scale)

            # (batch_size * num_samples, 1, *target_shape)
            new_samples = distr.sample()

            # (batch_size * num_samples, seq_len, *target_shape)
            repeated_past_target = F.concat(repeated_past_target,
                                            new_samples,
                                            dim=1)
            future_samples.append(new_samples)

        # reset cache of the decoder
        self.decoder.cache_reset()

        # (batch_size * num_samples, prediction_length, *target_shape)
        samples = F.concat(*future_samples, dim=1)

        # (batch_size, num_samples, *target_shape, prediction_length)
        return samples.reshape(shape=((-1, self.num_sample_paths) +
                                      self.target_shape +
                                      (self.prediction_length, )))
Exemple #2
0
    def sampling_decoder(
        self,
        F,
        static_feat: Tensor,
        past_target: Tensor,
        time_feat: Tensor,
        scale: Tensor,
        begin_states: List,
    ) -> Tensor:
        """
        Computes sample paths by unrolling the LSTM starting with a initial
        input and state.

        Parameters
        ----------
        static_feat : Tensor
            static features. Shape: (batch_size, num_static_features).
        past_target : Tensor
            target history. Shape: (batch_size, history_length).
        time_feat : Tensor
            time features. Shape: (batch_size, prediction_length, num_time_features).
        scale : Tensor
            tensor containing the scale of each element in the batch. Shape: (batch_size, 1, 1).
        begin_states : List
            list of initial states for the LSTM layers.
            the shape of each tensor of the list should be (batch_size, num_cells)
        Returns
        --------
        Tensor
            A tensor containing sampled paths.
            Shape: (batch_size, num_sample_paths, prediction_length).
        """

        # blows-up the dimension of each tensor to batch_size * self.num_parallel_samples for increasing parallelism
        repeated_past_target = past_target.repeat(
            repeats=self.num_parallel_samples, axis=0)
        repeated_time_feat = time_feat.repeat(
            repeats=self.num_parallel_samples, axis=0)
        repeated_static_feat = static_feat.repeat(
            repeats=self.num_parallel_samples, axis=0).expand_dims(axis=1)
        repeated_scale = scale.repeat(repeats=self.num_parallel_samples,
                                      axis=0)
        repeated_states = [
            s.repeat(repeats=self.num_parallel_samples, axis=0)
            for s in begin_states
        ]

        future_samples = []
        batch_size = past_target.shape[0]
        # for each future time-units we draw new samples for this time-unit and update the state
        for k in range(self.prediction_length):
            # (batch_size * num_samples, 1, *target_shape, num_lags)
            lags_m = self.get_lagged_subsequences(
                F=F,
                sequence=repeated_past_target.slice_axis(
                    axis=2, begin=0,
                    end=1).squeeze(),  # repeated_past_target[:,:,0]
                sequence_length=self.history_length + k,
                indices=self.shifted_lags,
                subsequences_length=1,
            )
            lags_q = self.get_lagged_subsequences(
                F=F,
                sequence=repeated_past_target.slice_axis(
                    axis=2, begin=1,
                    end=2).squeeze(),  # repeated_past_target[:,:,1]
                sequence_length=self.history_length + k,
                indices=self.shifted_lags,
                subsequences_length=1,
            )

            # (batch_size * num_samples, 1, *target_shape, num_lags)
            lags_scaled_m = F.broadcast_div(
                lags_m, repeated_scale.expand_dims(axis=-1))
            lags_scaled_q = F.broadcast_div(
                lags_q, repeated_scale.expand_dims(axis=-1))

            # from (batch_size * num_samples, 1, *target_shape, num_lags)
            # to (batch_size * num_samples, 1, prod(target_shape) * num_lags)
            input_lags_m = F.reshape(
                data=lags_scaled_m,
                shape=(-1, 1, prod(self.target_shape) * len(self.lags_seq)),
            )
            input_lags_q = F.reshape(
                data=lags_scaled_q,
                shape=(-1, 1, prod(self.target_shape) * len(self.lags_seq)),
            )

            # (batch_size * num_samples, 1, prod(target_shape) * num_lags + num_time_features + num_static_features)
            decoder_input = F.concat(
                input_lags_m,
                input_lags_q,
                repeated_time_feat.slice_axis(axis=1, begin=k, end=k + 1),
                repeated_static_feat,
                dim=-1,
            )

            # output shape: (batch_size * num_samples, 1, num_cells)
            # state shape: (batch_size * num_samples, num_cells)
            rnn_outputs, repeated_states = self.rnn.unroll(
                inputs=decoder_input,
                length=1,
                begin_state=repeated_states,
                layout="NTC",
                merge_outputs=True,
            )
            distr_args_m = self.proj_distr_args_m(rnn_outputs)
            distr_args_q = self.proj_distr_args_q(rnn_outputs)
            # compute likelihood of target given the predicted parameters
            distr_m = self.distr_output_m.distribution(distr_args_m,
                                                       scale=repeated_scale)
            distr_q = self.distr_output_q.distribution(distr_args_q,
                                                       scale=repeated_scale)
            # (batch_size * num_samples, 1, *target_shape)
            new_samples_m = distr_m.sample(dtype=self.dtype)
            new_samples_q = distr_q.sample(dtype=self.dtype)
            new_samples = F.concat(new_samples_m, new_samples_q, dim=1)
            new_samples = new_samples.expand_dims(axis=1)
            # new_samples = new_samples_m
            # (batch_size * num_samples, seq_len, *target_shape)
            repeated_past_target = F.concat(repeated_past_target,
                                            new_samples,
                                            dim=1)
            future_samples.append(new_samples)

        # (batch_size * num_samples, prediction_length, *target_shape)
        samples = F.concat(*future_samples, dim=1)
        # (batch_size, num_samples, prediction_length, *target_shape)
        return samples.reshape(shape=((batch_size, self.num_parallel_samples) +
                                      (self.prediction_length, ) +
                                      (samples.shape[-1], )))
Exemple #3
0
    def sampling_decoder(
        self,
        F,
        static_feat: Tensor,
        past_target: Tensor,
        time_feat: Tensor,
        scale: Tensor,
        begin_states: List,
    ) -> Tensor:
        """
        Computes sample paths by unrolling the LSTM starting with a initial
        input and state.

        Parameters
        ----------
        static_feat : Tensor
            static features. Shape: (batch_size, num_static_features).
        past_target : Tensor
            target history. Shape: (batch_size, history_length).
        time_feat : Tensor
            time features. Shape: (batch_size, prediction_length, num_time_features).
        scale : Tensor
            tensor containing the scale of each element in the batch. Shape: (batch_size, 1, 1).
        begin_states : List
            list of initial states for the LSTM layers.
            the shape of each tensor of the list should be (batch_size, num_cells)
        Returns
        --------
        Tensor
            A tensor containing sampled paths.
            Shape: (batch_size, num_sample_paths, prediction_length).
        """
        time_feat.attach_grad()
        past_target.attach_grad()
        with autograd.record():
            # blows-up the dimension of each tensor to batch_size * self.num_parallel_samples for increasing parallelism
            repeated_past_target = past_target.repeat(
                repeats=self.num_parallel_samples, axis=0)
            repeated_time_feat = time_feat.repeat(
                repeats=self.num_parallel_samples, axis=0)
            repeated_static_feat = static_feat.repeat(
                repeats=self.num_parallel_samples, axis=0).expand_dims(axis=1)
            repeated_scale = scale.repeat(repeats=self.num_parallel_samples,
                                          axis=0)
            repeated_states = [
                s.repeat(repeats=self.num_parallel_samples, axis=0)
                for s in begin_states
            ]

            future_samples = []

            # for each future time-units we draw new samples for this time-unit and update the state
            for k in range(self.prediction_length):
                # (batch_size * num_samples, 1, *target_shape, num_lags)
                lags = self.get_lagged_subsequences(
                    F=F,
                    sequence=repeated_past_target,
                    sequence_length=self.history_length + k,
                    indices=self.shifted_lags,
                    subsequences_length=1,
                )

                # (batch_size * num_samples, 1, *target_shape, num_lags)
                lags_scaled = F.broadcast_div(
                    lags, repeated_scale.expand_dims(axis=-1))

                # from (batch_size * num_samples, 1, *target_shape, num_lags)
                # to (batch_size * num_samples, 1, prod(target_shape) * num_lags)
                input_lags = F.reshape(
                    data=lags_scaled,
                    shape=(-1, 1,
                           prod(self.target_shape) * len(self.lags_seq)),
                )

                # (batch_size * num_samples, 1, prod(target_shape) * num_lags + num_time_features + num_static_features)
                decoder_input = F.concat(
                    input_lags,
                    repeated_time_feat.slice_axis(axis=1, begin=k, end=k + 1),
                    repeated_static_feat,
                    dim=-1,
                )

                # output shape: (batch_size * num_samples, 1, num_cells)
                # state shape: (batch_size * num_samples, num_cells)
                rnn_outputs, repeated_states = self.rnn.unroll(
                    inputs=decoder_input,
                    length=1,
                    begin_state=repeated_states,
                    layout="NTC",
                    merge_outputs=True,
                )
                distr_args = self.proj_distr_args(rnn_outputs)

                # compute likelihood of target given the predicted parameters
                distr = self.distr_output.distribution(distr_args,
                                                       scale=repeated_scale)
                #gaussian has mu and stddev, student T has mu sigma and nu
                gradient_mu_feat = autograd.grad(distr.base_distribution.mu,
                                                 [time_feat],
                                                 create_graph=True)
                gradient_sigma_feat = autograd.grad(
                    distr.base_distribution.sigma, [time_feat],
                    create_graph=True)
                gradient_nu_feat = autograd.grad(distr.base_distribution.nu,
                                                 [time_feat],
                                                 create_graph=True)
                # (batch_size * num_samples, 1, *target_shape)
                new_samples = distr.sample(dtype=self.dtype)
                with open('gradients.npy', 'wb') as f:
                    np.save(f, gradient_mu_feat[0].asnumpy())
                    np.save(f, gradient_nu_feat[0].asnumpy())
                    np.save(f, gradient_sigma_feat[0].asnumpy())

                # (batch_size * num_samples, seq_len, *target_shape)
                repeated_past_target = F.concat(repeated_past_target,
                                                new_samples,
                                                dim=1)
                future_samples.append(new_samples)

            # (batch_size * num_samples, prediction_length, *target_shape)
            samples = F.concat(*future_samples, dim=1)

        # (batch_size, num_samples, prediction_length, *target_shape)
        return samples.reshape(shape=((-1, self.num_parallel_samples) +
                                      (self.prediction_length, ) +
                                      self.target_shape))