Exemple #1
0
    def sampling_decoder(
        self,
        F,
        static_feat: Tensor,
        past_target: Tensor,
        time_feat: Tensor,
        scale: Tensor,
        enc_out: Tensor,
    ) -> Tensor:
        """
        Computes sample paths by unrolling the LSTM starting with a initial
        input and state.

        Parameters
        ----------
        static_feat : Tensor
            static features. Shape: (batch_size, num_static_features).
        past_target : Tensor
            target history. Shape: (batch_size, history_length, 1).
        time_feat : Tensor
            time features. Shape:
            (batch_size, prediction_length, num_time_features).
        scale : Tensor
            tensor containing the scale of each element in the batch.
            Shape: (batch_size, ).
        enc_out: Tensor
            output of the encoder. Shape: (batch_size, num_cells)

        Returns
        --------
        sample_paths : Tensor
            a tensor containing sampled paths.
            Shape: (batch_size, num_sample_paths, prediction_length).
        """

        # blows-up the dimension of each tensor to batch_size *
        # self.num_parallel_samples for increasing parallelism
        repeated_past_target = past_target.repeat(
            repeats=self.num_parallel_samples, axis=0)
        repeated_time_feat = time_feat.repeat(
            repeats=self.num_parallel_samples, axis=0)
        repeated_static_feat = static_feat.repeat(
            repeats=self.num_parallel_samples, axis=0).expand_dims(axis=1)
        repeated_enc_out = enc_out.repeat(repeats=self.num_parallel_samples,
                                          axis=0).expand_dims(axis=1)
        repeated_scale = scale.repeat(repeats=self.num_parallel_samples,
                                      axis=0)

        future_samples = []

        # for each future time-units we draw new samples for this time-unit and
        # update the state
        for k in range(self.prediction_length):
            lags = self.get_lagged_subsequences(
                F=F,
                sequence=repeated_past_target,
                sequence_length=self.history_length + k,
                indices=self.shifted_lags,
                subsequences_length=1,
            )

            # (batch_size * num_samples, 1, *target_shape, num_lags)
            lags_scaled = F.broadcast_div(lags,
                                          repeated_scale.expand_dims(axis=-1))

            # from (batch_size * num_samples, 1, *target_shape, num_lags)
            # to (batch_size * num_samples, 1, prod(target_shape) * num_lags)
            input_lags = F.reshape(
                data=lags_scaled,
                shape=(-1, 1, prod(self.target_shape) * len(self.lags_seq)),
            )

            # (batch_size * num_samples, 1, prod(target_shape) * num_lags +
            # num_time_features + num_static_features)
            dec_input = F.concat(
                input_lags,
                repeated_time_feat.slice_axis(axis=1, begin=k, end=k + 1),
                repeated_static_feat,
                dim=-1,
            )

            dec_output = self.decoder(dec_input, repeated_enc_out, None, False)

            distr_args = self.proj_dist_args(dec_output)

            # compute likelihood of target given the predicted parameters
            distr = self.distr_output.distribution(distr_args,
                                                   scale=repeated_scale)

            # (batch_size * num_samples, 1, *target_shape)
            new_samples = distr.sample()

            # (batch_size * num_samples, seq_len, *target_shape)
            repeated_past_target = F.concat(repeated_past_target,
                                            new_samples,
                                            dim=1)
            future_samples.append(new_samples)

        # reset cache of the decoder
        self.decoder.cache_reset()

        # (batch_size * num_samples, prediction_length, *target_shape)
        samples = F.concat(*future_samples, dim=1)

        # (batch_size, num_samples, *target_shape, prediction_length)
        return samples.reshape(shape=((-1, self.num_parallel_samples) +
                                      self.target_shape +
                                      (self.prediction_length, )))
Exemple #2
0
    def create_network_input(
            self,
            F,
            feat_static_cat: Tensor,  # (batch_size, num_features)
            past_time_feat: Tensor,  # (batch_size, num_features, history_length)
            past_target: Tensor,  # (batch_size, history_length, 1)
            past_observed_values: Tensor,  # (batch_size, history_length)
            future_time_feat: Optional[
                Tensor],  # (batch_size, num_features, prediction_length)
            future_target: Optional[Tensor],  # (batch_size, prediction_length)
    ) -> Tuple[Tensor, Tensor, Tensor]:
        """
        Creates inputs for the transformer network.

        All tensor arguments should have NTC layout.
        """

        if future_time_feat is None or future_target is None:
            time_feat = past_time_feat.slice_axis(
                axis=1,
                begin=self.history_length - self.context_length,
                end=None,
            )
            sequence = past_target
            sequence_length = self.history_length
            subsequences_length = self.context_length
        else:
            time_feat = F.concat(
                past_time_feat.slice_axis(
                    axis=1,
                    begin=self.history_length - self.context_length,
                    end=None,
                ),
                future_time_feat,
                dim=1,
            )
            sequence = F.concat(past_target, future_target, dim=1)
            sequence_length = self.history_length + self.prediction_length
            subsequences_length = self.context_length + self.prediction_length

        # (batch_size, sub_seq_len, *target_shape, num_lags)
        lags = self.get_lagged_subsequences(
            F=F,
            sequence=sequence,
            sequence_length=sequence_length,
            indices=self.lags_seq,
            subsequences_length=subsequences_length,
        )

        # scale is computed on the context length last units of the past target
        # scale shape is (batch_size, 1, *target_shape)
        _, scale = self.scaler(
            past_target.slice_axis(axis=1,
                                   begin=-self.context_length,
                                   end=None),
            past_observed_values.slice_axis(axis=1,
                                            begin=-self.context_length,
                                            end=None),
        )
        embedded_cat = self.embedder(feat_static_cat)

        # in addition to embedding features, use the log scale as it can help
        # prediction too(batch_size, num_features + prod(target_shape))
        static_feat = F.concat(
            embedded_cat,
            F.log(scale) if len(self.target_shape) == 0 else F.log(
                scale.squeeze(axis=1)),
            dim=1,
        )

        repeated_static_feat = static_feat.expand_dims(axis=1).repeat(
            axis=1, repeats=subsequences_length)

        # (batch_size, sub_seq_len, *target_shape, num_lags)
        lags_scaled = F.broadcast_div(lags, scale.expand_dims(axis=-1))

        # from (batch_size, sub_seq_len, *target_shape, num_lags)
        # to (batch_size, sub_seq_len, prod(target_shape) * num_lags)
        input_lags = F.reshape(
            data=lags_scaled,
            shape=(
                -1,
                subsequences_length,
                len(self.lags_seq) * prod(self.target_shape),
            ),
        )

        # (batch_size, sub_seq_len, input_dim)
        inputs = F.concat(input_lags, time_feat, repeated_static_feat, dim=-1)

        return inputs, scale, static_feat
Exemple #3
0
    def sampling_decoder(
        self,
        F,
        static_feat: Tensor,
        past_target: Tensor,
        time_feat: Tensor,
        scale: Tensor,
        begin_states: List,
    ) -> Tensor:
        """
        Computes sample paths by unrolling the LSTM starting with a initial
        input and state.

        Parameters
        ----------
        static_feat : Tensor
            static features. Shape: (batch_size, num_static_features).
        past_target : Tensor
            target history. Shape: (batch_size, history_length).
        time_feat : Tensor
            time features. Shape: (batch_size, prediction_length, num_time_features).
        scale : Tensor
            tensor containing the scale of each element in the batch. Shape: (batch_size, 1, 1).
        begin_states : List
            list of initial states for the LSTM layers.
            the shape of each tensor of the list should be (batch_size, num_cells)
        Returns
        --------
        Tensor
            A tensor containing sampled paths.
            Shape: (batch_size, num_sample_paths, prediction_length).
        """

        # blows-up the dimension of each tensor to batch_size * self.num_parallel_samples for increasing parallelism
        repeated_past_target = past_target.repeat(
            repeats=self.num_parallel_samples, axis=0)
        repeated_time_feat = time_feat.repeat(
            repeats=self.num_parallel_samples, axis=0)
        repeated_static_feat = static_feat.repeat(
            repeats=self.num_parallel_samples, axis=0).expand_dims(axis=1)
        repeated_scale = scale.repeat(repeats=self.num_parallel_samples,
                                      axis=0)
        repeated_states = [
            s.repeat(repeats=self.num_parallel_samples, axis=0)
            for s in begin_states
        ]

        future_samples = []

        # for each future time-units we draw new samples for this time-unit and update the state
        for k in range(self.prediction_length):
            # (batch_size * num_samples, 1, *target_shape, num_lags)
            lags = self.get_lagged_subsequences(
                F=F,
                sequence=repeated_past_target,
                sequence_length=self.history_length + k,
                indices=self.shifted_lags,
                subsequences_length=1,
            )

            # (batch_size * num_samples, 1, *target_shape, num_lags)
            lags_scaled = F.broadcast_div(lags,
                                          repeated_scale.expand_dims(axis=-1))

            # from (batch_size * num_samples, 1, *target_shape, num_lags)
            # to (batch_size * num_samples, 1, prod(target_shape) * num_lags)
            input_lags = F.reshape(
                data=lags_scaled,
                shape=(-1, 1, prod(self.target_shape) * len(self.lags_seq)),
            )

            # (batch_size * num_samples, 1, prod(target_shape) * num_lags + num_time_features + num_static_features)
            decoder_input = F.concat(
                input_lags,
                repeated_time_feat.slice_axis(axis=1, begin=k, end=k + 1),
                # observed_values.expand_dims(axis=1),
                repeated_static_feat,
                dim=-1,
            )

            # output shape: (batch_size * num_samples, 1, num_cells)
            # state shape: (batch_size * num_samples, num_cells)
            rnn_outputs, repeated_states = self.rnn.unroll(
                inputs=decoder_input,
                length=1,
                begin_state=repeated_states,
                layout="NTC",
                merge_outputs=True,
            )

            distr_args = self.proj_distr_args(rnn_outputs)

            # compute likelihood of target given the predicted parameters
            distr = self.distr_output.distribution(distr_args,
                                                   scale=repeated_scale)

            # (batch_size * num_samples, 1, *target_shape)
            new_samples = distr.sample(dtype=self.dtype)

            # (batch_size * num_samples, seq_len, *target_shape)
            repeated_past_target = F.concat(repeated_past_target,
                                            new_samples,
                                            dim=1)

            future_samples.append(new_samples)

        # (batch_size * num_samples, prediction_length, *target_shape)
        samples = F.concat(*future_samples, dim=1)

        # (batch_size, num_samples, prediction_length, *target_shape)
        return samples.reshape(shape=((-1, self.num_parallel_samples) +
                                      (self.prediction_length, ) +
                                      self.target_shape))
Exemple #4
0
    def unroll_encoder_default(
        self,
        F,
        feat_static_cat: Tensor,  # (batch_size, num_features)
        feat_static_real: Tensor,  # (batch_size, num_features)
        past_time_feat: Tensor,  # (batch_size, history_length, num_features)
        past_target: Tensor,  # (batch_size, history_length, *target_shape)
        past_observed_values:
        Tensor,  # (batch_size, history_length, *target_shape)
        past_is_pad: Tensor,
        future_observed_values: Optional[Tensor],
        future_time_feat: Optional[
            Tensor],  # (batch_size, prediction_length, num_features)
        future_target: Optional[
            Tensor],  # (batch_size, prediction_length, *target_shape)
    ) -> Tuple[Tensor, List, Tensor, Tensor, Tensor]:
        """
        Unrolls the LSTM encoder over past and, if present, future data.
        Returns outputs and state of the encoder, plus the scale of past_target
        and a vector of static features that was constructed and fed as input
        to the encoder.
        All tensor arguments should have NTC layout.
        """

        if future_time_feat is None or future_target is None:
            time_feat = past_time_feat.slice_axis(
                axis=1,
                begin=self.history_length - self.context_length,
                end=None,
            )

            is_padded_indicator = past_is_pad.slice_axis(
                axis=1,
                begin=self.history_length - self.context_length,
                end=None,
            )

            sequence = past_target
            sequence_length = self.history_length
            subsequences_length = self.context_length
        else:
            time_feat = F.concat(
                past_time_feat.slice_axis(
                    axis=1,
                    begin=self.history_length - self.context_length,
                    end=None,
                ),
                future_time_feat,
                dim=1,
            )

            is_padded_indicator = F.concat(
                past_is_pad.slice_axis(
                    axis=1,
                    begin=self.history_length - self.context_length,
                    end=None,
                ),
                F.zeros_like(future_observed_values),
                dim=1,
            )

            sequence = F.concat(past_target, future_target, dim=1)
            sequence_length = self.history_length + self.prediction_length
            subsequences_length = self.context_length + self.prediction_length

        # (batch_size, sub_seq_len, *target_shape, num_lags)
        lags = self.get_lagged_subsequences(
            F=F,
            sequence=sequence,
            sequence_length=sequence_length,
            indices=self.lags_seq,
            subsequences_length=subsequences_length,
        )

        # scale is computed on the context length last units of the past target
        # scale shape is (batch_size, 1, *target_shape)
        _, scale = self.scaler(
            past_target.slice_axis(axis=1,
                                   begin=-self.context_length,
                                   end=None),
            past_observed_values.slice_axis(axis=1,
                                            begin=-self.context_length,
                                            end=None),
        )

        # (batch_size, num_features)
        embedded_cat = self.embedder(feat_static_cat)

        # in addition to embedding features, use the log scale as it can help
        # prediction too
        # (batch_size, num_features + prod(target_shape))
        static_feat = F.concat(
            embedded_cat,
            feat_static_real,
            F.log(scale) if len(self.target_shape) == 0 else F.log(
                scale.squeeze(axis=1)),
            dim=1,
        )

        # (batch_size, subsequences_length, num_features + 1)
        repeated_static_feat = static_feat.expand_dims(axis=1).repeat(
            axis=1, repeats=subsequences_length)

        # (batch_size, sub_seq_len, *target_shape, num_lags)
        lags_scaled = F.broadcast_div(lags, scale.expand_dims(axis=-1))

        # from (batch_size, sub_seq_len, *target_shape, num_lags)
        # to (batch_size, sub_seq_len, prod(target_shape) * num_lags)
        input_lags = F.reshape(
            data=lags_scaled,
            shape=(
                -1,
                subsequences_length,
                len(self.lags_seq) * prod(self.target_shape),
            ),
        )

        # (batch_size, sub_seq_len, input_dim)
        inputs = F.concat(input_lags, time_feat, repeated_static_feat, dim=-1)

        begin_state = self.rnn.begin_state(
            func=F.zeros,
            dtype=self.dtype,
            batch_size=inputs.shape[0]
            if isinstance(inputs, mx.nd.NDArray) else 0,
        )
        state = begin_state
        # This is a dummy computation to avoid deferred initialization error
        # when past_is_pad is not used in the computation graph in default
        # unrolling mode.
        state = [
            F.where(
                is_padded_indicator.slice_axis(axis=1, begin=0, end=1).repeat(
                    repeats=self.num_cells, axis=1),
                bs,
                s,
            ) for bs, s in zip(begin_state, state)
        ]

        # unroll encoder
        outputs, state = self.rnn.unroll(
            inputs=inputs,
            length=subsequences_length,
            layout="NTC",
            merge_outputs=True,
            begin_state=state,
        )

        # outputs: (batch_size, seq_len, num_cells)
        # state: list of (batch_size, num_cells) tensors
        # scale: (batch_size, 1, *target_shape)
        # static_feat: (batch_size, num_features + prod(target_shape))
        return outputs, state, scale, static_feat, sequence
Exemple #5
0
    def unroll_encoder_imputation(
        self,
        F,
        feat_static_cat: Tensor,  # (batch_size, num_features)
        feat_static_real: Tensor,  # (batch_size, num_features)
        past_time_feat: Tensor,  # (batch_size, history_length, num_features)
        past_target: Tensor,  # (batch_size, history_length, *target_shape)
        past_observed_values:
        Tensor,  # (batch_size, history_length, *target_shape)
        past_is_pad: Tensor,  # (batch_size, history_length, *target_shape)
        future_observed_values: Optional[
            Tensor],  # (batch_size, history_length, *target_shape)
        future_time_feat: Optional[
            Tensor],  # (batch_size, prediction_length, num_features)
        future_target: Optional[
            Tensor],  # (batch_size, prediction_length, *target_shape)
    ) -> Tuple[Tensor, List, Tensor, Tensor, Tensor]:
        """
        Unrolls the RNN encoder in "imputation mode" which will fill imputed
        values with samples from the DeepAR model.
        """

        if future_time_feat is None or future_target is None:
            time_feat = past_time_feat.slice_axis(
                axis=1,
                begin=self.history_length - self.context_length,
                end=None,
            )

            is_padded_indicator = past_is_pad.slice_axis(
                axis=1,
                begin=self.history_length - self.context_length,
                end=None,
            )
            target = past_target.slice_axis(
                axis=1,
                begin=self.history_length - self.context_length,
                end=None,
            )
            target_observed_values = past_observed_values.slice_axis(
                axis=1,
                begin=self.history_length - self.context_length,
                end=None,
            )
            sequence = past_target
            sequence_length = self.history_length
            subsequences_length = self.context_length
        else:
            time_feat = F.concat(
                past_time_feat.slice_axis(
                    axis=1,
                    begin=self.history_length - self.context_length,
                    end=None,
                ),
                future_time_feat,
                dim=1,
            )

            is_padded_indicator = F.concat(
                past_is_pad.slice_axis(
                    axis=1,
                    begin=self.history_length - self.context_length,
                    end=None,
                ),
                F.zeros_like(future_observed_values),
                dim=1,
            )

            target = F.concat(
                past_target.slice_axis(
                    axis=1,
                    begin=self.history_length - self.context_length,
                    end=None,
                ),
                future_target,
                dim=1,
            )

            target_observed_values = F.concat(
                past_observed_values.slice_axis(
                    axis=1,
                    begin=self.history_length - self.context_length,
                    end=None,
                ),
                future_observed_values,
                dim=1,
            )

            sequence = F.concat(past_target, future_target, dim=1)
            sequence_length = self.history_length + self.prediction_length
            subsequences_length = self.context_length + self.prediction_length
        # (batch_size, sub_seq_len, *target_shape, num_lags)
        lags = self.get_lagged_subsequences(
            F=F,
            sequence=sequence,
            sequence_length=sequence_length,
            indices=self.lags_seq,
            subsequences_length=subsequences_length,
        )

        # scale is computed on the context length last units of the past target
        # scale shape is (batch_size, 1, *target_shape)
        _, scale = self.scaler(
            past_target.slice_axis(axis=1,
                                   begin=-self.context_length,
                                   end=None),
            past_observed_values.slice_axis(axis=1,
                                            begin=-self.context_length,
                                            end=None),
        )

        # (batch_size, num_features)
        embedded_cat = self.embedder(feat_static_cat)

        # in addition to embedding features, use the log scale as it can help
        # prediction too
        # (batch_size, num_features + prod(target_shape))
        static_feat = F.concat(
            embedded_cat,
            feat_static_real,
            F.log(scale) if len(self.target_shape) == 0 else F.log(
                scale.squeeze(axis=1)),
            dim=1,
        )

        # (batch_size, subsequences_length, num_features + 1)
        repeated_static_feat = static_feat.expand_dims(axis=1).repeat(
            axis=1, repeats=subsequences_length)

        # (batch_size, sub_seq_len, *target_shape, num_lags)
        lags_scaled = F.broadcast_div(lags, scale.expand_dims(axis=-1))
        # from (batch_size, sub_seq_len, *target_shape, num_lags)
        # to (batch_size, sub_seq_len, prod(target_shape) * num_lags)
        input_lags = F.reshape(
            data=lags_scaled,
            shape=(
                -1,
                subsequences_length,
                len(self.lags_seq) * prod(self.target_shape),
            ),
        )

        # (batch_size, sub_seq_len, input_dim)
        inputs = F.concat(input_lags, time_feat, repeated_static_feat, dim=-1)

        # Set initial state
        begin_state = self.rnn.begin_state(
            func=F.zeros,
            dtype=self.dtype,
            batch_size=inputs.shape[0]
            if isinstance(inputs, mx.nd.NDArray) else 0,
        )

        unroll_results = self.imputation_rnn_unroll(
            F,
            begin_state=begin_state,
            sequence=sequence,
            sequence_length=sequence_length,
            subsequences_length=subsequences_length,
            scale=scale,
            target=target,
            target_observed_values=target_observed_values,
            time_feat=time_feat,
            repeated_static_feat=repeated_static_feat,
            is_padded_indicator=is_padded_indicator,
        )

        outputs, state, imputed_sequence = unroll_results
        # outputs: (batch_size, seq_len, num_cells)
        # state: list of (batch_size, num_cells) tensors
        # scale: (batch_size, 1, *target_shape)
        # static_feat: (batch_size, num_features + prod(target_shape))
        out = F.concat(*outputs, dim=1)
        return out, state, scale, static_feat, imputed_sequence
Exemple #6
0
    def prepare_inputs_imputation_step(
        self,
        F,
        begin_state: List[Tensor],
        imputed_sequence: Tensor,
        sequence_length: int,
        subsequences_length: int,
        scale: Tensor,
        target: Tensor,
        target_observed_values: Tensor,
        time_feat: Tensor,
        repeated_static_feat: Tensor,
        is_padded_indicator: Tensor,
        state,
        i: int,
    ) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]:
        """
        Prepares inputs for the next LSTM unrolling step at step i.
        """
        lags = self.get_lagged_subsequences(
            F=F,
            sequence=imputed_sequence,
            sequence_length=sequence_length,
            indices=self.lags_seq,
            subsequences_length=subsequences_length,
        )
        # (batch_size, sub_seq_len, *target_shape, num_lags)
        lags_scaled = F.broadcast_div(lags, scale.expand_dims(axis=-1))
        # from (batch_size, sub_seq_len, *target_shape, num_lags)
        # to (batch_size, sub_seq_len, prod(target_shape) * num_lags)
        input_lags = F.reshape(
            data=lags_scaled,
            shape=(
                -1,
                subsequences_length,
                len(self.lags_seq) * prod(self.target_shape),
            ),
        )
        # (batch_size, sub_seq_len, input_dim)
        inputs = F.concat(input_lags, time_feat, repeated_static_feat, dim=-1)

        is_pad = is_padded_indicator.slice_axis(axis=1, begin=i, end=i + 1)

        current_observed_indicator = target_observed_values.slice_axis(axis=1,
                                                                       begin=i,
                                                                       end=i +
                                                                       1)

        current_target = target.slice_axis(axis=1, begin=i, end=i + 1)

        pre_sequence = imputed_sequence.slice_axis(axis=1,
                                                   begin=0,
                                                   end=-subsequences_length +
                                                   i)

        post_sequence = imputed_sequence.slice_axis(
            axis=1, begin=-subsequences_length + i + 1, end=None)
        # Reset the state to the begin state if the current target is padded
        state = [
            F.where(is_pad.repeat(repeats=self.num_cells, axis=1), bs, s)
            for bs, s in zip(begin_state, state)
        ]
        return (
            inputs,
            is_pad,
            current_observed_indicator,
            current_target,
            pre_sequence,
            post_sequence,
            state,
        )