Ejemplo n.º 1
0
    def __init__(
        self,
        encoder: Seq2SeqEncoder,
        enc2dec: Seq2SeqEnc2Dec,
        decoder: Seq2SeqDecoder,
        quantile_output: QuantileOutput,
        context_length: int,
        cardinality: List[int],
        embedding_dimension: List[int],
        input_repr: Representation = Representation(),
        output_repr: Representation = Representation(),
        scaling: bool = False,
        scaling_decoder_dynamic_feature: bool = False,
        dtype: DType = np.float32,
        **kwargs,
    ) -> None:
        super().__init__(**kwargs)

        self.encoder = encoder
        self.enc2dec = enc2dec
        self.decoder = decoder
        self.quantile_output = quantile_output
        self.context_length = context_length
        self.cardinality = cardinality
        self.embedding_dimension = embedding_dimension
        self.scaling = scaling
        self.scaling_decoder_dynamic_feature = scaling_decoder_dynamic_feature
        self.scaling_decoder_dynamic_feature_axis = 1
        self.dtype = dtype
        self.input_repr = input_repr
        self.output_repr = output_repr

        if self.scaling:
            self.scaler = MeanScaler(keepdims=True)
        else:
            self.scaler = NOPScaler(keepdims=True)

        if self.scaling_decoder_dynamic_feature:
            self.scaler_decoder_dynamic_feature = MeanScaler(
                keepdims=True, axis=self.scaling_decoder_dynamic_feature_axis)
        else:
            self.scaler_decoder_dynamic_feature = NOPScaler(
                keepdims=True, axis=self.scaling_decoder_dynamic_feature_axis)

        with self.name_scope():
            self.quantile_proj = quantile_output.get_quantile_proj()
            self.loss = quantile_output.get_loss()
            self.embedder = FeatureEmbedder(
                cardinalities=cardinality,
                embedding_dims=embedding_dimension,
                dtype=self.dtype,
            )
Ejemplo n.º 2
0
    def __init__(
        self,
        embedder: FeatureEmbedder,
        scaler: Scaler,
        encoder: Seq2SeqEncoder,
        enc2dec: Seq2SeqEnc2Dec,
        decoder: Seq2SeqDecoder,
        quantile_output: QuantileOutput,
        **kwargs,
    ) -> None:
        super().__init__(**kwargs)

        self.embedder = embedder
        self.scaler = scaler
        self.encoder = encoder
        self.enc2dec = enc2dec
        self.decoder = decoder
        self.quantile_output = quantile_output

        with self.name_scope():
            self.quantile_proj = quantile_output.get_quantile_proj()
            self.loss = quantile_output.get_loss()
Ejemplo n.º 3
0
class TemporalFusionTransformerNetwork(HybridBlock):
    @validated()
    def __init__(
        self,
        context_length: int,
        prediction_length: int,
        d_var: int,
        d_hidden: int,
        n_head: int,
        n_output: int,
        d_past_feat_dynamic_real: List[int],
        c_past_feat_dynamic_cat: List[int],
        d_feat_dynamic_real: List[int],
        c_feat_dynamic_cat: List[int],
        d_feat_static_real: List[int],
        c_feat_static_cat: List[int],
        dropout: float = 0.0,
        **kwargs,
    ):
        super(TemporalFusionTransformerNetwork, self).__init__(**kwargs)
        self.context_length = context_length
        self.prediction_length = prediction_length
        self.d_var = d_var
        self.d_hidden = d_hidden
        self.n_head = n_head
        self.n_output = n_output
        self.quantiles = sum(
            [[i / 10, 1.0 - i / 10] for i in range(1, (n_output + 1) // 2)],
            [0.5],
        )
        self.normalize_eps = 1e-5

        self.d_past_feat_dynamic_real = d_past_feat_dynamic_real
        self.c_past_feat_dynamic_cat = c_past_feat_dynamic_cat
        self.d_feat_dynamic_real = d_feat_dynamic_real
        self.c_feat_dynamic_cat = c_feat_dynamic_cat
        self.d_feat_static_real = d_feat_static_real
        self.c_feat_static_cat = c_feat_static_cat
        self.n_past_feat_dynamic = len(self.d_past_feat_dynamic_real) + len(
            self.c_past_feat_dynamic_cat
        )
        self.n_feat_dynamic = len(self.d_feat_dynamic_real) + len(
            self.c_feat_dynamic_cat
        )
        self.n_feat_static = len(self.d_feat_static_real) + len(
            self.c_feat_static_cat
        )

        with self.name_scope():
            self.target_proj = nn.Dense(
                units=self.d_var,
                in_units=1,
                flatten=False,
                prefix=f"target_projection_",
            )
            if self.d_past_feat_dynamic_real:
                self.past_feat_dynamic_proj = FeatureProjector(
                    feature_dims=self.d_past_feat_dynamic_real,
                    embedding_dims=[self.d_var]
                    * len(self.d_past_feat_dynamic_real),
                    prefix="past_feat_dynamic_",
                )
            else:
                self.past_feat_dynamic_proj = None

            if self.c_past_feat_dynamic_cat:
                self.past_feat_dynamic_embed = FeatureEmbedder(
                    cardinalities=self.c_past_feat_dynamic_cat,
                    embedding_dims=[self.d_var]
                    * len(self.c_past_feat_dynamic_cat),
                    prefix="past_feat_dynamic_",
                )
            else:
                self.past_feat_dynamic_embed = None

            if self.d_feat_dynamic_real:
                self.feat_dynamic_proj = FeatureProjector(
                    feature_dims=self.d_feat_dynamic_real,
                    embedding_dims=[self.d_var]
                    * len(self.d_feat_dynamic_real),
                    prefix="feat_dynamic_",
                )
            else:
                self.feat_dynamic_proj = None

            if self.c_feat_dynamic_cat:
                self.feat_dynamic_embed = FeatureEmbedder(
                    cardinalities=self.c_feat_dynamic_cat,
                    embedding_dims=[self.d_var] * len(self.c_feat_dynamic_cat),
                    prefix="feat_dynamic_",
                )
            else:
                self.feat_dynamic_embed = None

            if self.d_feat_static_real:
                self.feat_static_proj = FeatureProjector(
                    feature_dims=self.d_feat_static_real,
                    embedding_dims=[self.d_var] * len(self.d_feat_static_real),
                    prefix="feat_static_",
                )
            else:
                self.feat_static_proj = None

            if self.c_feat_static_cat:
                self.feat_static_embed = FeatureEmbedder(
                    cardinalities=self.c_feat_static_cat,
                    embedding_dims=[self.d_var] * len(self.c_feat_static_cat),
                    prefix="feat_static_",
                )
            else:
                self.feat_static_embed = None

            self.static_selector = VariableSelectionNetwork(
                d_hidden=self.d_var,
                n_vars=self.n_feat_static,
                dropout=dropout,
            )
            self.ctx_selector = VariableSelectionNetwork(
                d_hidden=self.d_var,
                n_vars=self.n_past_feat_dynamic + self.n_feat_dynamic + 1,
                add_static=True,
                dropout=dropout,
            )
            self.tgt_selector = VariableSelectionNetwork(
                d_hidden=self.d_var,
                n_vars=self.n_feat_dynamic,
                add_static=True,
                dropout=dropout,
            )
            self.selection = GatedResidualNetwork(
                d_hidden=self.d_var, dropout=dropout,
            )
            self.enrichment = GatedResidualNetwork(
                d_hidden=self.d_var, dropout=dropout,
            )
            self.state_h = GatedResidualNetwork(
                d_hidden=self.d_var, d_output=self.d_hidden, dropout=dropout,
            )
            self.state_c = GatedResidualNetwork(
                d_hidden=self.d_var, d_output=self.d_hidden, dropout=dropout,
            )
            self.temporal_encoder = TemporalFusionEncoder(
                context_length=self.context_length,
                prediction_length=self.prediction_length,
                d_input=self.d_var,
                d_hidden=self.d_hidden,
            )
            self.temporal_decoder = TemporalFusionDecoder(
                context_length=self.context_length,
                prediction_length=self.prediction_length,
                d_hidden=self.d_hidden,
                d_var=self.d_var,
                n_head=self.n_head,
                dropout=dropout,
            )
            self.output = QuantileOutput(quantiles=self.quantiles)
            self.output_proj = self.output.get_quantile_proj()
            self.loss = self.output.get_loss()

    def _preprocess(
        self,
        F,
        past_target: Tensor,
        past_observed_values: Tensor,
        past_feat_dynamic_real: Tensor,
        past_feat_dynamic_cat: Tensor,
        feat_dynamic_real: Tensor,
        feat_dynamic_cat: Tensor,
        feat_static_real: Tensor,
        feat_static_cat: Tensor,
    ):
        obs = F.broadcast_mul(past_target, past_observed_values)
        count = F.sum(past_observed_values, axis=1, keepdims=True)
        offset = F.broadcast_div(
            F.sum(obs, axis=1, keepdims=True), count + self.normalize_eps,
        )
        scale = F.broadcast_div(
            F.sum(obs ** 2, axis=1, keepdims=True), count + self.normalize_eps,
        )
        scale = F.broadcast_sub(scale, offset ** 2)
        scale = F.sqrt(scale)
        past_target = F.broadcast_div(
            F.broadcast_sub(past_target, offset), scale + self.normalize_eps,
        )
        past_target = F.expand_dims(past_target, axis=-1)

        past_covariates = []
        future_covariates = []
        static_covariates = []
        proj = self.target_proj(past_target)
        past_covariates.append(proj)
        if self.past_feat_dynamic_proj is not None:
            projs = self.past_feat_dynamic_proj(past_feat_dynamic_real)
            past_covariates.extend(projs)
        if self.past_feat_dynamic_embed is not None:
            embs = self.past_feat_dynamic_embed(past_feat_dynamic_cat)
            past_covariates.extend(embs)
        if self.feat_dynamic_proj is not None:
            projs = self.feat_dynamic_proj(feat_dynamic_real)
            for proj in projs:
                ctx_proj = F.slice_axis(
                    proj, axis=1, begin=0, end=self.context_length
                )
                tgt_proj = F.slice_axis(
                    proj, axis=1, begin=self.context_length, end=None
                )
                past_covariates.append(ctx_proj)
                future_covariates.append(tgt_proj)
        if self.feat_dynamic_embed is not None:
            embs = self.feat_dynamic_embed(feat_dynamic_cat)
            for emb in embs:
                ctx_emb = F.slice_axis(
                    emb, axis=1, begin=0, end=self.context_length
                )
                tgt_emb = F.slice_axis(
                    emb, axis=1, begin=self.context_length, end=None
                )
                past_covariates.append(ctx_emb)
                future_covariates.append(tgt_emb)

        if self.feat_static_proj is not None:
            projs = self.feat_static_proj(feat_static_real)
            static_covariates.extend(projs)
        if self.feat_static_embed is not None:
            embs = self.feat_static_embed(feat_static_cat)
            static_covariates.extend(embs)

        return (
            past_covariates,
            future_covariates,
            static_covariates,
            offset,
            scale,
        )

    def _postprocess(
        self, F, preds: Tensor, offset: Tensor, scale: Tensor,
    ) -> Tensor:
        offset = F.expand_dims(offset, axis=-1)
        scale = F.expand_dims(scale, axis=-1)
        preds = F.broadcast_add(
            F.broadcast_mul(preds, (scale + self.normalize_eps)), offset,
        )
        return preds

    def _forward(
        self,
        F,
        past_observed_values: Tensor,
        past_covariates: Tensor,
        future_covariates: Tensor,
        static_covariates: Tensor,
    ):
        static_var, _ = self.static_selector(static_covariates)
        c_selection = self.selection(static_var).expand_dims(axis=1)
        c_enrichment = self.enrichment(static_var).expand_dims(axis=1)
        c_h = self.state_h(static_var)
        c_c = self.state_c(static_var)

        ctx_input, _ = self.ctx_selector(past_covariates, c_selection)
        tgt_input, _ = self.tgt_selector(future_covariates, c_selection)

        encoding = self.temporal_encoder(ctx_input, tgt_input, [c_h, c_c])
        decoding = self.temporal_decoder(
            encoding, c_enrichment, past_observed_values
        )
        preds = self.output_proj(decoding)

        return preds
Ejemplo n.º 4
0
class SelfAttentionNetwork(HybridBlock):
    @validated()
    def __init__(
        self,
        context_length: int,
        prediction_length: int,
        d_hidden: int,
        m_ffn: int,
        n_head: int,
        n_layers: int,
        n_output: int,
        cardinalities: List[int],
        kernel_sizes: Optional[List[int]],
        dist_enc: Optional[str],
        pre_ln: bool,
        dropout: float,
        temperature: float,
        normalizer_eps: float = 1e-5,
        **kwargs,
    ):
        super().__init__(**kwargs)
        if kernel_sizes is None or len(kernel_sizes) == 0:
            self.kernel_sizes = (1, )
        else:
            self.kernel_sizes = kernel_sizes
        self.context_length = context_length
        self.prediction_length = prediction_length
        self.d_hidden = d_hidden
        assert (n_output % 2 == 1) and (n_output <= 9)
        self.quantiles = sum(
            ([i / 10, 1.0 - i / 10] for i in range(1, (n_output + 1) // 2)),
            [0.5],
        )
        self.normalizer_eps = normalizer_eps

        with self.name_scope():
            self._blocks = []
            for layer in range(n_layers):
                block = SelfAttentionBlock(
                    d_hidden=self.d_hidden,
                    m_ffn=m_ffn,
                    kernel_sizes=self.kernel_sizes,
                    n_head=n_head,
                    dist_enc=dist_enc,
                    pre_ln=pre_ln,
                    dropout=dropout,
                    temperature=temperature,
                )
                self.register_child(block=block, name=f"block_{layer+1}")
                self._blocks.append(block)

            self.target_proj = nn.Dense(
                units=self.d_hidden,
                in_units=1,
                use_bias=True,
                flatten=False,
                weight_initializer=init.Xavier(),
                prefix="target_proj_",
            )
            self.covar_proj = nn.Dense(
                units=self.d_hidden,
                use_bias=True,
                flatten=False,
                weight_initializer=init.Xavier(),
                prefix="covar_proj_",
            )
            if cardinalities:
                self.embedder = FeatureEmbedder(
                    cardinalities=cardinalities,
                    embedding_dims=[self.d_hidden] * len(cardinalities),
                    prefix="embedder_",
                )
            self.output = QuantileOutput(quantiles=self.quantiles)
            self.output_proj = self.output.get_quantile_proj()
            self.loss = self.output.get_loss()

    def _preprocess(
        self,
        F,
        past_target: Tensor,
        past_observed_values: Tensor,
        past_is_pad: Tensor,
        past_feat_dynamic_real: Tensor,
        past_feat_dynamic_cat: Tensor,
        future_target: Tensor,
        future_feat_dynamic_real: Tensor,
        future_feat_dynamic_cat: Tensor,
        feat_static_real: Tensor,
        feat_static_cat: Tensor,
    ) -> Tuple[Tensor, Optional[Tensor], Tensor, Tensor, Optional[Tensor],
               Tensor, Tensor, ]:
        obs = past_target * past_observed_values
        count = F.sum(past_observed_values, axis=1, keepdims=True)
        offset = F.sum(obs, axis=1,
                       keepdims=True) / (count + self.normalizer_eps)
        scale = F.sum(obs**2, axis=1,
                      keepdims=True) / (count + self.normalizer_eps)
        scale = scale - offset**2
        scale = scale.sqrt()

        past_target = (past_target - offset) / (scale + self.normalizer_eps)
        if future_target is not None:
            future_target = (future_target - offset) / (scale +
                                                        self.normalizer_eps)

        def _assemble_covariates(
            feat_dynamic_real: Tensor,
            feat_dynamic_cat: Tensor,
            feat_static_real: Tensor,
            feat_static_cat: Tensor,
            is_past: bool,
        ) -> Tensor:
            covariates = []
            if feat_dynamic_real.shape[-1] > 0:
                covariates.append(feat_dynamic_real)
            if feat_static_real.shape[-1] > 0:
                covariates.append(
                    feat_static_real.expand_dims(axis=1).repeat(
                        axis=1,
                        repeats=self.context_length
                        if is_past else self.prediction_length,
                    ))
            if len(covariates) > 0:
                covariates = F.concat(*covariates, dim=-1)
                covariates = self.covar_proj(covariates)
            else:
                covariates = None

            categories = []
            if feat_dynamic_cat.shape[-1] > 0:
                categories.append(feat_dynamic_cat)
            if feat_static_cat.shape[-1] > 0:
                categories.append(
                    feat_static_cat.expand_dims(axis=1).repeat(
                        axis=1,
                        repeats=self.context_length
                        if is_past else self.prediction_length,
                    ))
            if len(categories) > 0:
                categories = F.concat(*categories, dim=-1)
                embeddings = self.embedder(categories)
                embeddings = F.reshape(embeddings,
                                       shape=(0, 0, -4, self.d_hidden,
                                              -1)).sum(axis=-1)
                if covariates is not None:
                    covariates = covariates + embeddings
                else:
                    covariates = embeddings
            else:
                pass

            return covariates

        past_covariates = _assemble_covariates(
            past_feat_dynamic_real,
            past_feat_dynamic_cat,
            feat_static_real,
            feat_static_cat,
            is_past=True,
        )
        future_covariates = _assemble_covariates(
            future_feat_dynamic_real,
            future_feat_dynamic_cat,
            feat_static_real,
            feat_static_cat,
            is_past=False,
        )
        past_observed_values = F.broadcast_logical_and(
            past_observed_values,
            F.logical_not(past_is_pad),
        )

        return (
            past_target,
            past_covariates,
            past_observed_values,
            future_target,
            future_covariates,
            offset,
            scale,
        )

    def _postprocess(self, F, preds: Tensor, offset: Tensor,
                     scale: Tensor) -> Tensor:
        offset = F.expand_dims(offset, axis=-1)
        scale = F.expand_dims(scale, axis=-1)
        preds = preds * (scale + self.normalizer_eps) + offset
        return preds

    def _forward_step(
        self,
        F,
        horizon: int,
        target: Tensor,
        covars: Optional[Tensor],
        mask: Tensor,
    ) -> Tensor:
        target = F.expand_dims(target, axis=-1)
        mask = F.expand_dims(mask, axis=-1)
        value = self.target_proj(target)
        if covars is not None:
            value = value + covars
        for block in self._blocks:
            value = block(value, mask)
        value = F.slice_axis(value, axis=1, begin=-horizon, end=None)
        preds = self.output_proj(value)
        return preds