Esempio n. 1
0
    def initialize_blocks(self):
        r""" Helper function to initialize blocks.

        """
        for i in range(self._hparams.num_blocks):
            mh_attn = MultiheadRPRAttention(
                self._input_size,
                self._hparams.multihead_attention,
                stores_relative_position=bool(i == 0)
            )
            self.self_attns.append(mh_attn)

            self.self_attn_layer_norm.append(
                T5LayerNorm(self._input_size, eps=self._hparams.eps))
            if self._hparams.dim != mh_attn.hparams.output_dim:
                raise ValueError(
                    'The "dim" in the hparams of '
                    '"multihead_attention" should be equal to the '
                    '"dim" of T5Encoder')

            pw_net = FeedForwardNetwork(
                hparams=self._hparams['poswise_feedforward'])

            final_dim = pw_net.hparams.layers[-1]['kwargs']['out_features']
            if self._hparams.dim != final_dim:
                raise ValueError(
                    'The output dimenstion of '
                    '"poswise_feedforward" should be equal '
                    'to the "dim" of T5Encoder.')

            self.poswise_networks.append(pw_net)
            self.poswise_layer_norm.append(
                T5LayerNorm(self._input_size, eps=self._hparams.eps))
Esempio n. 2
0
    def initialize_blocks(self):
        r"""Helper function to initialize blocks.
        """
        for i in range(self._hparams.num_blocks):
            attn_module = MultiheadRPRAttention(
                self._input_size,
                self._hparams.multihead_attention,
                stores_relative_position=bool(i == 0))
            if self._hparams.dim != attn_module.output_size:
                raise ValueError("The output dimension of "
                                 "MultiheadRPRAttention should be equal "
                                 "to the dim of T5Decoder")
            self.self_attns.append(attn_module)
            self.self_attn_layer_norm.append(
                T5LayerNorm(self._input_size, eps=self._hparams.eps))

            attn_module = MultiheadRPRAttention(
                self._input_size,
                self._hparams.multihead_attention,
                stores_relative_position=bool(i == 0))
            if self._hparams.dim != attn_module.output_size:
                raise ValueError("The output dimension of "
                                 "MultiheadRPRAttention should be equal "
                                 "to the dim of T5Decoder")
            self.enc_dec_attns.append(attn_module)
            self.end_dec_attn_layer_norm.append(
                T5LayerNorm(self._input_size, eps=self._hparams.eps))

            poswise_network = FeedForwardNetwork(
                hparams=self._hparams.poswise_feedforward)
            if (poswise_network.hparams.layers[-1]['kwargs']['out_features'] !=
                    self._hparams.dim):
                raise ValueError("The output dimension of "
                                 "FeedForwardNetwork should be equal "
                                 "to the dim of T5Decoder")
            self.poswise_networks.append(poswise_network)
            self.poswise_layer_norm.append(
                T5LayerNorm(self._input_size, eps=self._hparams.eps))
Esempio n. 3
0
    def __init__(self,
                 token_embedder: Optional[TokenEmbedder] = None,
                 token_pos_embedder: Optional[TokenPosEmbedder] = None,
                 vocab_size: Optional[int] = None,
                 output_layer: Optional[Union[nn.Module, torch.Tensor]] = None,
                 hparams=None):
        super().__init__(token_embedder,
                         token_pos_embedder,
                         vocab_size=vocab_size,
                         output_layer=output_layer,
                         hparams=hparams)

        self.final_layer_norm = T5LayerNorm(
            self._input_size,  # type: ignore
            eps=self._hparams.eps)
Esempio n. 4
0
    def __init__(self, hparams=None):
        super().__init__(hparams=hparams)

        self.final_layer_norm = T5LayerNorm(self._input_size,
                                            eps=self._hparams.eps)