Example #1
0
 def forward(self, input_ids, token_type_ids=None):
     r'''
     The SqueezeBertForQuestionAnswering forward method, overrides the __call__() special method.
     Args:
         input_ids (Tensor):
             See :class:`SqueezeBertModel`.
         token_type_ids (Tensor, optional):
             See :class:`SqueezeBertModel`.
     Returns:
         tuple: Returns tuple (`start_logits`, `end_logits`).
         With the fields:
         - `start_logits` (Tensor):
             A tensor of the input token classification logits, indicates the start position of the labelled span.
             Its data type should be float32 and its shape is [batch_size, sequence_length].
         - `end_logits` (Tensor):
             A tensor of the input token classification logits, indicates the end position of the labelled span.
             Its data type should be float32 and its shape is [batch_size, sequence_length].
     '''
     sequence_output, _ = self.squeezebert(input_ids,
                                           token_type_ids=token_type_ids,
                                           position_ids=None,
                                           attention_mask=None)
     logits = self.classifier(sequence_output)
     logits = paddle.transpose(logits, perm=[2, 0, 1])
     start_logits, end_logits = paddle.unstack(x=logits, axis=0)
     return start_logits, end_logits
Example #2
0
    def forward(self, *args, **kwargs):
        """
        Args:
            start_pos (optional, `Variable` of shape [batch_size]):
                token index of start of answer span in `context`
            end_pos (optional, `Variable` of shape [batch_size]):
                token index of end of answer span in `context`
        Returns:
            loss (`Variable` of shape []):
                Cross entropy loss mean over batch and time, ignore positions where label == -100
                if labels not set, returns None
            start_logits (`Variable` of shape [batch_size, hidden_size]):
                output logits of start position, use argmax(start_logit) to get start index
            end_logits (`Variable` of shape [batch_size, hidden_size]):
                output logits of end position, use argmax(end_logit) to get end index
        """

        start_pos = kwargs.pop('start_pos', None)
        end_pos = kwargs.pop('end_pos', None)
        pooled, encoded = super(ErnieModelForQuestionAnswering,
                                self).forward(*args, **kwargs)
        encoded = self.dropout(encoded)
        encoded = self.classifier(encoded)
        start_logit, end_logits = P.unstack(encoded, axis=-1)
        if start_pos is not None and end_pos is not None:
            if len(start_pos.shape) != 1:
                start_pos = start_pos.squeeze()
            if len(end_pos.shape) != 1:
                end_pos = end_pos.squeeze()
            start_loss = F.cross_entropy(start_logit, start_pos)
            end_loss = F.cross_entropy(end_logits, end_pos)
            loss = (start_loss.mean() + end_loss.mean()) / 2.
        else:
            loss = None
        return loss, start_logit, end_logits
Example #3
0
 def forward(self, inputs):
     """
     forward
     """
     x = paddle.unstack(
         inputs, axis=self.config['axis'], num=self.config['num'])
     return x
Example #4
0
 def forward(self, input_ids, attention_mask=None):
     sequence_output = self.distilbert(input_ids,
                                       attention_mask=attention_mask)
     sequence_output = self.dropout(sequence_output)
     logits = self.classifier(sequence_output)
     logits = paddle.transpose(logits, perm=[2, 0, 1])
     start_logits, end_logits = paddle.unstack(x=logits, axis=0)
     return start_logits, end_logits
Example #5
0
    def forward(self, input_ids, token_type_ids=None, attention_mask=None):
        sequence_output, _ = self.nezha(input_ids, token_type_ids,
                                        attention_mask)

        logits = self.classifier(sequence_output)
        logits = paddle.transpose(logits, perm=[2, 0, 1])

        start_logits, end_logits = paddle.unstack(x=logits, axis=0)

        return start_logits, end_logits
Example #6
0
    def forward(self,
                input_ids,
                token_type_ids=None,
                position_ids=None,
                attention_mask=None):
        r"""
        The BertForQuestionAnswering forward method, overrides the __call__() special method.

        Args:
            input_ids (Tensor):
                See :class:`BertModel`.
            token_type_ids (Tensor, optional):
                See :class:`BertModel`.

        Returns:
            tuple: Returns tuple (`start_logits`, `end_logits`).

            With the fields:

            - `start_logits` (Tensor):
                A tensor of the input token classification logits, indicates the start position of the labelled span.
                Its data type should be float32 and its shape is [batch_size, sequence_length].

            - `end_logits` (Tensor):
                A tensor of the input token classification logits, indicates the end position of the labelled span.
                Its data type should be float32 and its shape is [batch_size, sequence_length].

        Example:
            .. code-block::

                import paddle
                from paddlenlp.transformers.bert.modeling import BertForQuestionAnswering
                from paddlenlp.transformers.bert.tokenizer import BertTokenizer

                tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
                model = BertForQuestionAnswering.from_pretrained('bert-base-cased')

                inputs = tokenizer("Welcome to use PaddlePaddle and PaddleNLP!")
                inputs = {k:paddle.to_tensor([v]) for (k, v) in inputs.items()}
                outputs = model(**inputs)

                start_logits = outputs[0]
                end_logits = outputs[1]
        """

        sequence_output, _ = self.bert(input_ids,
                                       token_type_ids=token_type_ids,
                                       position_ids=position_ids,
                                       attention_mask=attention_mask)

        logits = self.classifier(sequence_output)
        logits = paddle.transpose(logits, perm=[2, 0, 1])
        start_logits, end_logits = paddle.unstack(x=logits, axis=0)

        return start_logits, end_logits
Example #7
0
    def forward(self,
                input_ids,
                token_type_ids=None,
                position_ids=None,
                attention_mask=None):
        r"""
        Args:
            input_ids (Tensor):
                See :class:`ErnieGramModel`.
            token_type_ids (Tensor, optional):
                See :class:`ErnieGramModel`.
            position_ids (Tensor, optional):
                See :class:`ErnieGramModel`.
            attention_mask (Tensor, optional):
                See :class:`ErnieGramModel`.


        Returns:
            tuple: Returns tuple (`start_logits`, `end_logits`).

            With the fields:

            - `start_logits` (Tensor):
                A tensor of the input token classification logits, indicates the start position of the labelled span.
                Its data type should be float32 and its shape is [batch_size, sequence_length].

            - `end_logits` (Tensor):
                A tensor of the input token classification logits, indicates the end position of the labelled span.
                Its data type should be float32 and its shape is [batch_size, sequence_length].

        Example:
            .. code-block::

                import paddle
                from paddlenlp.transformers import ErnieGramForQuestionAnswering, ErnieGramTokenizer

                tokenizer = ErnieGramTokenizer.from_pretrained('ernie-gram-zh')
                model = ErnieGramForQuestionAnswering.from_pretrained('ernie-gram-zh')

                inputs = tokenizer("欢迎使用百度飞桨!")
                inputs = {k:paddle.to_tensor([v]) for (k, v) in inputs.items()}
                logits = model(**inputs)
        """

        sequence_output, _ = self.ernie_gram(
            input_ids,
            token_type_ids=token_type_ids,
            position_ids=position_ids,
            attention_mask=attention_mask)

        logits = self.classifier(sequence_output)
        logits = paddle.transpose(logits, perm=[2, 0, 1])
        start_logits, end_logits = paddle.unstack(x=logits, axis=0)

        return start_logits, end_logits
Example #8
0
    def forward(self, input_ids, token_type_ids=None, attention_mask=None):
        r"""
        The RoFormerv2ForQuestionAnswering forward method, overrides the __call__() special method.

        Args:
            input_ids (Tensor):
                See :class:`RoFormerv2Model`.
            token_type_ids (Tensor, optional):
                See :class:`RoFormerv2Model`.
            attention_mask (Tensor, optional):
                See :class:`RoFormerv2Model`.

        Returns:
            tuple: Returns tuple (`start_logits`, `end_logits`).

            With the fields:

            - `start_logits` (Tensor):
                A tensor of the input token classification logits, indicates the start position of the labelled span.
                Its data type should be float32 and its shape is [batch_size, sequence_length].

            - `end_logits` (Tensor):
                A tensor of the input token classification logits, indicates the end position of the labelled span.
                Its data type should be float32 and its shape is [batch_size, sequence_length].

        Example:
            .. code-block::

                import paddle
                from paddlenlp.transformers import RoFormerv2ForQuestionAnswering, RoFormerv2Tokenizer

                tokenizer = RoFormerv2Tokenizer.from_pretrained('roformer_v2_chinese_char_base')
                model = RoFormerv2ForQuestionAnswering.from_pretrained('roformer_v2_chinese_char_base')

                inputs = tokenizer("欢迎使用百度飞桨!")
                inputs = {k:paddle.to_tensor([v], dtype="int64") for (k, v) in inputs.items()}
                outputs = model(**inputs)

                start_logits = outputs[0]
                end_logits = outputs[1]
        """
        sequence_output = self.roformerv2(
            input_ids,
            token_type_ids=token_type_ids,
            attention_mask=attention_mask)

        logits = self.classifier(sequence_output)
        start_logits, end_logits = paddle.unstack(logits, axis=-1)

        return start_logits, end_logits
Example #9
0
    def forward(self, input_ids, token_type_ids=None):
        r"""
        Args:
            input_ids (Tensor):
                See :class:`RobertaModel`.
            token_type_ids (Tensor, optional):
                See :class:`RobertaModel`.
            position_ids (Tensor, optional):
                See :class:`RobertaModel`.
            attention_mask (Tensor, optional):
                See :class:`RobertaModel`.

        Returns:
            tuple: Returns tuple (`start_logits`, `end_logits`).

            With the fields:

            - `start_logits` (Tensor):
                A tensor of the input token classification logits, indicates the start position of the labelled span.
                Its data type should be float32 and its shape is [batch_size, sequence_length].

            - `end_logits` (Tensor):
                A tensor of the input token classification logits, indicates the end position of the labelled span.
                Its data type should be float32 and its shape is [batch_size, sequence_length].

        Example:
            .. code-block::

                import paddle
                from paddlenlp.transformers import RobertaForSequenceClassification, RobertaTokenizer

                tokenizer = RobertaTokenizer.from_pretrained('roberta-wwm-ext')
                model = RobertaForSequenceClassification.from_pretrained('roberta-wwm-ext')

                inputs = tokenizer("Welcome to use PaddlePaddle and PaddleNLP!")
                inputs = {k:paddle.to_tensor([v]) for (k, v) in inputs.items()}
                logits = model(**inputs)

        """
        sequence_output, _ = self.roberta(
            input_ids,
            token_type_ids=token_type_ids,
            position_ids=None,
            attention_mask=None)

        logits = self.classifier(sequence_output)
        logits = paddle.transpose(logits, perm=[2, 0, 1])
        start_logits, end_logits = paddle.unstack(x=logits, axis=0)

        return start_logits, end_logits
Example #10
0
def chunk(self, chunks, dim):
    slices = paddle.unstack(self, axis=dim, num=None)
    out_list = []
    step = int(np.ceil(len(slices) / chunks))
    for st in range(0, len(slices), step):
        out_list.append(
            varbase_to_tensor(
                fluid.layers.concat([
                    paddle.unsqueeze(x, dim, name=None)
                    for x in slices[st:(st + step)]
                ],
                                    axis=dim,
                                    name=None)))
    return out_list
Example #11
0
 def forward(self,
             input_ids,
             attention_mask=None,
             decoder_input_ids=None,
             decoder_attention_mask=None,
             encoder_output=None,
             use_cache=False,
             cache=None):
     output = self.bart(input_ids, attention_mask, decoder_input_ids,
                        decoder_attention_mask, encoder_output, use_cache,
                        cache)
     logits = self.classifier(output[0] if use_cache else output, )
     logits = paddle.transpose(logits, perm=[2, 0, 1])
     start_logits, end_logits = paddle.unstack(x=logits, axis=0)
     return start_logits, end_logits
Example #12
0
    def forward(self, x):
        """Forward network"""
        mask = paddle.any(x != self.pad_index, axis=-1)

        lens = paddle.sum(paddle.cast(mask, 'int32'), axis=-1)
        select = paddle.nonzero(mask)
        masked_x = paddle.gather_nd(x, select)
        char_mask = masked_x != self.pad_index
        emb = self.embed(masked_x)
        word_lens = paddle.sum(paddle.cast(char_mask, 'int32'), axis=-1)
        _, (h, _) = self.lstm(emb, sequence_length=word_lens)
        h = paddle.concat(paddle.unstack(h), axis=-1)

        feat_embed = pad_sequence_paddle(h, lens, pad_index=self.pad_index)

        return feat_embed
Example #13
0
    def forward(self,
                input_ids,
                token_type_ids=None,
                position_ids=None,
                attention_mask=None):
        sequence_output, pooled_output = self.roberta(
            input_ids,
            token_type_ids=token_type_ids,
            position_ids=position_ids,
            attention_mask=attention_mask)

        logits = self.classifier(sequence_output)
        logits = paddle.transpose(logits, perm=[2, 0, 1])
        start_logits, end_logits = paddle.unstack(x=logits, axis=0)
        cls_logits = self.classifier_cls(pooled_output)

        return start_logits, end_logits, cls_logits
Example #14
0
    def forward(self, x, kv_cache=None):
        self.seq_len = x.shape[1]
        x = self.query_key_value(x)
        q, k, v = x.split(num_or_sections=3, axis=2)

        q = self.split_heads(q)
        k = self.split_heads(k)
        v = self.split_heads(v)

        if kv_cache is not None:
            pk, pv = paddle.unstack(kv_cache, axis=1)
            k = paddle.concat([pk, k], axis=-2)
            v = paddle.concat([pv, v], axis=-2)
        cached_kv = paddle.stack([k, v], axis=1)

        attn = paddle.matmul(q, k, transpose_y=True)  # [B, N, L, S]
        attn = attn / math.sqrt(self.size_per_head)

        # [L, S]
        attention_mask = paddle.tril(
            paddle.ones([self.seq_len, self.seq_len], 'float32'))
        attention_mask = attention_mask.reshape(
            [1, 1, self.seq_len, self.seq_len])

        # adding to softmax -> its like removing them entirely
        attn = attn * attention_mask - 10000.0 * (1.0 - attention_mask)
        attn = nn.Softmax(axis=-1)(attn)
        attn = self.attn_drop(attn)

        y = paddle.matmul(attn, v)
        # [B, N, L, S] -> [B, L, N, S]
        y = y.transpose((0, 2, 1, 3))
        y = paddle.reshape(y, [-1, self.seq_len, self.embedding_size])
        y = self.resid_drop(self.dense(y))

        return y, cached_kv
Example #15
0
    def forward_interpret(self, *args, **kwargs):
        r"""
        Args:
            input_ids (Tensor):
                See :class:`RobertaModel`.
            token_type_ids (Tensor, optional):
                See :class:`RobertaModel`.
            position_ids (Tensor, optional):
                See :class:`RobertaModel`.
            attention_mask (Tensor, optional):
                See :class:`RobertaModel`.

        Returns:
            tuple: Returns tuple (`start_logits`, `end_logits`).

            With the fields:

            - `start_logits` (Tensor):
                A tensor of the input token classification logits, indicates the start position of the labelled span.
                Its data type should be float32 and its shape is [batch_size, sequence_length].

            - `end_logits` (Tensor):
                A tensor of the input token classification logits, indicates the end position of the labelled span.
                Its data type should be float32 and its shape is [batch_size, sequence_length].

        Example:
            .. code-block::

                import paddle
                from paddlenlp.transformers import RobertaForSequenceClassification, RobertaTokenizer

                tokenizer = RobertaTokenizer.from_pretrained('roberta-wwm-ext')
                model = RobertaForSequenceClassification.from_pretrained('roberta-wwm-ext')

                inputs = tokenizer("Welcome to use PaddlePaddle and PaddleNLP!")
                inputs = {k:paddle.to_tensor([v]) for (k, v) in inputs.items()}
                logits = model(**inputs)

        """
        start_pos = kwargs.pop('start_pos', None)
        end_pos = kwargs.pop('end_pos', None)
        cls_label = kwargs.pop('labels', None)

        # sequence_output, pooled_output, _, _ = self.roberta(
        #     input_ids,
        #     token_type_ids=token_type_ids,
        #     position_ids=None,
        #     attention_mask=None)
        # print(kwargs)
        sequence_output, pooled_output, att_weights_list, embedding_output = self.roberta(
            *args, **kwargs)

        logits = self.classifier(sequence_output)  # (bsz, seq, 2)
        logits = paddle.transpose(logits, perm=[2, 0, 1])  # (2, bsz, seq)
        start_logits, end_logits = paddle.unstack(x=logits, axis=0)
        cls_logits = self.classifier_cls(pooled_output)

        if start_pos is not None and end_pos is not None:
            if len(start_pos.shape) != 1:
                start_pos = start_pos.squeeze()
            if len(end_pos.shape) != 1:
                end_pos = end_pos.squeeze()
            loss = self.criterion((start_logits, end_logits, cls_logits),
                                  (start_pos, end_pos, cls_label))
        else:
            loss = None

        # return start_logit, end_logits
        return loss, start_logits, end_logits, cls_logits, att_weights_list, embedding_output
Example #16
0
    def forward(self,
                input_ids,
                attention_mask=None,
                decoder_input_ids=None,
                decoder_attention_mask=None,
                encoder_output=None,
                use_cache=False,
                cache=None):
        r"""
        The MBartForQuestionAnswering forward method, overrides the __call__() special method.

        Args:
            input_ids (Tensor):
                See :class:`MBartModel`.
            attention_mask (Tensor, optional):
                See :class:`MBartModel`.
            decoder_input_ids (Tensor, `optional`):
                See :class:`MBartModel`.
            decoder_attention_mask (Tensor, optional):
                See :class:`MBartModel`.
            encoder_output (Tensor, optonal):
                See :class:`MBartModel`.
            use_cache (bool, optional):
                See :class:`MBartModel`.
            cache (Tensor, optional):
                See :class:`MBartModel`.

        Returns:
            tuple: Returns tuple (`start_logits`, `end_logits`).

            With the fields:

            - `start_logits` (Tensor):
                A tensor of the input token classification logits, indicates the start position of the labelled span.
                Its data type should be float32 and its shape is [batch_size, sequence_length].

            - `end_logits` (Tensor):
                A tensor of the input token classification logits, indicates the end position of the labelled span.
                Its data type should be float32 and its shape is [batch_size, sequence_length].

        Example:
            .. code-block::

                import paddle
                from paddlenlp.transformers import MBartForQuestionAnswering, MBartTokenizer

                tokenizer = MBartTokenizer.from_pretrained('bart-base')
                model = MBartForQuestionAnswering.from_pretrained('bart-base')

                inputs = tokenizer("Welcome to use PaddlePaddle and PaddleNLP!")
                inputs = {k:paddle.to_tensor([v]) for (k, v) in inputs.items()}
                outputs = model(**inputs)
                start_logits = outputs[0]
                end_logits  =outputs[1]
        """
        output = self.mbart(input_ids, attention_mask, decoder_input_ids,
                            decoder_attention_mask, encoder_output, use_cache,
                            cache)
        logits = self.classifier(output[0] if use_cache else output, )
        logits = paddle.transpose(logits, perm=[2, 0, 1])
        start_logits, end_logits = paddle.unstack(x=logits, axis=0)
        return start_logits, end_logits
Example #17
0
 def _forward_log_det_jacobian(self, x):
     self._check_size(x)
     return paddle.stack([
         t.forward_log_det_jacobian(v)
         for v, t in zip(paddle.unstack(x, self._axis), self._transforms)
     ], self._axis)
Example #18
0
 def _inverse(self, y):
     self._check_size(y)
     return paddle.stack([
         t.inverse(v)
         for v, t in zip(paddle.unstack(y, self._axis), self._transforms)
     ], self._axis)
Example #19
0
    def forward(
        self,
        input_ids,
        token_type_ids=None,
        position_ids=None,
        attention_mask=None,
        output_hidden_states=False,
    ):
        r"""
        Args:
            input_ids (Tensor):
                See :class:`RobertaModel`.
            token_type_ids (Tensor, optional):
                See :class:`RobertaModel`.
            position_ids (Tensor, optional):
                See :class:`RobertaModel`.
            attention_mask (Tensor, optional):
                See :class:`RobertaModel`.
            output_hidden_states (bool, optional):
                See :class:`RobertaModel`.

        Returns:
            tuple: Returns tuple (`start_logits`, `end_logits`) by default if output_hidden_states is `False`.
            Returns tuple (`start_logits`, `end_logits`, `encoder_outputs`) if output_hidden_states is set to `True`.

            With the fields:

            - `start_logits` (Tensor):
                A tensor of the input token classification logits, indicates the start position of the labelled span.
                Its data type should be float32 and its shape is [batch_size, sequence_length].

            - `end_logits` (Tensor):
                A tensor of the input token classification logits, indicates the end position of the labelled span.
                Its data type should be float32 and its shape is [batch_size, sequence_length].

            - `encoder_outputs` (List(Tensor)):
                A list of Tensor containing hidden-states of the model at each hidden layer in the Transformer encoder.
                The length of the list is `num_hidden_layers`.
                Each Tensor has a data type of float32 and a shape of [batch_size, sequence_length, hidden_size].

        Example:
            .. code-block::

                import paddle
                from paddlenlp.transformers import RobertaForSequenceClassification, RobertaTokenizer

                tokenizer = RobertaTokenizer.from_pretrained('roberta-wwm-ext')
                model = RobertaForSequenceClassification.from_pretrained('roberta-wwm-ext')

                inputs = tokenizer("Welcome to use PaddlePaddle and PaddleNLP!")
                inputs = {k:paddle.to_tensor([v]) for (k, v) in inputs.items()}
                logits = model(**inputs)

        """
        encoder_outputs, _ = self.roberta(
            input_ids,
            token_type_ids=token_type_ids,
            position_ids=position_ids,
            attention_mask=attention_mask,
            output_hidden_states=output_hidden_states,
        )
        sequence_output = encoder_outputs[
            -1] if output_hidden_states else encoder_outputs
        logits = self.classifier(sequence_output)
        logits = paddle.transpose(logits, perm=[2, 0, 1])
        start_logits, end_logits = paddle.unstack(x=logits, axis=0)

        if output_hidden_states:
            return start_logits, end_logits, encoder_outputs
        else:
            return start_logits, end_logits
Example #20
0
    def forward(self,
                input_ids,
                token_type_ids=None,
                position_ids=None,
                attention_mask=None):
        r"""
        Args:
            input_ids (Tensor):
                Indices of input sequence tokens in the vocabulary. They are
                numerical representations of tokens that build the input sequence.
                It's data type should be `int64` and has a shape of [batch_size, sequence_length].
            token_type_ids (Tensor, optional):
                Segment token indices to indicate first and second portions of the inputs.
                Indices can be either 0 or 1:

                - 0 corresponds to a **sentence A** token,
                - 1 corresponds to a **sentence B** token.

                It's data type should be `int64` and has a shape of [batch_size, sequence_length].
                Defaults to None, which means no segment embeddings is added to token embeddings.
            position_ids (Tensor, optional):
                Indices of positions of each input sequence tokens in the position embeddings. Selected in the range ``[0,
                config.max_position_embeddings - 1]``.
                Defaults to `None`. Shape as `(batch_sie, num_tokens)` and dtype as `int32` or `int64`.
            attention_mask (Tensor, optional):
                Mask to indicate whether to perform attention on each input token or not.
                The values should be either 0 or 1. The attention scores will be set
                to **-infinity** for any positions in the mask that are **0**, and will be
                **unchanged** for positions that are **1**.

                - **1** for tokens that are **not masked**,
                - **0** for tokens that are **masked**.

                It's data type should be `float32` and has a shape of [batch_size, sequence_length].
                Defaults to `None`.


        Returns:
            A tuple of shape (``start_logits``, ``end_logits``).

            With the fields:
            - start_logits(Tensor): The logits of start position of prediction answer.
            - end_logits(Tensor): The logits of end position of prediction answer.

        Example:
            .. code-block::

                import paddle
                from paddlenlp.transformers import ErnieForQuestionAnswering, ErnieTokenizer

                tokenizer = ErnieTokenizer.from_pretrained('ernie-1.0')
                model = ErnieForQuestionAnswering.from_pretrained('ernie-1.0')

                inputs = tokenizer("这是个测试样例")
                inputs = {k:paddle.to_tensor([v]) for (k, v) in inputs.items()}
                logits = model(**inputs)
        """

        sequence_output, _ = self.ernie(
            input_ids,
            token_type_ids=token_type_ids,
            position_ids=position_ids,
            attention_mask=attention_mask)

        logits = self.classifier(sequence_output)
        logits = paddle.transpose(logits, perm=[2, 0, 1])
        start_logits, end_logits = paddle.unstack(x=logits, axis=0)

        return start_logits, end_logits