def forward(self, input_ids, token_type_ids=None): r''' The SqueezeBertForQuestionAnswering forward method, overrides the __call__() special method. Args: input_ids (Tensor): See :class:`SqueezeBertModel`. token_type_ids (Tensor, optional): See :class:`SqueezeBertModel`. Returns: tuple: Returns tuple (`start_logits`, `end_logits`). With the fields: - `start_logits` (Tensor): A tensor of the input token classification logits, indicates the start position of the labelled span. Its data type should be float32 and its shape is [batch_size, sequence_length]. - `end_logits` (Tensor): A tensor of the input token classification logits, indicates the end position of the labelled span. Its data type should be float32 and its shape is [batch_size, sequence_length]. ''' sequence_output, _ = self.squeezebert(input_ids, token_type_ids=token_type_ids, position_ids=None, attention_mask=None) logits = self.classifier(sequence_output) logits = paddle.transpose(logits, perm=[2, 0, 1]) start_logits, end_logits = paddle.unstack(x=logits, axis=0) return start_logits, end_logits
def forward(self, *args, **kwargs): """ Args: start_pos (optional, `Variable` of shape [batch_size]): token index of start of answer span in `context` end_pos (optional, `Variable` of shape [batch_size]): token index of end of answer span in `context` Returns: loss (`Variable` of shape []): Cross entropy loss mean over batch and time, ignore positions where label == -100 if labels not set, returns None start_logits (`Variable` of shape [batch_size, hidden_size]): output logits of start position, use argmax(start_logit) to get start index end_logits (`Variable` of shape [batch_size, hidden_size]): output logits of end position, use argmax(end_logit) to get end index """ start_pos = kwargs.pop('start_pos', None) end_pos = kwargs.pop('end_pos', None) pooled, encoded = super(ErnieModelForQuestionAnswering, self).forward(*args, **kwargs) encoded = self.dropout(encoded) encoded = self.classifier(encoded) start_logit, end_logits = P.unstack(encoded, axis=-1) if start_pos is not None and end_pos is not None: if len(start_pos.shape) != 1: start_pos = start_pos.squeeze() if len(end_pos.shape) != 1: end_pos = end_pos.squeeze() start_loss = F.cross_entropy(start_logit, start_pos) end_loss = F.cross_entropy(end_logits, end_pos) loss = (start_loss.mean() + end_loss.mean()) / 2. else: loss = None return loss, start_logit, end_logits
def forward(self, inputs): """ forward """ x = paddle.unstack( inputs, axis=self.config['axis'], num=self.config['num']) return x
def forward(self, input_ids, attention_mask=None): sequence_output = self.distilbert(input_ids, attention_mask=attention_mask) sequence_output = self.dropout(sequence_output) logits = self.classifier(sequence_output) logits = paddle.transpose(logits, perm=[2, 0, 1]) start_logits, end_logits = paddle.unstack(x=logits, axis=0) return start_logits, end_logits
def forward(self, input_ids, token_type_ids=None, attention_mask=None): sequence_output, _ = self.nezha(input_ids, token_type_ids, attention_mask) logits = self.classifier(sequence_output) logits = paddle.transpose(logits, perm=[2, 0, 1]) start_logits, end_logits = paddle.unstack(x=logits, axis=0) return start_logits, end_logits
def forward(self, input_ids, token_type_ids=None, position_ids=None, attention_mask=None): r""" The BertForQuestionAnswering forward method, overrides the __call__() special method. Args: input_ids (Tensor): See :class:`BertModel`. token_type_ids (Tensor, optional): See :class:`BertModel`. Returns: tuple: Returns tuple (`start_logits`, `end_logits`). With the fields: - `start_logits` (Tensor): A tensor of the input token classification logits, indicates the start position of the labelled span. Its data type should be float32 and its shape is [batch_size, sequence_length]. - `end_logits` (Tensor): A tensor of the input token classification logits, indicates the end position of the labelled span. Its data type should be float32 and its shape is [batch_size, sequence_length]. Example: .. code-block:: import paddle from paddlenlp.transformers.bert.modeling import BertForQuestionAnswering from paddlenlp.transformers.bert.tokenizer import BertTokenizer tokenizer = BertTokenizer.from_pretrained('bert-base-cased') model = BertForQuestionAnswering.from_pretrained('bert-base-cased') inputs = tokenizer("Welcome to use PaddlePaddle and PaddleNLP!") inputs = {k:paddle.to_tensor([v]) for (k, v) in inputs.items()} outputs = model(**inputs) start_logits = outputs[0] end_logits = outputs[1] """ sequence_output, _ = self.bert(input_ids, token_type_ids=token_type_ids, position_ids=position_ids, attention_mask=attention_mask) logits = self.classifier(sequence_output) logits = paddle.transpose(logits, perm=[2, 0, 1]) start_logits, end_logits = paddle.unstack(x=logits, axis=0) return start_logits, end_logits
def forward(self, input_ids, token_type_ids=None, position_ids=None, attention_mask=None): r""" Args: input_ids (Tensor): See :class:`ErnieGramModel`. token_type_ids (Tensor, optional): See :class:`ErnieGramModel`. position_ids (Tensor, optional): See :class:`ErnieGramModel`. attention_mask (Tensor, optional): See :class:`ErnieGramModel`. Returns: tuple: Returns tuple (`start_logits`, `end_logits`). With the fields: - `start_logits` (Tensor): A tensor of the input token classification logits, indicates the start position of the labelled span. Its data type should be float32 and its shape is [batch_size, sequence_length]. - `end_logits` (Tensor): A tensor of the input token classification logits, indicates the end position of the labelled span. Its data type should be float32 and its shape is [batch_size, sequence_length]. Example: .. code-block:: import paddle from paddlenlp.transformers import ErnieGramForQuestionAnswering, ErnieGramTokenizer tokenizer = ErnieGramTokenizer.from_pretrained('ernie-gram-zh') model = ErnieGramForQuestionAnswering.from_pretrained('ernie-gram-zh') inputs = tokenizer("欢迎使用百度飞桨!") inputs = {k:paddle.to_tensor([v]) for (k, v) in inputs.items()} logits = model(**inputs) """ sequence_output, _ = self.ernie_gram( input_ids, token_type_ids=token_type_ids, position_ids=position_ids, attention_mask=attention_mask) logits = self.classifier(sequence_output) logits = paddle.transpose(logits, perm=[2, 0, 1]) start_logits, end_logits = paddle.unstack(x=logits, axis=0) return start_logits, end_logits
def forward(self, input_ids, token_type_ids=None, attention_mask=None): r""" The RoFormerv2ForQuestionAnswering forward method, overrides the __call__() special method. Args: input_ids (Tensor): See :class:`RoFormerv2Model`. token_type_ids (Tensor, optional): See :class:`RoFormerv2Model`. attention_mask (Tensor, optional): See :class:`RoFormerv2Model`. Returns: tuple: Returns tuple (`start_logits`, `end_logits`). With the fields: - `start_logits` (Tensor): A tensor of the input token classification logits, indicates the start position of the labelled span. Its data type should be float32 and its shape is [batch_size, sequence_length]. - `end_logits` (Tensor): A tensor of the input token classification logits, indicates the end position of the labelled span. Its data type should be float32 and its shape is [batch_size, sequence_length]. Example: .. code-block:: import paddle from paddlenlp.transformers import RoFormerv2ForQuestionAnswering, RoFormerv2Tokenizer tokenizer = RoFormerv2Tokenizer.from_pretrained('roformer_v2_chinese_char_base') model = RoFormerv2ForQuestionAnswering.from_pretrained('roformer_v2_chinese_char_base') inputs = tokenizer("欢迎使用百度飞桨!") inputs = {k:paddle.to_tensor([v], dtype="int64") for (k, v) in inputs.items()} outputs = model(**inputs) start_logits = outputs[0] end_logits = outputs[1] """ sequence_output = self.roformerv2( input_ids, token_type_ids=token_type_ids, attention_mask=attention_mask) logits = self.classifier(sequence_output) start_logits, end_logits = paddle.unstack(logits, axis=-1) return start_logits, end_logits
def forward(self, input_ids, token_type_ids=None): r""" Args: input_ids (Tensor): See :class:`RobertaModel`. token_type_ids (Tensor, optional): See :class:`RobertaModel`. position_ids (Tensor, optional): See :class:`RobertaModel`. attention_mask (Tensor, optional): See :class:`RobertaModel`. Returns: tuple: Returns tuple (`start_logits`, `end_logits`). With the fields: - `start_logits` (Tensor): A tensor of the input token classification logits, indicates the start position of the labelled span. Its data type should be float32 and its shape is [batch_size, sequence_length]. - `end_logits` (Tensor): A tensor of the input token classification logits, indicates the end position of the labelled span. Its data type should be float32 and its shape is [batch_size, sequence_length]. Example: .. code-block:: import paddle from paddlenlp.transformers import RobertaForSequenceClassification, RobertaTokenizer tokenizer = RobertaTokenizer.from_pretrained('roberta-wwm-ext') model = RobertaForSequenceClassification.from_pretrained('roberta-wwm-ext') inputs = tokenizer("Welcome to use PaddlePaddle and PaddleNLP!") inputs = {k:paddle.to_tensor([v]) for (k, v) in inputs.items()} logits = model(**inputs) """ sequence_output, _ = self.roberta( input_ids, token_type_ids=token_type_ids, position_ids=None, attention_mask=None) logits = self.classifier(sequence_output) logits = paddle.transpose(logits, perm=[2, 0, 1]) start_logits, end_logits = paddle.unstack(x=logits, axis=0) return start_logits, end_logits
def chunk(self, chunks, dim): slices = paddle.unstack(self, axis=dim, num=None) out_list = [] step = int(np.ceil(len(slices) / chunks)) for st in range(0, len(slices), step): out_list.append( varbase_to_tensor( fluid.layers.concat([ paddle.unsqueeze(x, dim, name=None) for x in slices[st:(st + step)] ], axis=dim, name=None))) return out_list
def forward(self, input_ids, attention_mask=None, decoder_input_ids=None, decoder_attention_mask=None, encoder_output=None, use_cache=False, cache=None): output = self.bart(input_ids, attention_mask, decoder_input_ids, decoder_attention_mask, encoder_output, use_cache, cache) logits = self.classifier(output[0] if use_cache else output, ) logits = paddle.transpose(logits, perm=[2, 0, 1]) start_logits, end_logits = paddle.unstack(x=logits, axis=0) return start_logits, end_logits
def forward(self, x): """Forward network""" mask = paddle.any(x != self.pad_index, axis=-1) lens = paddle.sum(paddle.cast(mask, 'int32'), axis=-1) select = paddle.nonzero(mask) masked_x = paddle.gather_nd(x, select) char_mask = masked_x != self.pad_index emb = self.embed(masked_x) word_lens = paddle.sum(paddle.cast(char_mask, 'int32'), axis=-1) _, (h, _) = self.lstm(emb, sequence_length=word_lens) h = paddle.concat(paddle.unstack(h), axis=-1) feat_embed = pad_sequence_paddle(h, lens, pad_index=self.pad_index) return feat_embed
def forward(self, input_ids, token_type_ids=None, position_ids=None, attention_mask=None): sequence_output, pooled_output = self.roberta( input_ids, token_type_ids=token_type_ids, position_ids=position_ids, attention_mask=attention_mask) logits = self.classifier(sequence_output) logits = paddle.transpose(logits, perm=[2, 0, 1]) start_logits, end_logits = paddle.unstack(x=logits, axis=0) cls_logits = self.classifier_cls(pooled_output) return start_logits, end_logits, cls_logits
def forward(self, x, kv_cache=None): self.seq_len = x.shape[1] x = self.query_key_value(x) q, k, v = x.split(num_or_sections=3, axis=2) q = self.split_heads(q) k = self.split_heads(k) v = self.split_heads(v) if kv_cache is not None: pk, pv = paddle.unstack(kv_cache, axis=1) k = paddle.concat([pk, k], axis=-2) v = paddle.concat([pv, v], axis=-2) cached_kv = paddle.stack([k, v], axis=1) attn = paddle.matmul(q, k, transpose_y=True) # [B, N, L, S] attn = attn / math.sqrt(self.size_per_head) # [L, S] attention_mask = paddle.tril( paddle.ones([self.seq_len, self.seq_len], 'float32')) attention_mask = attention_mask.reshape( [1, 1, self.seq_len, self.seq_len]) # adding to softmax -> its like removing them entirely attn = attn * attention_mask - 10000.0 * (1.0 - attention_mask) attn = nn.Softmax(axis=-1)(attn) attn = self.attn_drop(attn) y = paddle.matmul(attn, v) # [B, N, L, S] -> [B, L, N, S] y = y.transpose((0, 2, 1, 3)) y = paddle.reshape(y, [-1, self.seq_len, self.embedding_size]) y = self.resid_drop(self.dense(y)) return y, cached_kv
def forward_interpret(self, *args, **kwargs): r""" Args: input_ids (Tensor): See :class:`RobertaModel`. token_type_ids (Tensor, optional): See :class:`RobertaModel`. position_ids (Tensor, optional): See :class:`RobertaModel`. attention_mask (Tensor, optional): See :class:`RobertaModel`. Returns: tuple: Returns tuple (`start_logits`, `end_logits`). With the fields: - `start_logits` (Tensor): A tensor of the input token classification logits, indicates the start position of the labelled span. Its data type should be float32 and its shape is [batch_size, sequence_length]. - `end_logits` (Tensor): A tensor of the input token classification logits, indicates the end position of the labelled span. Its data type should be float32 and its shape is [batch_size, sequence_length]. Example: .. code-block:: import paddle from paddlenlp.transformers import RobertaForSequenceClassification, RobertaTokenizer tokenizer = RobertaTokenizer.from_pretrained('roberta-wwm-ext') model = RobertaForSequenceClassification.from_pretrained('roberta-wwm-ext') inputs = tokenizer("Welcome to use PaddlePaddle and PaddleNLP!") inputs = {k:paddle.to_tensor([v]) for (k, v) in inputs.items()} logits = model(**inputs) """ start_pos = kwargs.pop('start_pos', None) end_pos = kwargs.pop('end_pos', None) cls_label = kwargs.pop('labels', None) # sequence_output, pooled_output, _, _ = self.roberta( # input_ids, # token_type_ids=token_type_ids, # position_ids=None, # attention_mask=None) # print(kwargs) sequence_output, pooled_output, att_weights_list, embedding_output = self.roberta( *args, **kwargs) logits = self.classifier(sequence_output) # (bsz, seq, 2) logits = paddle.transpose(logits, perm=[2, 0, 1]) # (2, bsz, seq) start_logits, end_logits = paddle.unstack(x=logits, axis=0) cls_logits = self.classifier_cls(pooled_output) if start_pos is not None and end_pos is not None: if len(start_pos.shape) != 1: start_pos = start_pos.squeeze() if len(end_pos.shape) != 1: end_pos = end_pos.squeeze() loss = self.criterion((start_logits, end_logits, cls_logits), (start_pos, end_pos, cls_label)) else: loss = None # return start_logit, end_logits return loss, start_logits, end_logits, cls_logits, att_weights_list, embedding_output
def forward(self, input_ids, attention_mask=None, decoder_input_ids=None, decoder_attention_mask=None, encoder_output=None, use_cache=False, cache=None): r""" The MBartForQuestionAnswering forward method, overrides the __call__() special method. Args: input_ids (Tensor): See :class:`MBartModel`. attention_mask (Tensor, optional): See :class:`MBartModel`. decoder_input_ids (Tensor, `optional`): See :class:`MBartModel`. decoder_attention_mask (Tensor, optional): See :class:`MBartModel`. encoder_output (Tensor, optonal): See :class:`MBartModel`. use_cache (bool, optional): See :class:`MBartModel`. cache (Tensor, optional): See :class:`MBartModel`. Returns: tuple: Returns tuple (`start_logits`, `end_logits`). With the fields: - `start_logits` (Tensor): A tensor of the input token classification logits, indicates the start position of the labelled span. Its data type should be float32 and its shape is [batch_size, sequence_length]. - `end_logits` (Tensor): A tensor of the input token classification logits, indicates the end position of the labelled span. Its data type should be float32 and its shape is [batch_size, sequence_length]. Example: .. code-block:: import paddle from paddlenlp.transformers import MBartForQuestionAnswering, MBartTokenizer tokenizer = MBartTokenizer.from_pretrained('bart-base') model = MBartForQuestionAnswering.from_pretrained('bart-base') inputs = tokenizer("Welcome to use PaddlePaddle and PaddleNLP!") inputs = {k:paddle.to_tensor([v]) for (k, v) in inputs.items()} outputs = model(**inputs) start_logits = outputs[0] end_logits =outputs[1] """ output = self.mbart(input_ids, attention_mask, decoder_input_ids, decoder_attention_mask, encoder_output, use_cache, cache) logits = self.classifier(output[0] if use_cache else output, ) logits = paddle.transpose(logits, perm=[2, 0, 1]) start_logits, end_logits = paddle.unstack(x=logits, axis=0) return start_logits, end_logits
def _forward_log_det_jacobian(self, x): self._check_size(x) return paddle.stack([ t.forward_log_det_jacobian(v) for v, t in zip(paddle.unstack(x, self._axis), self._transforms) ], self._axis)
def _inverse(self, y): self._check_size(y) return paddle.stack([ t.inverse(v) for v, t in zip(paddle.unstack(y, self._axis), self._transforms) ], self._axis)
def forward( self, input_ids, token_type_ids=None, position_ids=None, attention_mask=None, output_hidden_states=False, ): r""" Args: input_ids (Tensor): See :class:`RobertaModel`. token_type_ids (Tensor, optional): See :class:`RobertaModel`. position_ids (Tensor, optional): See :class:`RobertaModel`. attention_mask (Tensor, optional): See :class:`RobertaModel`. output_hidden_states (bool, optional): See :class:`RobertaModel`. Returns: tuple: Returns tuple (`start_logits`, `end_logits`) by default if output_hidden_states is `False`. Returns tuple (`start_logits`, `end_logits`, `encoder_outputs`) if output_hidden_states is set to `True`. With the fields: - `start_logits` (Tensor): A tensor of the input token classification logits, indicates the start position of the labelled span. Its data type should be float32 and its shape is [batch_size, sequence_length]. - `end_logits` (Tensor): A tensor of the input token classification logits, indicates the end position of the labelled span. Its data type should be float32 and its shape is [batch_size, sequence_length]. - `encoder_outputs` (List(Tensor)): A list of Tensor containing hidden-states of the model at each hidden layer in the Transformer encoder. The length of the list is `num_hidden_layers`. Each Tensor has a data type of float32 and a shape of [batch_size, sequence_length, hidden_size]. Example: .. code-block:: import paddle from paddlenlp.transformers import RobertaForSequenceClassification, RobertaTokenizer tokenizer = RobertaTokenizer.from_pretrained('roberta-wwm-ext') model = RobertaForSequenceClassification.from_pretrained('roberta-wwm-ext') inputs = tokenizer("Welcome to use PaddlePaddle and PaddleNLP!") inputs = {k:paddle.to_tensor([v]) for (k, v) in inputs.items()} logits = model(**inputs) """ encoder_outputs, _ = self.roberta( input_ids, token_type_ids=token_type_ids, position_ids=position_ids, attention_mask=attention_mask, output_hidden_states=output_hidden_states, ) sequence_output = encoder_outputs[ -1] if output_hidden_states else encoder_outputs logits = self.classifier(sequence_output) logits = paddle.transpose(logits, perm=[2, 0, 1]) start_logits, end_logits = paddle.unstack(x=logits, axis=0) if output_hidden_states: return start_logits, end_logits, encoder_outputs else: return start_logits, end_logits
def forward(self, input_ids, token_type_ids=None, position_ids=None, attention_mask=None): r""" Args: input_ids (Tensor): Indices of input sequence tokens in the vocabulary. They are numerical representations of tokens that build the input sequence. It's data type should be `int64` and has a shape of [batch_size, sequence_length]. token_type_ids (Tensor, optional): Segment token indices to indicate first and second portions of the inputs. Indices can be either 0 or 1: - 0 corresponds to a **sentence A** token, - 1 corresponds to a **sentence B** token. It's data type should be `int64` and has a shape of [batch_size, sequence_length]. Defaults to None, which means no segment embeddings is added to token embeddings. position_ids (Tensor, optional): Indices of positions of each input sequence tokens in the position embeddings. Selected in the range ``[0, config.max_position_embeddings - 1]``. Defaults to `None`. Shape as `(batch_sie, num_tokens)` and dtype as `int32` or `int64`. attention_mask (Tensor, optional): Mask to indicate whether to perform attention on each input token or not. The values should be either 0 or 1. The attention scores will be set to **-infinity** for any positions in the mask that are **0**, and will be **unchanged** for positions that are **1**. - **1** for tokens that are **not masked**, - **0** for tokens that are **masked**. It's data type should be `float32` and has a shape of [batch_size, sequence_length]. Defaults to `None`. Returns: A tuple of shape (``start_logits``, ``end_logits``). With the fields: - start_logits(Tensor): The logits of start position of prediction answer. - end_logits(Tensor): The logits of end position of prediction answer. Example: .. code-block:: import paddle from paddlenlp.transformers import ErnieForQuestionAnswering, ErnieTokenizer tokenizer = ErnieTokenizer.from_pretrained('ernie-1.0') model = ErnieForQuestionAnswering.from_pretrained('ernie-1.0') inputs = tokenizer("这是个测试样例") inputs = {k:paddle.to_tensor([v]) for (k, v) in inputs.items()} logits = model(**inputs) """ sequence_output, _ = self.ernie( input_ids, token_type_ids=token_type_ids, position_ids=position_ids, attention_mask=attention_mask) logits = self.classifier(sequence_output) logits = paddle.transpose(logits, perm=[2, 0, 1]) start_logits, end_logits = paddle.unstack(x=logits, axis=0) return start_logits, end_logits