Esempio n. 1
0
    def predict_minibatch(self, inputs):
        # Preprocess to ids and masks, and make the input batch.
        encoded_input = self.tokenizer.batch_encode_plus(
            [ex["sentence"] for ex in inputs],
            return_tensors="pt",
            add_special_tokens=True,
            max_length=128,
            pad_to_max_length=True)

        # Run a forward pass.
        with torch.no_grad():  # remove this if you need gradients.
            logits, embs, unused_attentions = self.model(**encoded_input)

        # Post-process outputs.
        batched_outputs = {
            "probas": torch.nn.functional.softmax(logits, dim=-1),
            "input_ids": encoded_input["input_ids"],
            "ntok": torch.sum(encoded_input["attention_mask"], dim=1),
            "cls_emb": embs[-1][:, 0],  # last layer, first token
        }
        # Return as NumPy for further processing.
        detached_outputs = {k: v.numpy() for k, v in batched_outputs.items()}
        # Unbatch outputs so we get one record per input example.
        for output in utils.unbatch_preds(detached_outputs):
            ntok = output.pop("ntok")
            output["tokens"] = self.tokenizer.convert_ids_to_tokens(
                output.pop("input_ids")[1:ntok - 1])
            yield output
Esempio n. 2
0
File: t5.py Progetto: smesaric/lit
  def _predict_minibatch_internal(self, inputs):
    """Run model on a single batch.

    Args:
      inputs: List[Dict] with fields as described by input_spec()

    Returns:
      outputs: List[Dict] with fields as described by output_spec()
    """
    # Text as sequence of sentencepiece ID"s.
    encoded_inputs = self._encode_texts([
        self.config.input_prefix + ex["input_text"] + " </s>" for ex in inputs
    ])
    encoded_targets = self._encode_texts(
        [ex.get("target_text", "") for ex in inputs])
    ##
    # Force-decode on target text, and also get encoder embs and attention.
    batched_outputs = self._force_decode(encoded_inputs, encoded_targets)
    # Get the conditional generation from the model.
    # Workaround for output_hidden not being compatible with generate.
    # See https://github.com/huggingface/transformers/issues/8361
    self.model.encoder.output_hidden_states = False
    self.model.decoder.output_hidden_states = False
    batched_outputs["generated_ids"] = self.model.generate(
        encoded_inputs["input_ids"],
        attention_mask=encoded_inputs["attention_mask"],
        max_length=self.config.max_gen_length)
    self.model.encoder.output_hidden_states = True
    self.model.decoder.output_hidden_states = True

    # Convert to numpy for post-processing.
    detached_outputs = {k: v.numpy() for k, v in batched_outputs.items()}
    # Split up batched outputs, then post-process each example.
    unbatched_outputs = utils.unbatch_preds(detached_outputs)
    return map(self._postprocess, unbatched_outputs)
Esempio n. 3
0
    def predict_minibatch(self, inputs):
        """Predict on a single minibatch of examples."""
        tokens_and_offsets = [
            retokenize.subtokenize(ex['tokens'], self.tokenizer.tokenize)
            for ex in inputs
        ]
        tokenized_texts, offsets = zip(*tokens_and_offsets)
        # Process to ids, add special tokens, and compute segment ids and masks.
        encoded_input = self.tokenizer.batch_encode_plus(
            list(tokenized_texts),
            is_split_into_words=True,
            return_tensors='tf',
            add_special_tokens=True,
            max_length=self.max_seq_length,
            padding='longest',
            truncation='longest_first')

        out: transformers.modeling_tf_outputs.TFMaskedLMOutput = \
            self.model(encoded_input)
        batched_outputs = {
            'input_ids': encoded_input['input_ids'].numpy(),
            'ntok': tf.reduce_sum(encoded_input['attention_mask'],
                                  axis=1).numpy(),
            'top_layer_embs':
            out.hidden_states[-1].numpy(),  # last layer, all tokens
        }
        # List of dicts, one per example.
        unbatched_outputs = list(utils.unbatch_preds(batched_outputs))
        # Postprocess to remove padding and add offsets.
        ret = [self._postprocess(ubo) for ubo in unbatched_outputs]
        for preds, offset_indices in zip(ret, offsets):
            preds['offsets'] = offset_indices
        return ret
Esempio n. 4
0
    def predict_minibatch(self, inputs):
        """Predict on a single minibatch of examples."""
        # If input has a 'tokens' field, use that. Otherwise tokenize the text.
        tokenized_texts = [
            ex.get("tokens") or self.tokenizer.tokenize(ex["text"])
            for ex in inputs
        ]
        encoded_input = batch_encode_pretokenized(self.tokenizer,
                                                  tokenized_texts)

        # out.logits is a single tensor
        #    <float32>[batch_size, num_tokens, vocab_size]
        # out.hidden_states is a list of num_layers + 1 tensors, each
        #    <float32>[batch_size, num_tokens, h_dim]
        out: transformers.modeling_tf_outputs.TFMaskedLMOutput = \
            self.model(encoded_input)
        batched_outputs = {
            "probas": tf.nn.softmax(out.logits, axis=-1).numpy(),
            "input_ids": encoded_input["input_ids"].numpy(),
            "ntok": tf.reduce_sum(encoded_input["attention_mask"],
                                  axis=1).numpy(),
            # last layer, first token
            "cls_emb": out.hidden_states[-1][:, 0].numpy(),
        }
        # List of dicts, one per example.
        unbatched_outputs = utils.unbatch_preds(batched_outputs)
        # Postprocess to remove padding and decode predictions.
        return map(self._postprocess, unbatched_outputs)
Esempio n. 5
0
    def predict_minibatch(self, inputs):
        # Preprocess to ids and masks, and make the input batch.
        encoded_input = self.tokenizer.batch_encode_plus(
            [ex["sentence"] for ex in inputs],
            return_tensors="tf",
            add_special_tokens=True,
            max_length=128,
            pad_to_max_length=True)

        # Run a forward pass.
        logits, embs, unused_attentions = self.model(encoded_input,
                                                     training=False)

        # Post-process outputs.
        batched_outputs = {
            "probas": tf.nn.softmax(logits, axis=-1).numpy(),
            "input_ids": encoded_input["input_ids"].numpy(),
            "ntok": tf.reduce_sum(encoded_input["attention_mask"],
                                  axis=1).numpy(),
            "cls_emb": embs[-1][:, 0].numpy(),  # last layer, first token
        }
        # Unbatch outputs so we get one record per input example.
        for output in utils.unbatch_preds(batched_outputs):
            ntok = output.pop("ntok")
            output["tokens"] = self.tokenizer.convert_ids_to_tokens(
                output.pop("input_ids")[1:ntok - 1])
            yield output
Esempio n. 6
0
    def predict_minibatch(self, inputs):
        # Preprocess to ids and masks, and make the input batch.
        encoded_input = self.tokenizer.batch_encode_plus(
            [ex["sentence"] for ex in inputs],
            return_tensors="tf",
            add_special_tokens=True,
            max_length=128,
            padding="longest",
            truncation="longest_first")

        # Run a forward pass.
        out: transformers.modeling_tf_outputs.TFSequenceClassifierOutput = \
            self.model(encoded_input, training=False)

        # Post-process outputs.
        batched_outputs = {
            "probas": tf.nn.softmax(out.logits, axis=-1),
            "input_ids": encoded_input["input_ids"],
            "ntok": tf.reduce_sum(encoded_input["attention_mask"], axis=1),
            "cls_emb": out.hidden_states[-1][:, 0],  # last layer, first token
        }
        # Return as NumPy for further processing.
        detached_outputs = {k: v.numpy() for k, v in batched_outputs.items()}
        # Unbatch outputs so we get one record per input example.
        for output in utils.unbatch_preds(detached_outputs):
            ntok = output.pop("ntok")
            output["tokens"] = self.tokenizer.convert_ids_to_tokens(
                output.pop("input_ids")[1:ntok - 1])
            yield output
Esempio n. 7
0
  def predict_minibatch(self, inputs):
    # Preprocess to ids and masks, and make the input batch.
    encoded_input = self.tokenizer.batch_encode_plus(
        [ex["sentence"] for ex in inputs],
        return_tensors="pt",
        add_special_tokens=True,
        max_length=128,
        padding="longest",
        truncation="longest_first")

    # Check and send to cuda (GPU) if available
    if torch.cuda.is_available():
      self.model.cuda()
      for tensor in encoded_input:
        encoded_input[tensor] = encoded_input[tensor].cuda()
    # Run a forward pass.
    with torch.no_grad():  # remove this if you need gradients.
      out: transformers.modeling_outputs.SequenceClassifierOutput = \
          self.model(**encoded_input)

    # Post-process outputs.
    batched_outputs = {
        "probas": torch.nn.functional.softmax(out.logits, dim=-1),
        "input_ids": encoded_input["input_ids"],
        "ntok": torch.sum(encoded_input["attention_mask"], dim=1),
        "cls_emb": out.hidden_states[-1][:, 0],  # last layer, first token
    }
    # Return as NumPy for further processing.
    detached_outputs = {k: v.cpu().numpy() for k, v in batched_outputs.items()}
    # Unbatch outputs so we get one record per input example.
    for output in utils.unbatch_preds(detached_outputs):
      ntok = output.pop("ntok")
      output["tokens"] = self.tokenizer.convert_ids_to_tokens(
          output.pop("input_ids")[1:ntok - 1])
      yield output
Esempio n. 8
0
    def predict_minibatch(self, inputs, config=None):
        """Predict on a single minibatch of examples."""
        # If input has a 'tokens' field, use that. Otherwise tokenize the text.
        tokenized_texts = [
            ex.get("tokens") or self.tokenizer.tokenize(ex["text"])
            for ex in inputs
        ]
        # Process to ids, add special tokens, and compute segment ids and masks.
        encoded_input = self.tokenizer.batch_encode_plus(
            tokenized_texts,
            is_pretokenized=True,
            return_tensors="tf",
            add_special_tokens=True,
            max_length=self.max_seq_length,
            pad_to_max_length=True)
        # We have to set max_length explicitly above so that
        # max_tokens <= model_max_length, in order to avoid indexing errors. But
        # the combination of max_length=<integer> and pad_to_max_length=True means
        # that if the max is < model_max_length, we end up with extra padding.
        # Thee lines below strip this off.
        # TODO(lit-dev): submit a PR to make this possible with tokenizer options?
        max_tokens = tf.reduce_max(
            tf.reduce_sum(encoded_input["attention_mask"], axis=1))
        encoded_input = {
            k: v[:, :max_tokens]
            for k, v in encoded_input.items()
        }

        # logits is a single tensor
        #    <float32>[batch_size, num_tokens, vocab_size]
        # embs is a list of num_layers + 1 tensors, each
        #    <float32>[batch_size, num_tokens, h_dim]
        # attentions is a list of num_layers tensors, each
        #    <float32>[batch_size, num_heads, num_tokens, num_tokens]
        logits, embs, unused_attentions = self.model(encoded_input)
        batched_outputs = {
            "probas": tf.nn.softmax(logits, axis=-1).numpy(),
            "input_ids": encoded_input["input_ids"].numpy(),
            "ntok": tf.reduce_sum(encoded_input["attention_mask"],
                                  axis=1).numpy(),
            "cls_emb": embs[-1][:, 0].numpy(),  # last layer, first token
        }
        # List of dicts, one per example.
        unbatched_outputs = utils.unbatch_preds(batched_outputs)
        # Postprocess to remove padding and decode predictions.
        return map(self._postprocess, unbatched_outputs)
Esempio n. 9
0
 def predict_minibatch(self, inputs, config=None):
     """Predict on a single minibatch of examples."""
     # Preprocess inputs.
     texts = [ex["text"] for ex in inputs]
     encoded_inputs = self.tokenizer.batch_encode_plus(
         texts,
         return_tensors="tf",
         add_special_tokens=True,
         add_prefix_space=True,
         pad_to_max_length=True)
     # Get the predictions.
     batched_outputs = self._pred(encoded_inputs)
     # Convert to numpy for post-processing.
     detached_outputs = {k: v.numpy() for k, v in batched_outputs.items()}
     # Split up batched outputs, then post-process each example.
     unbatched_outputs = utils.unbatch_preds(detached_outputs)
     return map(self._postprocess, unbatched_outputs)
Esempio n. 10
0
  def predict_minibatch(self, inputs, config=None):
    """Predict on a single minibatch of examples."""
    tokens_and_offsets = [
        retokenize.subtokenize(ex['tokens'], self.tokenizer.tokenize)
        for ex in inputs
    ]
    tokenized_texts, offsets = zip(*tokens_and_offsets)
    # Process to ids, add special tokens, and compute segment ids and masks.
    encoded_input = self.tokenizer.batch_encode_plus(
        tokenized_texts,
        is_pretokenized=True,
        return_tensors='tf',
        add_special_tokens=True,
        max_length=self.max_seq_length,
        pad_to_max_length=True)
    # We have to set max_length explicitly above so that
    # max_tokens <= model_max_length, in order to avoid indexing errors. But
    # the combination of max_length=<integer> and pad_to_max_length=True means
    # that if the max is < model_max_length, we end up with extra padding.
    # Thee lines below strip this off.
    # TODO(lit-dev): submit a PR to make this possible with tokenizer options?
    max_tokens = tf.reduce_max(
        tf.reduce_sum(encoded_input['attention_mask'], axis=1))
    encoded_input = {k: v[:, :max_tokens] for k, v in encoded_input.items()}

    # logits is a single tensor
    #    <float32>[batch_size, num_tokens, vocab_size]
    # embs is a list of num_layers + 1 tensors, each
    #    <float32>[batch_size, num_tokens, h_dim]
    unused_logits, embs = self.model(encoded_input)
    batched_outputs = {
        'input_ids': encoded_input['input_ids'].numpy(),
        'ntok': tf.reduce_sum(encoded_input['attention_mask'], axis=1).numpy(),
        'top_layer_embs': embs[-1].numpy(),  # last layer, all tokens
    }
    # List of dicts, one per example.
    unbatched_outputs = list(utils.unbatch_preds(batched_outputs))
    # Postprocess to remove padding and add offsets.
    ret = [self._postprocess(ubo) for ubo in unbatched_outputs]
    for preds, offset_indices in zip(ret, offsets):
      preds['offsets'] = offset_indices
    return ret
Esempio n. 11
0
    def predict_minibatch(self, inputs: Iterable[JsonDict]):
        # Use watch_accessed_variables to save memory by having the tape do nothing
        # if we don't need gradients.
        with tf.GradientTape(
                watch_accessed_variables=self.config.compute_grads) as tape:
            encoded_input = self._preprocess(inputs)
            logits, embs, attentions = self.model(encoded_input,
                                                  training=False)

            batched_outputs = {
                "input_ids": encoded_input["input_ids"],
                "ntok": tf.reduce_sum(encoded_input["attention_mask"], axis=1),
                "cls_emb": embs[-1][:, 0],  # last layer, first token
            }
            assert len(attentions) == self.model.config.num_hidden_layers
            for i, layer_attention in enumerate(attentions):
                batched_outputs[f"layer_{i}/attention"] = layer_attention

            if self.is_regression:
                # <tf.float32>[batch_size]
                batched_outputs["score"] = tf.squeeze(logits, axis=-1)
                scalar_pred_for_gradients = batched_outputs["score"]
            else:
                # <tf.float32>[batch_size, num_labels]
                batched_outputs["probas"] = tf.nn.softmax(logits, axis=-1)
                # <tf.float32>[batch_size]
                scalar_pred_for_gradients = tf.reduce_max(
                    batched_outputs["probas"], axis=-1)

        # Request gradients after the tape is run.
        # Note: embs[0] includes position and segment encodings, as well as subword
        # embeddings.
        if self.config.compute_grads:
            # <tf.float32>[batch_size, num_tokens, emb_dim]
            batched_outputs["input_emb_grad"] = tape.gradient(
                scalar_pred_for_gradients, embs[0])

        detached_outputs = {k: v.numpy() for k, v in batched_outputs.items()}
        # Sequence of dicts, one per example.
        unbatched_outputs = utils.unbatch_preds(detached_outputs)
        return map(self._postprocess, unbatched_outputs)
Esempio n. 12
0
File: t5.py Progetto: oceanfly/lit
    def predict_minibatch(self, inputs):
        """Run model on a single batch.

    Args:
      inputs: List[Dict] with fields as described by input_spec()

    Returns:
      outputs: List[Dict] with fields as described by output_spec()
    """
        # Text as sequence of sentencepiece ID"s.
        encoded_inputs = self._encode_texts(
            [ex["input_text"] for ex in inputs])
        encoded_targets = self._encode_texts(
            [ex.get("target_text", "") for ex in inputs])

        ##
        # Force-decode on target text, and also get encoder embs and attention.
        batched_outputs = self._force_decode(encoded_inputs, encoded_targets)
        # Get the conditional generation from the model.
        # Workaround for output_hidden not being compatible with generate.
        # See https://github.com/huggingface/transformers/issues/8361
        self.model.config.output_hidden_states = False
        generated_ids = self.model.generate(
            encoded_inputs.input_ids,
            num_beams=self.config.beam_size,
            attention_mask=encoded_inputs.attention_mask,
            max_length=self.config.max_gen_length,
            num_return_sequences=self.config.num_to_generate)
        # [batch_size*num_return_sequences, num_steps]
        # -> [batch_size, num_return_sequences, num_steps]
        batched_outputs["generated_ids"] = tf.reshape(
            generated_ids,
            [-1, self.config.num_to_generate, generated_ids.shape[-1]])
        self.model.config.output_hidden_states = True

        # Convert to numpy for post-processing.
        detached_outputs = {k: v.numpy() for k, v in batched_outputs.items()}
        # Split up batched outputs, then post-process each example.
        unbatched_outputs = utils.unbatch_preds(detached_outputs)
        return list(map(self._postprocess, unbatched_outputs))
Esempio n. 13
0
    def predict_minibatch(self, inputs):

        # Preprocess to ids and masks, and make the input batch.
        encoded_input = self.tokenizer.batch_encode_plus(
            [ex["sentence"] for ex in inputs],
            return_tensors="pt",
            add_special_tokens=True,
            max_length=128,
            padding="longest",
            truncation="longest_first")

        # Check and send to cuda (GPU) if available
        if torch.cuda.is_available():
            self.model.cuda()
            for tensor in encoded_input:
                encoded_input[tensor] = encoded_input[tensor].cuda()

        # Run a forward pass.
        with torch.set_grad_enabled(self.compute_grads):
            out: transformers.modeling_outputs.SequenceClassifierOutput = \
                self.model(**encoded_input)

        # Post-process outputs.
        batched_outputs = {
            "probas": torch.nn.functional.softmax(out.logits, dim=-1),
            "input_ids": encoded_input["input_ids"],
            "ntok": torch.sum(encoded_input["attention_mask"], dim=1),
            "cls_emb": out.hidden_states[-1][:, 0],  # last layer, first token
        }

        # Add attention layers to batched_outputs
        assert len(out.attentions) == self.model.config.num_hidden_layers
        for i, layer_attention in enumerate(out.attentions):
            batched_outputs[f"layer_{i}/attention"] = layer_attention

        # Request gradients after the forward pass.
        # Note: hidden_states[0] includes position and segment encodings, as well as
        # subword embeddings.
        if self.compute_grads:
            # <torch.float32>[batch_size, num_tokens, emb_dim]
            scalar_pred_for_gradients = torch.max(batched_outputs["probas"],
                                                  dim=1,
                                                  keepdim=False,
                                                  out=None)[0]
            batched_outputs["input_emb_grad"] = torch.autograd.grad(
                scalar_pred_for_gradients,
                out.hidden_states[0],
                grad_outputs=torch.ones_like(scalar_pred_for_gradients))[0]

        # Post-process outputs.
        # Return as NumPy for further processing.
        detached_outputs = {
            k: v.cpu().detach().numpy()
            for k, v in batched_outputs.items()
        }

        # Unbatch outputs so we get one record per input example.
        for output in utils.unbatch_preds(detached_outputs):
            ntok = output.pop("ntok")
            output["tokens"] = self.tokenizer.convert_ids_to_tokens(
                output.pop("input_ids")[:ntok])

            # set token gradients
            if self.compute_grads:
                output["token_grad_sentence"] = output["input_emb_grad"][:ntok]

            # Process attention.
            for key in output:
                if not re.match(r"layer_(\d+)/attention", key):
                    continue
                # Select only real tokens, since most of this matrix is padding.
                # <float32>[num_heads, max_seq_length, max_seq_length]
                # -> <float32>[num_heads, num_tokens, num_tokens]
                output[key] = output[key][:, :ntok, :ntok].transpose((0, 2, 1))
                # Make a copy of this array to avoid memory leaks, since NumPy otherwise
                # keeps a pointer around that prevents the source array from being GCed.
                output[key] = output[key].copy()
            yield output
 def predict_minibatch(self, inputs):
     """Make predictions for the given batch of inputs."""
     # Preprocess to ids and masks, and make the input batch.
     encoded_input = self.sentiment_model.tokenize([inp["tweet"] for inp in inputs])
     
     # Check and send to cuda (GPU) if available
     if torch.cuda.is_available():
         self.model.cuda()
         for tensor in encoded_input:
             encoded_input[tensor] = encoded_input[tensor].cuda()
     
     # Run a forward pass.
     with torch.set_grad_enabled(self.compute_grads):
         logits, embs, unused_attentions = self.model(**encoded_input).values()
     
     # Post-process outputs.
     batched_outputs = {
         "probas":    softmax(logits, dim=-1),
         "input_ids": encoded_input["input_ids"],
         "ntok":      torch.sum(encoded_input["attention_mask"], dim=1),
         "cls_emb":   embs[-1][:, 0],  # last layer, first token (is the cls token that's used for classification)
     }
     
     # Add attention layers to batched_outputs
     for i, layer_attention in enumerate(unused_attentions):
         batched_outputs[f"layer_{i}/attention"] = layer_attention
     
     # Request gradients after the forward pass.
     # Note: embs[0] includes position and segment encodings, as well as sub-word embeddings.
     if self.compute_grads:
         # <torch.float32>[batch_size, num_tokens, emb_dim]
         scalar_pred_for_gradients = torch.max(
                 batched_outputs["probas"],
                 dim=1,
                 keepdim=False,
                 out=None,
         )[0]
         batched_outputs["input_emb_grad"] = torch.autograd.grad(
                 scalar_pred_for_gradients,
                 embs[0],
                 grad_outputs=torch.ones_like(scalar_pred_for_gradients),
         )[0]
     
     # Return as NumPy for further processing.
     detached_outputs = {k: v.cpu().detach().numpy() for k, v in batched_outputs.items()}
     
     # Unbatch outputs so we get one record per input example.
     for output in utils.unbatch_preds(detached_outputs):
         ntok = output.pop("ntok")
         output["tokens"] = self.tokenizer.convert_ids_to_tokens(
                 output.pop("input_ids")[:ntok])
         
         # set token gradients
         if self.compute_grads:
             output["token_grad_sentence"] = output["input_emb_grad"][:ntok]
         
         # Process attention.
         for key in output:
             if not re.match(r"layer_(\d+)/attention", key):
                 continue
             # Select only real tokens, since most of this matrix is padding.
             # <float32>[num_heads, max_seq_length, max_seq_length]
             # -> <float32>[num_heads, num_tokens, num_tokens]
             output[key] = output[key][:, :ntok, :ntok].transpose((0, 2, 1))
             # Make a copy of this array to avoid memory leaks, since NumPy otherwise
             # keeps a pointer around that prevents the source array from being GCed.
             output[key] = output[key].copy()
         yield output
Esempio n. 15
0
  def predict_minibatch(self, inputs: Iterable[JsonDict]):
    # Use watch_accessed_variables to save memory by having the tape do nothing
    # if we don't need gradients.
    with tf.GradientTape(
        watch_accessed_variables=self.config.compute_grads) as tape:
      encoded_input = self._preprocess(inputs)

      # Gathers word embeddings from BERT model embedding layer using input ids
      # of the tokens.
      input_ids = encoded_input["input_ids"]
      word_embeddings = self.model.bert.embeddings.word_embeddings
      # <tf.float32>[batch_size, num_tokens, emb_size]
      input_embs = tf.gather(word_embeddings, input_ids)

      # Scatter in any passed in embeddings.
      # <tf.float32>[batch_size, num_tokens, emb_size]
      input_embs = self.scatter_all_embeddings(inputs, input_embs)

      tape.watch(input_embs)  # Watch input_embs for gradient calculation.

      model_inputs = encoded_input.copy()
      model_inputs["input_ids"] = None
      out: transformers.modeling_tf_outputs.TFSequenceClassifierOutput = \
          self.model(model_inputs, inputs_embeds=input_embs, training=False,
                     output_hidden_states=True, output_attentions=True,
                     return_dict=True)

      batched_outputs = {
          "input_ids": encoded_input["input_ids"],
          "ntok": tf.reduce_sum(encoded_input["attention_mask"], axis=1),
          "cls_emb": out.hidden_states[-1][:, 0],  # last layer, first token
          "input_embs": input_embs,
      }

      # First entry is embeddings, then output from each transformer layer.
      assert len(out.hidden_states) == self.model.config.num_hidden_layers + 1
      # <float32>[batch_size, num_tokens, 1]
      token_mask = tf.expand_dims(
          tf.cast(encoded_input["attention_mask"], tf.float32), axis=2)
      # <float32>[batch_size, 1]
      denom = tf.reduce_sum(token_mask, axis=1)
      for i, layer_output in enumerate(out.hidden_states):
        # layer_output is <float32>[batch_size, num_tokens, emb_dim]
        # average over tokens to get <float32>[batch_size, emb_dim]
        batched_outputs[f"layer_{i}/avg_emb"] = tf.reduce_sum(
            layer_output * token_mask, axis=1) / denom

      assert len(out.attentions) == self.model.config.num_hidden_layers
      for i, layer_attention in enumerate(out.attentions):
        batched_outputs[f"layer_{i+1}/attention"] = layer_attention

      if self.is_regression:
        # <tf.float32>[batch_size]
        batched_outputs["score"] = tf.squeeze(out.logits, axis=-1)
        scalar_pred_for_gradients = batched_outputs["score"]
      else:
        # <tf.float32>[batch_size, num_labels]
        batched_outputs["probas"] = tf.nn.softmax(out.logits, axis=-1)

        # If a class for the gradients has been specified in the input,
        # calculate gradients for that class. Otherwise, calculate gradients for
        # the arg_max class.
        arg_max = tf.math.argmax(batched_outputs["probas"], axis=-1).numpy()
        grad_classes = [ex.get("grad_class", arg_max[i]) for (i, ex) in
                        enumerate(inputs)]
        # Convert the class names to indices if needed.
        grad_classes = [self.config.labels.index(label)
                        if isinstance(label, str) else label
                        for label in grad_classes]

        gather_indices = list(enumerate(grad_classes))
        # <tf.float32>[batch_size]
        scalar_pred_for_gradients = tf.gather_nd(batched_outputs["probas"],
                                                 gather_indices)
        if self.config.compute_grads:
          batched_outputs["grad_class"] = tf.convert_to_tensor(grad_classes)

    # Request gradients after the tape is run.
    # Note: embs[0] includes position and segment encodings, as well as subword
    # embeddings.
    if self.config.compute_grads:
      # <tf.float32>[batch_size, num_tokens, emb_dim]
      batched_outputs["input_emb_grad"] = tape.gradient(
          scalar_pred_for_gradients, input_embs)

    detached_outputs = {k: v.numpy() for k, v in batched_outputs.items()}
    # Sequence of dicts, one per example.
    unbatched_outputs = utils.unbatch_preds(detached_outputs)
    return map(self._postprocess, unbatched_outputs)
Esempio n. 16
0
 def predict_minibatch(self, inputs):
   features = self._make_feature_columns(inputs)
   probas = self.model(features)  # <tf.float32>[batch_size, 1]
   preds = {'proba': tf.squeeze(probas, axis=-1).numpy()}
   return list(utils.unbatch_preds(preds))