예제 #1
0
파일: t5.py 프로젝트: oceanfly/lit
    def output_spec(self):
        spec = super().output_spec()  # has 'output_text'
        spec.update({
            "input_tokens":
            lit_types.Tokens(parent="input_text"),
            "encoder_final_embedding":
            lit_types.Embeddings(),
            # If target text is given, the following will also be populated.
            "target_tokens":
            lit_types.Tokens(parent="target_text"),
            "pred_tokens":
            lit_types.TokenTopKPreds(align="target_tokens"),
        })
        if self.config.num_to_generate > 1:
            spec["output_text"] = lit_types.GeneratedTextCandidates(
                parent="target_text")

        if self.config.output_attention:
            # Add attention for each layer.
            for i in range(self.num_layers):
                spec[
                    f"encoder_layer_{i:d}_attention"] = lit_types.AttentionHeads(
                        align_in="input_tokens", align_out="input_tokens")
                spec[
                    f"decoder_layer_{i:d}_attention"] = lit_types.AttentionHeads(
                        align_in="target_tokens", align_out="target_tokens")
        return spec
예제 #2
0
    def output_spec(self) -> lit_types.Spec:
        output = {
            "input_tokens":
            lit_types.Tokens(parent="input_text"),
            "predicted":
            lit_types.GeneratedText(parent='target_text'),
            'layer_average':
            lit_types.AttentionHeads(align=('input_tokens', 'input_tokens'))
        }
        for layer in range(self.ATTENTION_LAYERS):
            output['layer{}'.format(layer)] = lit_types.AttentionHeads(
                align=('input_tokens', 'input_tokens'))

        return output
예제 #3
0
    def output_spec(self) -> Spec:
        ret = {"tokens": lit_types.Tokens()}
        ret["tokens_" + self.config.text_a_name] = lit_types.Tokens()
        if self.config.text_b_name:
            ret["tokens_" + self.config.text_b_name] = lit_types.Tokens()
        if self.is_regression:
            ret["score"] = lit_types.RegressionScore(
                parent=self.config.label_name)
        else:
            ret["probas"] = lit_types.MulticlassPreds(
                parent=self.config.label_name,
                vocab=self.config.labels,
                null_idx=self.config.null_label_idx)
        ret["cls_emb"] = lit_types.Embeddings()

        # Gradients, if requested.
        if self.config.compute_grads:
            ret["token_grad_" +
                self.config.text_a_name] = lit_types.TokenGradients(
                    align="tokens_" + self.config.text_a_name)
            if self.config.text_b_name:
                ret["token_grad_" +
                    self.config.text_b_name] = lit_types.TokenGradients(
                        align="tokens_" + self.config.text_b_name)

        # Attention heads, one field for each layer.
        for i in range(self.model.config.num_hidden_layers):
            ret[f"layer_{i}/attention"] = lit_types.AttentionHeads(
                align=("tokens", "tokens"))

        return ret
예제 #4
0
파일: t5.py 프로젝트: smesaric/lit
 def output_spec(self):
   spec = {
       "input_tokens": lit_types.Tokens(parent="input_text"),
       "generation": lit_types.GeneratedText(parent="target_text"),
       "encoder_final_embedding": lit_types.Embeddings(),
       # If target text is given, the following will also be populated.
       "target_tokens": lit_types.Tokens(parent="target_text"),
       "pred_tokens": lit_types.TokenTopKPreds(align="target_tokens"),
       "rougeL": lit_types.Scalar(),
   }
   if self.config.output_attention:
     # Add attention for each layer.
     for i in range(self.num_layers):
       spec[f"encoder_layer_{i:d}_attention"] = lit_types.AttentionHeads(
           align=("input_tokens", "input_tokens"))
       spec[f"decoder_layer_{i:d}_attention"] = lit_types.AttentionHeads(
           align=("target_tokens", "target_tokens"))
   return spec
예제 #5
0
 def output_spec(self):
     spec = {
         # the "parent" keyword tells LIT which field in the input spec we should
         # compare this to when computing metrics.
         "pred_tokens": lit_types.TokenTopKPreds(align="tokens"),
         "tokens": lit_types.Tokens(parent="text"),  # all tokens
     }
     # Add attention and embeddings from each layer.
     for i in range(self.num_layers):
         spec[f"layer_{i:d}_attention"] = lit_types.AttentionHeads(
             align_in="tokens", align_out="tokens")
         spec[f"layer_{i:d}_avg_embedding"] = lit_types.Embeddings()
     return spec
예제 #6
0
  def output_spec(self) -> Spec:
    ret = {"tokens": lit_types.Tokens()}
    ret["tokens_" + self.config.text_a_name] = lit_types.Tokens(
        parent=self.config.text_a_name)
    if self.config.text_b_name:
      ret["tokens_" + self.config.text_b_name] = lit_types.Tokens(
          parent=self.config.text_b_name)
    if self.is_regression:
      ret["score"] = lit_types.RegressionScore(parent=self.config.label_name)
    else:
      ret["probas"] = lit_types.MulticlassPreds(
          parent=self.config.label_name,
          vocab=self.config.labels,
          null_idx=self.config.null_label_idx)
    ret["cls_emb"] = lit_types.Embeddings()
    # Average embeddings, one per layer including embeddings.
    for i in range(1 + self.model.config.num_hidden_layers):
      ret[f"layer_{i}/avg_emb"] = lit_types.Embeddings()

    ret["cls_grad"] = lit_types.Gradients(
        grad_for="cls_emb", grad_target_field_key="grad_class")

    # The input_embs_ and grad_class fields are used for Integrated Gradients.
    ret["input_embs_" + self.config.text_a_name] = lit_types.TokenEmbeddings(
        align="tokens_" + self.config.text_a_name)
    if self.config.text_b_name:
      ret["input_embs_" + self.config.text_b_name] = lit_types.TokenEmbeddings(
          align="tokens_" + self.config.text_b_name)

    # Gradients, if requested.
    if self.config.compute_grads:
      ret["grad_class"] = lit_types.CategoryLabel(required=False,
                                                  vocab=self.config.labels)
      ret["token_grad_" + self.config.text_a_name] = lit_types.TokenGradients(
          align="tokens_" + self.config.text_a_name,
          grad_for="input_embs_" + self.config.text_a_name,
          grad_target_field_key="grad_class")
      if self.config.text_b_name:
        ret["token_grad_" + self.config.text_b_name] = lit_types.TokenGradients(
            align="tokens_" + self.config.text_b_name,
            grad_for="input_embs_" + self.config.text_b_name,
            grad_target_field_key="grad_class")

    # Attention heads, one field for each layer.
    for i in range(self.model.config.num_hidden_layers):
      ret[f"layer_{i+1}/attention"] = lit_types.AttentionHeads(
          align_in="tokens", align_out="tokens")
    return ret
 def output_spec(self) -> lit_types.Spec:
     """Give the output specifications."""
     ret = {
         "tokens":  lit_types.Tokens(),
         "probas":  lit_types.MulticlassPreds(parent="label", vocab=self.LABELS),
         "cls_emb": lit_types.Embeddings()
     }
     
     # Gradients, if requested.
     if self.compute_grads:
         ret["token_grad_sentence"] = lit_types.TokenGradients(align="tokens")
     
     # Attention heads, one field for each layer.
     for i in range(self.model.config.num_hidden_layers):
         ret[f"layer_{i}/attention"] = lit_types.AttentionHeads(align=("tokens", "tokens"))
     return ret
예제 #8
0
 def output_spec(self) -> lit_types.Spec:
     spec = {
         "tokens":
         lit_types.Tokens(),
         "bio_tags":
         lit_types.SequenceTags(align="tokens"),
         "token_ids":
         lit_types.SequenceTags(align="tokens"),
         "grads":
         lit_types.TokenGradients(align="tokens"),
         "probas":
         lit_types.MulticlassPreds(parent="bio_tags", vocab=self.LABELS)
     }
     for i in range(self.model.config.num_hidden_layers):
         spec[f'layer_{i}/attention'] = lit_types.AttentionHeads(
             align=("tokens", "tokens"))
     return spec
예제 #9
0
파일: analyze.py 프로젝트: sillsdev/silnlp
 def output_spec(self) -> lit_types.Spec:
     return {
         "src_tokens":
         lit_types.Tokens(parent="src_text"),
         "trg_text":
         lit_types.GeneratedText(parent="ref_text"),
         "trg_tokens":
         lit_types.Tokens(parent="trg_text"),
         "attention":
         lit_types.AttentionHeads(align_in="src_tokens",
                                  align_out="trg_tokens"),
         "pred_tokens":
         lit_types.TokenTopKPreds(align="trg_tokens", parent="trg_text"),
         "encoder_final_embedding":
         lit_types.Embeddings(),
         "ter":
         lit_types.Scalar(),
         "chrf3":
         lit_types.Scalar(),
     }