Python Embeddings 예제들, lit_nlp.api.types.Embeddings Python 예제들

예제 #1

0

파일 보기

파일: utils_test.py 프로젝트: byhqsr/PAIR-code-lit

 def test_find_spec_keys(self):
   spec = {
       "score": types.RegressionScore(),
       "scalar_foo": types.Scalar(),
       "text": types.TextSegment(),
       "emb_0": types.Embeddings(),
       "emb_1": types.Embeddings(),
       "tokens": types.Tokens(),
       "generated_text": types.GeneratedText(),
   }
   self.assertEqual(["score"], utils.find_spec_keys(spec,
                                                    types.RegressionScore))
   self.assertEqual(["text", "tokens", "generated_text"],
                    utils.find_spec_keys(spec,
                                         (types.TextSegment, types.Tokens)))
   self.assertEqual(["emb_0", "emb_1"],
                    utils.find_spec_keys(spec, types.Embeddings))
   self.assertEqual([], utils.find_spec_keys(spec, types.AttentionHeads))
   # Check subclasses
   self.assertEqual(
       list(spec.keys()), utils.find_spec_keys(spec, types.LitType))
   self.assertEqual(["text", "generated_text"],
                    utils.find_spec_keys(spec, types.TextSegment))
   self.assertEqual(["score", "scalar_foo"],
                    utils.find_spec_keys(spec, types.Scalar))

예제 #2

0

파일 보기

 def spec(self):
   return {
       'span1_embs': lit_types.Embeddings(),
       'span2_embs': lit_types.Embeddings(),
       'label': lit_types.Scalar(),
       'src_idx': lit_types.Scalar(),
       'edge_idx': lit_types.Scalar(),
   }

예제 #3

0

파일 보기

  def output_spec(self) -> Spec:
    ret = {"tokens": lit_types.Tokens()}
    ret["tokens_" + self.config.text_a_name] = lit_types.Tokens(
        parent=self.config.text_a_name)
    if self.config.text_b_name:
      ret["tokens_" + self.config.text_b_name] = lit_types.Tokens(
          parent=self.config.text_b_name)
    if self.is_regression:
      ret["score"] = lit_types.RegressionScore(parent=self.config.label_name)
    else:
      ret["probas"] = lit_types.MulticlassPreds(
          parent=self.config.label_name,
          vocab=self.config.labels,
          null_idx=self.config.null_label_idx)
    ret["cls_emb"] = lit_types.Embeddings()
    # Average embeddings, one per layer including embeddings.
    for i in range(1 + self.model.config.num_hidden_layers):
      ret[f"layer_{i}/avg_emb"] = lit_types.Embeddings()

    ret["cls_grad"] = lit_types.Gradients(
        grad_for="cls_emb", grad_target_field_key="grad_class")

    # The input_embs_ and grad_class fields are used for Integrated Gradients.
    ret["input_embs_" + self.config.text_a_name] = lit_types.TokenEmbeddings(
        align="tokens_" + self.config.text_a_name)
    if self.config.text_b_name:
      ret["input_embs_" + self.config.text_b_name] = lit_types.TokenEmbeddings(
          align="tokens_" + self.config.text_b_name)

    # Gradients, if requested.
    if self.config.compute_grads:
      ret["grad_class"] = lit_types.CategoryLabel(required=False,
                                                  vocab=self.config.labels)
      ret["token_grad_" + self.config.text_a_name] = lit_types.TokenGradients(
          align="tokens_" + self.config.text_a_name,
          grad_for="input_embs_" + self.config.text_a_name,
          grad_target_field_key="grad_class")
      if self.config.text_b_name:
        ret["token_grad_" + self.config.text_b_name] = lit_types.TokenGradients(
            align="tokens_" + self.config.text_b_name,
            grad_for="input_embs_" + self.config.text_b_name,
            grad_target_field_key="grad_class")

    # Attention heads, one field for each layer.
    for i in range(self.model.config.num_hidden_layers):
      ret[f"layer_{i+1}/attention"] = lit_types.AttentionHeads(
          align_in="tokens", align_out="tokens")
    return ret

예제 #4

0

파일 보기

    def output_spec(self) -> Spec:
        ret = {"tokens": lit_types.Tokens()}
        ret["tokens_" + self.config.text_a_name] = lit_types.Tokens()
        if self.config.text_b_name:
            ret["tokens_" + self.config.text_b_name] = lit_types.Tokens()
        if self.is_regression:
            ret["score"] = lit_types.RegressionScore(
                parent=self.config.label_name)
        else:
            ret["probas"] = lit_types.MulticlassPreds(
                parent=self.config.label_name,
                vocab=self.config.labels,
                null_idx=self.config.null_label_idx)
        ret["cls_emb"] = lit_types.Embeddings()

        # Gradients, if requested.
        if self.config.compute_grads:
            ret["token_grad_" +
                self.config.text_a_name] = lit_types.TokenGradients(
                    align="tokens_" + self.config.text_a_name)
            if self.config.text_b_name:
                ret["token_grad_" +
                    self.config.text_b_name] = lit_types.TokenGradients(
                        align="tokens_" + self.config.text_b_name)

        # Attention heads, one field for each layer.
        for i in range(self.model.config.num_hidden_layers):
            ret[f"layer_{i}/attention"] = lit_types.AttentionHeads(
                align=("tokens", "tokens"))

        return ret

예제 #5

0

파일 보기

 def output_spec(self) -> lit_types.Spec:
     return {
         "tokens": lit_types.Tokens(),
         "probas": lit_types.MulticlassPreds(parent="label",
                                             vocab=self.LABELS),
         "cls_emb": lit_types.Embeddings()
     }

예제 #6

0

파일 보기

파일: t5.py 프로젝트: oceanfly/lit

    def output_spec(self):
        spec = super().output_spec()  # has 'output_text'
        spec.update({
            "input_tokens":
            lit_types.Tokens(parent="input_text"),
            "encoder_final_embedding":
            lit_types.Embeddings(),
            # If target text is given, the following will also be populated.
            "target_tokens":
            lit_types.Tokens(parent="target_text"),
            "pred_tokens":
            lit_types.TokenTopKPreds(align="target_tokens"),
        })
        if self.config.num_to_generate > 1:
            spec["output_text"] = lit_types.GeneratedTextCandidates(
                parent="target_text")

        if self.config.output_attention:
            # Add attention for each layer.
            for i in range(self.num_layers):
                spec[
                    f"encoder_layer_{i:d}_attention"] = lit_types.AttentionHeads(
                        align_in="input_tokens", align_out="input_tokens")
                spec[
                    f"decoder_layer_{i:d}_attention"] = lit_types.AttentionHeads(
                        align_in="target_tokens", align_out="target_tokens")
        return spec

예제 #7

0

파일 보기

 def output_spec(self) -> lit_types.Spec:
     return {
         "tokens": lit_types.Tokens(),
         "probas": lit_types.MulticlassPreds(parent="label", vocab=self._labels),
         "cls_emb": lit_types.Embeddings(),
         "token_grad_sentence": lit_types.TokenGradients(align="tokens")
     }

예제 #8

0

파일 보기

 def output_spec(self) -> lit_types.Spec:
     return {
         "tokens": lit_types.Tokens(),
         "logits": lit_types.RegressionScore(),
         "cls_emb": lit_types.Embeddings(),
         "token_grad_sentence": lit_types.TokenGradients(align="tokens")
     }

예제 #9

0

파일 보기

 def output_spec(self):
   return {
       'probas':
           lit_types.MulticlassPreds(
               parent='label', vocab=['0', '1'], null_idx=0),
       'cls_emb':
           lit_types.Embeddings(),
       'cls_grad':
           lit_types.Gradients(grad_for='cls_emb', grad_target='grad_class'),
       'grad_class':
           lit_types.CategoryLabel()
   }

예제 #10

0

파일 보기

파일: pretrained_lms.py 프로젝트: oceanfly/lit

 def output_spec(self):
     spec = {
         # the "parent" keyword tells LIT which field in the input spec we should
         # compare this to when computing metrics.
         "pred_tokens": lit_types.TokenTopKPreds(align="tokens"),
         "tokens": lit_types.Tokens(parent="text"),  # all tokens
     }
     # Add attention and embeddings from each layer.
     for i in range(self.num_layers):
         spec[f"layer_{i:d}_attention"] = lit_types.AttentionHeads(
             align_in="tokens", align_out="tokens")
         spec[f"layer_{i:d}_avg_embedding"] = lit_types.Embeddings()
     return spec

예제 #11

0

파일 보기

파일: lit_tool.py 프로젝트: vsa-datascience/vlaams-twitter-sentiment-model

 def output_spec(self) -> lit_types.Spec:
     """Give the output specifications."""
     ret = {
         "tokens":  lit_types.Tokens(),
         "probas":  lit_types.MulticlassPreds(parent="label", vocab=self.LABELS),
         "cls_emb": lit_types.Embeddings()
     }
     
     # Gradients, if requested.
     if self.compute_grads:
         ret["token_grad_sentence"] = lit_types.TokenGradients(align="tokens")
     
     # Attention heads, one field for each layer.
     for i in range(self.model.config.num_hidden_layers):
         ret[f"layer_{i}/attention"] = lit_types.AttentionHeads(align=("tokens", "tokens"))
     return ret

예제 #12

0

파일 보기

파일: t5.py 프로젝트: smesaric/lit

 def output_spec(self):
   spec = {
       "input_tokens": lit_types.Tokens(parent="input_text"),
       "generation": lit_types.GeneratedText(parent="target_text"),
       "encoder_final_embedding": lit_types.Embeddings(),
       # If target text is given, the following will also be populated.
       "target_tokens": lit_types.Tokens(parent="target_text"),
       "pred_tokens": lit_types.TokenTopKPreds(align="target_tokens"),
       "rougeL": lit_types.Scalar(),
   }
   if self.config.output_attention:
     # Add attention for each layer.
     for i in range(self.num_layers):
       spec[f"encoder_layer_{i:d}_attention"] = lit_types.AttentionHeads(
           align=("input_tokens", "input_tokens"))
       spec[f"decoder_layer_{i:d}_attention"] = lit_types.AttentionHeads(
           align=("target_tokens", "target_tokens"))
   return spec

예제 #13

0

파일 보기

파일: analyze.py 프로젝트: sillsdev/silnlp

 def output_spec(self) -> lit_types.Spec:
     return {
         "src_tokens":
         lit_types.Tokens(parent="src_text"),
         "trg_text":
         lit_types.GeneratedText(parent="ref_text"),
         "trg_tokens":
         lit_types.Tokens(parent="trg_text"),
         "attention":
         lit_types.AttentionHeads(align_in="src_tokens",
                                  align_out="trg_tokens"),
         "pred_tokens":
         lit_types.TokenTopKPreds(align="trg_tokens", parent="trg_text"),
         "encoder_final_embedding":
         lit_types.Embeddings(),
         "ter":
         lit_types.Scalar(),
         "chrf3":
         lit_types.Scalar(),
     }

예제 #14

0

파일 보기

 def meta_spec(self) -> types.Spec:
     return {
         CLUSTER_ID_KEY: types.CategoryLabel(),
         REPRESENTATION_KEY: types.Embeddings(),
     }

예제 #15

0

파일 보기

 def input_spec(self):
   return {
       'span1_embs': lit_types.Embeddings(),
       'span2_embs': lit_types.Embeddings(),
       'label': lit_types.Scalar(required=False),  # in range [0,1]
   }

예제 #16

0

파일 보기

 def input_spec(self):
   # 'x' denotes input features
   return {"x": lit_types.Embeddings()}

예제 #17

0

파일 보기

 def output_spec(self):
   # 'z' denotes projected embeddings
   return {"z": lit_types.Embeddings()}

예제 #18

0

파일 보기

파일: pretrained_lms.py 프로젝트: oceanfly/lit

 def output_spec(self):
     return {
         "tokens": lit_types.Tokens(parent="text"),
         "pred_tokens": lit_types.TokenTopKPreds(align="tokens"),
         "cls_emb": lit_types.Embeddings(),
     }