def test_find_spec_keys(self): spec = { "score": types.RegressionScore(), "scalar_foo": types.Scalar(), "text": types.TextSegment(), "emb_0": types.Embeddings(), "emb_1": types.Embeddings(), "tokens": types.Tokens(), "generated_text": types.GeneratedText(), } self.assertEqual(["score"], utils.find_spec_keys(spec, types.RegressionScore)) self.assertEqual(["text", "tokens", "generated_text"], utils.find_spec_keys(spec, (types.TextSegment, types.Tokens))) self.assertEqual(["emb_0", "emb_1"], utils.find_spec_keys(spec, types.Embeddings)) self.assertEqual([], utils.find_spec_keys(spec, types.AttentionHeads)) # Check subclasses self.assertEqual( list(spec.keys()), utils.find_spec_keys(spec, types.LitType)) self.assertEqual(["text", "generated_text"], utils.find_spec_keys(spec, types.TextSegment)) self.assertEqual(["score", "scalar_foo"], utils.find_spec_keys(spec, types.Scalar))
def spec(self): return { 'span1_embs': lit_types.Embeddings(), 'span2_embs': lit_types.Embeddings(), 'label': lit_types.Scalar(), 'src_idx': lit_types.Scalar(), 'edge_idx': lit_types.Scalar(), }
def output_spec(self) -> Spec: ret = {"tokens": lit_types.Tokens()} ret["tokens_" + self.config.text_a_name] = lit_types.Tokens( parent=self.config.text_a_name) if self.config.text_b_name: ret["tokens_" + self.config.text_b_name] = lit_types.Tokens( parent=self.config.text_b_name) if self.is_regression: ret["score"] = lit_types.RegressionScore(parent=self.config.label_name) else: ret["probas"] = lit_types.MulticlassPreds( parent=self.config.label_name, vocab=self.config.labels, null_idx=self.config.null_label_idx) ret["cls_emb"] = lit_types.Embeddings() # Average embeddings, one per layer including embeddings. for i in range(1 + self.model.config.num_hidden_layers): ret[f"layer_{i}/avg_emb"] = lit_types.Embeddings() ret["cls_grad"] = lit_types.Gradients( grad_for="cls_emb", grad_target_field_key="grad_class") # The input_embs_ and grad_class fields are used for Integrated Gradients. ret["input_embs_" + self.config.text_a_name] = lit_types.TokenEmbeddings( align="tokens_" + self.config.text_a_name) if self.config.text_b_name: ret["input_embs_" + self.config.text_b_name] = lit_types.TokenEmbeddings( align="tokens_" + self.config.text_b_name) # Gradients, if requested. if self.config.compute_grads: ret["grad_class"] = lit_types.CategoryLabel(required=False, vocab=self.config.labels) ret["token_grad_" + self.config.text_a_name] = lit_types.TokenGradients( align="tokens_" + self.config.text_a_name, grad_for="input_embs_" + self.config.text_a_name, grad_target_field_key="grad_class") if self.config.text_b_name: ret["token_grad_" + self.config.text_b_name] = lit_types.TokenGradients( align="tokens_" + self.config.text_b_name, grad_for="input_embs_" + self.config.text_b_name, grad_target_field_key="grad_class") # Attention heads, one field for each layer. for i in range(self.model.config.num_hidden_layers): ret[f"layer_{i+1}/attention"] = lit_types.AttentionHeads( align_in="tokens", align_out="tokens") return ret
def output_spec(self) -> Spec: ret = {"tokens": lit_types.Tokens()} ret["tokens_" + self.config.text_a_name] = lit_types.Tokens() if self.config.text_b_name: ret["tokens_" + self.config.text_b_name] = lit_types.Tokens() if self.is_regression: ret["score"] = lit_types.RegressionScore( parent=self.config.label_name) else: ret["probas"] = lit_types.MulticlassPreds( parent=self.config.label_name, vocab=self.config.labels, null_idx=self.config.null_label_idx) ret["cls_emb"] = lit_types.Embeddings() # Gradients, if requested. if self.config.compute_grads: ret["token_grad_" + self.config.text_a_name] = lit_types.TokenGradients( align="tokens_" + self.config.text_a_name) if self.config.text_b_name: ret["token_grad_" + self.config.text_b_name] = lit_types.TokenGradients( align="tokens_" + self.config.text_b_name) # Attention heads, one field for each layer. for i in range(self.model.config.num_hidden_layers): ret[f"layer_{i}/attention"] = lit_types.AttentionHeads( align=("tokens", "tokens")) return ret
def output_spec(self) -> lit_types.Spec: return { "tokens": lit_types.Tokens(), "probas": lit_types.MulticlassPreds(parent="label", vocab=self.LABELS), "cls_emb": lit_types.Embeddings() }
def output_spec(self): spec = super().output_spec() # has 'output_text' spec.update({ "input_tokens": lit_types.Tokens(parent="input_text"), "encoder_final_embedding": lit_types.Embeddings(), # If target text is given, the following will also be populated. "target_tokens": lit_types.Tokens(parent="target_text"), "pred_tokens": lit_types.TokenTopKPreds(align="target_tokens"), }) if self.config.num_to_generate > 1: spec["output_text"] = lit_types.GeneratedTextCandidates( parent="target_text") if self.config.output_attention: # Add attention for each layer. for i in range(self.num_layers): spec[ f"encoder_layer_{i:d}_attention"] = lit_types.AttentionHeads( align_in="input_tokens", align_out="input_tokens") spec[ f"decoder_layer_{i:d}_attention"] = lit_types.AttentionHeads( align_in="target_tokens", align_out="target_tokens") return spec
def output_spec(self) -> lit_types.Spec: return { "tokens": lit_types.Tokens(), "probas": lit_types.MulticlassPreds(parent="label", vocab=self._labels), "cls_emb": lit_types.Embeddings(), "token_grad_sentence": lit_types.TokenGradients(align="tokens") }
def output_spec(self) -> lit_types.Spec: return { "tokens": lit_types.Tokens(), "logits": lit_types.RegressionScore(), "cls_emb": lit_types.Embeddings(), "token_grad_sentence": lit_types.TokenGradients(align="tokens") }
def output_spec(self): return { 'probas': lit_types.MulticlassPreds( parent='label', vocab=['0', '1'], null_idx=0), 'cls_emb': lit_types.Embeddings(), 'cls_grad': lit_types.Gradients(grad_for='cls_emb', grad_target='grad_class'), 'grad_class': lit_types.CategoryLabel() }
def output_spec(self): spec = { # the "parent" keyword tells LIT which field in the input spec we should # compare this to when computing metrics. "pred_tokens": lit_types.TokenTopKPreds(align="tokens"), "tokens": lit_types.Tokens(parent="text"), # all tokens } # Add attention and embeddings from each layer. for i in range(self.num_layers): spec[f"layer_{i:d}_attention"] = lit_types.AttentionHeads( align_in="tokens", align_out="tokens") spec[f"layer_{i:d}_avg_embedding"] = lit_types.Embeddings() return spec
def output_spec(self) -> lit_types.Spec: """Give the output specifications.""" ret = { "tokens": lit_types.Tokens(), "probas": lit_types.MulticlassPreds(parent="label", vocab=self.LABELS), "cls_emb": lit_types.Embeddings() } # Gradients, if requested. if self.compute_grads: ret["token_grad_sentence"] = lit_types.TokenGradients(align="tokens") # Attention heads, one field for each layer. for i in range(self.model.config.num_hidden_layers): ret[f"layer_{i}/attention"] = lit_types.AttentionHeads(align=("tokens", "tokens")) return ret
def output_spec(self): spec = { "input_tokens": lit_types.Tokens(parent="input_text"), "generation": lit_types.GeneratedText(parent="target_text"), "encoder_final_embedding": lit_types.Embeddings(), # If target text is given, the following will also be populated. "target_tokens": lit_types.Tokens(parent="target_text"), "pred_tokens": lit_types.TokenTopKPreds(align="target_tokens"), "rougeL": lit_types.Scalar(), } if self.config.output_attention: # Add attention for each layer. for i in range(self.num_layers): spec[f"encoder_layer_{i:d}_attention"] = lit_types.AttentionHeads( align=("input_tokens", "input_tokens")) spec[f"decoder_layer_{i:d}_attention"] = lit_types.AttentionHeads( align=("target_tokens", "target_tokens")) return spec
def output_spec(self) -> lit_types.Spec: return { "src_tokens": lit_types.Tokens(parent="src_text"), "trg_text": lit_types.GeneratedText(parent="ref_text"), "trg_tokens": lit_types.Tokens(parent="trg_text"), "attention": lit_types.AttentionHeads(align_in="src_tokens", align_out="trg_tokens"), "pred_tokens": lit_types.TokenTopKPreds(align="trg_tokens", parent="trg_text"), "encoder_final_embedding": lit_types.Embeddings(), "ter": lit_types.Scalar(), "chrf3": lit_types.Scalar(), }
def meta_spec(self) -> types.Spec: return { CLUSTER_ID_KEY: types.CategoryLabel(), REPRESENTATION_KEY: types.Embeddings(), }
def input_spec(self): return { 'span1_embs': lit_types.Embeddings(), 'span2_embs': lit_types.Embeddings(), 'label': lit_types.Scalar(required=False), # in range [0,1] }
def input_spec(self): # 'x' denotes input features return {"x": lit_types.Embeddings()}
def output_spec(self): # 'z' denotes projected embeddings return {"z": lit_types.Embeddings()}
def output_spec(self): return { "tokens": lit_types.Tokens(parent="text"), "pred_tokens": lit_types.TokenTopKPreds(align="tokens"), "cls_emb": lit_types.Embeddings(), }