def spec(self): return { "premise": lit_types.TextSegment(), "hypothesis": lit_types.TextSegment(), "label": lit_types.CategoryLabel(vocab=self.LABELS), "language": lit_types.CategoryLabel(), }
def spec(self) -> Spec: return { 'source': lit_types.TextSegment(), 'source_language': lit_types.CategoryLabel(), 'target': lit_types.TextSegment(), 'target_language': lit_types.CategoryLabel(), }
def test_compute(self): multiclass_paired_metrics = metrics.MulticlassPairedMetrics() indices = ['7f7f85', '345ac4', '3a3112', '88bcda'] metas = [{'parentId': '345ac4'}, {}, {}, {'parentId': '3a3112'}] # No swaps. result = multiclass_paired_metrics.compute_with_metadata( ['1', '1', '0', '0'], [[0, 1], [0, 1], [1, 0], [1, 0]], types.CategoryLabel(), types.MulticlassPreds(vocab=['0', '1'], null_idx=0), indices, metas) self.assertAlmostEqual(result, { 'mean_jsd': 0.0, 'num_pairs': 2, 'swap_rate': 0.0 }) # One swap. result = multiclass_paired_metrics.compute_with_metadata( ['1', '1', '0', '0'], [[0, 1], [1, 0], [1, 0], [1, 0]], types.CategoryLabel(), types.MulticlassPreds(vocab=['0', '1'], null_idx=0), indices, metas) self.assertAlmostEqual(result, { 'mean_jsd': 0.3465735902799726, 'num_pairs': 2, 'swap_rate': 0.5 }) # Two swaps. result = multiclass_paired_metrics.compute_with_metadata( ['1', '1', '0', '0'], [[0, 1], [1, 0], [1, 0], [0, 1]], types.CategoryLabel(), types.MulticlassPreds(vocab=['0', '1'], null_idx=0), indices, metas) self.assertAlmostEqual(result, { 'mean_jsd': 0.6931471805599452, 'num_pairs': 2, 'swap_rate': 1.0 }) # Two swaps, no null index. result = multiclass_paired_metrics.compute_with_metadata( ['1', '1', '0', '0'], [[0, 1], [1, 0], [1, 0], [0, 1]], types.CategoryLabel(), types.MulticlassPreds(vocab=['0', '1']), indices, metas) self.assertAlmostEqual(result, { 'mean_jsd': 0.6931471805599452, 'num_pairs': 2, 'swap_rate': 1.0 }) # Empty predictions, indices, and meta. result = multiclass_paired_metrics.compute_with_metadata( [], [], types.CategoryLabel(), types.MulticlassPreds(vocab=['0', '1'], null_idx=0), [], []) self.assertAlmostEqual(result, {})
def spec(self) -> lit_types.Spec: return { 'size': lit_types.CategoryLabel(vocab=['small', 'medium', 'large']), 'weight': lit_types.Scalar(), 'legs': lit_types.Boolean(), 'description': lit_types.String(), 'animal': lit_types.CategoryLabel(vocab=ANIMALS), }
def input_spec(self): return { 'body_mass_g': lit_types.Scalar(), 'culmen_depth_mm': lit_types.Scalar(), 'culmen_length_mm': lit_types.Scalar(), 'flipper_length_mm': lit_types.Scalar(), 'island': lit_types.CategoryLabel(vocab=VOCABS['island']), 'sex': lit_types.CategoryLabel(vocab=VOCABS['sex']), }
def spec(self) -> lit_types.Spec: return { "premise": lit_types.TextSegment(), "hypothesis": lit_types.TextSegment(), # 'label2' for 2-way NLI labels "label2": lit_types.CategoryLabel(vocab=self.LABELS), "heuristic": lit_types.CategoryLabel(), "template": lit_types.CategoryLabel(), }
def test_compute(self): multiclass_metrics = metrics.MulticlassMetrics() # All correct predictions. result = multiclass_metrics.compute( ['1', '2', '0', '1'], [[0, 1, 0], [0, 0, 1], [1, 0, 0], [0, 1, 0]], types.CategoryLabel(), types.MulticlassPreds(vocab=['0', '1', '2'], null_idx=0)) self.assertAlmostEqual(result, { 'accuracy': 1.0, 'f1': 1.0, 'precision': 1.0, 'recall': 1.0 }) # Some incorrect predictions. result = multiclass_metrics.compute( ['1', '2', '0', '1'], [[.1, .4, .5], [0, .1, .9], [.1, 0, .9], [0, 1, 0]], types.CategoryLabel(), types.MulticlassPreds(vocab=['0', '1', '2'], null_idx=0)) self.assertAlmostEqual( result, { 'accuracy': 0.5, 'f1': 0.5714285714285715, 'precision': 0.5, 'recall': 0.6666666666666666 }) # All incorrect predictions. result = multiclass_metrics.compute( ['1', '2', '0', '1'], [[.1, .4, .5], [.2, .7, .1], [.1, 0, .9], [1, 0, 0]], types.CategoryLabel(), types.MulticlassPreds(vocab=['0', '1', '2'], null_idx=0)) self.assertAlmostEqual(result, { 'accuracy': 0.0, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0 }) # No null index. result = multiclass_metrics.compute( ['1', '2', '0', '1'], [[.1, .4, .5], [0, .1, .9], [.1, 0, .9], [0, 1, 0]], types.CategoryLabel(), types.MulticlassPreds(vocab=['0', '1', '2'])) self.assertAlmostEqual(result, {'accuracy': 0.5}) # Empty labels and predictions result = multiclass_metrics.compute([], [], types.CategoryLabel(), types.MulticlassPreds( vocab=['0', '1', '2'], null_idx=0)) self.assertAlmostEqual(result, {})
def spec(self) -> lit_types.Spec: """Should match MnliModel's input_spec().""" return { "premise": lit_types.TextSegment(), "hypothesis": lit_types.TextSegment(), # 'label' for 3-way NLI labels, 'label2' for binarized. "label": lit_types.CategoryLabel(vocab=self.LABELS3), "label2": lit_types.CategoryLabel(vocab=self.LABELS2), "genre": lit_types.CategoryLabel(), }
def input_spec(self): return { 'input_embs': lit_types.TokenEmbeddings(align='tokens', required=False), 'segment': lit_types.TextSegment, 'grad_class': lit_types.CategoryLabel(vocab=['0', '1']) }
def test_compatibility_optionals(self): """Test with optionals in the model spec.""" mspec = model.ModelSpec(input={ "text": types.TextSegment(), "tokens": types.Tokens(parent="text", required=False), "label": types.CategoryLabel(vocab=["0", "1"], required=False), }, output={}) dspec = { "text": types.TextSegment(), "label": types.CategoryLabel(vocab=["0", "1"]), } self.assertTrue(mspec.is_compatible_with_dataset(dspec))
def input_spec(self): return { 'image': lit_types.ImageBytes(), 'grad_target': lit_types.CategoryLabel(vocab=self.LABELS, required=False) }
def spec(self): return { 'sentence': lit_types.TextSegment(), 'review_title': lit_types.TextSegment(), 'product_name': lit_types.TextSegment(), 'label': lit_types.CategoryLabel(vocab=self.LABELS) }
def test_compatibility_optionals_mismatch(self): """Test with optionals that don't match metadata.""" mspec = model.ModelSpec(input={ "text": types.TextSegment(), "tokens": types.Tokens(parent="text", required=False), "label": types.CategoryLabel(vocab=["0", "1"], required=False), }, output={}) dspec = { "text": types.TextSegment(), # This label field doesn't match the one the model expects. "label": types.CategoryLabel(vocab=["foo", "bar"]), } self.assertFalse(mspec.is_compatible_with_dataset(dspec))
def config_spec(self) -> types.Spec: return { SALIENCE_MAPPER_KEY: types.CategoryLabel(required=True, vocab=list(self.salience_mappers.keys())), N_CLUSTERS_KEY: types.Scalar(min_val=2, max_val=100, default=2, step=1), }
def spec(self): return { "text": lit_types.TextSegment(), "tokens": lit_types.Tokens(parent="text"), "coref": lit_types.EdgeLabels(align="tokens"), # Metadata fields for filtering and analysis. "occupation": lit_types.CategoryLabel(), "participant": lit_types.CategoryLabel(), "answer": lit_types.CategoryLabel(vocab=ANSWER_VOCAB), "someone": lit_types.CategoryLabel(vocab=["True", "False"]), "pronouns": lit_types.CategoryLabel(vocab=list(PRONOUNS_BY_GENDER.values())), "pronoun_type": lit_types.CategoryLabel(vocab=["NOM", "POSS", "ACC"]), "gender": lit_types.CategoryLabel(vocab=[g.name for g in Gender]), "pf_bls": lit_types.Scalar(), }
def input_spec(self): return { 'image': lit_types.ImageBytes(), # If `grad_target` is not specified then the label with the highest # predicted score is used as the gradient target. 'grad_target': lit_types.CategoryLabel(vocab=self.labels, required=False) }
def output_spec(self): return { 'preds': lit_types.MulticlassPreds(vocab=self.labels, autosort=True), 'grads': lit_types.ImageGradients(align='image', grad_target_field_key='grad_target'), 'grad_target': lit_types.CategoryLabel(vocab=self.labels) }
def spec(self) -> lit_types.Spec: """Dataset spec, which should match the model"s input_spec().""" return { "sentence": lit_types.TextSegment(), "label": lit_types.CategoryLabel(vocab=self.LABELS), "identity_attack": lit_types.Boolean(), "insult": lit_types.Boolean(), "obscene": lit_types.Boolean(), "severe_toxicity": lit_types.Boolean(), "threat": lit_types.Boolean() }
def input_spec(self) -> Spec: ret = {} ret[self.config.text_a_name] = lit_types.TextSegment() if self.config.text_b_name: ret[self.config.text_b_name] = lit_types.TextSegment() if self.is_regression: ret[self.config.label_name] = lit_types.RegressionScore(required=False) else: ret[self.config.label_name] = lit_types.CategoryLabel( required=False, vocab=self.config.labels) return ret
def output_spec(self): return { 'probas': lit_types.MulticlassPreds( parent='label', vocab=['0', '1'], null_idx=0), 'cls_emb': lit_types.Embeddings(), 'cls_grad': lit_types.Gradients(grad_for='cls_emb', grad_target='grad_class'), 'grad_class': lit_types.CategoryLabel() }
def output_spec(self): return {'probas': lit_types.MulticlassPreds( parent='label', vocab=['0', '1'], null_idx=0), 'input_embs': lit_types.TokenEmbeddings(align='tokens'), 'input_embs_grad': lit_types.TokenGradients(align='tokens', grad_for='input_embs', grad_target='grad_class' ), 'tokens': lit_types.Tokens(), 'grad_class': lit_types.CategoryLabel(vocab=['0', '1']) }
def input_spec(self) -> Spec: ret = {} ret[self.config.text_a_name] = lit_types.TextSegment() if self.config.text_b_name: ret[self.config.text_b_name] = lit_types.TextSegment() if self.is_regression: ret[self.config.label_name] = lit_types.RegressionScore( required=False) else: ret[self.config.label_name] = lit_types.CategoryLabel( required=False, vocab=self.config.labels) # The input_embs_ and grad_class fields are used for Integrated Gradients. ret["input_embs_" + self.config.text_a_name] = lit_types.TokenEmbeddings( align="tokens", required=False) if self.config.text_b_name: ret["input_embs_" + self.config.text_b_name] = lit_types.TokenEmbeddings( align="tokens", required=False) ret["grad_class"] = lit_types.CategoryLabel(required=False, vocab=self.config.labels) return ret
def test_compatibility_extrafield(self): """Test with an extra field in the dataset.""" mspec = model.ModelSpec(input={ "text_a": types.TextSegment(), "text_b": types.TextSegment(), }, output={}) dspec = { "text_a": types.TextSegment(), "text_b": types.TextSegment(), "label": types.CategoryLabel(vocab=["0", "1"]), } self.assertTrue(mspec.is_compatible_with_dataset(dspec))
def create_train_dataset(config: Config) -> lit_dataset.Dataset: src_path = config.exp_dir / "train.src.txt" trg_path = config.exp_dir / "train.trg.txt" default_src_iso = config.default_src_iso default_trg_iso = config.default_trg_iso examples: List[lit_types.JsonDict] = [] with src_path.open("r", encoding="utf-8") as src_file, open( trg_path, "r", encoding="utf-8") as trg_file: for src_line, trg_line in zip(src_file, trg_file): src_line = src_line.strip() trg_line = trg_line.strip() src_iso = default_src_iso if len(config.src_isos) > 1: src_iso = "?" trg_iso = default_trg_iso if src_line.startswith("<2"): index = src_line.index(">") val = src_line[2:index] if val != "qaa": trg_iso = val example: lit_types.JsonDict = { "vref": "?", "src_text": decode_sp(src_line), "ref_text": decode_sp(trg_line), "src_iso": src_iso, "trg_iso": trg_iso, } examples.append(example) if len(examples) == 2000: break spec: lit_types.JsonDict = { "vref": lit_types.CategoryLabel(), "src_text": lit_types.TextSegment(), "ref_text": lit_types.TextSegment(), "src_iso": lit_types.CategoryLabel(), "trg_iso": lit_types.CategoryLabel(), } return lit_dataset.Dataset(spec, examples, description="train dataset")
def output_spec(self) -> Spec: ret = {"tokens": lit_types.Tokens()} ret["tokens_" + self.config.text_a_name] = lit_types.Tokens( parent=self.config.text_a_name) if self.config.text_b_name: ret["tokens_" + self.config.text_b_name] = lit_types.Tokens( parent=self.config.text_b_name) if self.is_regression: ret["score"] = lit_types.RegressionScore(parent=self.config.label_name) else: ret["probas"] = lit_types.MulticlassPreds( parent=self.config.label_name, vocab=self.config.labels, null_idx=self.config.null_label_idx) ret["cls_emb"] = lit_types.Embeddings() # Average embeddings, one per layer including embeddings. for i in range(1 + self.model.config.num_hidden_layers): ret[f"layer_{i}/avg_emb"] = lit_types.Embeddings() ret["cls_grad"] = lit_types.Gradients( grad_for="cls_emb", grad_target_field_key="grad_class") # The input_embs_ and grad_class fields are used for Integrated Gradients. ret["input_embs_" + self.config.text_a_name] = lit_types.TokenEmbeddings( align="tokens_" + self.config.text_a_name) if self.config.text_b_name: ret["input_embs_" + self.config.text_b_name] = lit_types.TokenEmbeddings( align="tokens_" + self.config.text_b_name) # Gradients, if requested. if self.config.compute_grads: ret["grad_class"] = lit_types.CategoryLabel(required=False, vocab=self.config.labels) ret["token_grad_" + self.config.text_a_name] = lit_types.TokenGradients( align="tokens_" + self.config.text_a_name, grad_for="input_embs_" + self.config.text_a_name, grad_target_field_key="grad_class") if self.config.text_b_name: ret["token_grad_" + self.config.text_b_name] = lit_types.TokenGradients( align="tokens_" + self.config.text_b_name, grad_for="input_embs_" + self.config.text_b_name, grad_target_field_key="grad_class") # Attention heads, one field for each layer. for i in range(self.model.config.num_hidden_layers): ret[f"layer_{i+1}/attention"] = lit_types.AttentionHeads( align_in="tokens", align_out="tokens") return ret
def setUp(self): super(ThresholderTest, self).setUp() self.thresholder = thresholder.Thresholder() self.model = caching.CachingModelWrapper( glue_models.SST2Model(BERT_TINY_PATH), 'test') examples = [{ 'sentence': 'a', 'label': '1' }, { 'sentence': 'b', 'label': '1' }, { 'sentence': 'c', 'label': '1' }, { 'sentence': 'd', 'label': '1' }, { 'sentence': 'e', 'label': '1' }, { 'sentence': 'f', 'label': '0' }, { 'sentence': 'g', 'label': '0' }, { 'sentence': 'h', 'label': '0' }, { 'sentence': 'i', 'label': '0' }] self.indexed_inputs = [{ 'id': caching.input_hash(ex), 'data': ex } for ex in examples] self.dataset = lit_dataset.IndexedDataset( id_fn=caching.input_hash, spec={ 'sentence': lit_types.TextSegment(), 'label': lit_types.CategoryLabel(vocab=['0', '1']) }, indexed_examples=self.indexed_inputs) self.model_outputs = list( self.model.predict_with_metadata(self.indexed_inputs, dataset_name='test'))
def input_spec(self): return { 'text': lit_types.TextSegment(), 'tokens': lit_types.Tokens(parent='text'), 'coref': lit_types.EdgeLabels(align='tokens'), # Index of predicted (single) edge for Winogender 'answer': lit_types.CategoryLabel(vocab=winogender.ANSWER_VOCAB, required=False), # TODO(b/172975096): allow plotting of scalars from input data, # so we don't need to add this to the predictions. 'pf_bls': lit_types.Scalar(required=False), }
def input_spec(self) -> lit_types.Spec: return { "sentence": lit_types.TextSegment(), "label": lit_types.CategoryLabel(vocab=self._labels, required=False) }
def create_test_dataset(config: Config) -> lit_dataset.Dataset: vref_file_names: List[str] = [] features_file_names: List[str] = [] refs_patterns: List[str] = [] for src_iso in sorted(config.src_isos): prefix = "test" if len(config.src_isos) == 1 else f"test.{src_iso}" features_file_name = f"{prefix}.src.txt" if (config.exp_dir / features_file_name).is_file(): # all target data is stored in a single file vref_file_names.append(f"{prefix}.vref.txt") features_file_names.append(features_file_name) refs_patterns.append(f"{prefix}.trg.detok*.txt") else: # target data is split into separate files for trg_iso in sorted(config.trg_isos): prefix = f"test.{src_iso}.{trg_iso}" vref_file_names.append(f"{prefix}.vref.txt") features_file_names.append(f"{prefix}.src.txt") refs_patterns.append(f"{prefix}.trg.detok*.txt") default_src_iso = config.default_src_iso default_trg_iso = config.default_trg_iso spec = lit_types.JsonDict = { "vref": lit_types.CategoryLabel(), "src_text": lit_types.TextSegment(), "ref_text": lit_types.TextSegment(), "src_iso": lit_types.CategoryLabel(), "trg_iso": lit_types.CategoryLabel(), } examples: List[lit_types.JsonDict] = [] for vref_file_name, features_file_name, refs_pattern in zip( vref_file_names, features_file_names, refs_patterns): src_iso = default_src_iso if features_file_name != "test.src.txt": src_iso = features_file_name.split(".")[1] with (config.exp_dir / features_file_name).open( "r", encoding="utf-8") as src_file, ( config.exp_dir / vref_file_name).open( "r", encoding="utf-8") as vref_file: ref_file_paths = config.exp_dir.glob(refs_pattern) ref_files: List[IO] = [] try: for ref_file_path in ref_file_paths: ref_files.append(ref_file_path.open("r", encoding="utf-8")) for lines in zip(src_file, vref_file, *ref_files): src_line = lines[0].strip() vref_line = lines[1].strip() trg_iso = default_trg_iso if src_line.startswith("<2"): index = src_line.index(">") val = src_line[2:index] if val != "qaa": trg_iso = val example: lit_types.JsonDict = { "vref": vref_line, "src_text": decode_sp(src_line), "src_iso": src_iso, "trg_iso": trg_iso, } for ref_index in range(len(ref_files)): ref_line = lines[ref_index + 2].strip() ref_key = "ref_text" if ref_index == 0 else f"ref_text_{ref_index}" example[ref_key] = ref_line if ref_key not in spec: spec[ref_key] = lit_types.TextSegment() examples.append(example) finally: for ref_file in ref_files: ref_file.close() return lit_dataset.Dataset(spec, examples, description="test dataset")
def spec(self) -> lit_types.Spec: """Dataset spec, which should match the model"s input_spec().""" return { "text": lit_types.TextSegment(), "label": lit_types.CategoryLabel(vocab=self.LABELS), }