def test_get_dataset_fn_scoring(self): # Setup mock BIG-bench task self.mock_bigbench_json_task._ds = self._flatten([[{ "input": "some input", "target": ["some output 1", "some output 2"] }, { "input": "some more input", "target": ["some more output"] }] for _ in range(100)]) self.mock_bigbench_json_task.task_prefix = "prefix: " self.mock_bb_json_task_api.return_value = self.mock_bigbench_json_task dataset_fn = bb.get_dataset_fn( task_name="test_task", task_path="test/task/path.json", bigbench_task_type=bb.BigBenchTaskType.SCORING) ds = dataset_fn(split="all", shuffle_files=False) expected = self._flatten([[{ "inputs": "prefix: some input", "targets": "some output 1", "idx": np.array([idx, 0]) }, { "inputs": "prefix: some input", "targets": "some output 2", "idx": np.array([idx, 1]) }, { "inputs": "prefix: some more input", "targets": "some more output", "idx": np.array([idx + 1, 0]) }] for idx in range(0, 200, 2)]) test_utils.assert_dataset(ds, expected)
def test_get_dataset_fn_train_split(self): self.mock_bigbench_json_task._ds = [{ "input": str(i), "target": [str(i + 1), str(i + 2)] } for i in range(100)] self.mock_bigbench_json_task.task_prefix = "prefix: " self.mock_bb_json_task_api.return_value = self.mock_bigbench_json_task dataset_fn = bb.get_dataset_fn(task_name="test_task", task_path="test/task/path.json") ds = dataset_fn(split="train", shuffle_files=False) expected = [ [ { # multiple targets are split into separate examples. "inputs": f"prefix: {i}", "targets": str(i + 1), "idx": np.array([idx, 0]) }, { "inputs": f"prefix: {i}", "targets": str(i + 2), "idx": np.array([idx, 1]) } ] for idx, i in enumerate(self._get_train_split_ids()) ] expected = self._flatten(expected) test_utils.assert_dataset(ds, expected)
def register_seqio_task( bigbench_task_name: str, bigbench_task_path: str, bigbench_task_type: bb.BigBenchTaskType, vocab: SeqIOVocabulary, num_shots: int, bigbench_subtask_name: Optional[str] = None, max_examples: Optional[int] = None, strip_inputs: bool = True, strip_targets: bool = True, add_inputs_eos: bool = False, add_targets_eos: bool = False, json_util: json_utils.JsonUtils = json_utils.get_default_json_utils(), min_validation_examples: int = _GLOBAL_MIN_VALIDATION_EXAMPLES, additional_metrics: Optional[Sequence[seqio.MetricFnCallable]] = None, ) -> str: """Registers a BIG-bench SeqIO Task and returns the Task name.""" seqio_task_name = get_seqio_name(bigbench_task_name, bigbench_task_type, vocab, num_shots, bigbench_subtask_name, max_examples) if seqio_task_name in seqio.TaskRegistry.names(): return seqio_task_name additional_metrics = additional_metrics or [] seqio.TaskRegistry.add( seqio_task_name, source=seqio.FunctionDataSource(bb.get_dataset_fn( task_name=bigbench_task_name, task_path=bigbench_task_path, subtask_name=bigbench_subtask_name, num_shots=num_shots, bigbench_task_type=bigbench_task_type, max_examples=max_examples, json_util=json_util, min_validation_examples=min_validation_examples, ), splits=["all", "train", "validation"]), preprocessors=bb.get_preprocessors(strip_inputs=strip_inputs, strip_targets=strip_targets), output_features=bb.get_output_features( vocab=vocab.vocabulary, add_inputs_eos=add_inputs_eos, add_targets_eos=add_targets_eos), postprocess_fn=bb.get_postprocess_fn( task_name=bigbench_task_name, task_path=bigbench_task_path, subtask_name=bigbench_subtask_name, bigbench_task_type=bigbench_task_type, json_util=json_util), metric_fns=[ bb.get_metric_fn(task_name=bigbench_task_name, task_path=bigbench_task_path, subtask_name=bigbench_subtask_name, bigbench_task_type=bigbench_task_type, json_util=json_util) ] + additional_metrics) return seqio_task_name
def test_get_dataset_fn_multiple_choice_gen_targets(self): self.mock_bigbench_json_task._ds = self._flatten([[ { "input": "some input 1", "target_scores": { "ans1": 0, "ans2": 1, }, "target": ["some target 1"] }, { "input": "some input 2", "target_scores": { "ans1": 1, "ans2": 0, }, "target": ["some target 2", "some target 3"] }, ] for _ in range(100)]) self.mock_bigbench_json_task.task_prefix = "prefix: " self.mock_bb_json_task_api.return_value = self.mock_bigbench_json_task dataset_fn = bb.get_dataset_fn( task_name="test_task", task_path="test/task/path.json", bigbench_task_type=bb.BigBenchTaskType.MULTIPLE_CHOICE) ds = dataset_fn(split="all", shuffle_files=False) expected = [[{ "inputs": "prefix: some input 1", "targets": "ans1", "scores": 0.0, "idx": [idx, 0], "answers": ["some target 1"] }, { "inputs": "prefix: some input 1", "targets": "ans2", "scores": 1.0, "idx": [idx, 1], "answers": ["some target 1"] }, { "inputs": "prefix: some input 2", "targets": "ans1", "scores": 1.0, "idx": [idx + 1, 0], "answers": ["some target 2", "some target 3"] }, { "inputs": "prefix: some input 2", "targets": "ans2", "scores": 0.0, "idx": [idx + 1, 1], "answers": ["some target 2", "some target 3"] }] for idx in range(0, 200, 2)] expected = self._flatten(expected) test_utils.assert_dataset(ds, expected)
def test_get_dataset_fn_ds_too_small(self): # Setup mock BIG-bench task self.mock_bigbench_json_task._ds = [{ "input": str(i), "target": [str(i + 1)] } for i in range(3)] self.mock_bigbench_json_task.task_prefix = "" self.mock_bb_json_task_api.return_value = self.mock_bigbench_json_task dataset_fn = bb.get_dataset_fn(task_name="test_task", task_path="test/task/path.json") with self.assertRaisesRegex( ValueError, "Task: test_task, subtask: None, has too few examples: 3.*"): _ = dataset_fn(split="validation", shuffle_files=False)
def test_get_dataset_fn_invalid_split(self): # Setup mock BIG-bench task self.mock_bigbench_json_task._ds = [{ "input": str(i), "target": [str(i + 1)] } for i in range(10)] self.mock_bigbench_json_task.task_prefix = "" self.mock_bb_json_task_api.return_value = self.mock_bigbench_json_task dataset_fn = bb.get_dataset_fn( task_name="test_task", task_path="test/task/path.json", ) with self.assertRaisesWithLiteralMatch( ValueError, "splits supported: ['all', 'train', 'validation'], got: test"): _ = dataset_fn(split="test", shuffle_files=False)
def test_get_dataset_fn_validation_split(self): self.mock_bigbench_json_task._ds = [{ "input": str(i), "target": [str(i + 1), str(i + 2)] } for i in range(100)] self.mock_bigbench_json_task.task_prefix = "prefix: " self.mock_bb_json_task_api.return_value = self.mock_bigbench_json_task dataset_fn = bb.get_dataset_fn(task_name="test_task", task_path="test/task/path.json") ds = dataset_fn(split="validation", shuffle_files=False) expected = [{ # multiple targets are not split into separate examples. "inputs": f"prefix: {i}", "targets": str(i + 1), "answers": [str(i + 1), str(i + 2)], } for i in self._get_validation_split_ids()] test_utils.assert_dataset(ds, expected)
def test_get_dataset_fn_all_split(self): self.mock_bigbench_json_task._ds = [{ "input": str(i), "target": [str(i + 1)] } for i in range(100)] self.mock_bigbench_json_task.task_prefix = "prefix: " self.mock_bb_json_task_api.return_value = self.mock_bigbench_json_task dataset_fn = bb.get_dataset_fn(task_name="test_task", task_path="test/task/path.json") ds = dataset_fn(split="all", shuffle_files=False) expected = [{ "inputs": f"prefix: {i}", "targets": str(i + 1), "answers": [str(i + 1)], } for i in range(100)] test_utils.assert_dataset(ds, expected)
def test_get_dataset_fn_few_shot(self, mock_nshot_maker): self.mock_bigbench_json_task._ds = [{ "input": "some input", "target": ["some output"] } for _ in range(100)] self.mock_bigbench_json_task.few_shot_example_separator = "," self.mock_bigbench_json_task.task_prefix = "prefix: " self.mock_bigbench_json_task.rng = None self.mock_bb_json_task_api.return_value = self.mock_bigbench_json_task mock_nshot_maker.return_value = [{ "input": "fewshot input", "target": ["fewshot output"] } for _ in range(100)] dataset_fn = bb.get_dataset_fn(task_name="test_task", task_path="test/task/path.json", num_shots=1) ds = dataset_fn(split="all", shuffle_files=False) expected = [{ "inputs": "prefix: fewshot input", "targets": "fewshot output", "answers": ["fewshot output"] } for _ in range(100)] test_utils.assert_dataset(ds, expected)
def test_get_dataset_fn_shuffle_max_examples(self): # Setup mock BIG-bench task self.mock_bigbench_json_task._ds = [{ "input": str(i), "target": [str(i + 1)] } for i in range(100)] self.mock_bigbench_json_task.task_prefix = "" self.mock_bb_json_task_api.return_value = self.mock_bigbench_json_task dataset_fn = bb.get_dataset_fn(task_name="test_task", task_path="test/task/path.json", max_examples=50) ds_all = dataset_fn(split="all", shuffle_files=True, seed=23) ds_train = dataset_fn(split="train", shuffle_files=True, seed=23) ds_val = dataset_fn(split="validation", shuffle_files=True, seed=23) expected_all = [{ "inputs": str(i), "targets": str(i + 1), "answers": [str(i + 1)], } for i in self._get_shuffled_sampled_all_split_ids()] expected_train = [{ "inputs": str(i), "targets": str(i + 1), "idx": np.array([idx, 0]) } for idx, i in enumerate(self._get_shuffled_sampled_train_split_ids()) ] expected_val = [{ "inputs": str(i), "targets": str(i + 1), "answers": [str(i + 1)], } for i in self._get_shuffled_sampled_validation_split_ids()] test_utils.assert_dataset(ds_all, expected_all) test_utils.assert_dataset(ds_train, expected_train) test_utils.assert_dataset(ds_val, expected_val)
def _generate_examples( self, split, # method parameters are unpacked from `gen_kwargs` as given in `_split_generators` ): validate_task_name(self.config.task_name) if self.config.subtask_name: # Subtasks are sometimes in bigbench written as task_name:subtask_name. # We want to remove the task_name from the subtask names: self.config.subtask_name = self.config.subtask_name.split(":")[-1] validate_subtask_name(self.config.task_name, self.config.subtask_name) """Yields examples as (key, example) tuples.""" if split == "all": # not cutoff in number of examples for 'all' split MIN_VALIDATION_EXAMPLES = 0 else: MIN_VALIDATION_EXAMPLES = 16 try: task_path, json_util = bb_json_paths.get_task_path( self.config.task_name) has_subtasks = bb_json_paths.has_subtasks(self.config.task_name) if has_subtasks: subtask_names = bb_json_paths.get_subtask_names( self.config.task_name) num_subtasks = len(subtask_names) min_validation_examples_per_subtask = div_or_none( MIN_VALIDATION_EXAMPLES, num_subtasks) if not has_subtasks: ds_fn = bbb.get_dataset_fn( task_name=self.config.task_name, task_path=task_path, subtask_name=None, num_shots=self.config.num_shots, bigbench_task_type=bbb.BigBenchTaskType.HUGGINGFACE, max_examples=self.config.max_examples, json_util=json_util, min_validation_examples=MIN_VALIDATION_EXAMPLES, format_fn=json_task.default_format_fn, ) ds_list = [ds_fn(split)] elif self.config.subtask_name is not None: ds_fn = bbb.get_dataset_fn( task_name=self.config.task_name, task_path=task_path, subtask_name=self.config.subtask_name, num_shots=self.config.num_shots, bigbench_task_type=bbb.BigBenchTaskType.HUGGINGFACE, max_examples=self.config.max_examples, json_util=json_util, min_validation_examples=min_validation_examples_per_subtask, format_fn=json_task.default_format_fn, ) ds_list = [ds_fn(split)] else: # Create mixture of all subtasks ds_list = [] for subtask_name in subtask_names: subtask_name = subtask_name.split(":")[-1] logger.info(f"Loading subtask {split} split", subtask_name) ds_fn = bbb.get_dataset_fn( task_name=self.config.task_name, task_path=task_path, subtask_name=subtask_name, num_shots=self.config.num_shots, bigbench_task_type=bbb.BigBenchTaskType.HUGGINGFACE, max_examples=div_or_none(self.config.max_examples, num_subtasks), json_util=json_util, min_validation_examples= min_validation_examples_per_subtask, format_fn=json_task.default_format_fn, ) ds_list.append(ds_fn(split)) except ValueError as value_error: # BIG-Bench requires at least 16 examples to use the train & validation splits, # while using 'all'/'default' does not have such a requirement. if "has too few examples" in value_error.args[0] and split != "all": logger.warning( f"-- WARNING: skipping split {split} because it has too few examples. Please use 'default' split." ) logger.warning(value_error) return raise value_error unique_key_counter = 0 for ds in ds_list: for example in ds: unique_key_counter += 1 yield unique_key_counter, { "idx": example["idx"], "inputs": example["inputs"].numpy().decode().strip(), "targets": [ target.numpy().decode().strip() for target in example["targets"] ], "multiple_choice_targets": [ targets.decode().strip() for targets in example["multiple_choice_targets"].numpy() ], "multiple_choice_scores": [ scores for scores in example["multiple_choice_scores"].numpy() ], }