Esempio n. 1
0
 def test_get_dataset_fn_scoring(self):
     # Setup mock BIG-bench task
     self.mock_bigbench_json_task._ds = self._flatten([[{
         "input":
         "some input",
         "target": ["some output 1", "some output 2"]
     }, {
         "input":
         "some more input",
         "target": ["some more output"]
     }] for _ in range(100)])
     self.mock_bigbench_json_task.task_prefix = "prefix: "
     self.mock_bb_json_task_api.return_value = self.mock_bigbench_json_task
     dataset_fn = bb.get_dataset_fn(
         task_name="test_task",
         task_path="test/task/path.json",
         bigbench_task_type=bb.BigBenchTaskType.SCORING)
     ds = dataset_fn(split="all", shuffle_files=False)
     expected = self._flatten([[{
         "inputs": "prefix: some input",
         "targets": "some output 1",
         "idx": np.array([idx, 0])
     }, {
         "inputs": "prefix: some input",
         "targets": "some output 2",
         "idx": np.array([idx, 1])
     }, {
         "inputs": "prefix: some more input",
         "targets": "some more output",
         "idx": np.array([idx + 1, 0])
     }] for idx in range(0, 200, 2)])
     test_utils.assert_dataset(ds, expected)
Esempio n. 2
0
 def test_get_dataset_fn_train_split(self):
     self.mock_bigbench_json_task._ds = [{
         "input": str(i),
         "target": [str(i + 1), str(i + 2)]
     } for i in range(100)]
     self.mock_bigbench_json_task.task_prefix = "prefix: "
     self.mock_bb_json_task_api.return_value = self.mock_bigbench_json_task
     dataset_fn = bb.get_dataset_fn(task_name="test_task",
                                    task_path="test/task/path.json")
     ds = dataset_fn(split="train", shuffle_files=False)
     expected = [
         [
             {  # multiple targets are split into separate examples.
                 "inputs": f"prefix: {i}",
                 "targets": str(i + 1),
                 "idx": np.array([idx, 0])
             },
             {
                 "inputs": f"prefix: {i}",
                 "targets": str(i + 2),
                 "idx": np.array([idx, 1])
             }
         ] for idx, i in enumerate(self._get_train_split_ids())
     ]
     expected = self._flatten(expected)
     test_utils.assert_dataset(ds, expected)
Esempio n. 3
0
def register_seqio_task(
    bigbench_task_name: str,
    bigbench_task_path: str,
    bigbench_task_type: bb.BigBenchTaskType,
    vocab: SeqIOVocabulary,
    num_shots: int,
    bigbench_subtask_name: Optional[str] = None,
    max_examples: Optional[int] = None,
    strip_inputs: bool = True,
    strip_targets: bool = True,
    add_inputs_eos: bool = False,
    add_targets_eos: bool = False,
    json_util: json_utils.JsonUtils = json_utils.get_default_json_utils(),
    min_validation_examples: int = _GLOBAL_MIN_VALIDATION_EXAMPLES,
    additional_metrics: Optional[Sequence[seqio.MetricFnCallable]] = None,
) -> str:
    """Registers a BIG-bench SeqIO Task and returns the Task name."""
    seqio_task_name = get_seqio_name(bigbench_task_name, bigbench_task_type,
                                     vocab, num_shots, bigbench_subtask_name,
                                     max_examples)

    if seqio_task_name in seqio.TaskRegistry.names():
        return seqio_task_name

    additional_metrics = additional_metrics or []
    seqio.TaskRegistry.add(
        seqio_task_name,
        source=seqio.FunctionDataSource(bb.get_dataset_fn(
            task_name=bigbench_task_name,
            task_path=bigbench_task_path,
            subtask_name=bigbench_subtask_name,
            num_shots=num_shots,
            bigbench_task_type=bigbench_task_type,
            max_examples=max_examples,
            json_util=json_util,
            min_validation_examples=min_validation_examples,
        ),
                                        splits=["all", "train", "validation"]),
        preprocessors=bb.get_preprocessors(strip_inputs=strip_inputs,
                                           strip_targets=strip_targets),
        output_features=bb.get_output_features(
            vocab=vocab.vocabulary,
            add_inputs_eos=add_inputs_eos,
            add_targets_eos=add_targets_eos),
        postprocess_fn=bb.get_postprocess_fn(
            task_name=bigbench_task_name,
            task_path=bigbench_task_path,
            subtask_name=bigbench_subtask_name,
            bigbench_task_type=bigbench_task_type,
            json_util=json_util),
        metric_fns=[
            bb.get_metric_fn(task_name=bigbench_task_name,
                             task_path=bigbench_task_path,
                             subtask_name=bigbench_subtask_name,
                             bigbench_task_type=bigbench_task_type,
                             json_util=json_util)
        ] + additional_metrics)
    return seqio_task_name
Esempio n. 4
0
 def test_get_dataset_fn_multiple_choice_gen_targets(self):
     self.mock_bigbench_json_task._ds = self._flatten([[
         {
             "input": "some input 1",
             "target_scores": {
                 "ans1": 0,
                 "ans2": 1,
             },
             "target": ["some target 1"]
         },
         {
             "input": "some input 2",
             "target_scores": {
                 "ans1": 1,
                 "ans2": 0,
             },
             "target": ["some target 2", "some target 3"]
         },
     ] for _ in range(100)])
     self.mock_bigbench_json_task.task_prefix = "prefix: "
     self.mock_bb_json_task_api.return_value = self.mock_bigbench_json_task
     dataset_fn = bb.get_dataset_fn(
         task_name="test_task",
         task_path="test/task/path.json",
         bigbench_task_type=bb.BigBenchTaskType.MULTIPLE_CHOICE)
     ds = dataset_fn(split="all", shuffle_files=False)
     expected = [[{
         "inputs": "prefix: some input 1",
         "targets": "ans1",
         "scores": 0.0,
         "idx": [idx, 0],
         "answers": ["some target 1"]
     }, {
         "inputs": "prefix: some input 1",
         "targets": "ans2",
         "scores": 1.0,
         "idx": [idx, 1],
         "answers": ["some target 1"]
     }, {
         "inputs": "prefix: some input 2",
         "targets": "ans1",
         "scores": 1.0,
         "idx": [idx + 1, 0],
         "answers": ["some target 2", "some target 3"]
     }, {
         "inputs": "prefix: some input 2",
         "targets": "ans2",
         "scores": 0.0,
         "idx": [idx + 1, 1],
         "answers": ["some target 2", "some target 3"]
     }] for idx in range(0, 200, 2)]
     expected = self._flatten(expected)
     test_utils.assert_dataset(ds, expected)
Esempio n. 5
0
    def test_get_dataset_fn_ds_too_small(self):
        # Setup mock BIG-bench task
        self.mock_bigbench_json_task._ds = [{
            "input": str(i),
            "target": [str(i + 1)]
        } for i in range(3)]
        self.mock_bigbench_json_task.task_prefix = ""
        self.mock_bb_json_task_api.return_value = self.mock_bigbench_json_task

        dataset_fn = bb.get_dataset_fn(task_name="test_task",
                                       task_path="test/task/path.json")

        with self.assertRaisesRegex(
                ValueError,
                "Task: test_task, subtask: None, has too few examples: 3.*"):
            _ = dataset_fn(split="validation", shuffle_files=False)
Esempio n. 6
0
 def test_get_dataset_fn_invalid_split(self):
     # Setup mock BIG-bench task
     self.mock_bigbench_json_task._ds = [{
         "input": str(i),
         "target": [str(i + 1)]
     } for i in range(10)]
     self.mock_bigbench_json_task.task_prefix = ""
     self.mock_bb_json_task_api.return_value = self.mock_bigbench_json_task
     dataset_fn = bb.get_dataset_fn(
         task_name="test_task",
         task_path="test/task/path.json",
     )
     with self.assertRaisesWithLiteralMatch(
             ValueError,
             "splits supported: ['all', 'train', 'validation'], got: test"):
         _ = dataset_fn(split="test", shuffle_files=False)
Esempio n. 7
0
 def test_get_dataset_fn_validation_split(self):
     self.mock_bigbench_json_task._ds = [{
         "input": str(i),
         "target": [str(i + 1), str(i + 2)]
     } for i in range(100)]
     self.mock_bigbench_json_task.task_prefix = "prefix: "
     self.mock_bb_json_task_api.return_value = self.mock_bigbench_json_task
     dataset_fn = bb.get_dataset_fn(task_name="test_task",
                                    task_path="test/task/path.json")
     ds = dataset_fn(split="validation", shuffle_files=False)
     expected = [{  # multiple targets are not split into separate examples.
         "inputs": f"prefix: {i}",
         "targets": str(i + 1),
         "answers": [str(i + 1), str(i + 2)],
     } for i in self._get_validation_split_ids()]
     test_utils.assert_dataset(ds, expected)
Esempio n. 8
0
 def test_get_dataset_fn_all_split(self):
     self.mock_bigbench_json_task._ds = [{
         "input": str(i),
         "target": [str(i + 1)]
     } for i in range(100)]
     self.mock_bigbench_json_task.task_prefix = "prefix: "
     self.mock_bb_json_task_api.return_value = self.mock_bigbench_json_task
     dataset_fn = bb.get_dataset_fn(task_name="test_task",
                                    task_path="test/task/path.json")
     ds = dataset_fn(split="all", shuffle_files=False)
     expected = [{
         "inputs": f"prefix: {i}",
         "targets": str(i + 1),
         "answers": [str(i + 1)],
     } for i in range(100)]
     test_utils.assert_dataset(ds, expected)
Esempio n. 9
0
 def test_get_dataset_fn_few_shot(self, mock_nshot_maker):
     self.mock_bigbench_json_task._ds = [{
         "input": "some input",
         "target": ["some output"]
     } for _ in range(100)]
     self.mock_bigbench_json_task.few_shot_example_separator = ","
     self.mock_bigbench_json_task.task_prefix = "prefix: "
     self.mock_bigbench_json_task.rng = None
     self.mock_bb_json_task_api.return_value = self.mock_bigbench_json_task
     mock_nshot_maker.return_value = [{
         "input": "fewshot input",
         "target": ["fewshot output"]
     } for _ in range(100)]
     dataset_fn = bb.get_dataset_fn(task_name="test_task",
                                    task_path="test/task/path.json",
                                    num_shots=1)
     ds = dataset_fn(split="all", shuffle_files=False)
     expected = [{
         "inputs": "prefix: fewshot input",
         "targets": "fewshot output",
         "answers": ["fewshot output"]
     } for _ in range(100)]
     test_utils.assert_dataset(ds, expected)
Esempio n. 10
0
    def test_get_dataset_fn_shuffle_max_examples(self):
        # Setup mock BIG-bench task
        self.mock_bigbench_json_task._ds = [{
            "input": str(i),
            "target": [str(i + 1)]
        } for i in range(100)]
        self.mock_bigbench_json_task.task_prefix = ""
        self.mock_bb_json_task_api.return_value = self.mock_bigbench_json_task

        dataset_fn = bb.get_dataset_fn(task_name="test_task",
                                       task_path="test/task/path.json",
                                       max_examples=50)

        ds_all = dataset_fn(split="all", shuffle_files=True, seed=23)
        ds_train = dataset_fn(split="train", shuffle_files=True, seed=23)
        ds_val = dataset_fn(split="validation", shuffle_files=True, seed=23)

        expected_all = [{
            "inputs": str(i),
            "targets": str(i + 1),
            "answers": [str(i + 1)],
        } for i in self._get_shuffled_sampled_all_split_ids()]
        expected_train = [{
            "inputs": str(i),
            "targets": str(i + 1),
            "idx": np.array([idx, 0])
        } for idx, i in enumerate(self._get_shuffled_sampled_train_split_ids())
                          ]
        expected_val = [{
            "inputs": str(i),
            "targets": str(i + 1),
            "answers": [str(i + 1)],
        } for i in self._get_shuffled_sampled_validation_split_ids()]

        test_utils.assert_dataset(ds_all, expected_all)
        test_utils.assert_dataset(ds_train, expected_train)
        test_utils.assert_dataset(ds_val, expected_val)
Esempio n. 11
0
    def _generate_examples(
        self,
        split,  # method parameters are unpacked from `gen_kwargs` as given in `_split_generators`
    ):
        validate_task_name(self.config.task_name)
        if self.config.subtask_name:
            # Subtasks are sometimes in bigbench written as task_name:subtask_name.
            # We want to remove the task_name from the subtask names:
            self.config.subtask_name = self.config.subtask_name.split(":")[-1]
            validate_subtask_name(self.config.task_name,
                                  self.config.subtask_name)
        """Yields examples as (key, example) tuples."""
        if split == "all":
            # not cutoff in number of examples for 'all' split
            MIN_VALIDATION_EXAMPLES = 0
        else:
            MIN_VALIDATION_EXAMPLES = 16

        try:
            task_path, json_util = bb_json_paths.get_task_path(
                self.config.task_name)

            has_subtasks = bb_json_paths.has_subtasks(self.config.task_name)
            if has_subtasks:
                subtask_names = bb_json_paths.get_subtask_names(
                    self.config.task_name)
                num_subtasks = len(subtask_names)
                min_validation_examples_per_subtask = div_or_none(
                    MIN_VALIDATION_EXAMPLES, num_subtasks)

            if not has_subtasks:
                ds_fn = bbb.get_dataset_fn(
                    task_name=self.config.task_name,
                    task_path=task_path,
                    subtask_name=None,
                    num_shots=self.config.num_shots,
                    bigbench_task_type=bbb.BigBenchTaskType.HUGGINGFACE,
                    max_examples=self.config.max_examples,
                    json_util=json_util,
                    min_validation_examples=MIN_VALIDATION_EXAMPLES,
                    format_fn=json_task.default_format_fn,
                )
                ds_list = [ds_fn(split)]
            elif self.config.subtask_name is not None:
                ds_fn = bbb.get_dataset_fn(
                    task_name=self.config.task_name,
                    task_path=task_path,
                    subtask_name=self.config.subtask_name,
                    num_shots=self.config.num_shots,
                    bigbench_task_type=bbb.BigBenchTaskType.HUGGINGFACE,
                    max_examples=self.config.max_examples,
                    json_util=json_util,
                    min_validation_examples=min_validation_examples_per_subtask,
                    format_fn=json_task.default_format_fn,
                )
                ds_list = [ds_fn(split)]
            else:
                # Create mixture of all subtasks
                ds_list = []
                for subtask_name in subtask_names:
                    subtask_name = subtask_name.split(":")[-1]
                    logger.info(f"Loading subtask {split} split", subtask_name)
                    ds_fn = bbb.get_dataset_fn(
                        task_name=self.config.task_name,
                        task_path=task_path,
                        subtask_name=subtask_name,
                        num_shots=self.config.num_shots,
                        bigbench_task_type=bbb.BigBenchTaskType.HUGGINGFACE,
                        max_examples=div_or_none(self.config.max_examples,
                                                 num_subtasks),
                        json_util=json_util,
                        min_validation_examples=
                        min_validation_examples_per_subtask,
                        format_fn=json_task.default_format_fn,
                    )
                    ds_list.append(ds_fn(split))
        except ValueError as value_error:
            # BIG-Bench requires at least 16 examples to use the train & validation splits,
            # while using 'all'/'default' does not have such a requirement.
            if "has too few examples" in value_error.args[0] and split != "all":
                logger.warning(
                    f"-- WARNING: skipping split {split} because it has too few examples. Please use 'default' split."
                )
                logger.warning(value_error)
                return
            raise value_error

        unique_key_counter = 0
        for ds in ds_list:
            for example in ds:
                unique_key_counter += 1
                yield unique_key_counter, {
                    "idx":
                    example["idx"],
                    "inputs":
                    example["inputs"].numpy().decode().strip(),
                    "targets": [
                        target.numpy().decode().strip()
                        for target in example["targets"]
                    ],
                    "multiple_choice_targets": [
                        targets.decode().strip() for targets in
                        example["multiple_choice_targets"].numpy()
                    ],
                    "multiple_choice_scores": [
                        scores for scores in
                        example["multiple_choice_scores"].numpy()
                    ],
                }