Exemple #1
0
def test_postprocess_output():

    inp = "abc.123.def"

    out = util.postprocess_output(inp, 2, None, None)
    assert out == "ab"

    out = util.postprocess_output(inp, 200, None, None)
    assert out == inp

    out = util.postprocess_output(inp, None, "q", None)
    assert out == inp

    out = util.postprocess_output(inp, None, ".", None)
    assert out == "abc."

    out = util.postprocess_output(inp, None, ".1", None)
    assert out == "abc.1"

    out = util.postprocess_output(inp, None, None, ".*")
    assert out == inp

    out = util.postprocess_output(inp, None, None, "z")
    assert out == ""

    out = util.postprocess_output(inp, None, None, r"\d+")
    assert out == "123"

    out = util.postprocess_output(inp, None, None, json_task._DEFAULT_REGEX)
    assert out == "abc"

    for sep in [".", "?", "!", ";", "\n"]:
        out = util.postprocess_output(f"foo{sep}bar", None, None,
                                      json_task._DEFAULT_REGEX)
        assert out == "foo"
Exemple #2
0
def test_postprocess_output():

    inp = "abc.123.def"

    out = util.postprocess_output(inp, 2, None, None)
    assert out == "ab"

    out = util.postprocess_output(inp, 200, None, None)
    assert out == inp

    out = util.postprocess_output(inp, None, "q", None)
    assert out == inp

    out = util.postprocess_output(inp, None, ".", None)
    assert out == "abc."

    out = util.postprocess_output(inp, None, ".1", None)
    assert out == "abc.1"

    out = util.postprocess_output(inp, None, None, ".*")
    assert out == inp

    out = util.postprocess_output(inp, None, None, "z")
    assert out == ""

    out = util.postprocess_output(inp, None, None, r"\d+")
    assert out == "123"
Exemple #3
0
    def generate_text(self,
                      inputs,
                      max_length=1000,
                      stop_string=None,
                      output_regex=None):
        outputs = []
        for input_str in inputs:
            if input_str.startswith("Read the text file"):
                output = fake_inspect_own_code(input_str)
            elif input_str.startswith("Execute a Python script"):
                output = fake_inspect_own_environment(input_str)
            elif "that is self-aware" in input_str:
                output = "fake self-ware answer"
            elif "The AI has two versions" in input_str:
                output = fake_detect_self_aware_answers(input_str)
            elif "we mixed up the answers" in input_str:
                output = fake_detect_own_answers(input_str)
            else:
                output = "dummy" + str(random.random())
            outputs.append(output)

        # To simulate the BIG-bench API behaviour where a single input to
        # the model doesn't return a _list_ of results, but the results
        # themselves (eg a string instead of a list of strings)
        if len(outputs) == 1:
            outputs = outputs[0]

        return util.postprocess_output(outputs, max_length, stop_string,
                                       output_regex)
    def generate_text(
        self,
        inputs: Union[str, List[str]],
        max_length: int = 0,  # TODO(guyga) should probably be the last argument
        stop_string: Optional[str] = None,
        output_regex: Optional[str] = None,
    ) -> Union[str, List[str]]:
        """Generates text for given inputs.

        Args:
          inputs: String or list of strings as inputs for model.
          max_length: Maximum string length of output, if 0 uses max_length passed
            to constructor
          stop_string: If specified, model output will be truncated to the shortest
            string which includes stop_string.
          output_regex: If specified, the first match to the python regular
            expression output_regex in the model output will be returned. If there is
            no match, an empty string will be returned.

        Returns:
          String or list of strings generated by model.

        Raises:
          ValueError if max_length is invalid
        """

        max_length = max_length or self._max_length

        if isinstance(inputs, str):
            input_list = [inputs]
        else:
            input_list = inputs

        generated = []
        last = start = time.time()

        for idx, inpt in enumerate(input_list):
            text = self._model.generate(
                context=inpt if inpt else "<|endoftext|>",
                max_length=max_length,
                temperature=self._temperature,
                top_k=self._top_k,
                top_p=self._top_p,
                num_outputs=1,
            )[0]
            generated.append(text)

            if self._show_progress and time.time() - last > 60:
                print(
                    f"generated {idx} of {len(input_list)} outputs in {int(time.time() - start)} secs..."
                )
                last = time.time()

        if isinstance(inputs, str):
            generated = generated[0]

        generated = util.postprocess_output(generated, max_length, stop_string,
                                            output_regex)
        return generated
Exemple #5
0
    def generate_text(
        self,
        inputs: Union[str, List[str]],
        max_length: Optional[int] = None,
        stop_string: Optional[str] = None,
        output_regex: Optional[str] = None,
    ) -> Union[str, List[str]]:
        """Generates text from model.

        Args:
          inputs: input string or list of strings for model
          max_length: maximum output length in characters, None = no limit
          stop_string: output stopped at first instance of this string.
          output_regex: output regex for generated text

        Returns:
          generated text
        """

        raw_outputs = self.model.generate_text(
            inputs=inputs,
            max_length=None,
            stop_string=None,
            output_regex=None,
        )

        if isinstance(inputs, list):
            sample_inputs = inputs
            sample_raw_outputs = raw_outputs
        else:
            sample_inputs = [inputs]
            sample_raw_outputs = [raw_outputs]

        outputs = util.postprocess_output(sample_raw_outputs, max_length,
                                          stop_string, output_regex)

        samples = []
        for input_str, output_str, raw_str in zip(sample_inputs, outputs,
                                                  sample_raw_outputs):
            sample = {"input": input_str, "output": output_str}

            if raw_str != output_str:
                sample["raw_output"] = raw_str

            samples.append(sample)

        self.queries.append({
            "function": "generate_text",
            "max_length": max_length,
            "stop_string": stop_string,
            "output_regex": output_regex,
            "samples": samples,
        })

        if isinstance(inputs, list):
            return outputs
        else:
            return outputs[0]
Exemple #6
0
    def generate_text(self, input, max_length=100, stop_string=None, output_regex=None):
        dummy_text = (
            "This is some dummy text that does not contain the forbidden words. "
        )

        if isinstance(input, str):
            text = dummy_text
        elif isinstance(input, list):
            text = [f"{i}_{dummy_text}" for i, _ in enumerate(input)]
        else:
            raise ValueError("Input has unexpected type %s" % type(input))

        return util.postprocess_output(text, max_length, stop_string, output_regex)
 def postprocess_fn(output: str, example: Mapping[str, Any],
                    is_target: bool) -> Any:
     if is_target:
         # Return the entire example. These are parsed into samples and passed to
         # the BIG-bench metrics API for metrics computation.
         return example
     if bigbench_task_type is not BigBenchTaskType.GENERATIVE:
         return output
     bigbench_task = BigBenchJsonTaskFetcher.get_bigbench_json_task_or_subtask(
         task_name=task_name,
         task_path=task_path,
         subtask_name=subtask_name,
         json_util=json_util)
     return util.postprocess_output(output,
                                    max_length=None,
                                    stop_string=bigbench_task.stop_string,
                                    output_regex=bigbench_task.output_regex)
    def generate_text(
        self,
        inputs: Union[str, List[str]],
        max_length:
        int = 256,  # TODO(guyga) should probably be the last argument
        stop_string: Optional[str] = None,
        output_regex: Optional[str] = None,
    ) -> Union[str, List[str]]:
        """Generates text for given inputs.

        Args:
          inputs: String or list of strings as inputs for model.
          max_length: Maximum string length of output.
          stop_string: If specified, model output will be truncated to the shortest
            string which includes stop_string.
          output_regex: If specified, the first match to the python regular
            expression output_regex in the model output will be returned. If there is
            no match, an empty string will be returned.

        Returns:
          String or list of strings generated by model.
        """

        if isinstance(inputs, str):
            input_list = [inputs]
        else:
            input_list = inputs

        generated = [
            self._model.generate(
                context=i,
                max_length=self._max_length,
                temperature=self._temperature,
                top_k=self._top_k,
                top_p=self._top_p,
                num_outputs=1,
            )[0] for i in input_list
        ]

        if isinstance(inputs, str):
            generated = generated[0]

        generated = util.postprocess_output(generated, max_length, stop_string,
                                            output_regex)
        return generated
Exemple #9
0
    def generate_text(self,
                      input,
                      max_length=1000,
                      stop_string=None,
                      output_regex=None):
        dummy_text = "dummy text."

        if isinstance(input, str):
            text = dummy_text
        elif isinstance(input, list):
            text = [f"{i}_{dummy_text}" for i, _ in enumerate(input)]
        else:
            raise ValueError("Input has unexpected type %s" % type(input))

        text = util.postprocess_output(text, max_length, stop_string,
                                       output_regex)

        return text
Exemple #10
0
    def generate_text(
        self,
        input,
        max_length=100,
        stop_string=None,
        output_regex=None,
        avoid=["yes", "no", "black", "white"],
    ):
        dummy_text = " ".join(avoid)

        if isinstance(input, str):
            text = dummy_text
        elif isinstance(input, list):
            text = [f"{i}_{dummy_text}" for i, _ in enumerate(input)]
        else:
            raise ValueError("Input has unexpected type %s" % type(input))

        text = util.postprocess_output(text, max_length, stop_string, output_regex)

        return text
Exemple #11
0
    def generate_text(self,
                      input,
                      max_length=100,
                      stop_string=None,
                      output_regex=None):
        dummy_text = "dummy text."

        if isinstance(input, list):
            return [
                self.generate_text(
                    inp,
                    max_length=max_length,
                    stop_string=stop_string,
                    output_regex=output_regex,
                ) for inp in input
            ]

        numbers = re.findall("\d+", input)
        output = str(sum(map(int, numbers)))
        return util.postprocess_output(output, max_length, stop_string,
                                       output_regex)
Exemple #12
0
    def generate_text(self,
                      inputs,
                      max_length=1000,
                      stop_string=None,
                      output_regex=None):
        if isinstance(inputs, str):
            inputs = [inputs]
        outputs = []

        print(
            f"Please write a continuation of each of the following {len(inputs)} input strings."
        )

        for i, context in enumerate(inputs):
            print(f"Input {i+1} of {len(inputs)}")
            print(context)
            output = input()
            output = util.postprocess_output(output, max_length, stop_string,
                                             output_regex)
            outputs.append(output)

        samples = [{
            "input": inputs[i],
            "output": outputs[i]
        } for i in range(len(inputs))]
        self.queries.append({
            "function": "human_text_generation_fn",
            "max_length": max_length,
            "stop_string": stop_string,
            "output_regex": output_regex,
            "samples": samples,
        })

        if len(inputs) == 1:
            outputs = outputs[0]

        return outputs
Exemple #13
0
    def generate_text(self,
                      inputs,
                      max_length=1000,
                      stop_string=None,
                      output_regex=None):
        print(
            f"Please write a continuation of each of the following {len(inputs)} input strings."
        )

        if isinstance(inputs, str):
            inputs = [inputs]
        outputs = []

        for i, context in enumerate(inputs):
            print(f'Input {i+1} of {len(inputs)}')
            print(context)
            output = input()
            output = util.postprocess_output(output, max_length, stop_string,
                                             output_regex)
            outputs.append(output)

        if isinstance(inputs, str):
            outputs = outputs[0]
        return outputs
Exemple #14
0
    def generate_text(self,
                      inputs,
                      max_length=500,
                      stop_string=None,
                      output_regex=None):
        n_queries = self.n_gen_text + self.n_mult_choice + 1
        if isinstance(inputs, str):
            text = f"Model query {n_queries}. {next(self.text_generator)}"
            self.n_gen_text += 1
        elif isinstance(inputs, list):
            text = [
                f"Model queries {n_queries} to {n_queries + len(inputs) - 1}, batch element {i+1}. {next(self.text_generator)}"
                for i, _ in enumerate(inputs)
            ]
            self.n_gen_text += len(inputs)
        else:
            raise ValueError("inputs has unexpected type %s" % type(inputs))

        text = util.postprocess_output(text, max_length, stop_string,
                                       output_regex)

        self.add_to_transcript(inputs, text)

        return text