def test_postprocess_output(): inp = "abc.123.def" out = util.postprocess_output(inp, 2, None, None) assert out == "ab" out = util.postprocess_output(inp, 200, None, None) assert out == inp out = util.postprocess_output(inp, None, "q", None) assert out == inp out = util.postprocess_output(inp, None, ".", None) assert out == "abc." out = util.postprocess_output(inp, None, ".1", None) assert out == "abc.1" out = util.postprocess_output(inp, None, None, ".*") assert out == inp out = util.postprocess_output(inp, None, None, "z") assert out == "" out = util.postprocess_output(inp, None, None, r"\d+") assert out == "123" out = util.postprocess_output(inp, None, None, json_task._DEFAULT_REGEX) assert out == "abc" for sep in [".", "?", "!", ";", "\n"]: out = util.postprocess_output(f"foo{sep}bar", None, None, json_task._DEFAULT_REGEX) assert out == "foo"
def test_postprocess_output(): inp = "abc.123.def" out = util.postprocess_output(inp, 2, None, None) assert out == "ab" out = util.postprocess_output(inp, 200, None, None) assert out == inp out = util.postprocess_output(inp, None, "q", None) assert out == inp out = util.postprocess_output(inp, None, ".", None) assert out == "abc." out = util.postprocess_output(inp, None, ".1", None) assert out == "abc.1" out = util.postprocess_output(inp, None, None, ".*") assert out == inp out = util.postprocess_output(inp, None, None, "z") assert out == "" out = util.postprocess_output(inp, None, None, r"\d+") assert out == "123"
def generate_text(self, inputs, max_length=1000, stop_string=None, output_regex=None): outputs = [] for input_str in inputs: if input_str.startswith("Read the text file"): output = fake_inspect_own_code(input_str) elif input_str.startswith("Execute a Python script"): output = fake_inspect_own_environment(input_str) elif "that is self-aware" in input_str: output = "fake self-ware answer" elif "The AI has two versions" in input_str: output = fake_detect_self_aware_answers(input_str) elif "we mixed up the answers" in input_str: output = fake_detect_own_answers(input_str) else: output = "dummy" + str(random.random()) outputs.append(output) # To simulate the BIG-bench API behaviour where a single input to # the model doesn't return a _list_ of results, but the results # themselves (eg a string instead of a list of strings) if len(outputs) == 1: outputs = outputs[0] return util.postprocess_output(outputs, max_length, stop_string, output_regex)
def generate_text( self, inputs: Union[str, List[str]], max_length: int = 0, # TODO(guyga) should probably be the last argument stop_string: Optional[str] = None, output_regex: Optional[str] = None, ) -> Union[str, List[str]]: """Generates text for given inputs. Args: inputs: String or list of strings as inputs for model. max_length: Maximum string length of output, if 0 uses max_length passed to constructor stop_string: If specified, model output will be truncated to the shortest string which includes stop_string. output_regex: If specified, the first match to the python regular expression output_regex in the model output will be returned. If there is no match, an empty string will be returned. Returns: String or list of strings generated by model. Raises: ValueError if max_length is invalid """ max_length = max_length or self._max_length if isinstance(inputs, str): input_list = [inputs] else: input_list = inputs generated = [] last = start = time.time() for idx, inpt in enumerate(input_list): text = self._model.generate( context=inpt if inpt else "<|endoftext|>", max_length=max_length, temperature=self._temperature, top_k=self._top_k, top_p=self._top_p, num_outputs=1, )[0] generated.append(text) if self._show_progress and time.time() - last > 60: print( f"generated {idx} of {len(input_list)} outputs in {int(time.time() - start)} secs..." ) last = time.time() if isinstance(inputs, str): generated = generated[0] generated = util.postprocess_output(generated, max_length, stop_string, output_regex) return generated
def generate_text( self, inputs: Union[str, List[str]], max_length: Optional[int] = None, stop_string: Optional[str] = None, output_regex: Optional[str] = None, ) -> Union[str, List[str]]: """Generates text from model. Args: inputs: input string or list of strings for model max_length: maximum output length in characters, None = no limit stop_string: output stopped at first instance of this string. output_regex: output regex for generated text Returns: generated text """ raw_outputs = self.model.generate_text( inputs=inputs, max_length=None, stop_string=None, output_regex=None, ) if isinstance(inputs, list): sample_inputs = inputs sample_raw_outputs = raw_outputs else: sample_inputs = [inputs] sample_raw_outputs = [raw_outputs] outputs = util.postprocess_output(sample_raw_outputs, max_length, stop_string, output_regex) samples = [] for input_str, output_str, raw_str in zip(sample_inputs, outputs, sample_raw_outputs): sample = {"input": input_str, "output": output_str} if raw_str != output_str: sample["raw_output"] = raw_str samples.append(sample) self.queries.append({ "function": "generate_text", "max_length": max_length, "stop_string": stop_string, "output_regex": output_regex, "samples": samples, }) if isinstance(inputs, list): return outputs else: return outputs[0]
def generate_text(self, input, max_length=100, stop_string=None, output_regex=None): dummy_text = ( "This is some dummy text that does not contain the forbidden words. " ) if isinstance(input, str): text = dummy_text elif isinstance(input, list): text = [f"{i}_{dummy_text}" for i, _ in enumerate(input)] else: raise ValueError("Input has unexpected type %s" % type(input)) return util.postprocess_output(text, max_length, stop_string, output_regex)
def postprocess_fn(output: str, example: Mapping[str, Any], is_target: bool) -> Any: if is_target: # Return the entire example. These are parsed into samples and passed to # the BIG-bench metrics API for metrics computation. return example if bigbench_task_type is not BigBenchTaskType.GENERATIVE: return output bigbench_task = BigBenchJsonTaskFetcher.get_bigbench_json_task_or_subtask( task_name=task_name, task_path=task_path, subtask_name=subtask_name, json_util=json_util) return util.postprocess_output(output, max_length=None, stop_string=bigbench_task.stop_string, output_regex=bigbench_task.output_regex)
def generate_text( self, inputs: Union[str, List[str]], max_length: int = 256, # TODO(guyga) should probably be the last argument stop_string: Optional[str] = None, output_regex: Optional[str] = None, ) -> Union[str, List[str]]: """Generates text for given inputs. Args: inputs: String or list of strings as inputs for model. max_length: Maximum string length of output. stop_string: If specified, model output will be truncated to the shortest string which includes stop_string. output_regex: If specified, the first match to the python regular expression output_regex in the model output will be returned. If there is no match, an empty string will be returned. Returns: String or list of strings generated by model. """ if isinstance(inputs, str): input_list = [inputs] else: input_list = inputs generated = [ self._model.generate( context=i, max_length=self._max_length, temperature=self._temperature, top_k=self._top_k, top_p=self._top_p, num_outputs=1, )[0] for i in input_list ] if isinstance(inputs, str): generated = generated[0] generated = util.postprocess_output(generated, max_length, stop_string, output_regex) return generated
def generate_text(self, input, max_length=1000, stop_string=None, output_regex=None): dummy_text = "dummy text." if isinstance(input, str): text = dummy_text elif isinstance(input, list): text = [f"{i}_{dummy_text}" for i, _ in enumerate(input)] else: raise ValueError("Input has unexpected type %s" % type(input)) text = util.postprocess_output(text, max_length, stop_string, output_regex) return text
def generate_text( self, input, max_length=100, stop_string=None, output_regex=None, avoid=["yes", "no", "black", "white"], ): dummy_text = " ".join(avoid) if isinstance(input, str): text = dummy_text elif isinstance(input, list): text = [f"{i}_{dummy_text}" for i, _ in enumerate(input)] else: raise ValueError("Input has unexpected type %s" % type(input)) text = util.postprocess_output(text, max_length, stop_string, output_regex) return text
def generate_text(self, input, max_length=100, stop_string=None, output_regex=None): dummy_text = "dummy text." if isinstance(input, list): return [ self.generate_text( inp, max_length=max_length, stop_string=stop_string, output_regex=output_regex, ) for inp in input ] numbers = re.findall("\d+", input) output = str(sum(map(int, numbers))) return util.postprocess_output(output, max_length, stop_string, output_regex)
def generate_text(self, inputs, max_length=1000, stop_string=None, output_regex=None): if isinstance(inputs, str): inputs = [inputs] outputs = [] print( f"Please write a continuation of each of the following {len(inputs)} input strings." ) for i, context in enumerate(inputs): print(f"Input {i+1} of {len(inputs)}") print(context) output = input() output = util.postprocess_output(output, max_length, stop_string, output_regex) outputs.append(output) samples = [{ "input": inputs[i], "output": outputs[i] } for i in range(len(inputs))] self.queries.append({ "function": "human_text_generation_fn", "max_length": max_length, "stop_string": stop_string, "output_regex": output_regex, "samples": samples, }) if len(inputs) == 1: outputs = outputs[0] return outputs
def generate_text(self, inputs, max_length=1000, stop_string=None, output_regex=None): print( f"Please write a continuation of each of the following {len(inputs)} input strings." ) if isinstance(inputs, str): inputs = [inputs] outputs = [] for i, context in enumerate(inputs): print(f'Input {i+1} of {len(inputs)}') print(context) output = input() output = util.postprocess_output(output, max_length, stop_string, output_regex) outputs.append(output) if isinstance(inputs, str): outputs = outputs[0] return outputs
def generate_text(self, inputs, max_length=500, stop_string=None, output_regex=None): n_queries = self.n_gen_text + self.n_mult_choice + 1 if isinstance(inputs, str): text = f"Model query {n_queries}. {next(self.text_generator)}" self.n_gen_text += 1 elif isinstance(inputs, list): text = [ f"Model queries {n_queries} to {n_queries + len(inputs) - 1}, batch element {i+1}. {next(self.text_generator)}" for i, _ in enumerate(inputs) ] self.n_gen_text += len(inputs) else: raise ValueError("inputs has unexpected type %s" % type(inputs)) text = util.postprocess_output(text, max_length, stop_string, output_regex) self.add_to_transcript(inputs, text) return text