def _evaluate_model(self, model, number_of_shots=0, max_examples_per_shot=None, random_seed=0): """Evaluate model with a specific number of examples in the prompt""" # Use same prompt/example ordering for each number_of_shots np.random.seed(random_seed) trials = 0 score, tasks = 0, 0 for idx, val_task in enumerate( tqdm(np.random.permutation(self.examples))): if trials >= max_examples_per_shot: break tasks += 1 prompt = self._make_prompt(number_of_shots) val_prompts, targets = self._make_val_prompts(val_task) correct_val = 0 for val_prompt, target in zip(val_prompts, targets): full_prompt = prompt + val_prompt if self.verbose: print("Task {}".format(idx)) if idx == 0: print(full_prompt) for i in range(self.trials_per_input): trials += 1 result = model.generate_text(full_prompt, output_regex=r"\d[\d\n]+\d") if result == target: correct_val += 1 break if self.verbose: print( f"Problem: {val_prompt}, Answer: {result}, " f"Expected Answer: {target}, Correct: {result == target}" ) if correct_val == len(val_prompt): score += 1 if self.verbose: print("Intermediate score", score, tasks) return task.ScoreData( {"exact_str_match": score / tasks}, preferred_score="exact_str_match", subtask_description= "Exact string match on Abstraction and Reasoning Corpus with" f"{number_of_shots} training samples", low_score=0.0, high_score=1.0, number_of_shots=number_of_shots, )
def play_round(self, model: model.Model) -> None: """Play a single round of the game.""" prompt = get_prompt( self.letter_set, self.correct_words, self.incorrect_words, self.score, self.round, self.total_rounds, ) logging.info(prompt) attempt = model.generate_text(prompt) logging.info(attempt) self._score_round(attempt) self.round += 1
# ---------------------------------------------------------------------------- if __name__ == "__main__": """Use this to run a simple test of the HF model types.""" # test a few gpt models for model_name in ["openai-gpt", "gpt2", "gpt2-medium", "gpt2-large"]: print("-" * 80) print(f"model: {model_name}") set_seed(42) model = BIGBenchHFModel(model_name=model_name) prompt = "It was the best of times, it was" response = model.generate_text( inputs=prompt, max_length=32, stop_string=".", ) print(f"prompt: {prompt}") print(f"response: {response}") prompts = ["These are the times that", "Stately, plump Buck Mulligan"] responses = model.generate_text(inputs=prompts, max_length=32, stop_string=".") for p, r in zip(prompts, responses): print(f"prompt: {p}") print(f"response: {r}")