async def compare_models_in_dir(model_dir: Text, stories_file: Text, output: Text) -> None: """Evaluate multiple trained models in a directory on a test set. Args: model_dir: path to directory that contains the models to evaluate stories_file: path to the story file output: output directory to store results to """ number_correct = defaultdict(list) for run in io_utils.list_subdirectories(model_dir): number_correct_in_run = defaultdict(list) for model in sorted(io_utils.list_files(run)): if not model.endswith("tar.gz"): continue # The model files are named like <config-name>PERCENTAGE_KEY<number>.tar.gz # Remove the percentage key and number from the name to get the config name config_name = os.path.basename(model).split(PERCENTAGE_KEY)[0] number_of_correct_stories = await _evaluate_core_model( model, stories_file) number_correct_in_run[config_name].append( number_of_correct_stories) for k, v in number_correct_in_run.items(): number_correct[k].append(v) io_utils.dump_obj_as_json_to_file(os.path.join(output, RESULTS_FILE), number_correct)
def persist(self, file_name: Text, model_dir: Text) -> Optional[Dict[Text, Any]]: """Persist this model into the passed directory. Return the metadata necessary to load the model again.""" file_name = f"{file_name}.json" regex_file = os.path.join(model_dir, file_name) io_utils.dump_obj_as_json_to_file(regex_file, self.patterns) return {"file": file_name}
async def compare_models(models: List[Text], stories_file: Text, output: Text) -> None: """Evaluates provided trained models on a test set.""" number_correct = defaultdict(list) for model in models: number_of_correct_stories = await _evaluate_core_model( model, stories_file) number_correct[os.path.basename(model)].append( number_of_correct_stories) io_utils.dump_obj_as_json_to_file(os.path.join(output, RESULTS_FILE), number_correct)
def persist(self, path: Text) -> None: if self.model: self.featurizer.persist(path) meta = {"priority": self.priority} meta_file = os.path.join(path, "sklearn_policy.json") io_utils.dump_obj_as_json_to_file(meta_file, meta) filename = os.path.join(path, "sklearn_model.pkl") io_utils.pickle_dump(filename, self._state) zero_features_filename = os.path.join(path, "zero_state_features.pkl") io_utils.pickle_dump(zero_features_filename, self.zero_state_features) else: rasa.shared.utils.io.raise_warning( "Persist called without a trained model present. " "Nothing to persist then!")
async def test( stories: Text, agent: "Agent", max_stories: Optional[int] = None, out_directory: Optional[Text] = None, fail_on_prediction_errors: bool = False, e2e: bool = False, disable_plotting: bool = False, successes: bool = False, errors: bool = True, ) -> Dict[Text, Any]: """Run the evaluation of the stories, optionally plot the results. Args: stories: the stories to evaulate on agent: the agent max_stories: maximum number of stories to consider out_directory: path to directory to results to fail_on_prediction_errors: boolean indicating whether to fail on prediction errors or not e2e: boolean indicating whether to use end to end evaluation or not disable_plotting: boolean indicating whether to disable plotting or not successes: boolean indicating whether to write down successful predictions or not errors: boolean indicating whether to write down incorrect predictions or not Returns: Evaluation summary. """ from rasa.test import get_evaluation_metrics completed_trackers = await _generate_trackers(stories, agent, max_stories, e2e) story_evaluation, _ = _collect_story_predictions( completed_trackers, agent, fail_on_prediction_errors, e2e) evaluation_store = story_evaluation.evaluation_store with warnings.catch_warnings(): from sklearn.exceptions import UndefinedMetricWarning warnings.simplefilter("ignore", UndefinedMetricWarning) targets, predictions = evaluation_store.serialise() if out_directory: report, precision, f1, accuracy = get_evaluation_metrics( targets, predictions, output_dict=True) report_filename = os.path.join(out_directory, REPORT_STORIES_FILE) io_utils.dump_obj_as_json_to_file(report_filename, report) logger.info(f"Stories report saved to {report_filename}.") else: report, precision, f1, accuracy = get_evaluation_metrics( targets, predictions, output_dict=True) _log_evaluation_table( evaluation_store.action_targets, "ACTION", report, precision, f1, accuracy, story_evaluation.in_training_data_fraction, include_report=False, ) if not disable_plotting: _plot_story_evaluation( evaluation_store.action_targets, evaluation_store.action_predictions, out_directory, ) if errors: _log_stories(story_evaluation.failed_stories, FAILED_STORIES_FILE, out_directory) if successes: _log_stories(story_evaluation.successful_stories, SUCCESSFUL_STORIES_FILE, out_directory) return { "report": report, "precision": precision, "f1": f1, "accuracy": accuracy, "actions": story_evaluation.action_list, "in_training_data_fraction": story_evaluation.in_training_data_fraction, "is_end_to_end_evaluation": e2e, }
def test_write_json_file(tmp_path: Path): expected = {"abc": "dasds", "list": [1, 2, 3, 4], "nested": {"a": "b"}} file_path = str(tmp_path / "abc.txt") io_utils.dump_obj_as_json_to_file(file_path, expected) assert io_utils.read_json_file(file_path) == expected