def run_evaluation( data_path, model, report_folder=None, successes_filename=None, errors_filename="errors.json", confmat_filename=None, intent_hist_filename=None, component_builder=None, ) -> Dict: # pragma: no cover """Evaluate intent classification and entity extraction.""" # get the metadata config from the package data if isinstance(model, Interpreter): interpreter = model else: interpreter = Interpreter.load(model, component_builder) test_data = training_data.load_data(data_path, interpreter.model_metadata.language) extractors = get_entity_extractors(interpreter) if is_intent_classifier_present(interpreter): intent_targets = get_intent_targets(test_data) else: intent_targets = [None] * test_data.training_examples intent_results, entity_predictions, tokens = get_predictions( interpreter, test_data, intent_targets) if duckling_extractors.intersection(extractors): entity_predictions = remove_duckling_entities(entity_predictions) extractors = remove_duckling_extractors(extractors) result = {"intent_evaluation": None, "entity_evaluation": None} if report_folder: utils.create_dir(report_folder) if is_intent_classifier_present(interpreter): logger.info("Intent evaluation results:") result["intent_evaluation"] = evaluate_intents( intent_results, report_folder, successes_filename, errors_filename, confmat_filename, intent_hist_filename, ) if extractors: entity_targets = get_entity_targets(test_data) logger.info("Entity evaluation results:") result["entity_evaluation"] = evaluate_entities( entity_targets, entity_predictions, tokens, extractors, report_folder) return result
def test_entity_evaluation_report(tmpdir_factory): path = tmpdir_factory.mktemp("evaluation").strpath report_folder = os.path.join(path, "reports") mock_extractors = ["A", "B"] report_filename_a = os.path.join(report_folder, "A_report.json") report_filename_b = os.path.join(report_folder, "B_report.json") ner_filename = os.path.join(report_folder, "ner_filename.json") utils.create_dir(report_folder) result = evaluate_entities( [EN_targets], [EN_predicted], [EN_tokens], mock_extractors, report_folder, ner_filename, ) report_a = json.loads(rasa.utils.io.read_file(report_filename_a)) report_b = json.loads(rasa.utils.io.read_file(report_filename_b)) assert len(report_a) == 8 assert report_a["datetime"]["support"] == 1.0 assert report_b["macro avg"]["recall"] == 0.2 assert result["A"]["accuracy"] == 0.75
def test_intent_evaluation_report(tmpdir_factory): path = tmpdir_factory.mktemp("evaluation").strpath report_folder = os.path.join(path, "reports") report_filename = os.path.join(report_folder, "intent_report.json") utils.create_dir(report_folder) intent_results = [ IntentEvaluationResult("", "restaurant_search", "I am hungry", 0.12345), IntentEvaluationResult("greet", "greet", "hello", 0.98765)] result = evaluate_intents(intent_results, report_folder, successes_filename=None, errors_filename=None, confmat_filename=None, intent_hist_filename=None) report = json.loads(utils.read_file(report_filename)) greet_results = {"precision": 1.0, "recall": 1.0, "f1-score": 1.0, "support": 1} prediction = {'text': 'hello', 'intent': 'greet', 'predicted': 'greet', 'confidence': 0.98765} assert len(report.keys()) == 4 assert report["greet"] == greet_results assert result["predictions"][0] == prediction
def main(): from rasa.core.agent import Agent from rasa.core.interpreter import NaturalLanguageInterpreter from rasa.core.utils import AvailableEndpoints, set_default_subparser import rasa.nlu.utils as nlu_utils import rasa.core.cli from rasa.core import utils loop = asyncio.get_event_loop() # Running as standalone python application arg_parser = create_argument_parser() set_default_subparser(arg_parser, "default") cmdline_arguments = arg_parser.parse_args() logging.basicConfig(level=cmdline_arguments.loglevel) _endpoints = AvailableEndpoints.read_endpoints(cmdline_arguments.endpoints) if cmdline_arguments.output: nlu_utils.create_dir(cmdline_arguments.output) if not cmdline_arguments.core: raise ValueError( "you must provide a core model directory to evaluate using -d / --core" ) if cmdline_arguments.mode == "default": _interpreter = NaturalLanguageInterpreter.create( cmdline_arguments.nlu, _endpoints.nlu ) _agent = Agent.load(cmdline_arguments.core, interpreter=_interpreter) stories = loop.run_until_complete( rasa.core.cli.train.stories_from_cli_args(cmdline_arguments) ) loop.run_until_complete( test( stories, _agent, cmdline_arguments.max_stories, cmdline_arguments.output, cmdline_arguments.fail_on_prediction_errors, cmdline_arguments.e2e, ) ) elif cmdline_arguments.mode == "compare": compare( cmdline_arguments.core, cmdline_arguments.stories, cmdline_arguments.output ) story_n_path = os.path.join(cmdline_arguments.core, "num_stories.json") number_of_stories = utils.read_json_file(story_n_path) plot_curve(cmdline_arguments.output, number_of_stories) logger.info("Finished evaluation")
def test_core( model: Optional[Text] = None, stories: Optional[Text] = None, endpoints: Optional[Text] = None, output: Text = DEFAULT_RESULTS_PATH, kwargs: Optional[Dict] = None, ): import rasa.core.test import rasa.core.utils as core_utils from rasa.nlu import utils as nlu_utils from rasa.model import get_model from rasa.core.interpreter import NaturalLanguageInterpreter from rasa.core.agent import Agent _endpoints = core_utils.AvailableEndpoints.read_endpoints(endpoints) if kwargs is None: kwargs = {} if output: nlu_utils.create_dir(output) unpacked_model = get_model(model) if unpacked_model is None: print_error( "Unable to test: could not find a model. Use 'rasa train' to train a " "Rasa model." ) return core_path, nlu_path = get_model_subdirectories(unpacked_model) if not os.path.exists(core_path): print_error( "Unable to test: could not find a Core model. Use 'rasa train' to " "train a model." ) use_e2e = kwargs["e2e"] if "e2e" in kwargs else False _interpreter = RegexInterpreter() if use_e2e: if os.path.exists(nlu_path): _interpreter = NaturalLanguageInterpreter.create(nlu_path, _endpoints.nlu) else: print_warning( "No NLU model found. Using default 'RegexInterpreter' for end-to-end " "evaluation." ) _agent = Agent.load(unpacked_model, interpreter=_interpreter) kwargs = minimal_kwargs(kwargs, rasa.core.test, ["stories", "agent"]) loop = asyncio.get_event_loop() loop.run_until_complete( rasa.core.test(stories, _agent, out_directory=output, **kwargs) )
def test_core( model: Optional[Text] = None, stories: Optional[Text] = None, endpoints: Optional[Text] = None, output: Text = DEFAULT_RESULTS_PATH, model_path: Optional[Text] = None, kwargs: Optional[Dict] = None, ): import rasa.core.test import rasa.core.utils as core_utils from rasa.nlu import utils as nlu_utils from rasa.model import get_model from rasa.core.interpreter import NaturalLanguageInterpreter from rasa.core.agent import Agent _endpoints = core_utils.AvailableEndpoints.read_endpoints(endpoints) if kwargs is None: kwargs = {} if output: nlu_utils.create_dir(output) if os.path.isfile(model): model_path = get_model(model) if model_path: # Single model: Normal evaluation loop = asyncio.get_event_loop() model_path = get_model(model) core_path, nlu_path = get_model_subdirectories(model_path) if os.path.exists(core_path) and os.path.exists(nlu_path): _interpreter = NaturalLanguageInterpreter.create(nlu_path, _endpoints.nlu) _agent = Agent.load(core_path, interpreter=_interpreter) kwargs = minimal_kwargs(kwargs, rasa.core.test, ["stories", "agent"]) loop.run_until_complete( rasa.core.test(stories, _agent, out_directory=output, **kwargs) ) else: logger.warning( "Not able to test. Make sure both models, core and " "nlu, are available." ) else: from rasa.core.test import compare, plot_curve compare(model, stories, output) story_n_path = os.path.join(model, "num_stories.json") number_of_stories = core_utils.read_json_file(story_n_path) plot_curve(output, number_of_stories)
def run_evaluation( data_path: Text, model_path: Text, report: Optional[Text] = None, successes: Optional[Text] = None, errors: Optional[Text] = "errors.json", confmat: Optional[Text] = None, histogram: Optional[Text] = None, component_builder: Optional[ComponentBuilder] = None, ) -> Dict: # pragma: no cover """ Evaluate intent classification and entity extraction. :param data_path: path to the test data :param model_path: path to the model :param report: path to folder where reports are stored :param successes: path to file that will contain success cases :param errors: path to file that will contain error cases :param confmat: path to file that will show the confusion matrix :param histogram: path fo file that will show a histogram :param component_builder: component builder :return: dictionary containing evaluation results """ # get the metadata config from the package data interpreter = Interpreter.load(model_path, component_builder) interpreter.pipeline = remove_pretrained_extractors(interpreter.pipeline) test_data = training_data.load_data(data_path, interpreter.model_metadata.language) result = { "intent_evaluation": None, "entity_evaluation": None, } # type: Dict[Text, Optional[Dict]] if report: utils.create_dir(report) intent_results, entity_results = get_eval_data(interpreter, test_data) if intent_results: logger.info("Intent evaluation results:") result["intent_evaluation"] = evaluate_intents(intent_results, report, successes, errors, confmat, histogram) if entity_results: logger.info("Entity evaluation results:") extractors = get_entity_extractors(interpreter) result["entity_evaluation"] = evaluate_entities( entity_results, extractors, report) return result
def persist(self, path: Text, persistor: Optional[Persistor] = None, project_name: Text = None, fixed_model_name: Text = None) -> Text: """Persist all components of the pipeline to the passed path. Returns the directory of the persisted model.""" timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M%S') metadata = { "language": self.config["language"], "adapter": self.config['adapter'], "pipeline": [], } if project_name is None: project_name = "default" if fixed_model_name: model_name = fixed_model_name else: model_name = "model_" + timestamp path = make_path_absolute(path) dir_name = os.path.join(path, project_name, model_name) create_dir(dir_name) if self.training_data: metadata.update(self.training_data.persist(dir_name)) for i, component in enumerate(self.pipeline): file_name = self._file_name(i, component.name) update = component.persist(file_name, dir_name) component_meta = component.component_config if update: component_meta.update(update) component_meta["class"] = utils.module_path_from_object(component) metadata["pipeline"].append(component_meta) Metadata(metadata, dir_name).persist(dir_name) if persistor is not None: persistor.persist(dir_name, model_name, project_name) logger.info("Successfully saved model into " "'{}'".format(os.path.abspath(dir_name))) return dir_name
def test_entity_evaluation_report(tmpdir_factory): class EntityExtractorA(EntityExtractor): provides = ["entities"] def __init__(self, component_config=None) -> None: super(EntityExtractorA, self).__init__(component_config) class EntityExtractorB(EntityExtractor): provides = ["entities"] def __init__(self, component_config=None) -> None: super(EntityExtractorB, self).__init__(component_config) path = tmpdir_factory.mktemp("evaluation").strpath report_folder = os.path.join(path, "reports") report_filename_a = os.path.join(report_folder, "EntityExtractorA_report.json") report_filename_b = os.path.join(report_folder, "EntityExtractorB_report.json") utils.create_dir(report_folder) mock_interpreter = Interpreter( [ EntityExtractorA({"provides": ["entities"]}), EntityExtractorB({"provides": ["entities"]}), ], None, ) extractors = get_entity_extractors(mock_interpreter) result = evaluate_entities([EN_entity_result], extractors, report_folder) report_a = json.loads(rasa.utils.io.read_file(report_filename_a)) report_b = json.loads(rasa.utils.io.read_file(report_filename_b)) assert len(report_a) == 8 assert report_a["datetime"]["support"] == 1.0 assert report_b["macro avg"]["recall"] == 0.2 assert result["EntityExtractorA"]["accuracy"] == 0.75
def test_core(model: Text, stories: Text, endpoints: Text = None, output: Text = DEFAULT_RESULTS_PATH, model_path: Text = None, **kwargs: Dict): import rasa.core.test import rasa.core.utils as core_utils from rasa.nlu import utils as nlu_utils from rasa.model import get_model from rasa.core.interpreter import NaturalLanguageInterpreter from rasa.core.agent import Agent _endpoints = core_utils.AvailableEndpoints.read_endpoints(endpoints) if output: nlu_utils.create_dir(output) if os.path.isfile(model): model_path = get_model(model) if model_path: # Single model: Normal evaluation loop = asyncio.get_event_loop() model_path = get_model(model) core_path, nlu_path = get_model_subdirectories(model_path) _interpreter = NaturalLanguageInterpreter.create(nlu_path, _endpoints.nlu) _agent = Agent.load(core_path, interpreter=_interpreter) kwargs = minimal_kwargs(kwargs, rasa.core.test) loop.run_until_complete( rasa.core.test(stories, _agent, out_directory=output, **kwargs)) else: from rasa.core.test import compare, plot_curve compare(model, stories, output) story_n_path = os.path.join(model, 'num_stories.json') number_of_stories = core_utils.read_json_file(story_n_path) plot_curve(output, number_of_stories)
def test_core( model: Optional[Text] = None, stories: Optional[Text] = None, endpoints: Optional[Text] = None, output: Text = DEFAULT_RESULTS_PATH, kwargs: Optional[Dict] = None, ): import rasa.core.test import rasa.core.utils as core_utils from rasa.nlu import utils as nlu_utils from rasa.model import get_model from rasa.core.interpreter import NaturalLanguageInterpreter from rasa.core.agent import Agent _endpoints = core_utils.AvailableEndpoints.read_endpoints(endpoints) if kwargs is None: kwargs = {} if output: nlu_utils.create_dir(output) loop = asyncio.get_event_loop() model_path = get_model(model) core_path, nlu_path = get_model_subdirectories(model_path) if os.path.exists(core_path) and os.path.exists(nlu_path): _interpreter = NaturalLanguageInterpreter.create( nlu_path, _endpoints.nlu) _agent = Agent.load(model_path, interpreter=_interpreter) kwargs = minimal_kwargs(kwargs, rasa.core.test, ["stories", "agent"]) loop.run_until_complete( rasa.core.test(stories, _agent, out_directory=output, **kwargs)) else: print_error( "Not able to test. Make sure both models - core and nlu - are available." )
def cross_validate( data: TrainingData, n_folds: int, nlu_config: Union[RasaNLUModelConfig, Text], report: Optional[Text] = None, successes: Optional[Text] = None, errors: Optional[Text] = "errors.json", confmat: Optional[Text] = None, histogram: Optional[Text] = None, ) -> Tuple[CVEvaluationResult, CVEvaluationResult]: """Stratified cross validation on data. Args: data: Training Data n_folds: integer, number of cv folds nlu_config: nlu config file report: path to folder where reports are stored successes: path to file that will contain success cases errors: path to file that will contain error cases confmat: path to file that will show the confusion matrix histogram: path fo file that will show a histogram Returns: dictionary with key, list structure, where each entry in list corresponds to the relevant result for one fold """ from collections import defaultdict if isinstance(nlu_config, str): nlu_config = config.load(nlu_config) if report: utils.create_dir(report) trainer = Trainer(nlu_config) trainer.pipeline = remove_pretrained_extractors(trainer.pipeline) intent_train_metrics = defaultdict(list) # type: IntentMetrics intent_test_metrics = defaultdict(list) # type: IntentMetrics entity_train_metrics = defaultdict( lambda: defaultdict(list)) # type: EntityMetrics entity_test_metrics = defaultdict( lambda: defaultdict(list)) # type: EntityMetrics intent_test_results = [] # type: List[IntentEvaluationResult] entity_test_results = [] # type: List[EntityEvaluationResult] intent_classifier_present = False extractors = set() # type: Set[Text] for train, test in generate_folds(n_folds, data): interpreter = trainer.train(train) # calculate train accuracy combine_result(intent_train_metrics, entity_train_metrics, interpreter, train) # calculate test accuracy combine_result( intent_test_metrics, entity_test_metrics, interpreter, test, intent_test_results, entity_test_results, ) if not extractors: extractors = get_entity_extractors(interpreter) if is_intent_classifier_present(interpreter): intent_classifier_present = True if intent_classifier_present: logger.info("Accumulated test folds intent evaluation results:") evaluate_intents(intent_test_results, report, successes, errors, confmat, histogram) if extractors: logger.info("Accumulated test folds entity evaluation results:") evaluate_entities(entity_test_results, extractors, report) return ( CVEvaluationResult(dict(intent_train_metrics), dict(intent_test_metrics)), CVEvaluationResult(dict(entity_train_metrics), dict(entity_test_metrics)), )
def test_creation_of_existing_dir(tmpdir): # makes sure there is no exception assert create_dir(tmpdir.strpath) is None
def run_evaluation( data_path: Text, model_path: Text, report: Optional[Text] = None, successes: Optional[Text] = None, errors: Optional[Text] = "errors.json", confmat: Optional[Text] = None, histogram: Optional[Text] = None, component_builder: Optional[ComponentBuilder] = None, ) -> Dict: # pragma: no cover """ Evaluate intent classification and entity extraction. :param data_path: path to the test data :param model_path: path to the model :param report: path to folder where reports are stored :param successes: path to file that will contain success cases :param errors: path to file that will contain error cases :param confmat: path to file that will show the confusion matrix :param histogram: path fo file that will show a histogram :param component_builder: component builder :return: dictionary containing evaluation results """ # get the metadata config from the package data interpreter = Interpreter.load(model_path, component_builder) test_data = training_data.load_data(data_path, interpreter.model_metadata.language) extractors = get_entity_extractors(interpreter) if is_intent_classifier_present(interpreter): intent_targets = get_intent_targets(test_data) else: intent_targets = [None] * len(test_data.training_examples) intent_results, entity_predictions, tokens = get_predictions( interpreter, test_data, intent_targets) if duckling_extractors.intersection(extractors): entity_predictions = remove_duckling_entities(entity_predictions) extractors = remove_duckling_extractors(extractors) result = {"intent_evaluation": None, "entity_evaluation": None} if report: utils.create_dir(report) if is_intent_classifier_present(interpreter): logger.info("Intent evaluation results:") result["intent_evaluation"] = evaluate_intents(intent_results, report, successes, errors, confmat, histogram) if extractors: entity_targets = get_entity_targets(test_data) logger.info("Entity evaluation results:") result["entity_evaluation"] = evaluate_entities( entity_targets, entity_predictions, tokens, extractors, report) return result