Ejemplo n.º 1
0
def run_evaluation(
    data_path,
    model,
    report_folder=None,
    successes_filename=None,
    errors_filename="errors.json",
    confmat_filename=None,
    intent_hist_filename=None,
    component_builder=None,
) -> Dict:  # pragma: no cover
    """Evaluate intent classification and entity extraction."""

    # get the metadata config from the package data
    if isinstance(model, Interpreter):
        interpreter = model
    else:
        interpreter = Interpreter.load(model, component_builder)
    test_data = training_data.load_data(data_path,
                                        interpreter.model_metadata.language)

    extractors = get_entity_extractors(interpreter)

    if is_intent_classifier_present(interpreter):
        intent_targets = get_intent_targets(test_data)
    else:
        intent_targets = [None] * test_data.training_examples

    intent_results, entity_predictions, tokens = get_predictions(
        interpreter, test_data, intent_targets)

    if duckling_extractors.intersection(extractors):
        entity_predictions = remove_duckling_entities(entity_predictions)
        extractors = remove_duckling_extractors(extractors)

    result = {"intent_evaluation": None, "entity_evaluation": None}

    if report_folder:
        utils.create_dir(report_folder)

    if is_intent_classifier_present(interpreter):

        logger.info("Intent evaluation results:")
        result["intent_evaluation"] = evaluate_intents(
            intent_results,
            report_folder,
            successes_filename,
            errors_filename,
            confmat_filename,
            intent_hist_filename,
        )

    if extractors:
        entity_targets = get_entity_targets(test_data)

        logger.info("Entity evaluation results:")
        result["entity_evaluation"] = evaluate_entities(
            entity_targets, entity_predictions, tokens, extractors,
            report_folder)

    return result
Ejemplo n.º 2
0
def test_entity_evaluation_report(tmpdir_factory):
    path = tmpdir_factory.mktemp("evaluation").strpath
    report_folder = os.path.join(path, "reports")

    mock_extractors = ["A", "B"]
    report_filename_a = os.path.join(report_folder, "A_report.json")
    report_filename_b = os.path.join(report_folder, "B_report.json")
    ner_filename = os.path.join(report_folder, "ner_filename.json")

    utils.create_dir(report_folder)

    result = evaluate_entities(
        [EN_targets],
        [EN_predicted],
        [EN_tokens],
        mock_extractors,
        report_folder,
        ner_filename,
    )

    report_a = json.loads(rasa.utils.io.read_file(report_filename_a))
    report_b = json.loads(rasa.utils.io.read_file(report_filename_b))

    assert len(report_a) == 8
    assert report_a["datetime"]["support"] == 1.0
    assert report_b["macro avg"]["recall"] == 0.2
    assert result["A"]["accuracy"] == 0.75
Ejemplo n.º 3
0
def test_intent_evaluation_report(tmpdir_factory):
    path = tmpdir_factory.mktemp("evaluation").strpath
    report_folder = os.path.join(path, "reports")
    report_filename = os.path.join(report_folder, "intent_report.json")

    utils.create_dir(report_folder)

    intent_results = [
        IntentEvaluationResult("", "restaurant_search",
                               "I am hungry", 0.12345),
        IntentEvaluationResult("greet", "greet",
                               "hello", 0.98765)]

    result = evaluate_intents(intent_results,
                              report_folder,
                              successes_filename=None,
                              errors_filename=None,
                              confmat_filename=None,
                              intent_hist_filename=None)

    report = json.loads(utils.read_file(report_filename))

    greet_results = {"precision": 1.0,
                     "recall": 1.0,
                     "f1-score": 1.0,
                     "support": 1}

    prediction = {'text': 'hello',
                  'intent': 'greet',
                  'predicted': 'greet',
                  'confidence': 0.98765}

    assert len(report.keys()) == 4
    assert report["greet"] == greet_results
    assert result["predictions"][0] == prediction
Ejemplo n.º 4
0
def main():
    from rasa.core.agent import Agent
    from rasa.core.interpreter import NaturalLanguageInterpreter
    from rasa.core.utils import AvailableEndpoints, set_default_subparser
    import rasa.nlu.utils as nlu_utils
    import rasa.core.cli
    from rasa.core import utils

    loop = asyncio.get_event_loop()

    # Running as standalone python application
    arg_parser = create_argument_parser()
    set_default_subparser(arg_parser, "default")
    cmdline_arguments = arg_parser.parse_args()

    logging.basicConfig(level=cmdline_arguments.loglevel)
    _endpoints = AvailableEndpoints.read_endpoints(cmdline_arguments.endpoints)

    if cmdline_arguments.output:
        nlu_utils.create_dir(cmdline_arguments.output)

    if not cmdline_arguments.core:
        raise ValueError(
            "you must provide a core model directory to evaluate using -d / --core"
        )
    if cmdline_arguments.mode == "default":

        _interpreter = NaturalLanguageInterpreter.create(
            cmdline_arguments.nlu, _endpoints.nlu
        )

        _agent = Agent.load(cmdline_arguments.core, interpreter=_interpreter)

        stories = loop.run_until_complete(
            rasa.core.cli.train.stories_from_cli_args(cmdline_arguments)
        )

        loop.run_until_complete(
            test(
                stories,
                _agent,
                cmdline_arguments.max_stories,
                cmdline_arguments.output,
                cmdline_arguments.fail_on_prediction_errors,
                cmdline_arguments.e2e,
            )
        )

    elif cmdline_arguments.mode == "compare":
        compare(
            cmdline_arguments.core, cmdline_arguments.stories, cmdline_arguments.output
        )

        story_n_path = os.path.join(cmdline_arguments.core, "num_stories.json")

        number_of_stories = utils.read_json_file(story_n_path)
        plot_curve(cmdline_arguments.output, number_of_stories)

    logger.info("Finished evaluation")
Ejemplo n.º 5
0
def test_core(
    model: Optional[Text] = None,
    stories: Optional[Text] = None,
    endpoints: Optional[Text] = None,
    output: Text = DEFAULT_RESULTS_PATH,
    kwargs: Optional[Dict] = None,
):
    import rasa.core.test
    import rasa.core.utils as core_utils
    from rasa.nlu import utils as nlu_utils
    from rasa.model import get_model
    from rasa.core.interpreter import NaturalLanguageInterpreter
    from rasa.core.agent import Agent

    _endpoints = core_utils.AvailableEndpoints.read_endpoints(endpoints)

    if kwargs is None:
        kwargs = {}

    if output:
        nlu_utils.create_dir(output)

    unpacked_model = get_model(model)
    if unpacked_model is None:
        print_error(
            "Unable to test: could not find a model. Use 'rasa train' to train a "
            "Rasa model."
        )
        return

    core_path, nlu_path = get_model_subdirectories(unpacked_model)

    if not os.path.exists(core_path):
        print_error(
            "Unable to test: could not find a Core model. Use 'rasa train' to "
            "train a model."
        )

    use_e2e = kwargs["e2e"] if "e2e" in kwargs else False

    _interpreter = RegexInterpreter()
    if use_e2e:
        if os.path.exists(nlu_path):
            _interpreter = NaturalLanguageInterpreter.create(nlu_path, _endpoints.nlu)
        else:
            print_warning(
                "No NLU model found. Using default 'RegexInterpreter' for end-to-end "
                "evaluation."
            )

    _agent = Agent.load(unpacked_model, interpreter=_interpreter)

    kwargs = minimal_kwargs(kwargs, rasa.core.test, ["stories", "agent"])

    loop = asyncio.get_event_loop()
    loop.run_until_complete(
        rasa.core.test(stories, _agent, out_directory=output, **kwargs)
    )
Ejemplo n.º 6
0
def test_core(
    model: Optional[Text] = None,
    stories: Optional[Text] = None,
    endpoints: Optional[Text] = None,
    output: Text = DEFAULT_RESULTS_PATH,
    model_path: Optional[Text] = None,
    kwargs: Optional[Dict] = None,
):
    import rasa.core.test
    import rasa.core.utils as core_utils
    from rasa.nlu import utils as nlu_utils
    from rasa.model import get_model
    from rasa.core.interpreter import NaturalLanguageInterpreter
    from rasa.core.agent import Agent

    _endpoints = core_utils.AvailableEndpoints.read_endpoints(endpoints)

    if kwargs is None:
        kwargs = {}

    if output:
        nlu_utils.create_dir(output)

    if os.path.isfile(model):
        model_path = get_model(model)

    if model_path:
        # Single model: Normal evaluation
        loop = asyncio.get_event_loop()
        model_path = get_model(model)
        core_path, nlu_path = get_model_subdirectories(model_path)

        if os.path.exists(core_path) and os.path.exists(nlu_path):
            _interpreter = NaturalLanguageInterpreter.create(nlu_path, _endpoints.nlu)

            _agent = Agent.load(core_path, interpreter=_interpreter)

            kwargs = minimal_kwargs(kwargs, rasa.core.test, ["stories", "agent"])

            loop.run_until_complete(
                rasa.core.test(stories, _agent, out_directory=output, **kwargs)
            )
        else:
            logger.warning(
                "Not able to test. Make sure both models, core and "
                "nlu, are available."
            )

    else:
        from rasa.core.test import compare, plot_curve

        compare(model, stories, output)

        story_n_path = os.path.join(model, "num_stories.json")

        number_of_stories = core_utils.read_json_file(story_n_path)
        plot_curve(output, number_of_stories)
Ejemplo n.º 7
0
def run_evaluation(
    data_path: Text,
    model_path: Text,
    report: Optional[Text] = None,
    successes: Optional[Text] = None,
    errors: Optional[Text] = "errors.json",
    confmat: Optional[Text] = None,
    histogram: Optional[Text] = None,
    component_builder: Optional[ComponentBuilder] = None,
) -> Dict:  # pragma: no cover
    """
    Evaluate intent classification and entity extraction.

    :param data_path: path to the test data
    :param model_path: path to the model
    :param report: path to folder where reports are stored
    :param successes: path to file that will contain success cases
    :param errors: path to file that will contain error cases
    :param confmat: path to file that will show the confusion matrix
    :param histogram: path fo file that will show a histogram
    :param component_builder: component builder

    :return: dictionary containing evaluation results
    """

    # get the metadata config from the package data
    interpreter = Interpreter.load(model_path, component_builder)

    interpreter.pipeline = remove_pretrained_extractors(interpreter.pipeline)
    test_data = training_data.load_data(data_path,
                                        interpreter.model_metadata.language)

    result = {
        "intent_evaluation": None,
        "entity_evaluation": None,
    }  # type: Dict[Text, Optional[Dict]]

    if report:
        utils.create_dir(report)

    intent_results, entity_results = get_eval_data(interpreter, test_data)

    if intent_results:
        logger.info("Intent evaluation results:")
        result["intent_evaluation"] = evaluate_intents(intent_results, report,
                                                       successes, errors,
                                                       confmat, histogram)

    if entity_results:
        logger.info("Entity evaluation results:")
        extractors = get_entity_extractors(interpreter)
        result["entity_evaluation"] = evaluate_entities(
            entity_results, extractors, report)

    return result
Ejemplo n.º 8
0
    def persist(self,
                path: Text,
                persistor: Optional[Persistor] = None,
                project_name: Text = None,
                fixed_model_name: Text = None) -> Text:
        """Persist all components of the pipeline to the passed path.

        Returns the directory of the persisted model."""

        timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
        metadata = {
            "language": self.config["language"],
            "adapter": self.config['adapter'],
            "pipeline": [],
        }

        if project_name is None:
            project_name = "default"

        if fixed_model_name:
            model_name = fixed_model_name
        else:
            model_name = "model_" + timestamp

        path = make_path_absolute(path)
        dir_name = os.path.join(path, project_name, model_name)

        create_dir(dir_name)

        if self.training_data:
            metadata.update(self.training_data.persist(dir_name))

        for i, component in enumerate(self.pipeline):
            file_name = self._file_name(i, component.name)
            update = component.persist(file_name, dir_name)
            component_meta = component.component_config
            if update:
                component_meta.update(update)
            component_meta["class"] = utils.module_path_from_object(component)

            metadata["pipeline"].append(component_meta)

        Metadata(metadata, dir_name).persist(dir_name)

        if persistor is not None:
            persistor.persist(dir_name, model_name, project_name)
        logger.info("Successfully saved model into "
                    "'{}'".format(os.path.abspath(dir_name)))
        return dir_name
Ejemplo n.º 9
0
def test_entity_evaluation_report(tmpdir_factory):
    class EntityExtractorA(EntityExtractor):

        provides = ["entities"]

        def __init__(self, component_config=None) -> None:

            super(EntityExtractorA, self).__init__(component_config)

    class EntityExtractorB(EntityExtractor):

        provides = ["entities"]

        def __init__(self, component_config=None) -> None:

            super(EntityExtractorB, self).__init__(component_config)

    path = tmpdir_factory.mktemp("evaluation").strpath
    report_folder = os.path.join(path, "reports")

    report_filename_a = os.path.join(report_folder,
                                     "EntityExtractorA_report.json")
    report_filename_b = os.path.join(report_folder,
                                     "EntityExtractorB_report.json")

    utils.create_dir(report_folder)
    mock_interpreter = Interpreter(
        [
            EntityExtractorA({"provides": ["entities"]}),
            EntityExtractorB({"provides": ["entities"]}),
        ],
        None,
    )
    extractors = get_entity_extractors(mock_interpreter)
    result = evaluate_entities([EN_entity_result], extractors, report_folder)

    report_a = json.loads(rasa.utils.io.read_file(report_filename_a))
    report_b = json.loads(rasa.utils.io.read_file(report_filename_b))

    assert len(report_a) == 8
    assert report_a["datetime"]["support"] == 1.0
    assert report_b["macro avg"]["recall"] == 0.2
    assert result["EntityExtractorA"]["accuracy"] == 0.75
Ejemplo n.º 10
0
def test_core(model: Text, stories: Text, endpoints: Text = None,
              output: Text = DEFAULT_RESULTS_PATH, model_path: Text = None,
              **kwargs: Dict):
    import rasa.core.test
    import rasa.core.utils as core_utils
    from rasa.nlu import utils as nlu_utils
    from rasa.model import get_model
    from rasa.core.interpreter import NaturalLanguageInterpreter
    from rasa.core.agent import Agent

    _endpoints = core_utils.AvailableEndpoints.read_endpoints(endpoints)

    if output:
        nlu_utils.create_dir(output)

    if os.path.isfile(model):
        model_path = get_model(model)

    if model_path:
        # Single model: Normal evaluation
        loop = asyncio.get_event_loop()
        model_path = get_model(model)
        core_path, nlu_path = get_model_subdirectories(model_path)

        _interpreter = NaturalLanguageInterpreter.create(nlu_path,
                                                         _endpoints.nlu)

        _agent = Agent.load(core_path, interpreter=_interpreter)

        kwargs = minimal_kwargs(kwargs, rasa.core.test)
        loop.run_until_complete(
            rasa.core.test(stories, _agent, out_directory=output, **kwargs))

    else:
        from rasa.core.test import compare, plot_curve

        compare(model, stories, output)

        story_n_path = os.path.join(model, 'num_stories.json')

        number_of_stories = core_utils.read_json_file(story_n_path)
        plot_curve(output, number_of_stories)
Ejemplo n.º 11
0
def test_core(
    model: Optional[Text] = None,
    stories: Optional[Text] = None,
    endpoints: Optional[Text] = None,
    output: Text = DEFAULT_RESULTS_PATH,
    kwargs: Optional[Dict] = None,
):
    import rasa.core.test
    import rasa.core.utils as core_utils
    from rasa.nlu import utils as nlu_utils
    from rasa.model import get_model
    from rasa.core.interpreter import NaturalLanguageInterpreter
    from rasa.core.agent import Agent

    _endpoints = core_utils.AvailableEndpoints.read_endpoints(endpoints)

    if kwargs is None:
        kwargs = {}

    if output:
        nlu_utils.create_dir(output)

    loop = asyncio.get_event_loop()
    model_path = get_model(model)
    core_path, nlu_path = get_model_subdirectories(model_path)

    if os.path.exists(core_path) and os.path.exists(nlu_path):
        _interpreter = NaturalLanguageInterpreter.create(
            nlu_path, _endpoints.nlu)

        _agent = Agent.load(model_path, interpreter=_interpreter)

        kwargs = minimal_kwargs(kwargs, rasa.core.test, ["stories", "agent"])

        loop.run_until_complete(
            rasa.core.test(stories, _agent, out_directory=output, **kwargs))
    else:
        print_error(
            "Not able to test. Make sure both models - core and nlu - are available."
        )
Ejemplo n.º 12
0
def cross_validate(
    data: TrainingData,
    n_folds: int,
    nlu_config: Union[RasaNLUModelConfig, Text],
    report: Optional[Text] = None,
    successes: Optional[Text] = None,
    errors: Optional[Text] = "errors.json",
    confmat: Optional[Text] = None,
    histogram: Optional[Text] = None,
) -> Tuple[CVEvaluationResult, CVEvaluationResult]:
    """Stratified cross validation on data.

    Args:
        data: Training Data
        n_folds: integer, number of cv folds
        nlu_config: nlu config file
        report: path to folder where reports are stored
        successes: path to file that will contain success cases
        errors: path to file that will contain error cases
        confmat: path to file that will show the confusion matrix
        histogram: path fo file that will show a histogram

    Returns:
        dictionary with key, list structure, where each entry in list
              corresponds to the relevant result for one fold
    """
    from collections import defaultdict

    if isinstance(nlu_config, str):
        nlu_config = config.load(nlu_config)

    if report:
        utils.create_dir(report)

    trainer = Trainer(nlu_config)
    trainer.pipeline = remove_pretrained_extractors(trainer.pipeline)

    intent_train_metrics = defaultdict(list)  # type: IntentMetrics
    intent_test_metrics = defaultdict(list)  # type: IntentMetrics
    entity_train_metrics = defaultdict(
        lambda: defaultdict(list))  # type: EntityMetrics
    entity_test_metrics = defaultdict(
        lambda: defaultdict(list))  # type: EntityMetrics

    intent_test_results = []  # type: List[IntentEvaluationResult]
    entity_test_results = []  # type: List[EntityEvaluationResult]
    intent_classifier_present = False
    extractors = set()  # type: Set[Text]

    for train, test in generate_folds(n_folds, data):
        interpreter = trainer.train(train)

        # calculate train accuracy
        combine_result(intent_train_metrics, entity_train_metrics, interpreter,
                       train)
        # calculate test accuracy
        combine_result(
            intent_test_metrics,
            entity_test_metrics,
            interpreter,
            test,
            intent_test_results,
            entity_test_results,
        )

        if not extractors:
            extractors = get_entity_extractors(interpreter)

        if is_intent_classifier_present(interpreter):
            intent_classifier_present = True

    if intent_classifier_present:
        logger.info("Accumulated test folds intent evaluation results:")
        evaluate_intents(intent_test_results, report, successes, errors,
                         confmat, histogram)

    if extractors:
        logger.info("Accumulated test folds entity evaluation results:")
        evaluate_entities(entity_test_results, extractors, report)

    return (
        CVEvaluationResult(dict(intent_train_metrics),
                           dict(intent_test_metrics)),
        CVEvaluationResult(dict(entity_train_metrics),
                           dict(entity_test_metrics)),
    )
Ejemplo n.º 13
0
def test_creation_of_existing_dir(tmpdir):
    # makes sure there is no exception
    assert create_dir(tmpdir.strpath) is None
Ejemplo n.º 14
0
def run_evaluation(
    data_path: Text,
    model_path: Text,
    report: Optional[Text] = None,
    successes: Optional[Text] = None,
    errors: Optional[Text] = "errors.json",
    confmat: Optional[Text] = None,
    histogram: Optional[Text] = None,
    component_builder: Optional[ComponentBuilder] = None,
) -> Dict:  # pragma: no cover
    """
    Evaluate intent classification and entity extraction.

    :param data_path: path to the test data
    :param model_path: path to the model
    :param report: path to folder where reports are stored
    :param successes: path to file that will contain success cases
    :param errors: path to file that will contain error cases
    :param confmat: path to file that will show the confusion matrix
    :param histogram: path fo file that will show a histogram
    :param component_builder: component builder

    :return: dictionary containing evaluation results
    """

    # get the metadata config from the package data
    interpreter = Interpreter.load(model_path, component_builder)
    test_data = training_data.load_data(data_path,
                                        interpreter.model_metadata.language)

    extractors = get_entity_extractors(interpreter)

    if is_intent_classifier_present(interpreter):
        intent_targets = get_intent_targets(test_data)
    else:
        intent_targets = [None] * len(test_data.training_examples)

    intent_results, entity_predictions, tokens = get_predictions(
        interpreter, test_data, intent_targets)

    if duckling_extractors.intersection(extractors):
        entity_predictions = remove_duckling_entities(entity_predictions)
        extractors = remove_duckling_extractors(extractors)

    result = {"intent_evaluation": None, "entity_evaluation": None}

    if report:
        utils.create_dir(report)

    if is_intent_classifier_present(interpreter):

        logger.info("Intent evaluation results:")
        result["intent_evaluation"] = evaluate_intents(intent_results, report,
                                                       successes, errors,
                                                       confmat, histogram)

    if extractors:
        entity_targets = get_entity_targets(test_data)

        logger.info("Entity evaluation results:")
        result["entity_evaluation"] = evaluate_entities(
            entity_targets, entity_predictions, tokens, extractors, report)

    return result