Esempio n. 1
0
def do_train(
        cfg,  # type: RasaNLUModelConfig
        data,  # type: Text
        path=None,  # type: Optional[Text]
        project=None,  # type: Optional[Text]
        fixed_model_name=None,  # type: Optional[Text]
        storage=None,  # type: Optional[Text]
        component_builder=None,  # type: Optional[ComponentBuilder]
        url=None,  # type: Optional[Text]
        **kwargs  # type: Any
):
    # type: (...) -> Tuple[Trainer, Interpreter, Text]
    """Loads the trainer and the data and runs the training of the model."""

    # Ensure we are training a model that we can save in the end
    # WARN: there is still a race condition if a model with the same name is
    # trained in another subprocess
    trainer = Trainer(cfg, component_builder)
    persistor = create_persistor(storage)
    if url is not None:
        training_data = load_data_from_url(url, cfg.language)
    else:
        training_data = load_data(data, cfg.language)

    interpreter = trainer.train(training_data, **kwargs)

    if path:
        persisted_path = trainer.persist(path, persistor, project,
                                         fixed_model_name)
    else:
        persisted_path = None

    return trainer, interpreter, persisted_path
Esempio n. 2
0
def run_evaluation(data_path,
                   model_path,
                   errors_filename='errors.json',
                   confmat_filename=None,
                   intent_hist_filename=None,
                   component_builder=None):  # pragma: no cover
    """Evaluate intent classification and entity extraction."""

    # get the metadata config from the package data
    interpreter = Interpreter.load(model_path, component_builder)
    test_data = training_data.load_data(data_path,
                                        interpreter.model_metadata.language)
    extractors = get_entity_extractors(interpreter)
    entity_predictions, tokens = get_entity_predictions(interpreter, test_data)
    if duckling_extractors.intersection(extractors):
        entity_predictions = remove_duckling_entities(entity_predictions)
        extractors = remove_duckling_extractors(extractors)

    if is_intent_classifier_present(interpreter):
        intent_targets = get_intent_targets(test_data)
        intent_results = get_intent_predictions(intent_targets, interpreter,
                                                test_data)
        logger.info("Intent evaluation results:")

        evaluate_intents(intent_results, errors_filename, confmat_filename,
                         intent_hist_filename)

    if extractors:
        entity_targets = get_entity_targets(test_data)

        logger.info("Entity evaluation results:")
        evaluate_entities(entity_targets, entity_predictions, tokens,
                          extractors)
Esempio n. 3
0
def convert_training_data(data_file, out_file, output_format, language):
    td = training_data.load_data(data_file, language)

    if output_format == 'md':
        output = td.as_markdown()
    else:
        output = td.as_json(indent=2)

    write_to_file(out_file, output)
Esempio n. 4
0
def main():
    parser = create_argument_parser()
    cmdline_args = parser.parse_args()
    utils.configure_colored_logging(cmdline_args.loglevel)

    if cmdline_args.mode == "crossvalidation":

        # TODO: move parsing into sub parser
        # manual check argument dependency
        if cmdline_args.model is not None:
            parser.error("Crossvalidation will train a new model "
                         "- do not specify external model.")

        if cmdline_args.config is None:
            parser.error("Crossvalidation will train a new model "
                         "you need to specify a model configuration.")

        nlu_config = config.load(cmdline_args.config)
        data = training_data.load_data(cmdline_args.data)
        data = drop_intents_below_freq(data, cutoff=5)
        results, entity_results = run_cv_evaluation(data,
                                                    int(cmdline_args.folds),
                                                    nlu_config)
        logger.info("CV evaluation (n={})".format(cmdline_args.folds))

        if any(results):
            logger.info("Intent evaluation results")
            return_results(results.train, "train")
            return_results(results.test, "test")
        if any(entity_results):
            logger.info("Entity evaluation results")
            return_entity_results(entity_results.train, "train")
            return_entity_results(entity_results.test, "test")

    elif cmdline_args.mode == "evaluation":
        run_evaluation(cmdline_args.data, cmdline_args.model,
                       cmdline_args.errors, cmdline_args.confmat,
                       cmdline_args.histogram)

    logger.info("Finished evaluation")