Esempio n. 1
0
async def _train_core_with_validated_data(
    file_importer: TrainingDataImporter,
    output: Text,
    train_path: Optional[Text] = None,
    fixed_model_name: Optional[Text] = None,
    additional_arguments: Optional[Dict] = None,
) -> Optional[Text]:
    """Train Core with validated training and config data."""

    import rasa.core.train

    with ExitStack() as stack:
        if train_path:
            # If the train path was provided, do nothing on exit.
            _train_path = train_path
        else:
            # Otherwise, create a temp train path and clean it up on exit.
            _train_path = stack.enter_context(
                TempDirectoryPath(tempfile.mkdtemp()))

        # normal (not compare) training
        print_color("Training Core model...", color=bcolors.OKBLUE)
        # bf mod
        domain, config = await asyncio.gather(
            file_importer.get_domain(),
            file_importer.get_core_config()  #.get_config()
        )
        # /bf mod
        await rasa.core.train(
            domain_file=domain,
            training_resource=file_importer,
            output_path=os.path.join(_train_path,
                                     DEFAULT_CORE_SUBDIRECTORY_NAME),
            policy_config=config,
            additional_arguments=additional_arguments,
        )
        print_color("Core model training completed.", color=bcolors.OKBLUE)

        if train_path is None:
            # Only Core was trained.
            new_fingerprint = await model.model_fingerprint(file_importer)
            return model.package_model(
                fingerprint=new_fingerprint,
                output_directory=output,
                train_path=_train_path,
                fixed_model_name=fixed_model_name,
                model_prefix="core-",
            )

        return _train_path
Esempio n. 2
0
async def test_rasa_file_importer_with_invalid_domain(tmp_path: Path):
    config_file = tmp_path / "config.yml"
    config_file.write_text("")
    importer = TrainingDataImporter.load_from_dict({}, str(config_file), None, [])

    actual = await importer.get_domain()
    assert actual.as_dict() == Domain.empty().as_dict()
Esempio n. 3
0
File: train.py Progetto: zylhub/rasa
async def _train_nlu_async(
    config: Text,
    nlu_data: Text,
    output: Text,
    train_path: Optional[Text] = None,
    fixed_model_name: Optional[Text] = None,
    persist_nlu_training_data: bool = False,
    additional_arguments: Optional[Dict] = None,
):
    if not nlu_data:
        print_error(
            "No NLU data given. Please provide NLU data in order to train "
            "a Rasa NLU model using the '--nlu' argument.")
        return

    # training NLU only hence the training files still have to be selected
    file_importer = TrainingDataImporter.load_nlu_importer_from_config(
        config, training_data_paths=[nlu_data])

    training_datas = await file_importer.get_nlu_data()
    if training_datas.is_empty():
        print_error(f"Path '{nlu_data}' doesn't contain valid NLU data in it. "
                    "Please verify the data format. "
                    "The NLU model training will be skipped now.")
        return

    return await _train_nlu_with_validated_data(
        file_importer,
        output=output,
        train_path=train_path,
        fixed_model_name=fixed_model_name,
        persist_nlu_training_data=persist_nlu_training_data,
        additional_arguments=additional_arguments,
    )
Esempio n. 4
0
def interactive(args: argparse.Namespace) -> None:
    _set_not_required_args(args)
    file_importer = TrainingDataImporter.load_from_config(
        args.config, args.domain, args.data
    )

    if args.model is None:
        loop = asyncio.get_event_loop()
        story_graph = loop.run_until_complete(file_importer.get_stories())
        if not story_graph or story_graph.is_empty():
            utils.print_error_and_exit(
                "Could not run interactive learning without either core data or a model containing core data."
            )

        zipped_model = train.train_core(args) if args.core_only else train.train(args)
        if not zipped_model:
            utils.print_error_and_exit(
                "Could not train an initial model. Either pass paths "
                "to the relevant training files (`--data`, `--config`, `--domain`), "
                "or use 'rasa train' to train a model."
            )
    else:
        zipped_model = get_provided_model(args.model)
        if not (zipped_model and os.path.exists(zipped_model)):
            utils.print_error_and_exit(
                f"Interactive learning process cannot be started as no initial model was "
                f"found at path '{args.model}'.  Use 'rasa train' to train a model."
            )
        if not args.skip_visualization:
            logger.info(f"Loading visualization data from {args.data}.")

    perform_interactive_learning(args, zipped_model, file_importer)
Esempio n. 5
0
async def test_without_additional_e2e_examples(tmp_path: Path):
    domain_path = tmp_path / "domain.yml"
    domain_path.write_text(Domain.empty().as_yaml())

    config_path = tmp_path / "config.yml"
    config_path.touch()

    existing = TrainingDataImporter.load_from_dict({}, str(config_path),
                                                   str(domain_path), [])

    stories = StoryGraph([
        StoryStep(events=[
            UserUttered("greet_from_stories", {"name": "greet_from_stories"}),
            ActionExecuted("utter_greet_from_stories"),
        ])
    ])

    # Patch to return our test stories
    existing.get_stories = asyncio.coroutine(lambda *args: stories)

    importer = E2EImporter(existing)

    training_data = await importer.get_nlu_data()

    assert training_data.training_examples
    assert training_data.is_empty()
    assert not training_data.without_empty_e2e_examples().training_examples
Esempio n. 6
0
async def test_import_nlu_training_data_with_default_actions(project: Text):
    config_path = os.path.join(project, DEFAULT_CONFIG_PATH)
    domain_path = os.path.join(project, DEFAULT_DOMAIN_PATH)
    default_data_path = os.path.join(project, DEFAULT_DATA_PATH)
    importer = TrainingDataImporter.load_from_dict({}, config_path,
                                                   domain_path,
                                                   [default_data_path])

    assert isinstance(importer, E2EImporter)
    importer_without_e2e = importer.importer

    # Check additional NLU training data from domain was added
    nlu_data = await importer.get_nlu_data()

    assert len(nlu_data.training_examples) > len(
        (await importer_without_e2e.get_nlu_data()).training_examples)

    from rasa.core.actions import action

    extended_training_data = await importer.get_nlu_data()
    assert all(
        Message(data={
            ACTION_NAME: action_name,
            ACTION_TEXT: ""
        }) in extended_training_data.training_examples
        for action_name in action.default_action_names())
Esempio n. 7
0
def get_training_data():
    importer = TrainingDataImporter.load_from_config("../config.yml",
                                                     "../base/domain-eng.yml",
                                                     ["../base/data/"])
    loop = asyncio.get_event_loop()
    data = loop.run_until_complete(importer.get_nlu_data())
    return set(i.text.lower().strip() for i in data.intent_examples)
Esempio n. 8
0
async def test_adding_e2e_actions_to_domain(project: Text):
    config_path = os.path.join(project, DEFAULT_CONFIG_PATH)
    domain_path = os.path.join(project, DEFAULT_DOMAIN_PATH)
    default_data_path = os.path.join(project, DEFAULT_DATA_PATH)
    existing = TrainingDataImporter.load_from_dict({}, config_path,
                                                   domain_path,
                                                   [default_data_path])

    additional_actions = ["Hi Joey.", "it's sunny outside."]
    stories = StoryGraph([
        StoryStep(events=[
            UserUttered("greet_from_stories", {"name": "greet_from_stories"}),
            ActionExecuted("utter_greet_from_stories"),
        ]),
        StoryStep(events=[
            UserUttered("how are you doing?", {"name": "greet_from_stories"}),
            ActionExecuted(additional_actions[0],
                           action_text=additional_actions[0]),
            ActionExecuted(additional_actions[1],
                           action_text=additional_actions[1]),
            ActionExecuted(additional_actions[1],
                           action_text=additional_actions[1]),
        ]),
    ])

    # Patch to return our test stories
    existing.get_stories = asyncio.coroutine(lambda *args: stories)

    importer = E2EImporter(existing)
    domain = await importer.get_domain()

    assert all(action_name in domain.action_names
               for action_name in additional_actions)
Esempio n. 9
0
async def _train_nlu_async(
    config: Text,
    nlu_data: Text,
    output: Text,
    train_path: Optional[Text] = None,
    fixed_model_name: Optional[Text] = None,
    persist_nlu_training_data: bool = False,
):
    # training NLU only hence the training files still have to be selected
    file_importer = TrainingDataImporter.load_nlu_importer_from_config(
        config, training_data_paths=[nlu_data])

    training_datas = await file_importer.get_nlu_data()
    if training_datas.is_empty():
        print_error(
            "No NLU data given. Please provide NLU data in order to train "
            "a Rasa NLU model using the '--nlu' argument.")
        return

    return await _train_nlu_with_validated_data(
        file_importer,
        output=output,
        train_path=train_path,
        fixed_model_name=fixed_model_name,
        persist_nlu_training_data=persist_nlu_training_data,
    )
Esempio n. 10
0
async def train_comparison_models(
    story_file: Text,
    domain: Text,
    output_path: Text = "",
    exclusion_percentages: Optional[List] = None,
    policy_configs: Optional[List] = None,
    runs: int = 1,
    dump_stories: bool = False,
    additional_arguments: Optional[Dict] = None,
):
    """Train multiple models for comparison of policies"""
    from rasa import model
    from rasa.importers.importer import TrainingDataImporter

    exclusion_percentages = exclusion_percentages or []
    policy_configs = policy_configs or []

    for r in range(runs):
        logging.info("Starting run {}/{}".format(r + 1, runs))

        for current_run, percentage in enumerate(exclusion_percentages, 1):
            for policy_config in policy_configs:

                file_importer = TrainingDataImporter.load_core_importer_from_config(
                    policy_config, domain, [story_file])

                config_name = os.path.splitext(
                    os.path.basename(policy_config))[0]
                logging.info("Starting to train {} round {}/{}"
                             " with {}% exclusion"
                             "".format(config_name, current_run,
                                       len(exclusion_percentages), percentage))

                with TempDirectoryPath(tempfile.mkdtemp()) as train_path:
                    _, new_fingerprint = await asyncio.gather(
                        train(
                            domain,
                            file_importer,
                            train_path,
                            policy_config=policy_config,
                            exclusion_percentage=percentage,
                            additional_arguments=additional_arguments,
                            dump_stories=dump_stories,
                        ),
                        model.model_fingerprint(file_importer),
                    )

                    output_dir = os.path.join(output_path, "run_" + str(r + 1))
                    model_name = config_name + PERCENTAGE_KEY + str(percentage)
                    model.package_model(
                        fingerprint=new_fingerprint,
                        output_directory=output_dir,
                        train_path=train_path,
                        fixed_model_name=model_name,
                    )
async def test_example_bot_training_data_not_raises(config_file: Text,
                                                    domain_file: Text,
                                                    data_folder: Text):

    importer = TrainingDataImporter.load_from_config(config_file, domain_file,
                                                     data_folder)

    with pytest.warns(None) as record:
        await importer.get_nlu_data()
        await importer.get_stories()

    assert not len(record)
Esempio n. 12
0
async def train_core_async(
    domain: Union[Domain, Text],
    config: Text,
    stories: Text,
    output: Text,
    train_path: Optional[Text] = None,
    fixed_model_name: Optional[Text] = None,
    additional_arguments: Optional[Dict] = None,
) -> Optional[Text]:
    """Trains a Core model.

    Args:
        domain: Path to the domain file.
        config: Path to the config file for Core.
        stories: Path to the Core training data.
        output: Output path.
        train_path: If `None` the model will be trained in a temporary
            directory, otherwise in the provided directory.
        fixed_model_name: Name of model to be stored.
        uncompress: If `True` the model will not be compressed.
        additional_arguments: Additional training parameters.

    Returns:
        If `train_path` is given it returns the path to the model archive,
        otherwise the path to the directory with the trained model files.

    """

    file_importer = TrainingDataImporter.load_core_importer_from_config(
        config, domain, [stories]
    )
    domain = await file_importer.get_domain()
    if domain.is_empty():
        print_error(
            "Core training was skipped because no valid domain file was found. "
            "Please specify a valid domain using '--domain' argument or check if the provided domain file exists."
        )
        return None

    if not await file_importer.get_stories():
        print_error(
            "No stories given. Please provide stories in order to "
            "train a Rasa Core model using the '--stories' argument."
        )
        return

    return await _train_core_with_validated_data(
        file_importer,
        output=output,
        train_path=train_path,
        fixed_model_name=fixed_model_name,
        additional_arguments=additional_arguments,
    )
Esempio n. 13
0
async def train_async(
    domain: Union[Domain, Text],
    config: Text,
    training_files: Optional[Union[Text, List[Text]]],
    output_path: Text = DEFAULT_MODELS_PATH,
    force_training: bool = False,
    fixed_model_name: Optional[Text] = None,
    persist_nlu_training_data: bool = False,
    core_additional_arguments: Optional[Dict] = None,
    nlu_additional_arguments: Optional[Dict] = None,
) -> Optional[Text]:
    """Trains a Rasa model (Core and NLU).

    Args:
        domain: Path to the domain file.
        config: Path to the config for Core and NLU.
        training_files: Paths to the training data for Core and NLU.
        output_path: Output path.
        force_training: If `True` retrain model even if data has not changed.
        fixed_model_name: Name of model to be stored.
        persist_nlu_training_data: `True` if the NLU training data should be persisted
                                   with the model.
        core_additional_arguments: Additional training parameters for core training.
        nlu_additional_arguments: Additional training parameters forwarded to training
                                  method of each NLU component.

    Returns:
        Path of the trained model archive.
    """

    file_importer = TrainingDataImporter.load_from_config(
        config, domain, training_files
    )
    with ExitStack() as stack:
        train_path = stack.enter_context(TempDirectoryPath(tempfile.mkdtemp()))

        domain = await file_importer.get_domain()

        if domain.is_empty():
            return await handle_domain_if_not_exists(
                file_importer, output_path, fixed_model_name
            )

        return await _train_async_internal(
            file_importer,
            train_path,
            output_path,
            force_training,
            fixed_model_name,
            persist_nlu_training_data,
            core_additional_arguments=core_additional_arguments,
            nlu_additional_arguments=nlu_additional_arguments,
        )
Esempio n. 14
0
def test_load_from_config(tmpdir: Path):
    import rasa.utils.io as io_utils

    config_path = str(tmpdir / "config.yml")

    io_utils.write_yaml({"importers": [{
        "name": "MultiProjectImporter"
    }]}, config_path)

    importer = TrainingDataImporter.load_from_config(config_path)
    assert isinstance(importer, CombinedDataImporter)
    assert isinstance(importer._importers[0], MultiProjectImporter)
Esempio n. 15
0
def test_load_from_config(tmpdir: Path):
    config_path = str(tmpdir / "config.yml")

    rasa.shared.utils.io.write_yaml(
        {"importers": [{
            "name": "MultiProjectImporter"
        }]}, config_path)

    importer = TrainingDataImporter.load_from_config(config_path)
    assert isinstance(importer, E2EImporter)
    assert isinstance(importer.importer, RetrievalModelsDataImporter)
    assert isinstance(importer.importer._importer._importers[0],
                      MultiProjectImporter)
Esempio n. 16
0
def test_load_from_dict(config: Dict,
                        expected: List[Type["TrainingDataImporter"]],
                        project: Text):
    config_path = os.path.join(project, DEFAULT_CONFIG_PATH)
    domain_path = os.path.join(project, DEFAULT_DOMAIN_PATH)
    default_data_path = os.path.join(project, DEFAULT_DATA_PATH)
    actual = TrainingDataImporter.load_from_dict(config, config_path,
                                                 domain_path,
                                                 [default_data_path])

    assert isinstance(actual, CombinedDataImporter)

    actual_importers = [i.__class__ for i in actual._importers]
    assert actual_importers == expected
Esempio n. 17
0
async def test_eval_data(component_builder, tmpdir, project):
    _config = RasaNLUModelConfig({
        "pipeline": [
            {
                "name": "WhitespaceTokenizer"
            },
            {
                "name": "CountVectorsFeaturizer"
            },
            {
                "name": "DIETClassifier",
                "epochs": 2
            },
            {
                "name": "ResponseSelector",
                "epochs": 2
            },
        ],
        "language":
        "en",
    })

    config_path = os.path.join(project, "config.yml")
    data_importer = TrainingDataImporter.load_nlu_importer_from_config(
        config_path,
        training_data_paths=[
            "data/examples/rasa/demo-rasa.md",
            "data/examples/rasa/demo-rasa-responses.md",
        ],
    )

    (_, _, persisted_path) = await train(
        _config,
        path=tmpdir.strpath,
        data=data_importer,
        component_builder=component_builder,
        persist_nlu_training_data=True,
    )

    interpreter = Interpreter.load(persisted_path, component_builder)

    data = await data_importer.get_nlu_data()
    intent_results, response_selection_results, entity_results, = get_eval_data(
        interpreter, data)

    assert len(intent_results) == 46
    assert len(response_selection_results) == 46
    assert len(entity_results) == 46
async def test_example_bot_training_on_initial_project(tmp_path: Path):
    # we need to test this one separately, as we can't test it in place
    # configuration suggestions would otherwise change the initial file
    scaffold.create_initial_project(str(tmp_path))

    importer = TrainingDataImporter.load_from_config(
        str(tmp_path / "config.yml"),
        str(tmp_path / "domain.yml"),
        str(tmp_path / "data"),
    )

    with pytest.warns(None) as record:
        await importer.get_nlu_data()
        await importer.get_stories()

    assert not len(record)
Esempio n. 19
0
async def test_use_of_interface():
    importer = TrainingDataImporter()

    functions_to_test = [
        lambda: importer.get_config(),
        lambda: importer.get_stories(),
        lambda: importer.get_nlu_data(),
        lambda: importer.get_domain(),
    ]
    for f in functions_to_test:
        with pytest.raises(NotImplementedError):
            await f()
Esempio n. 20
0
async def test_nlu_only(project: Text):
    config_path = os.path.join(project, DEFAULT_CONFIG_PATH)
    default_data_path = os.path.join(project, DEFAULT_DATA_PATH)
    actual = TrainingDataImporter.load_nlu_importer_from_config(
        config_path, training_data_paths=[default_data_path])

    assert isinstance(actual, NluDataImporter)

    stories = await actual.get_stories()
    assert stories.is_empty()

    domain = await actual.get_domain()
    assert domain.is_empty()

    config = await actual.get_config()
    assert config

    nlu_data = await actual.get_nlu_data()
    assert not nlu_data.is_empty()
Esempio n. 21
0
async def test_nlu_data_domain_sync_with_retrieval_intents(project: Text):
    config_path = os.path.join(project, DEFAULT_CONFIG_PATH)
    domain_path = "data/test_domains/default_retrieval_intents.yml"
    data_paths = [
        "data/test_nlu/default_retrieval_intents.md",
        "data/test_responses/default.md",
    ]
    base_data_importer = TrainingDataImporter.load_from_dict({}, config_path,
                                                             domain_path,
                                                             data_paths)

    nlu_importer = NluDataImporter(base_data_importer)
    core_importer = CoreDataImporter(base_data_importer)

    importer = RetrievalModelsDataImporter(
        CombinedDataImporter([nlu_importer, core_importer]))
    domain = await importer.get_domain()
    nlu_data = await importer.get_nlu_data()

    assert domain.retrieval_intents == ["chitchat"]
    assert domain.intent_properties["chitchat"].get("is_retrieval_intent")
    assert domain.templates == nlu_data.responses
    assert "utter_chitchat" in domain.action_names
Esempio n. 22
0
async def test_formbot_example():
    sys.path.append("examples/formbot/")
    project = Path("examples/formbot/")
    config = str(project / "config.yml")
    domain = str(project / "domain.yml")
    training_dir = project / "data"
    training_files = [
        str(training_dir / "rules.yml"),
        str(training_dir / "stories.yml"),
    ]
    importer = TrainingDataImporter.load_from_config(config, domain,
                                                     training_files)

    endpoint = EndpointConfig("https://example.com/webhooks/actions")
    endpoints = AvailableEndpoints(action=endpoint)
    agent = await train(
        domain,
        importer,
        str(project / "models" / "dialogue"),
        endpoints=endpoints,
        policy_config="examples/formbot/config.yml",
    )

    async def mock_form_happy_path(input_text, output_text, slot=None):
        if slot:
            form = "restaurant_form"
            template = f"utter_ask_{slot}"
        else:
            form = None
            template = "utter_submit"
        response = {
            "events": [
                {
                    "event": "form",
                    "name": form,
                    "timestamp": None
                },
                {
                    "event": "slot",
                    "timestamp": None,
                    "name": "requested_slot",
                    "value": slot,
                },
            ],
            "responses": [{
                "template": template
            }],
        }
        with aioresponses() as mocked:
            mocked.post("https://example.com/webhooks/actions",
                        payload=response,
                        repeat=True)
            responses = await agent.handle_text(input_text)
            assert responses[0]["text"] == output_text

    async def mock_form_unhappy_path(input_text, output_text, slot):
        response_error = {
            "error":
            f"Failed to extract slot {slot} with action restaurant_form",
            "action_name": "restaurant_form",
        }
        with aioresponses() as mocked:
            # noinspection PyTypeChecker
            mocked.post(
                "https://example.com/webhooks/actions",
                repeat=True,
                exception=ClientResponseError(400, "",
                                              json.dumps(response_error)),
            )
            responses = await agent.handle_text(input_text)
            assert responses[0]["text"] == output_text

    await mock_form_happy_path("/request_restaurant",
                               "what cuisine?",
                               slot="cuisine")
    await mock_form_unhappy_path("/chitchat", "chitchat", slot="cuisine")
    await mock_form_happy_path('/inform{"cuisine": "mexican"}',
                               "how many people?",
                               slot="num_people")
    await mock_form_happy_path('/inform{"number": "2"}',
                               "do you want to seat outside?",
                               slot="outdoor_seating")
    await mock_form_happy_path("/affirm",
                               "please provide additional preferences",
                               slot="preferences")

    responses = await agent.handle_text("/restart")
    assert responses[0]["text"] == "restarted"

    responses = await agent.handle_text("/greet")
    assert (responses[0]["text"] ==
            "Hello! I am restaurant search assistant! How can I help?")

    await mock_form_happy_path("/request_restaurant",
                               "what cuisine?",
                               slot="cuisine")
    await mock_form_happy_path('/inform{"cuisine": "mexican"}',
                               "how many people?",
                               slot="num_people")
    await mock_form_happy_path('/inform{"number": "2"}',
                               "do you want to seat outside?",
                               slot="outdoor_seating")
    await mock_form_unhappy_path("/stop",
                                 "do you want to continue?",
                                 slot="outdoor_seating")
    await mock_form_happy_path("/affirm",
                               "do you want to seat outside?",
                               slot="outdoor_seating")
    await mock_form_happy_path("/affirm",
                               "please provide additional preferences",
                               slot="preferences")
    await mock_form_happy_path(
        "/deny",
        "please give your feedback on your experience so far",
        slot="feedback")
    await mock_form_happy_path('/inform{"feedback": "great"}', "All done!")

    responses = await agent.handle_text("/thankyou")
    assert responses[0]["text"] == "you are welcome :)"
Esempio n. 23
0
async def train_comparison_models(
    story_file: Text,
    domain: Text,
    output_path: Text = "",
    exclusion_percentages: Optional[List] = None,
    policy_configs: Optional[List] = None,
    runs: int = 1,
    dump_stories: bool = False,
    kwargs: Optional[Dict] = None,
):
    """Train multiple models for comparison of policies"""
    from rasa.core import config
    from rasa import model
    from rasa.importers.importer import TrainingDataImporter

    exclusion_percentages = exclusion_percentages or []
    policy_configs = policy_configs or []

    for r in range(runs):
        logging.info("Starting run {}/{}".format(r + 1, runs))

        for current_run, percentage in enumerate(exclusion_percentages, 1):
            for policy_config in policy_configs:
                policies = config.load(policy_config)

                if len(policies) > 1:
                    raise ValueError(
                        "You can only specify one policy per model for comparison"
                    )

                file_importer = TrainingDataImporter.load_core_importer_from_config(
                    policy_config, domain, [story_file])

                policy_name = type(policies[0]).__name__
                logging.info("Starting to train {} round {}/{}"
                             " with {}% exclusion"
                             "".format(policy_name, current_run,
                                       len(exclusion_percentages), percentage))

                with TempDirectoryPath(tempfile.mkdtemp()) as train_path:
                    await train(
                        domain,
                        file_importer,
                        train_path,
                        policy_config=policy_config,
                        exclusion_percentage=current_run,
                        kwargs=kwargs,
                        dump_stories=dump_stories,
                    )

                    new_fingerprint = await model.model_fingerprint(
                        file_importer)

                    output_dir = os.path.join(output_path, "run_" + str(r + 1))
                    model_name = policy_name + str(current_run)
                    model.package_model(
                        fingerprint=new_fingerprint,
                        output_directory=output_dir,
                        train_path=train_path,
                        fixed_model_name=model_name,
                    )
Esempio n. 24
0
async def _train_async_internal(
    file_importer: TrainingDataImporter,
    train_path: Text,
    output_path: Text,
    force_training: bool,
    fixed_model_name: Optional[Text],
    persist_nlu_training_data: bool,
    additional_arguments: Optional[Dict],
) -> Optional[Text]:
    """Trains a Rasa model (Core and NLU). Use only from `train_async`.

    Args:
        file_importer: `TrainingDataImporter` which supplies the training data.
        train_path: Directory in which to train the model.
        output_path: Output path.
        force_training: If `True` retrain model even if data has not changed.
        persist_nlu_training_data: `True` if the NLU training data should be persisted
                                   with the model.
        fixed_model_name: Name of model to be stored.
        additional_arguments: Additional training parameters.

    Returns:
        Path of the trained model archive.
    """

    stories, nlu_data = await asyncio.gather(file_importer.get_stories(),
                                             file_importer.get_nlu_data())

    if stories.is_empty() and nlu_data.is_empty():
        print_error(
            "No training data given. Please provide stories and NLU data in "
            "order to train a Rasa model using the '--data' argument.")
        return

    if stories.is_empty():
        print_warning(
            "No stories present. Just a Rasa NLU model will be trained.")
        return await _train_nlu_with_validated_data(
            file_importer,
            output=output_path,
            fixed_model_name=fixed_model_name,
            persist_nlu_training_data=persist_nlu_training_data,
        )

    if nlu_data.is_empty():
        print_warning(
            "No NLU data present. Just a Rasa Core model will be trained.")
        return await _train_core_with_validated_data(
            file_importer,
            output=output_path,
            fixed_model_name=fixed_model_name,
            additional_arguments=additional_arguments,
        )

    new_fingerprint = await model.model_fingerprint(file_importer)
    old_model = model.get_latest_model(output_path)
    fingerprint_comparison = FingerprintComparisonResult(
        force_training=force_training)
    if not force_training:
        fingerprint_comparison = model.should_retrain(new_fingerprint,
                                                      old_model, train_path)

    if fingerprint_comparison.is_training_required():
        await _do_training(
            file_importer,
            output_path=output_path,
            train_path=train_path,
            fingerprint_comparison_result=fingerprint_comparison,
            fixed_model_name=fixed_model_name,
            persist_nlu_training_data=persist_nlu_training_data,
            additional_arguments=additional_arguments,
        )

        return model.package_model(
            fingerprint=new_fingerprint,
            output_directory=output_path,
            train_path=train_path,
            fixed_model_name=fixed_model_name,
        )

    print_success("Nothing changed. You can use the old model stored at '{}'."
                  "".format(os.path.abspath(old_model)))
    return old_model
Esempio n. 25
0
async def test_import_nlu_training_data_from_e2e_stories(project: Text):
    config_path = os.path.join(project, DEFAULT_CONFIG_PATH)
    domain_path = os.path.join(project, DEFAULT_DOMAIN_PATH)
    default_data_path = os.path.join(project, DEFAULT_DATA_PATH)
    importer = TrainingDataImporter.load_from_dict({}, config_path,
                                                   domain_path,
                                                   [default_data_path])

    # The `E2EImporter` correctly wraps the underlying `CombinedDataImporter`
    assert isinstance(importer, E2EImporter)
    importer_without_e2e = importer.importer

    stories = StoryGraph([
        StoryStep(events=[
            SlotSet("some slot", "doesn't matter"),
            UserUttered("greet_from_stories", {"name": "greet_from_stories"}),
            ActionExecuted("utter_greet_from_stories"),
        ]),
        StoryStep(events=[
            UserUttered("how are you doing?"),
            ActionExecuted("utter_greet_from_stories", action_text="Hi Joey."),
        ]),
    ])

    # Patch to return our test stories
    importer_without_e2e.get_stories = asyncio.coroutine(lambda *args: stories)

    # The wrapping `E2EImporter` simply forwards these method calls
    assert (await importer_without_e2e.get_stories()).as_story_string() == (
        await importer.get_stories()).as_story_string()
    assert (await importer_without_e2e.get_config()) == (await
                                                         importer.get_config())

    # Check additional NLU training data from stories was added
    nlu_data = await importer.get_nlu_data()

    # The `E2EImporter` adds NLU training data based on our training stories
    assert len(nlu_data.training_examples) > len(
        (await importer_without_e2e.get_nlu_data()).training_examples)

    # Check if the NLU training data was added correctly from the story training data
    expected_additional_messages = [
        Message(data={
            TEXT: "greet_from_stories",
            INTENT_NAME: "greet_from_stories"
        }),
        Message(data={
            ACTION_NAME: "utter_greet_from_stories",
            ACTION_TEXT: ""
        }),
        Message(data={
            TEXT: "how are you doing?",
            INTENT_NAME: None
        }),
        Message(data={
            ACTION_NAME: "utter_greet_from_stories",
            ACTION_TEXT: "Hi Joey."
        }),
    ]

    assert all(m in nlu_data.training_examples
               for m in expected_additional_messages)
Esempio n. 26
0
async def _train_async_internal(
    file_importer: TrainingDataImporter,
    train_path: Text,
    output_path: Text,
    force_training: bool,
    fixed_model_name: Optional[Text],
    persist_nlu_training_data: bool,
    kwargs: Optional[Dict],
) -> Optional[Text]:
    """Trains a Rasa model (Core and NLU). Use only from `train_async`.

    Args:
        file_importer: `TrainingDataImporter` which supplies the training data.
        train_path: Directory in which to train the model.
        output_path: Output path.
        force_training: If `True` retrain model even if data has not changed.
        persist_nlu_training_data: `True` if the NLU training data should be persisted
                                   with the model.
        fixed_model_name: Name of model to be stored.
        kwargs: Additional training parameters.

    Returns:
        Path of the trained model archive.
    """

    stories, nlu_data = await asyncio.gather(file_importer.get_stories(),
                                             file_importer.get_nlu_data())

    # if stories.is_empty() and nlu_data.is_empty():
    #     print_error(
    #         "No training data given. Please provide stories and NLU data in "
    #         "order to train a Rasa model using the '--data' argument."
    #     )
    #     return

    # if stories.is_empty():
    #     print_warning("No stories present. Just a Rasa NLU model will be trained.")
    #     return await _train_nlu_with_validated_data(
    #         file_importer,
    #         output=output_path,
    #         fixed_model_name=fixed_model_name,
    #         persist_nlu_training_data=persist_nlu_training_data,
    #     )

    # if nlu_data.is_empty():
    #     print_warning("No NLU data present. Just a Rasa Core model will be trained.")
    #     return await _train_core_with_validated_data(
    #         file_importer,
    #         output=output_path,
    #         fixed_model_name=fixed_model_name,
    #         kwargs=kwargs,
    #     )

    new_fingerprint = await model.model_fingerprint(file_importer)
    old_model = model.get_latest_model(output_path)
    fingerprint_comparison = FingerprintComparisonResult(
        force_training=force_training)
    if not force_training:
        fingerprint_comparison = model.should_retrain(new_fingerprint,
                                                      old_model, train_path)

    # bf mod >
    domain = await file_importer.get_domain()
    core_untrainable = domain.is_empty() or stories.is_empty()
    nlu_untrainable = [l for l, d in nlu_data.items() if d.is_empty()]
    fingerprint_comparison.core = fingerprint_comparison.core and not core_untrainable
    fingerprint_comparison.nlu = [
        l for l in fingerprint_comparison.nlu if l not in nlu_untrainable
    ]

    if core_untrainable:
        print_color(
            "Skipping Core training since domain or stories are empty.",
            color=bcolors.OKBLUE)
    for lang in nlu_untrainable:
        print_color(
            "No NLU data found for language <{}>, skipping training...".format(
                lang),
            color=bcolors.OKBLUE)
    # </ bf mod

    if fingerprint_comparison.is_training_required():
        await _do_training(
            file_importer,
            output_path=output_path,
            train_path=train_path,
            fingerprint_comparison_result=fingerprint_comparison,
            fixed_model_name=fixed_model_name,
            persist_nlu_training_data=persist_nlu_training_data,
            kwargs=kwargs,
        )

        return model.package_model(
            fingerprint=new_fingerprint,
            output_directory=output_path,
            train_path=train_path,
            fixed_model_name=fixed_model_name,
        )

    print_success("Nothing changed. You can use the old model stored at '{}'."
                  "".format(os.path.abspath(old_model)))
    return old_model