Example #1
0
async def test_eval_data(
    component_builder: ComponentBuilder,
    tmp_path: Path,
    project: Text,
    unpacked_trained_rasa_model: Text,
):
    config_path = os.path.join(project, "config.yml")
    data_importer = TrainingDataImporter.load_nlu_importer_from_config(
        config_path,
        training_data_paths=[
            "data/examples/rasa/demo-rasa.yml",
            "data/examples/rasa/demo-rasa-responses.yml",
        ],
    )

    _, nlu_model_directory = rasa.model.get_model_subdirectories(
        unpacked_trained_rasa_model
    )
    interpreter = Interpreter.load(nlu_model_directory, component_builder)

    data = await data_importer.get_nlu_data()
    (intent_results, response_selection_results, entity_results) = get_eval_data(
        interpreter, data
    )

    assert len(intent_results) == 46
    assert len(response_selection_results) == 0
    assert len(entity_results) == 46
Example #2
0
async def _train_nlu_async(
    config: Text,
    nlu_data: Text,
    output: Text,
    train_path: Optional[Text] = None,
    fixed_model_name: Optional[Text] = None,
    persist_nlu_training_data: bool = False,
    additional_arguments: Optional[Dict] = None,
):
    if not nlu_data:
        print_error(
            "No NLU data given. Please provide NLU data in order to train "
            "a Rasa NLU model using the '--nlu' argument.")
        return

    # training NLU only hence the training files still have to be selected
    file_importer = TrainingDataImporter.load_nlu_importer_from_config(
        config, training_data_paths=[nlu_data])

    training_datas = await file_importer.get_nlu_data()
    if training_datas.is_empty():
        print_error(f"Path '{nlu_data}' doesn't contain valid NLU data in it. "
                    f"Please verify the data format. "
                    f"The NLU model training will be skipped now.")
        return

    return await _train_nlu_with_validated_data(
        file_importer,
        output=output,
        train_path=train_path,
        fixed_model_name=fixed_model_name,
        persist_nlu_training_data=persist_nlu_training_data,
        additional_arguments=additional_arguments,
    )
Example #3
0
def train_nlu(
    config: Text,
    nlu_data: Optional[Text],
    output: Text,
    fixed_model_name: Optional[Text] = None,
    persist_nlu_training_data: bool = False,
    additional_arguments: Optional[Dict] = None,
    domain: Optional[Union[Domain, Text]] = None,
    model_to_finetune: Optional[Text] = None,
    finetuning_epoch_fraction: float = 1.0,
) -> Optional[Text]:
    """Trains an NLU model.

    Args:
        config: Path to the config file for NLU.
        nlu_data: Path to the NLU training data.
        output: Output path.
        fixed_model_name: Name of the model to be stored.
        persist_nlu_training_data: `True` if the NLU training data should be persisted
                                   with the model.
        additional_arguments: Additional training parameters which will be passed to
                              the `train` method of each component.
        domain: Path to the optional domain file/Domain object.
        model_to_finetune: Optional path to a model which should be finetuned or
            a directory in case the latest trained model should be used.
        finetuning_epoch_fraction: The fraction currently specified training epochs
            in the model configuration which should be used for finetuning.

    Returns:
        Path to the model archive.
    """
    if not nlu_data:
        rasa.shared.utils.cli.print_error(
            "No NLU data given. Please provide NLU data in order to train "
            "a Rasa NLU model using the '--nlu' argument.")
        return None

    # training NLU only hence the training files still have to be selected
    file_importer = TrainingDataImporter.load_nlu_importer_from_config(
        config, domain, training_data_paths=[nlu_data])

    training_data = file_importer.get_nlu_data()
    if training_data.contains_no_pure_nlu_data():
        rasa.shared.utils.cli.print_error(
            f"Path '{nlu_data}' doesn't contain valid NLU data in it. "
            f"Please verify the data format. "
            f"The NLU model training will be skipped now.")
        return None

    return _train_graph(
        file_importer,
        training_type=TrainingType.NLU,
        output_path=output,
        model_to_finetune=model_to_finetune,
        fixed_model_name=fixed_model_name,
        finetuning_epoch_fraction=finetuning_epoch_fraction,
        persist_nlu_training_data=persist_nlu_training_data,
        **(additional_arguments or {}),
    ).model
Example #4
0
async def test_eval_data(component_builder, tmpdir, project):
    _config = RasaNLUModelConfig({
        "pipeline": [
            {
                "name": "WhitespaceTokenizer"
            },
            {
                "name": "CountVectorsFeaturizer"
            },
            {
                "name": "DIETClassifier",
                "epochs": 2
            },
            {
                "name": "ResponseSelector",
                "epochs": 2
            },
        ],
        "language":
        "en",
    })

    config_path = os.path.join(project, "config.yml")
    data_importer = TrainingDataImporter.load_nlu_importer_from_config(
        config_path,
        training_data_paths=[
            "data/examples/rasa/demo-rasa.md",
            "data/examples/rasa/demo-rasa-responses.md",
        ],
    )

    (_, _, persisted_path) = await train(
        _config,
        path=tmpdir.strpath,
        data=data_importer,
        component_builder=component_builder,
        persist_nlu_training_data=True,
    )

    interpreter = Interpreter.load(persisted_path, component_builder)

    data = await data_importer.get_nlu_data()
    (
        intent_results,
        response_selection_results,
        entity_results,
    ) = get_eval_data(interpreter, data)

    assert len(intent_results) == 46
    assert len(response_selection_results) == 46
    assert len(entity_results) == 46
Example #5
0
async def train_nlu_async(
    config: Text,
    nlu_data: Text,
    output: Text,
    train_path: Optional[Text] = None,
    fixed_model_name: Optional[Text] = None,
    persist_nlu_training_data: bool = False,
    additional_arguments: Optional[Dict] = None,
    domain: Optional[Union[Domain, Text]] = None,
    model_to_finetune: Optional[Text] = None,
    finetuning_epoch_fraction: float = 1.0,
) -> Optional[Text]:
    """Trains an NLU model asynchronously."""
    if not nlu_data:
        rasa.shared.utils.cli.print_error(
            "No NLU data given. Please provide NLU data in order to train "
            "a Rasa NLU model using the '--nlu' argument."
        )
        return

    # training NLU only hence the training files still have to be selected
    file_importer = TrainingDataImporter.load_nlu_importer_from_config(
        config, domain, training_data_paths=[nlu_data]
    )

    training_data = await file_importer.get_nlu_data()
    if training_data.contains_no_pure_nlu_data():
        rasa.shared.utils.cli.print_error(
            f"Path '{nlu_data}' doesn't contain valid NLU data in it. "
            f"Please verify the data format. "
            f"The NLU model training will be skipped now."
        )
        return

    return await _train_nlu_with_validated_data(
        file_importer,
        output=output,
        train_path=train_path,
        fixed_model_name=fixed_model_name,
        persist_nlu_training_data=persist_nlu_training_data,
        additional_arguments=additional_arguments,
        model_to_finetune=model_to_finetune,
        finetuning_epoch_fraction=finetuning_epoch_fraction,
    )
Example #6
0
async def test_nlu_only(project: Text):
    config_path = os.path.join(project, DEFAULT_CONFIG_PATH)
    default_data_path = os.path.join(project, DEFAULT_DATA_PATH)
    actual = TrainingDataImporter.load_nlu_importer_from_config(
        config_path, training_data_paths=[default_data_path])

    assert isinstance(actual, NluDataImporter)
    assert isinstance(actual._importer, RetrievalModelsDataImporter)

    stories = await actual.get_stories()
    assert stories.is_empty()

    domain = await actual.get_domain()
    assert domain.is_empty()

    config = await actual.get_config()
    assert config

    nlu_data = await actual.get_nlu_data()
    assert not nlu_data.is_empty()
Example #7
0
async def test_eval_data(tmp_path: Path, project: Text,
                         trained_rasa_model: Text):
    config_path = os.path.join(project, "config.yml")
    data_importer = TrainingDataImporter.load_nlu_importer_from_config(
        config_path,
        training_data_paths=[
            "data/examples/rasa/demo-rasa.yml",
            "data/examples/rasa/demo-rasa-responses.yml",
        ],
    )

    processor = Agent.load(trained_rasa_model).processor

    data = data_importer.get_nlu_data()
    (intent_results, response_selection_results,
     entity_results) = await get_eval_data(processor, data)

    assert len(intent_results) == 46
    assert len(response_selection_results) == 46
    assert len(entity_results) == 46