Beispiel #1
0
def test_validation_with_missing_nlu_target():
    graph_config = GraphSchema(
        {
            "A": SchemaNode(
                needs={},
                uses=TestNLUTarget,
                eager=True,
                constructor_name="create",
                fn="run",
                config={},
            )
        }
    )

    with pytest.raises(
        GraphSchemaValidationException, match="no target for the 'nlu_target'"
    ):
        validation.validate(
            GraphModelConfiguration(
                train_schema=GraphSchema({}),
                predict_schema=graph_config,
                training_type=TrainingType.BOTH,
                language=None,
                core_target=None,
                nlu_target=None,
            )
        )
Beispiel #2
0
    def graph_config_for_recipe(
        self,
        config: Dict,
        cli_parameters: Dict[Text, Any],
        training_type: TrainingType = TrainingType.BOTH,
        is_finetuning: bool = False,
    ) -> GraphModelConfiguration:
        """Converts the default config to graphs (see interface for full docstring)."""
        mark_as_experimental_feature("graph recipe")
        if cli_parameters or is_finetuning:
            raise_warning(
                "Unlike the Default Recipe, Graph Recipe does not utilize CLI "
                "parameters or finetuning and these configurations will be ignored. "
                "Add configuration to the recipe itself if you want them to be used.",
                docs=DOCS_URL_GRAPH_RECIPE,
            )

        nlu_target, core_target = self.get_targets(config, training_type)

        return GraphModelConfiguration(
            train_schema=GraphSchema.from_dict(config.get("train_schema")),
            predict_schema=GraphSchema.from_dict(config.get("predict_schema")),
            training_type=training_type,
            language=config.get("language"),
            core_target=core_target,
            nlu_target=nlu_target,
        )
Beispiel #3
0
def test_validation_with_core_target_wrong_type():
    graph_config = GraphSchema(
        {
            "A":
            SchemaNode(
                needs={},
                uses=TestNLUTarget,
                eager=True,
                constructor_name="create",
                fn="run",
                config={},
            ),
        }, )

    with pytest.raises(
            GraphSchemaValidationException,
            match="Core model's .* invalid return type",
    ):
        validation.validate(
            GraphModelConfiguration(
                train_schema=GraphSchema({}),
                predict_schema=graph_config,
                training_type=TrainingType.BOTH,
                language=None,
                core_target="A",
                nlu_target="A",
            ))
Beispiel #4
0
def test_validate_validates_required_components(
    test_case: List[RequiredComponentsTestCase],
    is_train_graph: bool,
    test_subclass: bool,
):
    train_schema = GraphSchema({})
    predict_schema = DEFAULT_PREDICT_SCHEMA
    graph_schema = _create_graph_schema_from_requirements(
        node_needs_requires=test_case.node_needs_requires_tuples,
        targets=test_case.targets,
        use_subclass=test_subclass,
    )

    if is_train_graph:
        train_schema = graph_schema
    else:
        predict_schema = graph_schema
    graph_config = GraphModelConfiguration(train_schema, predict_schema,
                                           TrainingType.BOTH, None, None,
                                           "nlu_target")

    num_unmet = test_case.num_unmet_requirements
    if num_unmet == 0:
        validation.validate(graph_config)
    else:
        message = f"{num_unmet} components are missing"
        with pytest.raises(GraphSchemaValidationException, match=message):
            validation.validate(graph_config)
Beispiel #5
0
    def inner(
        train_schema: GraphSchema,
        cache: Optional[TrainingCache] = None,
        model_storage: Optional[ModelStorage] = None,
        path: Optional[Path] = None,
        force_retraining: bool = False,
    ) -> Path:
        if not path:
            path = tmp_path_factory.mktemp("model_storage_path")
        if not model_storage:
            model_storage = LocalModelStorage.create(path)
        if not cache:
            cache = local_cache_creator(path)

        graph_trainer = GraphTrainer(
            model_storage=model_storage, cache=cache, graph_runner_class=DaskGraphRunner
        )

        output_filename = path / "model.tar.gz"
        graph_trainer.train(
            GraphModelConfiguration(
                train_schema=train_schema,
                predict_schema=GraphSchema({}),
                language=None,
                core_target=None,
                nlu_target="nlu",
                training_type=TrainingType.BOTH,
            ),
            importer=TrainingDataImporter.load_from_dict(domain_path=str(domain_path)),
            output_filename=output_filename,
            force_retraining=force_retraining,
        )

        assert output_filename.is_file()
        return output_filename
Beispiel #6
0
def test_read_unsupported_model(
    monkeypatch: MonkeyPatch,
    tmp_path_factory: TempPathFactory,
    domain: Domain,
):
    train_model_storage = LocalModelStorage(
        tmp_path_factory.mktemp("train model storage"))
    graph_schema = GraphSchema(nodes={})

    persisted_model_dir = tmp_path_factory.mktemp("persisted models")
    archive_path = persisted_model_dir / "my-model.tar.gz"

    # Create outdated model meta data
    trained_at = datetime.utcnow()
    model_configuration = GraphModelConfiguration(graph_schema, graph_schema,
                                                  TrainingType.BOTH, None,
                                                  None, "nlu")
    outdated_model_meta_data = ModelMetadata(
        trained_at=trained_at,
        rasa_open_source_version=rasa.
        __version__,  # overwrite later to avoid error
        model_id=uuid.uuid4().hex,
        domain=domain,
        train_schema=model_configuration.train_schema,
        predict_schema=model_configuration.predict_schema,
        training_type=model_configuration.training_type,
        project_fingerprint=rasa.model.project_fingerprint(),
        language=model_configuration.language,
        core_target=model_configuration.core_target,
        nlu_target=model_configuration.nlu_target,
    )
    old_version = "0.0.1"
    outdated_model_meta_data.rasa_open_source_version = old_version

    # Package model - and inject the outdated model meta data
    monkeypatch.setattr(
        LocalModelStorage,
        "_create_model_metadata",
        lambda *args, **kwargs: outdated_model_meta_data,
    )
    train_model_storage.create_model_package(
        model_archive_path=archive_path,
        model_configuration=model_configuration,
        domain=domain,
    )

    # Unpack and inspect packaged model
    load_model_storage_dir = tmp_path_factory.mktemp("load model storage")

    expected_message = (
        f"The model version is trained using Rasa Open Source "
        f"{old_version} and is not compatible with your current "
        f"installation .*")
    with pytest.raises(UnsupportedModelVersionError, match=expected_message):
        LocalModelStorage.metadata_from_archive(archive_path)

    with pytest.raises(UnsupportedModelVersionError, match=expected_message):
        LocalModelStorage.from_model_archive(load_model_storage_dir,
                                             archive_path)
Beispiel #7
0
def create_test_schema(
    uses:
    Type,  # The unspecified type is on purpose to enable testing of invalid cases
    constructor_name: Text = "create",
    run_fn: Text = "run",
    needs: Optional[Dict[Text, Text]] = None,
    eager: bool = True,
    parent: Optional[Type[GraphComponent]] = None,
    language: Optional[Text] = None,
    is_train_graph: bool = True,
) -> GraphModelConfiguration:

    parent_node = {}
    if parent:
        parent_node = {
            "parent":
            SchemaNode(needs={},
                       uses=parent,
                       constructor_name="create",
                       fn="run",
                       config={})
        }

    train_schema = GraphSchema({})
    predict_schema = DEFAULT_PREDICT_SCHEMA
    # noinspection PyTypeChecker
    schema = GraphSchema(
        {
            "my_node":
            SchemaNode(
                needs=needs or {},
                uses=uses,
                eager=eager,
                constructor_name=constructor_name,
                fn=run_fn,
                config={},
            ),
            **DEFAULT_PREDICT_SCHEMA.nodes,
            **parent_node,
        }, )

    if is_train_graph:
        train_schema = schema
    else:
        predict_schema = schema

    return GraphModelConfiguration(
        train_schema=train_schema,
        predict_schema=predict_schema,
        training_type=TrainingType.BOTH,
        core_target=None,
        nlu_target="nlu_target",
        language=language,
    )
Beispiel #8
0
def test_cycle(is_train_graph: bool):
    class MyTestComponent(TestComponentWithoutRun):
        def run(self, training_data: TrainingData) -> TrainingData:
            pass

    train_schema = GraphSchema({})
    predict_schema = DEFAULT_PREDICT_SCHEMA

    schema = GraphSchema({
        "A":
        SchemaNode(
            needs={"training_data": "B"},
            uses=MyTestComponent,
            eager=True,
            constructor_name="create",
            fn="run",
            is_target=True,
            config={},
        ),
        "B":
        SchemaNode(
            needs={"training_data": "C"},
            uses=MyTestComponent,
            eager=True,
            constructor_name="create",
            fn="run",
            config={},
        ),
        "C":
        SchemaNode(
            needs={"training_data": "A"},
            uses=MyTestComponent,
            eager=True,
            constructor_name="create",
            fn="run",
            config={},
        ),
    })

    if is_train_graph:
        train_schema = schema
    else:
        predict_schema = schema

    with pytest.raises(GraphSchemaValidationException, match="Cycles"):
        validation.validate(
            GraphModelConfiguration(
                train_schema=train_schema,
                predict_schema=predict_schema,
                training_type=TrainingType.BOTH,
                language=None,
                core_target=None,
                nlu_target="nlu_target",
            ))
Beispiel #9
0
def test_create_model_package_with_non_existing_dir(
        tmp_path: Path, default_model_storage: ModelStorage):
    path = tmp_path / "some_dir" / "another" / "model.tar.gz"
    default_model_storage.create_model_package(
        path,
        GraphModelConfiguration(GraphSchema({}), GraphSchema({}),
                                TrainingType.BOTH, None, None, "nlu"),
        Domain.empty(),
    )

    assert path.exists()
Beispiel #10
0
def test_create_package_with_non_existing_parent(tmp_path: Path):
    storage = LocalModelStorage.create(tmp_path)
    model_file = tmp_path / "new" / "sub" / "dir" / "file.tar.gz"

    storage.create_model_package(
        model_file,
        GraphModelConfiguration(GraphSchema({}), GraphSchema({}),
                                TrainingType.BOTH, None, None, "nlu"),
        Domain.empty(),
    )

    assert model_file.is_file()
Beispiel #11
0
    def graph_config_for_recipe(
        self,
        config: Dict,
        cli_parameters: Dict[Text, Any],
        training_type: TrainingType = TrainingType.BOTH,
        is_finetuning: bool = False,
    ) -> GraphModelConfiguration:
        """Converts the default config to graphs (see interface for full docstring)."""
        self._use_core = (
            bool(config.get("policies")) and not training_type == TrainingType.NLU
        )
        self._use_nlu = (
            bool(config.get("pipeline")) and not training_type == TrainingType.CORE
        )

        if not self._use_nlu and training_type == TrainingType.NLU:
            raise InvalidConfigException(
                "Can't train an NLU model without a specified pipeline. Please make "
                "sure to specify a valid pipeline in your configuration."
            )

        if not self._use_core and training_type == TrainingType.CORE:
            raise InvalidConfigException(
                "Can't train an Core model without policies. Please make "
                "sure to specify a valid policy in your configuration."
            )

        self._use_end_to_end = (
            self._use_nlu
            and self._use_core
            and training_type == TrainingType.END_TO_END
        )

        self._is_finetuning = is_finetuning

        train_nodes, preprocessors = self._create_train_nodes(config, cli_parameters)
        predict_nodes = self._create_predict_nodes(config, preprocessors, train_nodes)

        core_target = "select_prediction" if self._use_core else None

        from rasa.nlu.classifiers.regex_message_handler import RegexMessageHandler

        return GraphModelConfiguration(
            train_schema=GraphSchema(train_nodes),
            predict_schema=GraphSchema(predict_nodes),
            training_type=training_type,
            language=config.get("language"),
            core_target=core_target,
            nlu_target=f"run_{RegexMessageHandler.__name__}",
        )
Beispiel #12
0
def test_validation_with_core_target_used_by_other_node():
    class CoreTargetConsumer(TestComponentWithoutRun):
        def run(self,
                core_target_output: PolicyPrediction) -> PolicyPrediction:
            pass

    graph_config = GraphSchema(
        {
            "A":
            SchemaNode(
                needs={},
                uses=TestNLUTarget,
                eager=True,
                constructor_name="create",
                fn="run",
                config={},
            ),
            "B":
            SchemaNode(
                needs={},
                uses=TestCoreTarget,
                eager=True,
                constructor_name="create",
                fn="run",
                config={},
            ),
            "C":
            SchemaNode(
                needs={"core_target_output": "B"},
                uses=CoreTargetConsumer,
                eager=True,
                constructor_name="create",
                fn="run",
                config={},
            ),
        }, )

    with pytest.raises(GraphSchemaValidationException,
                       match="uses the Core target 'B' as input"):
        validation.validate(
            GraphModelConfiguration(
                train_schema=GraphSchema({}),
                predict_schema=graph_config,
                training_type=TrainingType.BOTH,
                language=None,
                core_target="B",
                nlu_target="A",
            ))
Beispiel #13
0
def test_validation_with_placeholders():
    class MyTestComponent(TestComponentWithoutRun):
        def run(self,
                training_data: TrainingDataImporter) -> TrainingDataImporter:
            pass

    graph_config = GraphSchema({
        "A":
        SchemaNode(
            needs={"training_data": "B"},
            uses=MyTestComponent,
            eager=True,
            constructor_name="create",
            fn="run",
            is_target=True,
            config={},
        ),
        "B":
        SchemaNode(
            needs={"training_data": PLACEHOLDER_IMPORTER},
            uses=MyTestComponent,
            eager=True,
            constructor_name="create",
            fn="run",
            config={},
        ),
    })

    # Does not raise
    validation.validate(
        GraphModelConfiguration(
            train_schema=graph_config,
            predict_schema=DEFAULT_PREDICT_SCHEMA,
            training_type=TrainingType.BOTH,
            language=None,
            core_target=None,
            nlu_target="nlu_target",
        ))
Beispiel #14
0
def test_loader_loads_graph_runner(
    default_model_storage: ModelStorage,
    temp_cache: TrainingCache,
    tmp_path: Path,
    tmp_path_factory: TempPathFactory,
    domain_path: Path,
):
    graph_trainer = GraphTrainer(
        model_storage=default_model_storage,
        cache=temp_cache,
        graph_runner_class=DaskGraphRunner,
    )

    test_value = "test_value"

    train_schema = GraphSchema(
        {
            "train": SchemaNode(
                needs={},
                uses=PersistableTestComponent,
                fn="train",
                constructor_name="create",
                config={"test_value": test_value},
                is_target=True,
            ),
            "load": SchemaNode(
                needs={"resource": "train"},
                uses=PersistableTestComponent,
                fn="run_inference",
                constructor_name="load",
                config={},
            ),
        }
    )
    predict_schema = GraphSchema(
        {
            "load": SchemaNode(
                needs={},
                uses=PersistableTestComponent,
                fn="run_inference",
                constructor_name="load",
                config={},
                is_target=True,
                resource=Resource("train"),
            )
        }
    )

    output_filename = tmp_path / "model.tar.gz"

    importer = TrainingDataImporter.load_from_dict(
        training_data_paths=[], domain_path=str(domain_path)
    )

    trained_at = datetime.utcnow()
    with freezegun.freeze_time(trained_at):
        model_metadata = graph_trainer.train(
            GraphModelConfiguration(
                train_schema=train_schema,
                predict_schema=predict_schema,
                training_type=TrainingType.BOTH,
                language=None,
                core_target=None,
                nlu_target=None,
            ),
            importer=importer,
            output_filename=output_filename,
        )

    assert isinstance(model_metadata, ModelMetadata)
    assert output_filename.is_file()

    loaded_model_storage_path = tmp_path_factory.mktemp("loaded model storage")

    model_metadata, loaded_predict_graph_runner = loader.load_predict_graph_runner(
        storage_path=loaded_model_storage_path,
        model_archive_path=output_filename,
        model_storage_class=LocalModelStorage,
        graph_runner_class=DaskGraphRunner,
    )

    assert loaded_predict_graph_runner.run() == {"load": test_value}

    assert model_metadata.predict_schema == predict_schema
    assert model_metadata.train_schema == train_schema
    assert model_metadata.model_id
    assert model_metadata.domain.as_dict() == Domain.from_path(domain_path).as_dict()
    assert model_metadata.rasa_open_source_version == rasa.__version__
    assert model_metadata.trained_at == trained_at
Beispiel #15
0
def test_graph_trainer_returns_model_metadata(
    default_model_storage: ModelStorage,
    temp_cache: TrainingCache,
    tmp_path: Path,
    domain_path: Path,
):
    graph_trainer = GraphTrainer(
        model_storage=default_model_storage,
        cache=temp_cache,
        graph_runner_class=DaskGraphRunner,
    )

    test_value = "test_value"

    train_schema = GraphSchema(
        {
            "train": SchemaNode(
                needs={},
                uses=PersistableTestComponent,
                fn="train",
                constructor_name="create",
                config={"test_value": test_value},
                is_target=True,
            ),
            "load": SchemaNode(
                needs={"resource": "train"},
                uses=PersistableTestComponent,
                fn="run_inference",
                constructor_name="load",
                config={},
            ),
        }
    )
    predict_schema = GraphSchema(
        {
            "load": SchemaNode(
                needs={},
                uses=PersistableTestComponent,
                fn="run_inference",
                constructor_name="load",
                config={},
                is_target=True,
                resource=Resource("train"),
            )
        }
    )

    output_filename = tmp_path / "model.tar.gz"
    model_metadata = graph_trainer.train(
        GraphModelConfiguration(
            train_schema=train_schema,
            predict_schema=predict_schema,
            language=None,
            core_target=None,
            nlu_target="nlu",
            training_type=TrainingType.BOTH,
        ),
        importer=TrainingDataImporter.load_from_dict(domain_path=str(domain_path)),
        output_filename=output_filename,
    )
    assert model_metadata.model_id
    assert model_metadata.domain.as_dict() == Domain.from_path(domain_path).as_dict()
    assert model_metadata.train_schema == train_schema
    assert model_metadata.predict_schema == predict_schema
Beispiel #16
0
def test_create_model_package(tmp_path_factory: TempPathFactory,
                              domain: Domain):
    train_model_storage = LocalModelStorage(
        tmp_path_factory.mktemp("train model storage"))

    train_schema = GraphSchema({
        "train":
        SchemaNode(
            needs={},
            uses=PersistableTestComponent,
            fn="train",
            constructor_name="create",
            config={
                "some_config": 123455,
                "some more config": [{
                    "nested": "hi"
                }]
            },
        ),
        "load":
        SchemaNode(
            needs={"resource": "train"},
            uses=PersistableTestComponent,
            fn="run_inference",
            constructor_name="load",
            config={},
            is_target=True,
        ),
    })

    predict_schema = GraphSchema({
        "run":
        SchemaNode(
            needs={},
            uses=PersistableTestComponent,
            fn="run",
            constructor_name="load",
            config={
                "some_config": 123455,
                "some more config": [{
                    "nested": "hi"
                }]
            },
        )
    })

    # Fill model Storage
    with train_model_storage.write_to(Resource("resource1")) as directory:
        file = directory / "file.txt"
        file.write_text("test")

    # Package model
    persisted_model_dir = tmp_path_factory.mktemp("persisted models")
    archive_path = persisted_model_dir / "my-model.tar.gz"

    trained_at = datetime.utcnow()
    with freezegun.freeze_time(trained_at):
        train_model_storage.create_model_package(
            archive_path,
            GraphModelConfiguration(train_schema, predict_schema,
                                    TrainingType.BOTH, None, None, "nlu"),
            domain,
        )

    # Unpack and inspect packaged model
    load_model_storage_dir = tmp_path_factory.mktemp("load model storage")

    just_packaged_metadata = LocalModelStorage.metadata_from_archive(
        archive_path)

    (load_model_storage,
     packaged_metadata) = LocalModelStorage.from_model_archive(
         load_model_storage_dir, archive_path)

    assert just_packaged_metadata.trained_at == packaged_metadata.trained_at

    assert packaged_metadata.train_schema == train_schema
    assert packaged_metadata.predict_schema == predict_schema
    assert packaged_metadata.domain.as_dict() == domain.as_dict()

    assert packaged_metadata.rasa_open_source_version == rasa.__version__
    assert packaged_metadata.trained_at == trained_at
    assert packaged_metadata.model_id
    assert packaged_metadata.project_fingerprint

    persisted_resources = load_model_storage_dir.glob("*")
    assert list(persisted_resources) == [
        Path(load_model_storage_dir, "resource1")
    ]