def test_cached_component_replace_schema_node(): schema_node = SchemaNode( needs={ "i1": "first_input", "i2": "second_input" }, uses=FingerprintComponent, fn="add", constructor_name="load", config={"a": 1}, eager=False, is_input=False, resource=Resource("hello"), ) PrecomputedValueProvider.replace_schema_node(schema_node, 2) assert schema_node == SchemaNode( needs={ "i1": "first_input", "i2": "second_input" }, uses=PrecomputedValueProvider, fn="get_value", constructor_name="create", config={"output": 2}, eager=False, is_input=False, resource=Resource("hello"), )
def replace_schema_node(cls, node: SchemaNode, cache: TrainingCache) -> None: """Updates a `SchemaNode` to use a `FingerprintComponent`. This is for when we want to do a fingerprint run. During the fingerprint run we replace all non-input nodes with `FingerprintComponent`s so we can determine whether they are able to be pruned or cached before the next graph run without running the actual components. Args: node: The node to update. cache: The cache is needed to determine of there is cache hit for the fingerprint key. """ graph_component_class = node.uses node.uses = cls # We update the node to be "eager" so that `FingerprintComponent.run` sees # ALL the inputs to the node. If it was not eager, we would miss any args used # by the constructor. node.eager = True node.constructor_name = cls.create.__name__ node.fn = cls.run.__name__ node.config = { "config_of_replaced_component": node.config, "cache": cache, "graph_component_class": graph_component_class, }
def test_target_override(eager: bool, default_model_storage: ModelStorage): graph_schema = GraphSchema( { "add": SchemaNode( needs={"i1": "first_input", "i2": "second_input"}, uses=AddInputs, fn="add", constructor_name="create", config={}, eager=eager, ), "subtract_2": SchemaNode( needs={"i": "add"}, uses=SubtractByX, fn="subtract_x", constructor_name="create", config={"x": 3}, eager=eager, is_target=True, ), } ) execution_context = ExecutionContext(graph_schema=graph_schema, model_id="1") runner = DaskGraphRunner( graph_schema=graph_schema, model_storage=default_model_storage, execution_context=execution_context, ) results = runner.run(inputs={"first_input": 3, "second_input": 4}, targets=["add"]) assert results == {"add": 7}
def _add_end_to_end_features_for_training( self, preprocessors: List[Text], train_nodes: Dict[Text, SchemaNode]) -> None: train_nodes["story_to_nlu_training_data_converter"] = SchemaNode( needs={ "story_graph": "story_graph_provider", "domain": "domain_for_core_training_provider", }, uses=CoreFeaturizationInputConverter, constructor_name="create", fn="convert_for_training", config={}, is_input=True, ) last_node_name = "story_to_nlu_training_data_converter" for preprocessor in preprocessors: node = copy.deepcopy(train_nodes[preprocessor]) node.needs["training_data"] = last_node_name node_name = f"e2e_{preprocessor}" train_nodes[node_name] = node last_node_name = node_name node_with_e2e_features = "end_to_end_features_provider" train_nodes[node_with_e2e_features] = SchemaNode( needs={"messages": last_node_name}, uses=CoreFeaturizationCollector, constructor_name="create", fn="collect", config={}, )
def test_serialize_graph_schema(tmp_path: Path): graph_schema = GraphSchema( { "train": SchemaNode( needs={}, uses=PersistableTestComponent, fn="train", constructor_name="create", config={"some_config": 123455, "some more config": [{"nested": "hi"}]}, ), "load": SchemaNode( needs={"resource": "train"}, uses=PersistableTestComponent, fn="run_inference", constructor_name="load", config={}, is_target=True, resource=Resource("test resource"), ), } ) serialized = graph_schema.as_dict() # Dump it to make sure it's actually serializable file_path = tmp_path / "my_graph.yml" rasa.shared.utils.io.write_yaml(serialized, file_path) serialized_graph_schema_from_file = rasa.shared.utils.io.read_yaml_file(file_path) graph_schema_from_file = GraphSchema.from_dict(serialized_graph_schema_from_file) assert graph_schema_from_file == graph_schema
def _add_end_to_end_features_for_inference( self, predict_nodes: Dict[Text, SchemaNode], preprocessors: List[Text]) -> Text: predict_nodes["tracker_to_message_converter"] = SchemaNode( **DEFAULT_PREDICT_KWARGS, needs={"tracker": PLACEHOLDER_TRACKER}, uses=CoreFeaturizationInputConverter, fn="convert_for_inference", config={}, ) last_node_name = "tracker_to_message_converter" for preprocessor in preprocessors: node = dataclasses.replace(predict_nodes[preprocessor], needs={"messages": last_node_name}) node_name = f"e2e_{preprocessor}" predict_nodes[node_name] = node last_node_name = node_name node_with_e2e_features = "end_to_end_features_provider" predict_nodes[node_with_e2e_features] = SchemaNode( **DEFAULT_PREDICT_KWARGS, needs={"messages": last_node_name}, uses=CoreFeaturizationCollector, fn="collect", config={}, ) return node_with_e2e_features
def test_unused_node(default_model_storage: ModelStorage): graph_schema = GraphSchema({ "provide": SchemaNode( needs={}, uses=ProvideX, fn="provide", constructor_name="create", config={}, is_target=True, ), "provide_2": SchemaNode( # This will not output needs={}, uses=ProvideX, fn="provide", constructor_name="create", config={}, ), }) runner = DaskGraphRunner( graph_schema=graph_schema, model_storage=default_model_storage, execution_context=ExecutionContext(graph_schema=graph_schema, model_id="1"), ) results = runner.run() assert results == {"provide": 1}
def test_loop(default_model_storage: ModelStorage): graph_schema = GraphSchema({ "subtract_a": SchemaNode( needs={"i": "subtract_b"}, uses=SubtractByX, fn="subtract_x", constructor_name="create", config={}, is_target=False, ), "subtract_b": SchemaNode( needs={"i": "subtract_a"}, uses=SubtractByX, fn="subtract_x", constructor_name="create", config={}, is_target=True, ), }) runner = DaskGraphRunner( graph_schema=graph_schema, model_storage=default_model_storage, execution_context=ExecutionContext(graph_schema=graph_schema, model_id="1"), ) with pytest.raises(GraphRunError): runner.run()
def test_unused_node(default_model_storage: ModelStorage): graph_schema = GraphSchema({ "provide": SchemaNode( needs={}, uses=ProvideX, fn="provide", constructor_name="create", config={}, is_target=True, ), # This node will not fail as it will be pruned because it is not a target # or a target's ancestor. "assert_false": SchemaNode( needs={"i": "input"}, uses=AssertComponent, fn="run_assert", constructor_name="create", config={"value_to_assert": "some_value"}, ), }) runner = DaskGraphRunner( graph_schema=graph_schema, model_storage=default_model_storage, execution_context=ExecutionContext(graph_schema=graph_schema, model_id="1"), ) results = runner.run(inputs={"input": "some_other_value"}) assert results == {"provide": 1}
def create_test_schema( uses: Type, # The unspecified type is on purpose to enable testing of invalid cases constructor_name: Text = "create", run_fn: Text = "run", needs: Optional[Dict[Text, Text]] = None, eager: bool = True, parent: Optional[Type[GraphComponent]] = None, ) -> GraphSchema: parent_node = {} if parent: parent_node = { "parent": SchemaNode( needs={}, uses=parent, constructor_name="create", fn="run", config={} ) } # noinspection PyTypeChecker return GraphSchema( { "my_node": SchemaNode( needs=needs or {}, uses=uses, eager=eager, constructor_name=constructor_name, fn=run_fn, config={}, ), **parent_node, } )
def test_resources_fingerprints_remain_after_being_cached( temp_cache: LocalTrainingCache, train_with_schema: Callable ): train_schema = GraphSchema( { "train": SchemaNode( needs={}, uses=PersistableTestComponent, fn="train", constructor_name="create", config={"test_value": "4"}, is_target=True, ), "process": SchemaNode( needs={"resource": "train"}, uses=PersistableTestComponent, fn="run_inference", constructor_name="load", config={}, is_target=True, ), } ) # Train and cache. train_with_schema(train_schema, temp_cache) # We can determine if a cached `Resource` has a static fingerprint by comparing two # subsequent cache entries of a child node. import sqlalchemy as sa with temp_cache._sessionmaker.begin() as session: # This will get the cache entry for the "process" node. query_for_most_recently_used_entry = sa.select(temp_cache.CacheEntry).order_by( temp_cache.CacheEntry.last_used.desc() ) entry = session.execute(query_for_most_recently_used_entry).scalars().first() # The fingerprint key will incorporate the fingerprint of the `Resource` # provided by the "train" node. We save this key to compare after the next run. fingerprint_key = entry.fingerprint_key # Deleting the entry will force it to be recreated next train. delete_query = sa.delete(temp_cache.CacheEntry).where( temp_cache.CacheEntry.fingerprint_key == fingerprint_key ) session.execute(delete_query) # In this second train, the Resource output of "train" will be retrieved from the # cache. train_with_schema(train_schema, temp_cache) with temp_cache._sessionmaker.begin() as session: # This will get the new cache entry for the "process" node. query_for_most_recently_used_entry = sa.select(temp_cache.CacheEntry).order_by( temp_cache.CacheEntry.last_used.desc() ) entry = session.execute(query_for_most_recently_used_entry).scalars().first() # Assert the fingerprint key of the new entry is the same. This confirms that # the Resource from the cache has the same fingerprint. assert entry.fingerprint_key == fingerprint_key
def test_graph_trainer_returns_prediction_runner( default_model_storage: ModelStorage, temp_cache: TrainingCache, tmp_path: Path, domain_path: Path, ): graph_trainer = GraphTrainer( model_storage=default_model_storage, cache=temp_cache, graph_runner_class=DaskGraphRunner, ) test_value = "test_value" train_schema = GraphSchema({ "train": SchemaNode( needs={}, uses=PersistableTestComponent, fn="train", constructor_name="create", config={ "test_value": test_value, }, is_target=True, ), "load": SchemaNode( needs={"resource": "train"}, uses=PersistableTestComponent, fn="run_inference", constructor_name="load", config={}, ), }) predict_schema = GraphSchema({ "load": SchemaNode( needs={}, uses=PersistableTestComponent, fn="run_inference", constructor_name="load", config={}, is_target=True, resource=Resource("train"), ), }) output_filename = tmp_path / "model.tar.gz" predict_graph_runner = graph_trainer.train( train_schema=train_schema, predict_schema=predict_schema, domain_path=domain_path, output_filename=output_filename, ) assert isinstance(predict_graph_runner, DaskGraphRunner) assert output_filename.is_file() assert predict_graph_runner.run() == {"load": test_value}
def _add_nlu_predict_nodes( self, last_run_node: Text, predict_config: Dict[Text, Any], predict_nodes: Dict[Text, SchemaNode], train_nodes: Dict[Text, SchemaNode], ) -> Text: for idx, config in enumerate(predict_config["pipeline"]): component_name = config.pop("name") component = self._from_registry(component_name) component_name = f"{component_name}{idx}" if self.ComponentType.MODEL_LOADER in component.types: predict_nodes[f"provide_{component_name}"] = SchemaNode( **DEFAULT_PREDICT_KWARGS, needs={}, uses=component.clazz, fn="provide", config=config, ) if component.types.intersection({ self.ComponentType.MESSAGE_TOKENIZER, self.ComponentType.MESSAGE_FEATURIZER, }): last_run_node = self._add_nlu_predict_node_from_train( predict_nodes, component_name, train_nodes, last_run_node, config, from_resource=component.is_trainable, ) elif component.types.intersection({ self.ComponentType.INTENT_CLASSIFIER, self.ComponentType.ENTITY_EXTRACTOR, }): if component.is_trainable: last_run_node = self._add_nlu_predict_node_from_train( predict_nodes, component_name, train_nodes, last_run_node, config, from_resource=component.is_trainable, ) else: new_node = SchemaNode( needs={"messages": last_run_node}, uses=component.clazz, constructor_name="create", fn="process", config=config, ) last_run_node = self._add_nlu_predict_node( predict_nodes, new_node, component_name, last_run_node) return last_run_node
def test_cycle(is_train_graph: bool): class MyTestComponent(TestComponentWithoutRun): def run(self, training_data: TrainingData) -> TrainingData: pass train_schema = GraphSchema({}) predict_schema = DEFAULT_PREDICT_SCHEMA schema = GraphSchema({ "A": SchemaNode( needs={"training_data": "B"}, uses=MyTestComponent, eager=True, constructor_name="create", fn="run", is_target=True, config={}, ), "B": SchemaNode( needs={"training_data": "C"}, uses=MyTestComponent, eager=True, constructor_name="create", fn="run", config={}, ), "C": SchemaNode( needs={"training_data": "A"}, uses=MyTestComponent, eager=True, constructor_name="create", fn="run", config={}, ), }) if is_train_graph: train_schema = schema else: predict_schema = schema with pytest.raises(GraphSchemaValidationException, match="Cycles"): validation.validate( GraphModelConfiguration( train_schema=train_schema, predict_schema=predict_schema, training_type=TrainingType.BOTH, language=None, core_target=None, nlu_target="nlu_target", ))
def create_test_schema( uses: Type, # The unspecified type is on purpose to enable testing of invalid cases constructor_name: Text = "create", run_fn: Text = "run", needs: Optional[Dict[Text, Text]] = None, eager: bool = True, parent: Optional[Type[GraphComponent]] = None, language: Optional[Text] = None, is_train_graph: bool = True, ) -> GraphModelConfiguration: parent_node = {} if parent: parent_node = { "parent": SchemaNode(needs={}, uses=parent, constructor_name="create", fn="run", config={}) } train_schema = GraphSchema({}) predict_schema = DEFAULT_PREDICT_SCHEMA # noinspection PyTypeChecker schema = GraphSchema( { "my_node": SchemaNode( needs=needs or {}, uses=uses, eager=eager, constructor_name=constructor_name, fn=run_fn, config={}, ), **DEFAULT_PREDICT_SCHEMA.nodes, **parent_node, }, ) if is_train_graph: train_schema = schema else: predict_schema = schema return GraphModelConfiguration( train_schema=train_schema, predict_schema=predict_schema, training_type=TrainingType.BOTH, core_target=None, nlu_target="nlu_target", language=language, )
def test_graph_trainer_always_reads_input( temp_cache: TrainingCache, tmp_path: Path, train_with_schema: Callable, spy_on_all_components: Callable, ): input_file = tmp_path / "input_file.txt" input_file.write_text("3") train_schema = GraphSchema( { "read_file": SchemaNode( needs={}, uses=FileReader, fn="read", constructor_name="create", config={"file_path": str(input_file)}, is_input=True, ), "subtract": SchemaNode( needs={"i": "read_file"}, uses=SubtractByX, fn="subtract_x", constructor_name="create", config={"x": 1}, ), "assert_node": SchemaNode( needs={"i": "subtract"}, uses=AssertComponent, fn="run_assert", constructor_name="create", config={"value_to_assert": 2}, is_target=True, ), } ) # The first train should call all the components and cache their outputs. mocks = spy_on_all_components(train_schema) train_with_schema(train_schema, temp_cache) assert node_call_counts(mocks) == {"read_file": 1, "subtract": 1, "assert_node": 1} # Nothing has changed so this time so no components will run # (just input nodes during fingerprint run). mocks = spy_on_all_components(train_schema) train_with_schema(train_schema, temp_cache) assert node_call_counts(mocks) == {"read_file": 1, "subtract": 0, "assert_node": 0} # When we update the input file, all the nodes will run again and the assert_node # will fail. input_file.write_text("5") with pytest.raises(GraphComponentException): train_with_schema(train_schema, temp_cache)
def test_graph_trainer_train_logging_with_cached_components( tmp_path: Path, temp_cache: TrainingCache, train_with_schema: Callable, caplog: LogCaptureFixture, ): input_file = tmp_path / "input_file.txt" input_file.write_text("3") train_schema = GraphSchema({ "input": SchemaNode( needs={}, uses=ProvideX, fn="provide", constructor_name="create", config={}, ), "subtract": SchemaNode( needs={"i": "input"}, uses=SubtractByX, fn="subtract_x", constructor_name="create", config={"x": 1}, is_target=True, is_input=False, ), "cache_able_node": SchemaNode( needs={"suffix": "input"}, uses=CacheableComponent, fn="run", constructor_name="create", config={}, is_target=True, is_input=False, ), }) # Train to cache train_with_schema(train_schema, temp_cache) # Train a second time with caplog.at_level(logging.INFO, logger="rasa.engine.training.hooks"): train_with_schema(train_schema, temp_cache) assert set(caplog.messages) == { "Starting to train component 'SubtractByX'.", "Finished training component 'SubtractByX'.", "Restored component 'CacheableComponent' from cache.", }
def _create_predict_nodes( self, config: Dict[Text, SchemaNode], preprocessors: List[Text], train_nodes: Dict[Text, SchemaNode], ) -> Dict[Text, SchemaNode]: predict_config = copy.deepcopy(config) predict_nodes = {} from rasa.nlu.classifiers.regex_message_handler import RegexMessageHandler predict_nodes["nlu_message_converter"] = SchemaNode( **DEFAULT_PREDICT_KWARGS, needs={"messages": PLACEHOLDER_MESSAGE}, uses=NLUMessageConverter, fn="convert_user_message", config={}, ) last_run_nlu_node = "nlu_message_converter" if self._use_nlu: last_run_nlu_node = self._add_nlu_predict_nodes( last_run_nlu_node, predict_config, predict_nodes, train_nodes) domain_needs = {} if self._use_core: domain_needs["domain"] = "domain_provider" regex_handler_node_name = f"run_{RegexMessageHandler.__name__}" predict_nodes[regex_handler_node_name] = SchemaNode( **DEFAULT_PREDICT_KWARGS, needs={ "messages": last_run_nlu_node, **domain_needs }, uses=RegexMessageHandler, fn="process", config={}, ) if self._use_core: self._add_core_predict_nodes( predict_config, predict_nodes, train_nodes, preprocessors, ) return predict_nodes
def test_validate_after_adding_adding_default_parameter( get_validation_method: Callable[..., ValidationMethodType], nlu: bool, core: bool, ): # create a schema and rely on rasa to fill in defaults later schema1 = _get_example_schema() schema1.nodes["nlu-node"] = SchemaNode(needs={}, uses=WhitespaceTokenizer, constructor_name="", fn="", config={}) schema1.nodes["core-node"] = SchemaNode(needs={}, uses=RulePolicy, constructor_name="", fn="", config={}) # training validate = get_validation_method(finetuning=False, load=False, nlu=nlu, core=core, graph_schema=schema1) validate(importer=EmptyDataImporter()) # same schema -- we just explicitly pass default values schema2 = copy.deepcopy(schema1) schema2.nodes["nlu-node"] = SchemaNode( needs={}, uses=WhitespaceTokenizer, constructor_name="", fn="", config=WhitespaceTokenizer.get_default_config(), ) schema2.nodes["core-node"] = SchemaNode( needs={}, uses=RulePolicy, constructor_name="", fn="", config=RulePolicy.get_default_config(), ) # finetuning *does not raise* loaded_validate = get_validation_method(finetuning=True, load=True, nlu=nlu, core=core, graph_schema=schema2) loaded_validate(importer=EmptyDataImporter())
def test_core_warn_if_data_but_no_policy(monkeypatch: MonkeyPatch, policy_type: Optional[Type[Policy]]): importer = TrainingDataImporter.load_from_dict( domain_path="data/test_e2ebot/domain.yml", training_data_paths=[ "data/test_e2ebot/data/nlu.yml", "data/test_e2ebot/data/stories.yml", ], ) nodes = { "tokenizer": SchemaNode({}, WhitespaceTokenizer, "", "", {}), "nlu-component": SchemaNode({}, DIETClassifier, "", "", {}), } if policy_type is not None: nodes["some-policy"] = SchemaNode({}, policy_type, "", "", {}) graph_schema = GraphSchema(nodes) validator = DefaultV1RecipeValidator(graph_schema) monkeypatch.setattr( validator, "_raise_if_a_rule_policy_is_incompatible_with_domain", lambda *args, **kwargs: None, ) monkeypatch.setattr(validator, "_warn_if_no_rule_policy_is_contained", lambda: None) monkeypatch.setattr( validator, "_warn_if_rule_based_data_is_unused_or_missing", lambda *args, **kwargs: None, ) if policy_type is None: with pytest.warns( UserWarning, match="Found data for training policies but no policy" ) as records: validator.validate(importer) assert len(records) == 1 else: with pytest.warns( UserWarning, match="Slot auto-fill has been removed in 3.0") as records: validator.validate(importer) assert all([ warn.message.args[0].startswith("Slot auto-fill has been removed") for warn in records.list ])
def test_validation_with_core_target_used_by_other_node(): class CoreTargetConsumer(TestComponentWithoutRun): def run(self, core_target_output: PolicyPrediction) -> PolicyPrediction: pass graph_config = GraphSchema( { "A": SchemaNode( needs={}, uses=TestNLUTarget, eager=True, constructor_name="create", fn="run", config={}, ), "B": SchemaNode( needs={}, uses=TestCoreTarget, eager=True, constructor_name="create", fn="run", config={}, ), "C": SchemaNode( needs={"core_target_output": "B"}, uses=CoreTargetConsumer, eager=True, constructor_name="create", fn="run", config={}, ), }, ) with pytest.raises(GraphSchemaValidationException, match="uses the Core target 'B' as input"): validation.validate( GraphModelConfiguration( train_schema=GraphSchema({}), predict_schema=graph_config, training_type=TrainingType.BOTH, language=None, core_target="B", nlu_target="A", ))
def test_graph_trainer_train_logging( tmp_path: Path, temp_cache: TrainingCache, train_with_schema: Callable, caplog: LogCaptureFixture, ): input_file = tmp_path / "input_file.txt" input_file.write_text("3") train_schema = GraphSchema({ "input": SchemaNode( needs={}, uses=ProvideX, fn="provide", constructor_name="create", config={}, ), "subtract 2": SchemaNode( needs={}, uses=ProvideX, fn="provide", constructor_name="create", config={}, is_target=True, is_input=True, ), "subtract": SchemaNode( needs={"i": "input"}, uses=SubtractByX, fn="subtract_x", constructor_name="create", config={"x": 1}, is_target=True, is_input=False, ), }) with caplog.at_level(logging.INFO, logger="rasa.engine.training.hooks"): train_with_schema(train_schema, temp_cache) assert caplog.messages == [ "Starting to train component 'SubtractByX'.", "Finished training component 'SubtractByX'.", ]
def test_validation_with_core_target_wrong_type(): graph_config = GraphSchema( { "A": SchemaNode( needs={}, uses=TestNLUTarget, eager=True, constructor_name="create", fn="run", config={}, ), }, ) with pytest.raises( GraphSchemaValidationException, match="Core model's .* invalid return type", ): validation.validate( GraphModelConfiguration( train_schema=GraphSchema({}), predict_schema=graph_config, training_type=TrainingType.BOTH, language=None, core_target="A", nlu_target="A", ))
def test_core_raise_if_a_rule_policy_is_incompatible_with_domain( monkeypatch: MonkeyPatch, ): domain = Domain.empty() num_instances = 2 nodes = {} configs_for_rule_policies = [] for feature_type in POLICY_CLASSSES: for idx in range(num_instances): unique_name = f"{feature_type.__name__}-{idx}" unique_config = {unique_name: None} nodes[unique_name] = SchemaNode({}, feature_type, "", "", unique_config) if feature_type == RulePolicy: configs_for_rule_policies.append(unique_config) mock = Mock() monkeypatch.setattr(RulePolicy, "raise_if_incompatible_with_domain", mock) validator = DefaultV1RecipeValidator(graph_schema=GraphSchema(nodes)) monkeypatch.setattr( validator, "_warn_if_rule_based_data_is_unused_or_missing", lambda *args, **kwargs: None, ) importer = DummyImporter() validator.validate(importer) # Note: this works because we validate nodes in insertion order mock.all_args_list == [{ "config": config, "domain": domain } for config in configs_for_rule_policies]
def _test_validation_warnings_with_default_configs( training_data: TrainingData, component_types: List[Type], warnings: Optional[List[Text]] = None, ): dummy_importer = DummyImporter(training_data=training_data) graph_schema = GraphSchema({ f"{idx}": SchemaNode( needs={}, uses=component_type, constructor_name="", fn="", config=component_type.get_default_config(), ) for idx, component_type in enumerate(component_types) }) validator = DefaultV1RecipeValidator(graph_schema) if not warnings: with pytest.warns(None) as records: validator.validate(dummy_importer) assert len(records) == 0, [ warning.message for warning in records.list ] else: with pytest.warns(None) as records: validator.validate(dummy_importer) assert len(records) == len(warnings), ", ".join(warning.message.args[0] for warning in records) assert [ re.match(warning.message.args[0], expected_warning) for warning, expected_warning in zip(records, warnings) ]
def test_validation_with_missing_nlu_target(): graph_config = GraphSchema( { "A": SchemaNode( needs={}, uses=TestNLUTarget, eager=True, constructor_name="create", fn="run", config={}, ) } ) with pytest.raises( GraphSchemaValidationException, match="no target for the 'nlu_target'" ): validation.validate( GraphModelConfiguration( train_schema=GraphSchema({}), predict_schema=graph_config, training_type=TrainingType.BOTH, language=None, core_target=None, nlu_target=None, ) )
def _get_example_schema(num_epochs: int = 5, other_parameter: int = 10) -> GraphSchema: example_configs = [ { "epochs": num_epochs, "other-parameter": other_parameter, "some-parameter": "bla", }, { "epochs": num_epochs, "yet-other-parameter": 344 }, { "no-epochs-defined-here": None }, ] return GraphSchema( nodes={ f"node-{idx}": SchemaNode(needs={}, uses=GraphComponent, constructor_name="", fn="", config=config) for idx, config in enumerate(example_configs) })
def _add_nlu_process_node( self, train_nodes: Dict[Text, SchemaNode], component_class: Type[GraphComponent], component_name: Text, last_run_node: Text, component_config: Dict[Text, Any], from_resource: Optional[Text] = None, ) -> Text: resource_needs = {} if from_resource: resource_needs = {"resource": from_resource} model_provider_needs = self._get_model_provider_needs( train_nodes, component_class) node_name = f"run_{component_name}" train_nodes[node_name] = SchemaNode( needs={ "training_data": last_run_node, **resource_needs, **model_provider_needs, }, uses=component_class, constructor_name="load", fn="process_training_data", config=component_config, ) return node_name
def test_invalid_module_error_when_deserializing_schemas(tmp_path: Path): graph_schema = GraphSchema( { "train": SchemaNode( needs={}, uses=PersistableTestComponent, fn="train", constructor_name="create", config={"some_config": 123455, "some more config": [{"nested": "hi"}]}, ) } ) serialized = graph_schema.as_dict() # Pretend module is for some reason invalid serialized["nodes"]["train"]["uses"] = "invalid.class" # Dump it to make sure it's actually serializable file_path = tmp_path / "my_graph.yml" rasa.shared.utils.io.write_yaml(serialized, file_path) serialized_graph_schema_from_file = rasa.shared.utils.io.read_yaml_file(file_path) with pytest.raises(GraphSchemaException): _ = GraphSchema.from_dict(serialized_graph_schema_from_file)
def _add_nlu_train_node( self, train_nodes: Dict[Text, SchemaNode], component: Type[GraphComponent], component_name: Text, last_run_node: Text, config: Dict[Text, Any], cli_parameters: Dict[Text, Any], ) -> Text: config_from_cli = self._extra_config_from_cli(cli_parameters, component, config) model_provider_needs = self._get_model_provider_needs( train_nodes, component) train_node_name = f"train_{component_name}" train_nodes[train_node_name] = SchemaNode( needs={ "training_data": last_run_node, **model_provider_needs }, uses=component, constructor_name="load" if self._is_finetuning else "create", fn="train", config={ **config, **config_from_cli }, is_target=True, ) return train_node_name