def from_importer(cls, importer: TrainingDataImporter) -> "Validator": """Create an instance from the domain, nlu and story files.""" domain = importer.get_domain() story_graph = importer.get_stories() intents = importer.get_nlu_data() config = importer.get_config() return cls(domain, intents, story_graph, config)
async def test_use_of_interface(): importer = TrainingDataImporter() functions_to_test = [ lambda: importer.get_config(), lambda: importer.get_stories(), lambda: importer.get_nlu_data(), lambda: importer.get_domain(), ] for f in functions_to_test: with pytest.raises(NotImplementedError): await f()
async def _train_core_with_validated_data( file_importer: TrainingDataImporter, output: Text, train_path: Optional[Text] = None, fixed_model_name: Optional[Text] = None, additional_arguments: Optional[Dict] = None, interpreter: Optional[Interpreter] = None, ) -> Optional[Text]: """Train Core with validated training and config data.""" import rasa.core.train with ExitStack() as stack: if train_path: # If the train path was provided, do nothing on exit. _train_path = train_path else: # Otherwise, create a temp train path and clean it up on exit. _train_path = stack.enter_context( TempDirectoryPath(tempfile.mkdtemp())) # normal (not compare) training print_color("Training Core model...", color=rasa.shared.utils.io.bcolors.OKBLUE) domain, config = await asyncio.gather(file_importer.get_domain(), file_importer.get_config()) async with telemetry.track_model_training(file_importer, model_type="core"): await rasa.core.train( domain_file=domain, training_resource=file_importer, output_path=os.path.join(_train_path, DEFAULT_CORE_SUBDIRECTORY_NAME), policy_config=config, additional_arguments=additional_arguments, interpreter=interpreter, ) print_color("Core model training completed.", color=rasa.shared.utils.io.bcolors.OKBLUE) if train_path is None: # Only Core was trained. new_fingerprint = await model.model_fingerprint(file_importer) return model.package_model( fingerprint=new_fingerprint, output_directory=output, train_path=_train_path, fixed_model_name=fixed_model_name, model_prefix="core-", ) return _train_path
def validate(self, importer: TrainingDataImporter) -> TrainingDataImporter: """Validates the current graph schema against the training data and domain. Args: importer: the training data importer which can also load the domain Raises: `InvalidConfigException` or `InvalidDomain` in case there is some mismatch """ nlu_data = importer.get_nlu_data() self._validate_nlu(nlu_data) story_graph = importer.get_stories() domain = importer.get_domain() self._validate_core(story_graph, domain) return importer
def _validate( self, importer: TrainingDataImporter, nlu: bool = True, core: bool = True, ) -> None: """Validate whether the finetuning setting conflicts with other settings. Note that this validation always takes into account the configuration of nlu *and* core part, while the validation of aspects of the domain and the NLU training data only happen if we request to validate finetuning with respect to NLU/Core models, respectively. For more details, see docstring of this class. Args: importer: a training data importer domain: the domain nlu: set to `False` if NLU part should not be validated core: set to `False` if Core part should not be validated Raises: `InvalidConfigException` if there is a conflict """ if self._is_finetuning and not self._fingerprints: raise InvalidConfigException( f"Finetuning is enabled but the {self.__class__.__name__} " f"does not remember seeing a training run. Ensure that you have " f"trained your model at least once (with finetuning disabled) " f"and ensure that the {self.__class__.__name__} is part of the " f"training graph. ") rasa_version = rasa.__version__ if self._is_finetuning: old_rasa_version = self._fingerprints[FINGERPRINT_VERSION] if version.parse(old_rasa_version) < version.parse( MINIMUM_COMPATIBLE_VERSION): raise InvalidConfigException( f"The minimum compatible Rasa Version is " f"{MINIMUM_COMPATIBLE_VERSION} but the model we attempt to " f"finetune has been generated with an older version " f"({old_rasa_version}.") self._fingerprints[FINGERPRINT_VERSION] = rasa_version config = importer.get_config() self._compare_or_memorize( fingerprint_key=FINGERPRINT_CONFIG_WITHOUT_EPOCHS_KEY, new_fingerprint=self._get_fingerprint_of_config_without_epochs( config), error_message= ("Cannot finetune because more than just the 'epoch' keys have been " "changed in the configuration. " "Please revert your configuration and only change " "the 'epoch' settings where needed."), ) if core: # NOTE: If there's a consistency check between domain and core training data # that ensures domain and core training data are consistent, then we can # drop this check. domain = importer.get_domain() self._compare_or_memorize( fingerprint_key=FINGERPRINT_CORE, new_fingerprint=self. _get_fingerprint_of_domain_without_responses(domain), error_message= ("Cannot finetune because more than just the responses have been " "changed in the domain." "Please revert all settings in your domain file (except the " "'responses')."), ) if nlu: nlu_data = importer.get_nlu_data() self._compare_or_memorize( fingerprint_key=FINGERPRINT_NLU, new_fingerprint=nlu_data.label_fingerprint(), error_message= ("Cannot finetune because NLU training data contains new labels " "or does not contain any examples for some known labels. " "Please make sure that the NLU data that you use " "for finetuning contains at least one example for every label " "(i.e. intent, action name, ...) that was included in the NLU " "data used for training the model which we attempt to finetune " "now. Moreover, you must not add labels that were not included " "during training before. "), ) self.persist()
async def _train_core_with_validated_data( file_importer: TrainingDataImporter, output: Text, train_path: Optional[Text] = None, fixed_model_name: Optional[Text] = None, additional_arguments: Optional[Dict] = None, interpreter: Optional[Interpreter] = None, model_to_finetune: Optional["Text"] = None, finetuning_epoch_fraction: float = 1.0, ) -> Optional[Text]: """Train Core with validated training and config data.""" import rasa.core.train with ExitStack() as stack: if train_path: # If the train path was provided, do nothing on exit. _train_path = train_path else: # Otherwise, create a temp train path and clean it up on exit. _train_path = stack.enter_context( TempDirectoryPath(tempfile.mkdtemp())) # normal (not compare) training rasa.shared.utils.cli.print_color( "Training Core model...", color=rasa.shared.utils.io.bcolors.OKBLUE) domain, config = await asyncio.gather(file_importer.get_domain(), file_importer.get_config()) if model_to_finetune: rasa.shared.utils.common.mark_as_experimental_feature( "Incremental Training feature") model_to_finetune = await _core_model_for_finetuning( model_to_finetune, file_importer=file_importer, finetuning_epoch_fraction=finetuning_epoch_fraction, ) if not model_to_finetune: rasa.shared.utils.cli.print_error_and_exit( f"No Core model for finetuning found. Please make sure to either " f"specify a path to a previous model or to have a finetunable " f"model within the directory '{output}'.") async with telemetry.track_model_training( file_importer, model_type="core", is_finetuning=model_to_finetune is not None, ): await rasa.core.train.train( domain_file=domain, training_resource=file_importer, output_path=os.path.join(_train_path, DEFAULT_CORE_SUBDIRECTORY_NAME), policy_config=config, additional_arguments=additional_arguments, interpreter=interpreter, model_to_finetune=model_to_finetune, ) rasa.shared.utils.cli.print_color( "Core model training completed.", color=rasa.shared.utils.io.bcolors.OKBLUE) if train_path is None: # Only Core was trained. new_fingerprint = await model.model_fingerprint(file_importer) return model.package_model( fingerprint=new_fingerprint, output_directory=output, train_path=_train_path, fixed_model_name=fixed_model_name, model_prefix="core-", ) return _train_path
def provide_train(self, importer: TrainingDataImporter) -> Domain: """Provides domain from training data during training.""" domain = importer.get_domain() self._persist(domain) return domain
def train( self, model_configuration: GraphModelConfiguration, importer: TrainingDataImporter, output_filename: Path, force_retraining: bool = False, is_finetuning: bool = False, ) -> ModelMetadata: """Trains and packages a model and returns the prediction graph runner. Args: model_configuration: The model configuration (schemas, language, etc.) importer: The importer which provides the training data for the training. output_filename: The location to save the packaged model. force_retraining: If `True` then the cache is skipped and all components are retrained. Returns: The metadata describing the trained model. """ logger.debug("Starting training.") # Retrieve the domain for the model metadata right at the start. # This avoids that something during the graph runs mutates it. domain = copy.deepcopy(importer.get_domain()) if force_retraining: logger.debug( "Skip fingerprint run as a full training of the model was enforced." ) pruned_training_schema = model_configuration.train_schema else: fingerprint_run_outputs = self.fingerprint( model_configuration.train_schema, importer=importer, is_finetuning=is_finetuning, ) pruned_training_schema = self._prune_schema( model_configuration.train_schema, fingerprint_run_outputs) hooks = [ LoggingHook(pruned_schema=pruned_training_schema), TrainingHook( cache=self._cache, model_storage=self._model_storage, pruned_schema=pruned_training_schema, ), ] graph_runner = self._graph_runner_class.create( graph_schema=pruned_training_schema, model_storage=self._model_storage, execution_context=ExecutionContext( graph_schema=model_configuration.train_schema, is_finetuning=is_finetuning, ), hooks=hooks, ) logger.debug( "Running the pruned train graph with real node execution.") graph_runner.run(inputs={PLACEHOLDER_IMPORTER: importer}) return self._model_storage.create_model_package( output_filename, model_configuration, domain)