Exemple #1
0
    def from_importer(cls, importer: TrainingDataImporter) -> "Validator":
        """Create an instance from the domain, nlu and story files."""
        domain = importer.get_domain()
        story_graph = importer.get_stories()
        intents = importer.get_nlu_data()
        config = importer.get_config()

        return cls(domain, intents, story_graph, config)
Exemple #2
0
async def test_use_of_interface():
    importer = TrainingDataImporter()

    functions_to_test = [
        lambda: importer.get_config(),
        lambda: importer.get_stories(),
        lambda: importer.get_nlu_data(),
        lambda: importer.get_domain(),
    ]
    for f in functions_to_test:
        with pytest.raises(NotImplementedError):
            await f()
Exemple #3
0
async def _train_core_with_validated_data(
    file_importer: TrainingDataImporter,
    output: Text,
    train_path: Optional[Text] = None,
    fixed_model_name: Optional[Text] = None,
    additional_arguments: Optional[Dict] = None,
    interpreter: Optional[Interpreter] = None,
) -> Optional[Text]:
    """Train Core with validated training and config data."""

    import rasa.core.train

    with ExitStack() as stack:
        if train_path:
            # If the train path was provided, do nothing on exit.
            _train_path = train_path
        else:
            # Otherwise, create a temp train path and clean it up on exit.
            _train_path = stack.enter_context(
                TempDirectoryPath(tempfile.mkdtemp()))

        # normal (not compare) training
        print_color("Training Core model...",
                    color=rasa.shared.utils.io.bcolors.OKBLUE)
        domain, config = await asyncio.gather(file_importer.get_domain(),
                                              file_importer.get_config())
        async with telemetry.track_model_training(file_importer,
                                                  model_type="core"):
            await rasa.core.train(
                domain_file=domain,
                training_resource=file_importer,
                output_path=os.path.join(_train_path,
                                         DEFAULT_CORE_SUBDIRECTORY_NAME),
                policy_config=config,
                additional_arguments=additional_arguments,
                interpreter=interpreter,
            )
        print_color("Core model training completed.",
                    color=rasa.shared.utils.io.bcolors.OKBLUE)

        if train_path is None:
            # Only Core was trained.
            new_fingerprint = await model.model_fingerprint(file_importer)
            return model.package_model(
                fingerprint=new_fingerprint,
                output_directory=output,
                train_path=_train_path,
                fixed_model_name=fixed_model_name,
                model_prefix="core-",
            )

        return _train_path
    def validate(self, importer: TrainingDataImporter) -> TrainingDataImporter:
        """Validates the current graph schema against the training data and domain.

        Args:
            importer: the training data importer which can also load the domain
        Raises:
            `InvalidConfigException` or `InvalidDomain` in case there is some mismatch
        """
        nlu_data = importer.get_nlu_data()
        self._validate_nlu(nlu_data)

        story_graph = importer.get_stories()
        domain = importer.get_domain()
        self._validate_core(story_graph, domain)
        return importer
Exemple #5
0
    def _validate(
        self,
        importer: TrainingDataImporter,
        nlu: bool = True,
        core: bool = True,
    ) -> None:
        """Validate whether the finetuning setting conflicts with other settings.

        Note that this validation always takes into account the configuration of
        nlu *and* core part, while the validation of aspects of the domain and
        the NLU training data only happen if we request to validate finetuning
        with respect to NLU/Core models, respectively.

        For more details, see docstring of this class.

        Args:
            importer: a training data importer
            domain: the domain
            nlu: set to `False` if NLU part should not be validated
            core: set to `False` if Core part should not be validated
        Raises:
            `InvalidConfigException` if there is a conflict
        """
        if self._is_finetuning and not self._fingerprints:
            raise InvalidConfigException(
                f"Finetuning is enabled but the {self.__class__.__name__} "
                f"does not remember seeing a training run. Ensure that you have "
                f"trained your model at least once (with finetuning disabled) "
                f"and ensure that the  {self.__class__.__name__} is part of the "
                f"training graph. ")

        rasa_version = rasa.__version__
        if self._is_finetuning:
            old_rasa_version = self._fingerprints[FINGERPRINT_VERSION]
            if version.parse(old_rasa_version) < version.parse(
                    MINIMUM_COMPATIBLE_VERSION):
                raise InvalidConfigException(
                    f"The minimum compatible Rasa Version is "
                    f"{MINIMUM_COMPATIBLE_VERSION} but the model we attempt to "
                    f"finetune has been generated with an older version "
                    f"({old_rasa_version}.")
        self._fingerprints[FINGERPRINT_VERSION] = rasa_version

        config = importer.get_config()
        self._compare_or_memorize(
            fingerprint_key=FINGERPRINT_CONFIG_WITHOUT_EPOCHS_KEY,
            new_fingerprint=self._get_fingerprint_of_config_without_epochs(
                config),
            error_message=
            ("Cannot finetune because more than just the 'epoch' keys have been "
             "changed in the configuration. "
             "Please revert your configuration and only change "
             "the 'epoch' settings where needed."),
        )

        if core:
            # NOTE: If there's a consistency check between domain and core training data
            # that ensures domain and core training data are consistent, then we can
            # drop this check.
            domain = importer.get_domain()
            self._compare_or_memorize(
                fingerprint_key=FINGERPRINT_CORE,
                new_fingerprint=self.
                _get_fingerprint_of_domain_without_responses(domain),
                error_message=
                ("Cannot finetune because more than just the responses have been "
                 "changed in the domain."
                 "Please revert all settings in your domain file (except the "
                 "'responses')."),
            )

        if nlu:
            nlu_data = importer.get_nlu_data()
            self._compare_or_memorize(
                fingerprint_key=FINGERPRINT_NLU,
                new_fingerprint=nlu_data.label_fingerprint(),
                error_message=
                ("Cannot finetune because NLU training data contains new labels "
                 "or does not contain any examples for some known labels. "
                 "Please make sure that the NLU data that you use "
                 "for finetuning contains at least one example for every label "
                 "(i.e. intent, action name, ...) that was included in the NLU "
                 "data used for training the model which we attempt to finetune "
                 "now. Moreover, you must not add labels that were not included "
                 "during training before. "),
            )

        self.persist()
Exemple #6
0
async def _train_core_with_validated_data(
    file_importer: TrainingDataImporter,
    output: Text,
    train_path: Optional[Text] = None,
    fixed_model_name: Optional[Text] = None,
    additional_arguments: Optional[Dict] = None,
    interpreter: Optional[Interpreter] = None,
    model_to_finetune: Optional["Text"] = None,
    finetuning_epoch_fraction: float = 1.0,
) -> Optional[Text]:
    """Train Core with validated training and config data."""
    import rasa.core.train

    with ExitStack() as stack:
        if train_path:
            # If the train path was provided, do nothing on exit.
            _train_path = train_path
        else:
            # Otherwise, create a temp train path and clean it up on exit.
            _train_path = stack.enter_context(
                TempDirectoryPath(tempfile.mkdtemp()))

        # normal (not compare) training
        rasa.shared.utils.cli.print_color(
            "Training Core model...",
            color=rasa.shared.utils.io.bcolors.OKBLUE)
        domain, config = await asyncio.gather(file_importer.get_domain(),
                                              file_importer.get_config())

        if model_to_finetune:
            rasa.shared.utils.common.mark_as_experimental_feature(
                "Incremental Training feature")
            model_to_finetune = await _core_model_for_finetuning(
                model_to_finetune,
                file_importer=file_importer,
                finetuning_epoch_fraction=finetuning_epoch_fraction,
            )

            if not model_to_finetune:
                rasa.shared.utils.cli.print_error_and_exit(
                    f"No Core model for finetuning found. Please make sure to either "
                    f"specify a path to a previous model or to have a finetunable "
                    f"model within the directory '{output}'.")

        async with telemetry.track_model_training(
                file_importer,
                model_type="core",
                is_finetuning=model_to_finetune is not None,
        ):
            await rasa.core.train.train(
                domain_file=domain,
                training_resource=file_importer,
                output_path=os.path.join(_train_path,
                                         DEFAULT_CORE_SUBDIRECTORY_NAME),
                policy_config=config,
                additional_arguments=additional_arguments,
                interpreter=interpreter,
                model_to_finetune=model_to_finetune,
            )
        rasa.shared.utils.cli.print_color(
            "Core model training completed.",
            color=rasa.shared.utils.io.bcolors.OKBLUE)

        if train_path is None:
            # Only Core was trained.
            new_fingerprint = await model.model_fingerprint(file_importer)
            return model.package_model(
                fingerprint=new_fingerprint,
                output_directory=output,
                train_path=_train_path,
                fixed_model_name=fixed_model_name,
                model_prefix="core-",
            )

        return _train_path
Exemple #7
0
 def provide_train(self, importer: TrainingDataImporter) -> Domain:
     """Provides domain from training data during training."""
     domain = importer.get_domain()
     self._persist(domain)
     return domain
Exemple #8
0
    def train(
        self,
        model_configuration: GraphModelConfiguration,
        importer: TrainingDataImporter,
        output_filename: Path,
        force_retraining: bool = False,
        is_finetuning: bool = False,
    ) -> ModelMetadata:
        """Trains and packages a model and returns the prediction graph runner.

        Args:
            model_configuration: The model configuration (schemas, language, etc.)
            importer: The importer which provides the training data for the training.
            output_filename: The location to save the packaged model.
            force_retraining: If `True` then the cache is skipped and all components
                are retrained.

        Returns:
            The metadata describing the trained model.
        """
        logger.debug("Starting training.")

        # Retrieve the domain for the model metadata right at the start.
        # This avoids that something during the graph runs mutates it.
        domain = copy.deepcopy(importer.get_domain())

        if force_retraining:
            logger.debug(
                "Skip fingerprint run as a full training of the model was enforced."
            )
            pruned_training_schema = model_configuration.train_schema
        else:
            fingerprint_run_outputs = self.fingerprint(
                model_configuration.train_schema,
                importer=importer,
                is_finetuning=is_finetuning,
            )
            pruned_training_schema = self._prune_schema(
                model_configuration.train_schema, fingerprint_run_outputs)

        hooks = [
            LoggingHook(pruned_schema=pruned_training_schema),
            TrainingHook(
                cache=self._cache,
                model_storage=self._model_storage,
                pruned_schema=pruned_training_schema,
            ),
        ]

        graph_runner = self._graph_runner_class.create(
            graph_schema=pruned_training_schema,
            model_storage=self._model_storage,
            execution_context=ExecutionContext(
                graph_schema=model_configuration.train_schema,
                is_finetuning=is_finetuning,
            ),
            hooks=hooks,
        )

        logger.debug(
            "Running the pruned train graph with real node execution.")

        graph_runner.run(inputs={PLACEHOLDER_IMPORTER: importer})

        return self._model_storage.create_model_package(
            output_filename, model_configuration, domain)