def fit(self, dataset, force_retrain=True):
        """Fits the NLU engine

        Args:
            dataset (dict): A valid Snips dataset
            force_retrain (bool, optional): If *False*, will not retrain intent
                parsers when they are already fitted. Default to *True*.

        Returns:
            The same object, trained.
        """
        dataset = validate_and_format_dataset(dataset)
        if self.config is None:
            language = dataset[LANGUAGE]
            default_config = DEFAULT_CONFIGS.get(language)
            if default_config is not None:
                self.config = self.config_type.from_dict(default_config)
            else:
                self.config = self.config_type()

        self.load_resources_if_needed(dataset[LANGUAGE])
        self.fit_builtin_entity_parser_if_needed(dataset)
        self.fit_custom_entity_parser_if_needed(dataset)

        parsers = []
        for parser_config in self.config.intent_parsers_configs:
            # Re-use existing parsers to allow pre-training
            recycled_parser = None
            for parser in self.intent_parsers:
                if parser.unit_name == parser_config.unit_name:
                    recycled_parser = parser
                    break
            if recycled_parser is None:
                recycled_parser = IntentParser.from_config(
                    parser_config,
                    builtin_entity_parser=self.builtin_entity_parser,
                    custom_entity_parser=self.custom_entity_parser,
                    resources=self.resources,
                    random_state=self.random_state,
                )

            if force_retrain or not recycled_parser.fitted:
                recycled_parser.fit(dataset, force_retrain)
            parsers.append(recycled_parser)

        self.intent_parsers = parsers
        self.dataset_metadata = _get_dataset_metadata(dataset)
        return self
Ejemplo n.º 2
0
    def test_default_configs_should_work(self):
        # Given
        dataset = deepcopy(WEATHER_DATASET)

        for language in get_all_languages():
            # When
            config = DEFAULT_CONFIGS.get(language)
            self.assertIsNotNone(config,
                                 "Missing default config for '%s'" % language)
            dataset[LANGUAGE] = language
            engine = SnipsNLUEngine(config).fit(dataset)
            result = engine.parse("Please give me the weather in Paris")

            # Then
            intent_name = result[RES_INTENT][RES_INTENT_NAME]
            self.assertEqual("SearchWeatherForecast", intent_name)
Ejemplo n.º 3
0
    def fit(self, dataset, force_retrain=True):
        """Fit the NLU engine

        Args:
            dataset (dict): A valid Snips dataset
            force_retrain (bool, optional): If *False*, will not retrain intent
                parsers when they are already fitted. Default to *True*.

        Returns:
            The same object, trained.
        """
        logger.info("Fitting NLU engine...")

        dataset = validate_and_format_dataset(dataset)
        self._dataset_metadata = _get_dataset_metadata(dataset)
        if self.config is None:
            language = self._dataset_metadata["language_code"]
            default_config = DEFAULT_CONFIGS.get(language)
            if default_config is not None:
                self.config = self.config_type.from_dict(default_config)
            else:
                self.config = self.config_type()

        self.fit_builtin_entity_parser_if_needed(dataset)
        self.fit_custom_entity_parser_if_needed(dataset)

        parsers = []
        for parser_config in self.config.intent_parsers_configs:
            # Re-use existing parsers to allow pre-training
            recycled_parser = None
            for parser in self.intent_parsers:
                if parser.unit_name == parser_config.unit_name:
                    recycled_parser = parser
                    break
            if recycled_parser is None:
                recycled_parser = build_processing_unit(parser_config)

            recycled_parser.builtin_entity_parser = self.builtin_entity_parser
            recycled_parser.custom_entity_parser = self.custom_entity_parser
            if force_retrain or not recycled_parser.fitted:
                recycled_parser.fit(dataset, force_retrain)
            parsers.append(recycled_parser)

        self.intent_parsers = parsers
        return self
Ejemplo n.º 4
0
    def test_default_configs_should_work(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: TurnLightOn
utterances:
- turn on the lights
- please switch on the light
- switch the light on
- can you turn the light on ?
- I need you to turn on the lights

---
type: intent
name: GetWeather
utterances:
- what is the weather today
- What's the weather in tokyo today?
- Can you tell me the weather please ?
- what is the weather forecast for this weekend""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        for language in get_all_languages():
            # When
            config = DEFAULT_CONFIGS.get(language)
            self.assertIsNotNone(config, "Missing default config for '%s'"
                                 % language)
            dataset[LANGUAGE] = language
            shared = self.get_shared_data(dataset)
            engine = SnipsNLUEngine(config, **shared).fit(dataset)
            result = engine.parse("Please give me the weather in Paris")

            # Then
            self.assertIsNotNone(result[RES_INTENT])
            intent_name = result[RES_INTENT][RES_INTENT_NAME]
            self.assertEqual("GetWeather", intent_name)