예제 #1
0
def generate_dataset(language, *files):
    """Create a Snips NLU dataset from text friendly files"""
    if any(f.endswith(".yml") or f.endswith(".yaml") for f in files):
        dataset = Dataset.from_yaml_files(language, list(files))
    else:
        dataset = Dataset.from_files(language, list(files))
    print(json.dumps(dataset.json, indent=2, sort_keys=True))
예제 #2
0
    def test_should_generate_dataset_from_files(self):
        # Given
        intent_file_1 = "intent_whoIsGame.txt"
        intent_file_2 = "intent_getWeather.txt"
        entity_file_1 = "entity_location.txt"

        who_is_game_txt = """
who is the [role:role](president) of [country:country](France)
who is the [role:role](CEO) of [company:company](Google) please
"""

        get_weather_txt = """
what is the weather in [weatherLocation:location](Paris)?
is it raining in [weatherLocation] [weatherDate:snips/datetime]
"""

        location_txt = """
new york,big apple
london
        """

        # pylint:disable=unused-argument
        def mock_open(self_, *args, **kwargs):
            if str(self_) == intent_file_1:
                return io.StringIO(who_is_game_txt)
            if str(self_) == intent_file_2:
                return io.StringIO(get_weather_txt)
            if str(self_) == entity_file_1:
                return io.StringIO(location_txt)
            return None

        # pylint:enable=unused-argument

        dataset_files = [intent_file_1, intent_file_2, entity_file_1]

        # When
        with patch("pathlib.io") as mock_io:
            mock_io.open.side_effect = mock_open
            dataset = Dataset.from_files("en", dataset_files)
        dataset_dict = dataset.json

        # When / Then
        validate_and_format_dataset(dataset_dict)
        self.assertDictEqual(EXPECTED_DATASET_DICT, dataset_dict)