예제 #1
0
 async def get_nlu_data(self, languages = True) -> Dict[Text, TrainingData]:
     if isinstance(languages, str) and languages.startswith('data_for_'):
         lang = languages.replace('data_for_', '')
         return utils.training_data_from_paths([self.nlu_config[lang]['path']], 'xx')
     if not isinstance(languages, list):
         languages = self.nlu_config.keys()
     td = {}
     for lang in languages:
         try:
             td[lang] = utils.training_data_from_paths([self.nlu_config[lang]['path']], 'xx')
         except ValueError as e:
             if str(e).startswith("Unknown data format"):
                 from rasa.nlu.training_data import TrainingData
                 td[lang] = TrainingData()
     return td
예제 #2
0
def test_train_test_split(filepaths):
    from rasa.importers.utils import training_data_from_paths

    td = training_data_from_paths(filepaths, language="en")

    assert td.intents == {
        "affirm", "greet", "restaurant_search", "goodbye", "chitchat"
    }
    assert td.entities == {"location", "cuisine"}
    assert set(
        td.responses.keys()) == {"chitchat/ask_name", "chitchat/ask_weather"}

    assert len(td.training_examples) == 46
    assert len(td.intent_examples) == 46
    assert len(td.response_examples) == 4

    td_train, td_test = td.train_test_split(train_frac=0.8)

    assert len(td_test.training_examples) + len(
        td_train.training_examples) == 46
    assert len(td_train.training_examples) == 34
    assert len(td_test.training_examples) == 12

    assert len(td.number_of_examples_per_intent.keys()) == len(
        td_test.number_of_examples_per_intent.keys())
    assert len(td.number_of_examples_per_intent.keys()) == len(
        td_train.number_of_examples_per_intent.keys())
    assert len(td.number_of_examples_per_response.keys()) == len(
        td_test.number_of_examples_per_response.keys())
    assert len(td.number_of_examples_per_response.keys()) == len(
        td_train.number_of_examples_per_response.keys())
예제 #3
0
def test_demo_data(files):
    from rasa.importers.utils import training_data_from_paths

    td = training_data_from_paths(files, language="en")
    assert td.intents == {
        "affirm", "greet", "restaurant_search", "goodbye", "chitchat"
    }
    assert td.entities == {"location", "cuisine"}
    assert set(
        td.responses.keys()) == {"chitchat/ask_name", "chitchat/ask_weather"}
    assert len(td.training_examples) == 46
    assert len(td.intent_examples) == 46
    assert len(td.response_examples) == 4
    assert len(td.entity_examples) == 11
    assert len(td.responses) == 2

    assert td.entity_synonyms == {
        "Chines": "chinese",
        "Chinese": "chinese",
        "chines": "chinese",
        "vegg": "vegetarian",
        "veggie": "vegetarian",
    }

    assert td.regex_features == [
        {
            "name": "greet",
            "pattern": r"hey[^\s]*"
        },
        {
            "name": "zipcode",
            "pattern": r"[0-9]{5}"
        },
    ]
예제 #4
0
 async def save_from_path(self,
                          path: Text,
                          bot: Text,
                          overwrite: bool = True,
                          user="******"):
     try:
         story_files, nlu_files = get_core_nlu_files(
             os.path.join(path, DEFAULT_DATA_PATH))
         nlu = utils.training_data_from_paths(nlu_files, "en")
         domain = Domain.from_file(os.path.join(path, DEFAULT_DOMAIN_PATH))
         domain.check_missing_templates()
         story_steps = await StoryFileReader.read_from_files(
             story_files, domain)
         config = read_config_file(os.path.join(path, DEFAULT_CONFIG_PATH))
         self.save_domain(domain, bot, user)
         self.save_stories(story_steps, bot, user)
         self.save_nlu(nlu, bot, user)
         self.save_config(config, bot, user)
     except InvalidDomain as e:
         logging.info(e)
         raise AppException("""Failed to validate yaml file.
                         Please make sure the file is initial and all mandatory parameters are specified"""
                            )
     except Exception as e:
         logging.info(e)
         raise AppException(e)
예제 #5
0
    def view_file(self, file, lang='en'):
        """
        $ python -m saai.nlu_data_procs view_file ./nlu_multilang/en/nlu_data.md
        $ python -m saai.nlu_data_procs view_file /pi/ws/knowledgebasebot/data/nlu.md

        :param file:
        :param lang:
        :return:
        """
        from pprint import pprint

        # files = ['./nlu_multilang/en/nlu_data.md']
        td = training_data_from_paths([file], language=lang)

        print('.. examples')
        # print(*[e.text for e in td.training_examples], sep='\n')
        print(*[(e.get("intent"), e.text) for e in td.training_examples],
              sep='\n')

        tc.emp('green', '.. intents')
        for intent in td.intents:
            tc.emp('yellow', f"  - {intent}")
        tc.emp('green', '.. entities')
        print(td.entities)

        tc.emp('green', '.. lookup_tables')
        pprint(td.lookup_tables)
예제 #6
0
def test_train_test_split(filepaths):
    from rasa.importers.utils import training_data_from_paths

    td = training_data_from_paths(filepaths, language="en")
    assert td.intents == {"affirm", "greet", "restaurant_search", "goodbye", "chitchat"}
    assert td.entities == {"location", "cuisine"}
    assert len(td.training_examples) == 46
    assert len(td.intent_examples) == 46

    td_train, td_test = td.train_test_split(train_frac=0.8)

    assert len(td_train.training_examples) == 35
    assert len(td_test.training_examples) == 11
예제 #7
0
def test_train_test_split_with_random_seed(filepaths):
    from rasa.importers.utils import training_data_from_paths

    td = training_data_from_paths(filepaths, language="en")

    td_train_1, td_test_1 = td.train_test_split(train_frac=0.8, random_seed=1)
    td_train_2, td_test_2 = td.train_test_split(train_frac=0.8, random_seed=1)
    train_1_intent_examples = [e.get(TEXT) for e in td_train_1.intent_examples]
    train_2_intent_examples = [e.get(TEXT) for e in td_train_2.intent_examples]

    test_1_intent_examples = [e.get(TEXT) for e in td_test_1.intent_examples]
    test_2_intent_examples = [e.get(TEXT) for e in td_test_2.intent_examples]

    assert train_1_intent_examples == train_2_intent_examples
    assert test_1_intent_examples == test_2_intent_examples
예제 #8
0
def test_demo_data_filter_out_retrieval_intents(files):
    from rasa.importers.utils import training_data_from_paths

    training_data = training_data_from_paths(files, language="en")
    assert len(training_data.training_examples) == 46

    training_data_filtered = training_data.filter_training_examples(
        lambda ex: ex.get(INTENT_RESPONSE_KEY) is None)
    assert len(training_data_filtered.training_examples) == 42

    training_data_filtered_2 = training_data.filter_training_examples(
        lambda ex: ex.get(INTENT_RESPONSE_KEY) is not None)
    assert len(training_data_filtered_2.training_examples) == 4

    # make sure filtering operation doesn't mutate the source training data
    assert len(training_data.training_examples) == 46
예제 #9
0
 async def get_nlu_data(self, languages=True) -> Dict[Text, TrainingData]:
     language = None
     if isinstance(languages, str):
         language = languages
         languages = [language]
     if not isinstance(languages, list):
         languages = self.nlu_config.keys()
     td = {}
     for lang in languages:
         try:
             td[lang] = utils.training_data_from_paths(
                 self.path_for_nlu_lang(lang),
                 lang,
             )
         except ValueError as e:
             if str(e).startswith("Unknown data format"):
                 td[lang] = TrainingData()
     if language: return td.get(language, TrainingData())
     return td
예제 #10
0
 def save_from_path(self, path: Text, bot: Text, user="******"):
     try:
         story_files, nlu_files = get_core_nlu_files(
             os.path.join(path, DEFAULT_DATA_PATH))
         nlu = utils.training_data_from_paths(nlu_files, "en")
         domain = Domain.from_file(os.path.join(path, DEFAULT_DOMAIN_PATH))
         loop = asyncio.new_event_loop()
         story_steps = loop.run_until_complete(
             StoryFileReader.read_from_files(story_files, domain))
         self.save_domain(domain, bot, user)
         self.save_stories(story_steps, bot, user)
         self.save_nlu(nlu, bot, user)
         self.__save_config(
             read_config_file(os.path.join(path, DEFAULT_CONFIG_PATH)), bot,
             user)
     except InvalidDomain as e:
         logging.info(e)
         raise AppException("""Failed to validate yaml file.
                         Please make sure the file is initial and all mandatory parameters are specified"""
                            )
     except Exception as e:
         logging.info(e)
         raise AppException(e)
예제 #11
0
 async def get_nlu_data(self,
                        language: Optional[Text] = "en") -> TrainingData:
     return utils.training_data_from_paths(self._nlu_files, language)
예제 #12
0
    async def get_nlu_data(self,
                           language: Optional[Text] = "en") -> TrainingData:
        from rasa.importers import utils

        return utils.training_data_from_paths(self.nlu_files, language)