def test_train_tokenizer(text, expected_tokens, expected_indices, spacy_nlp): tk = SpacyTokenizer() message = Message(text) message.set(SPACY_DOCS[TEXT], spacy_nlp(text)) message.set(RESPONSE, text) message.set(SPACY_DOCS[RESPONSE], spacy_nlp(text)) training_data = TrainingData() training_data.training_examples = [message] tk.train(training_data) for attribute in [RESPONSE, TEXT]: tokens = training_data.training_examples[0].get(TOKENS_NAMES[attribute]) assert [t.text for t in tokens] == expected_tokens assert [t.start for t in tokens] == [i[0] for i in expected_indices] assert [t.end for t in tokens] == [i[1] for i in expected_indices]
def test_extract_patterns_use_only_entities_lookup_tables( entity: Text, lookup_tables: Dict[Text, Text], expected_patterns: Dict[Text, Text]): training_data = TrainingData() if entity: training_data.training_examples = [ Message("text", data={"entities": [{ "entity": entity, "value": "text" }]}) ] if lookup_tables: training_data.lookup_tables = [lookup_tables] actual_patterns = pattern_utils.extract_patterns(training_data, use_only_entities=True) assert actual_patterns == expected_patterns
def test_train_tokenizer(text, expected_tokens, expected_indices): tk = WhitespaceTokenizer() message = Message(text) message.set(RESPONSE, text) message.set(INTENT, text) training_data = TrainingData() training_data.training_examples = [message] tk.train(training_data) for attribute in [RESPONSE, TEXT]: tokens = training_data.training_examples[0].get( TOKENS_NAMES[attribute]) assert [t.text for t in tokens] == expected_tokens assert [t.start for t in tokens] == [i[0] for i in expected_indices] assert [t.end for t in tokens] == [i[1] for i in expected_indices] # check intent attribute tokens = training_data.training_examples[0].get(TOKENS_NAMES[INTENT]) assert [t.text for t in tokens] == [text]