def test_spacy(spacy_nlp): from rasa_nlu.tokenizers.spacy_tokenizer import SpacyTokenizer tk = SpacyTokenizer() assert [t.text for t in tk.tokenize(spacy_nlp("Forecast for lunch"))] == ['Forecast', 'for', 'lunch'] assert [t.offset for t in tk.tokenize(spacy_nlp("Forecast for lunch"))] == [0, 9, 13] assert [t.text for t in tk.tokenize(spacy_nlp("hey ńöñàśçií how're you?"))] == \ ['hey', 'ńöñàśçií', 'how', '\'re', 'you', '?'] assert [t.offset for t in tk.tokenize(spacy_nlp("hey ńöñàśçií how're you?"))] == [0, 4, 13, 16, 20, 23]
def test_spacy(spacy_nlp): from rasa_nlu.tokenizers.spacy_tokenizer import SpacyTokenizer tk = SpacyTokenizer() assert [t.text for t in tk.tokenize(spacy_nlp("Forecast for lunch")) ] == ['Forecast', 'for', 'lunch'] assert [t.offset for t in tk.tokenize(spacy_nlp("Forecast for lunch")) ] == [0, 9, 13] assert [t.text for t in tk.tokenize(spacy_nlp("hey ńöñàśçií how're you?"))] == \ ['hey', 'ńöñàśçií', 'how', '\'re', 'you', '?'] assert [ t.offset for t in tk.tokenize(spacy_nlp("hey ńöñàśçií how're you?")) ] == [0, 4, 13, 16, 20, 23]
def tokenize_sentence(sentence, expected_result, language): from rasa_nlu.tokenizers.spacy_tokenizer import SpacyTokenizer tk = SpacyTokenizer(language) assert tk.tokenize(sentence) == expected_result
def tokenize_sentence(sentence, expected_result): from rasa_nlu.tokenizers.spacy_tokenizer import SpacyTokenizer tk = SpacyTokenizer(spacy_nlp_en) assert tk.tokenize(sentence) == expected_result
def tokenize_sentence(sentence, expected_result): from rasa_nlu.tokenizers.spacy_tokenizer import SpacyTokenizer tk = SpacyTokenizer() assert tk.tokenize(sentence, spacy_nlp) == expected_result