Example #1
0
def test_mitie_add_cls_token():
    from rasa.nlu.tokenizers.mitie_tokenizer import MitieTokenizer

    component_config = {"use_cls_token": True}

    tk = MitieTokenizer(component_config)

    text = "Forecast for lunch"
    assert [t.text for t in tk.tokenize(text)] == [
        "Forecast",
        "for",
        "lunch",
        CLS_TOKEN,
    ]
    assert [t.offset for t in tk.tokenize(text)] == [0, 9, 13, 19]
Example #2
0
def test_mitie():
    from rasa.nlu.tokenizers.mitie_tokenizer import MitieTokenizer
    tk = MitieTokenizer()

    text = "Forecast for lunch"
    assert [t.text for t in tk.tokenize(text)] == \
           ['Forecast', 'for', 'lunch']
    assert [t.offset for t in tk.tokenize(text)] == \
           [0, 9, 13]

    text = "hey ńöñàśçií how're you?"
    assert [t.text for t in tk.tokenize(text)] == \
           ['hey', 'ńöñàśçií', 'how', '\'re', 'you', '?']
    assert [t.offset for t in tk.tokenize(text)] == \
           [0, 4, 13, 16, 20, 23]
Example #3
0
def test_mitie(text, expected_tokens, expected_indices):
    tk = MitieTokenizer()

    tokens = tk.tokenize(Message(text), attribute=TEXT)

    assert [t.text for t in tokens] == expected_tokens
    assert [t.start for t in tokens] == [i[0] for i in expected_indices]
    assert [t.end for t in tokens] == [i[1] for i in expected_indices]
Example #4
0
def test_mitie():
    from rasa.nlu.tokenizers.mitie_tokenizer import MitieTokenizer

    tk = MitieTokenizer()

    text = "Forecast for lunch"
    assert [t.text for t in tk.tokenize(text)] == ["Forecast", "for", "lunch"]
    assert [t.offset for t in tk.tokenize(text)] == [0, 9, 13]

    text = "hey ńöñàśçií how're you?"
    assert [t.text for t in tk.tokenize(text)] == [
        "hey",
        "ńöñàśçií",
        "how",
        "'re",
        "you",
        "?",
    ]
    assert [t.offset for t in tk.tokenize(text)] == [0, 4, 13, 16, 20, 23]