예제 #1
0
def test_convert_featurizer_process(monkeypatch: MonkeyPatch):
    tokenizer = WhitespaceTokenizer()

    monkeypatch.setattr(ConveRTFeaturizer, "_get_validated_model_url",
                        lambda x: RESTRICTED_ACCESS_URL)
    component_config = {
        "name": "ConveRTFeaturizer",
        "model_url": RESTRICTED_ACCESS_URL
    }
    featurizer = ConveRTFeaturizer(component_config)
    sentence = "Hey how are you today ?"
    message = Message.build(text=sentence)

    td = TrainingData([message])
    tokenizer.train(td)
    tokens = featurizer.tokenize(message, attribute=TEXT)

    featurizer.process(message, tf_hub_module=featurizer.module)

    expected = np.array(
        [2.2636216, -0.26475656, -1.1358104, -0.49751878, -1.3946456])
    expected_cls = np.array(
        [1.0251294, -0.04053932, -0.7018805, -0.82054937, -0.75054353])

    seq_vecs, sent_vecs = message.get_dense_features(TEXT, [])

    seq_vecs = seq_vecs.features
    sent_vecs = sent_vecs.features

    assert len(tokens) == len(seq_vecs)
    assert np.allclose(seq_vecs[0][:5], expected, atol=1e-5)
    assert np.allclose(sent_vecs[-1][:5], expected_cls, atol=1e-5)
예제 #2
0
def test_convert_featurizer_train():
    featurizer = ConveRTFeaturizer.create({}, RasaNLUModelConfig())

    sentence = "Hey how are you today ?"
    message = Message(sentence)
    message.set(RESPONSE, sentence)
    tokens = ConveRTTokenizer().tokenize(message, attribute=TEXT)
    tokens = Tokenizer.add_cls_token(tokens, attribute=TEXT)
    message.set(TOKENS_NAMES[TEXT], tokens)
    message.set(TOKENS_NAMES[RESPONSE], tokens)

    featurizer.train(TrainingData([message]), RasaNLUModelConfig())

    expected = np.array(
        [2.2636216, -0.26475656, -1.1358104, -0.49751878, -1.3946456])
    expected_cls = np.array(
        [1.0251294, -0.04053932, -0.7018805, -0.82054937, -0.75054353])

    vecs = message.get(DENSE_FEATURE_NAMES[TEXT])

    assert len(tokens) == len(vecs)
    assert np.allclose(vecs[0][:5], expected, atol=1e-5)
    assert np.allclose(vecs[-1][:5], expected_cls, atol=1e-5)

    vecs = message.get(DENSE_FEATURE_NAMES[RESPONSE])

    assert len(tokens) == len(vecs)
    assert np.allclose(vecs[0][:5], expected, atol=1e-5)
    assert np.allclose(vecs[-1][:5], expected_cls, atol=1e-5)

    vecs = message.get(DENSE_FEATURE_NAMES[INTENT])

    assert vecs is None
예제 #3
0
def test_convert_featurizer_process():
    featurizer = ConveRTFeaturizer.create({}, RasaNLUModelConfig())
    sentence = "Hey how are you today ?"
    message = Message(sentence)
    show_message(message)
    tokens = ConveRTTokenizer().tokenize(message, attribute=TEXT)
    tokens = Tokenizer.add_cls_token(tokens, attribute=TEXT)
    message.set(TOKENS_NAMES[TEXT], tokens)
    assert show_message(message, False) == {
        "tokens": ["hey", "how", "are", "you", "today", "__CLS__"],
        "text": "Hey how are you today ?"
    }
    featurizer.process(message)
    show_message(message)

    expected = np.array(
        [2.2636216, -0.26475656, -1.1358104, -0.49751878, -1.3946456])
    expected_cls = np.array(
        [1.0251294, -0.04053932, -0.7018805, -0.82054937, -0.75054353])

    vecs = message.get(DENSE_FEATURE_NAMES[TEXT])
    assert len(tokens) == len(vecs)
    assert len(vecs) == 6
    assert len(tokens) == 6
    assert len(vecs[0]) == 1024
    assert np.allclose(vecs[0][:5], expected, atol=1e-5)
    assert np.allclose(vecs[-1][:5], expected_cls, atol=1e-5)
예제 #4
0
def test_convert_featurizer_tokens_to_text(component_builder, sentence, expected_text):
    tokenizer = component_builder.create_component_from_class(ConveRTTokenizer)
    tokens = tokenizer.tokenize(Message(sentence), attribute=TEXT)

    actual_text = ConveRTFeaturizer._tokens_to_text([tokens])[0]

    assert expected_text == actual_text
예제 #5
0
def test_raise_invalid_urls(model_url: Optional[Text], exception_phrase: Text):

    component_config = {"name": "ConveRTFeaturizer", "model_url": model_url}
    with pytest.raises(RasaException) as excinfo:
        _ = ConveRTFeaturizer(component_config)

    assert exception_phrase in str(excinfo.value)
예제 #6
0
def test_convert_featurizer_tokens_to_text(
    create_or_load_convert_featurizer: Callable[[Dict[Text, Any]],
                                                ConveRTFeaturizer],
    sentence: Text,
    expected_text: Text,
    monkeypatch: MonkeyPatch,
    whitespace_tokenizer: WhitespaceTokenizer,
):

    monkeypatch.setattr(
        ConveRTFeaturizer,
        "_validate_model_url",
        lambda _: None,
    )
    component_config = {
        FEATURIZER_CLASS_ALIAS: "alias",
        "model_url": RESTRICTED_ACCESS_URL,
    }
    featurizer = create_or_load_convert_featurizer(component_config)
    message = Message.build(text=sentence)
    td = TrainingData([message])
    whitespace_tokenizer.process_training_data(td)
    tokens = featurizer.tokenize(message, attribute=TEXT)

    actual_text = ConveRTFeaturizer._tokens_to_text([tokens])[0]

    assert expected_text == actual_text
예제 #7
0
def test_convert_featurizer_tokens_to_text(sentence, expected_text):
    tokens = ConveRTTokenizer().tokenize(Message(sentence),
                                         attribute=TEXT_ATTRIBUTE)

    actual_text = ConveRTFeaturizer._tokens_to_text([tokens])[0]

    assert expected_text == actual_text
예제 #8
0
def test_convert_featurizer_tokens_to_text(sentence: Text, expected_text: Text,
                                           monkeypatch: MonkeyPatch):
    tokenizer = WhitespaceTokenizer()

    monkeypatch.setattr(ConveRTFeaturizer, "_get_validated_model_url",
                        lambda x: RESTRICTED_ACCESS_URL)
    component_config = {
        "name": "ConveRTFeaturizer",
        "model_url": RESTRICTED_ACCESS_URL
    }
    featurizer = ConveRTFeaturizer(component_config)
    message = Message.build(text=sentence)
    td = TrainingData([message])
    tokenizer.train(td)
    tokens = featurizer.tokenize(message, attribute=TEXT)

    actual_text = ConveRTFeaturizer._tokens_to_text([tokens])[0]

    assert expected_text == actual_text
예제 #9
0
def test_raise_wrong_model_directory(tmp_path: Path):

    component_config = {
        "name": "ConveRTFeaturizer",
        "model_url": str(tmp_path)
    }

    with pytest.raises(RasaException) as excinfo:
        _ = ConveRTFeaturizer(component_config)

    assert "Re-check the files inside the directory" in str(excinfo.value)
예제 #10
0
def test_raise_invalid_path():

    component_config = {
        "name": "ConveRTFeaturizer",
        "model_url": "saved_model.pb"
    }

    with pytest.raises(RasaException) as excinfo:
        _ = ConveRTFeaturizer(component_config)

    assert "neither a valid remote URL nor a local directory" in str(
        excinfo.value)
예제 #11
0
def test_raise_wrong_model_file(tmp_path: Path):

    # create a dummy file
    temp_file = os.path.join(tmp_path, "saved_model.pb")
    f = open(temp_file, "wb")
    f.close()
    component_config = {"name": "ConveRTFeaturizer", "model_url": temp_file}

    with pytest.raises(RasaException) as excinfo:
        _ = ConveRTFeaturizer(component_config)

    assert "set to the path of a file which is invalid" in str(excinfo.value)
예제 #12
0
def test_convert_featurizer_number_of_sub_tokens(
        text: Text, expected_number_of_sub_tokens: List[int],
        monkeypatch: MonkeyPatch):
    tokenizer = WhitespaceTokenizer()

    monkeypatch.setattr(ConveRTFeaturizer, "_get_validated_model_url",
                        lambda x: RESTRICTED_ACCESS_URL)
    component_config = {
        "name": "ConveRTFeaturizer",
        "model_url": RESTRICTED_ACCESS_URL
    }
    featurizer = ConveRTFeaturizer(component_config)

    message = Message.build(text=text)
    td = TrainingData([message])
    tokenizer.train(td)

    tokens = featurizer.tokenize(message, attribute=TEXT)

    assert [t.get(NUMBER_OF_SUB_TOKENS)
            for t in tokens] == expected_number_of_sub_tokens
예제 #13
0
def test_convert_featurizer_tokens_to_text(
    sentence: Text, expected_text: Text, monkeypatch: MonkeyPatch
):

    monkeypatch.setattr(
        ConveRTTokenizer, "_get_validated_model_url", lambda x: RESTRICTED_ACCESS_URL
    )
    component_config = {"name": "ConveRTTokenizer", "model_url": RESTRICTED_ACCESS_URL}
    tokenizer = ConveRTTokenizer(component_config)
    tokens = tokenizer.tokenize(Message(data={TEXT: sentence}), attribute=TEXT)

    actual_text = ConveRTFeaturizer._tokens_to_text([tokens])[0]

    assert expected_text == actual_text
예제 #14
0
def test_convert_featurizer_token_edge_cases(
    text: Text,
    expected_tokens: List[Text],
    expected_indices: List[Tuple[int]],
    monkeypatch: MonkeyPatch,
):
    tokenizer = WhitespaceTokenizer()

    monkeypatch.setattr(ConveRTFeaturizer, "_get_validated_model_url",
                        lambda x: RESTRICTED_ACCESS_URL)
    component_config = {
        "name": "ConveRTFeaturizer",
        "model_url": RESTRICTED_ACCESS_URL
    }
    featurizer = ConveRTFeaturizer(component_config)
    message = Message.build(text=text)
    td = TrainingData([message])
    tokenizer.train(td)
    tokens = featurizer.tokenize(message, attribute=TEXT)

    assert [t.text for t in tokens] == expected_tokens
    assert [t.start for t in tokens] == [i[0] for i in expected_indices]
    assert [t.end for t in tokens] == [i[1] for i in expected_indices]
예제 #15
0
from rasa.nlu.featurizers.sparse_featurizer.count_vectors_featurizer import CountVectorsFeaturizer

from rasa.nlu.constants import SPACY_DOCS, TEXT, DENSE_FEATURE_NAMES, TOKENS_NAMES, SPARSE_FEATURE_NAMES

logger = logging_setup()

featurizer = SpacyFeaturizer.create({}, RasaNLUModelConfig())

test_input = "Okay, pick up this yellow banana for me."
message = Message(test_input)

message.set(SPACY_DOCS[TEXT], spacy_nlp(test_input))
featurizer._set_spacy_features(message)
vecs = message.get(DENSE_FEATURE_NAMES[TEXT])
logger.info("SpaCy: {}".format(vecs.shape))

message = Message(test_input)
featurizer = MitieFeaturizer.create({}, RasaNLUModelConfig())
MitieTokenizer().process(message)
tokens = message.get(TOKENS_NAMES[TEXT])
vecs = featurizer.features_for_tokens(tokens, mitie_feature_extractor)
logger.info("Mitie: {}".format(vecs.shape))

featurizer = ConveRTFeaturizer.create({}, RasaNLUModelConfig())
tokens = ConveRTTokenizer().tokenize(message, attribute=TEXT)
tokens = Tokenizer.add_cls_token(tokens, attribute=TEXT)
message.set(TOKENS_NAMES[TEXT], tokens)
featurizer.process(message)
vecs = message.get(DENSE_FEATURE_NAMES[TEXT])
logger.info("ConveRT: {}".format(vecs.shape))