コード例 #1
0
def test_cvf_independent_train_vocabulary_expand(
    additional_size: Optional[int],
    text: Text,
    real_vocabulary_size: int,
    total_vocabulary_size: int,
):

    tokenizer = WhitespaceTokenizer()
    featurizer = CountVectorsFeaturizer(
        {
            "additional_vocabulary_size": {
                TEXT: additional_size,
                RESPONSE: additional_size,
                ACTION_TEXT: additional_size,
            }
        },
        finetune_mode=False,
    )

    train_message = Message(
        data={
            TEXT: text,
            INTENT: "intent_1",
            RESPONSE: text,
            ACTION_TEXT: text,
            ACTION_NAME: "action_1",
        })
    data = TrainingData([train_message])

    tokenizer.train(data)
    featurizer.train(data)

    for attribute in [TEXT, RESPONSE, ACTION_TEXT]:
        attribute_vocabulary = featurizer.vectorizers[attribute].vocabulary_
        assert len(attribute_vocabulary) == total_vocabulary_size
        assert (featurizer._get_starting_empty_index(attribute_vocabulary) ==
                real_vocabulary_size)

    for attribute in [INTENT, ACTION_NAME]:
        attribute_vocabulary = featurizer.vectorizers[attribute].vocabulary_
        assert len(attribute_vocabulary) == 1
コード例 #2
0
def test_cvf_shared_train_vocabulary_expand(
    additional_size: Optional[int],
    text: Text,
    real_vocabulary_size: int,
    total_vocabulary_size: int,
):

    tokenizer = WhitespaceTokenizer()
    featurizer = CountVectorsFeaturizer(
        {
            "additional_vocabulary_size": {
                "text": additional_size,
                "response": additional_size,
                "action_text": additional_size,
            },
            "use_shared_vocab": True,
        },
        finetune_mode=False,
    )

    train_message = Message(
        data={
            TEXT: text,
            INTENT: "intent_1",
            RESPONSE: text,
            ACTION_TEXT: text,
            ACTION_NAME: "action_1",
        })
    data = TrainingData([train_message])

    tokenizer.train(data)
    featurizer.train(data)

    shared_vocabulary = featurizer.vectorizers["text"].vocabulary_
    assert len(shared_vocabulary) == total_vocabulary_size
    assert (featurizer._get_starting_empty_index(shared_vocabulary) ==
            real_vocabulary_size)