Beispiel #1
0
def test_single_state_featurizer_without_interpreter_state_with_action_listen():
    """This test are for encoding state without a trained interpreter.
    action_name is action_listen, so, INTENT and ENTITIES should be featurized
    while text shouldn't because we don't have an interpreter.
    """
    f = SingleStateFeaturizer()
    f._default_feature_states[INTENT] = {"a": 0, "b": 1}
    f._default_feature_states[ACTION_NAME] = {"c": 0, "d": 1, "action_listen": 2}
    f._default_feature_states[SLOTS] = {"e_0": 0, "f_0": 1, "g_0": 2}
    f._default_feature_states[ACTIVE_LOOP] = {"h": 0, "i": 1, "j": 2, "k": 3}

    encoded = f.encode_state(
        {
            "user": {"intent": "a", "text": "blah blah blah"},
            "prev_action": {"action_name": "action_listen", "action_text": "boom"},
            "active_loop": {"name": "k"},
            "slots": {"e": (1.0,)},
        },
        interpreter=RegexInterpreter(),
    )

    # we featurize all the features except for *_text ones because NLU wasn't trained
    assert list(encoded.keys()) == [INTENT, ACTION_NAME, ACTIVE_LOOP, SLOTS]
    assert (encoded[INTENT][0].features != scipy.sparse.coo_matrix([[1, 0]])).nnz == 0
    assert (
        encoded[ACTION_NAME][0].features != scipy.sparse.coo_matrix([[0, 0, 1]])
    ).nnz == 0
    assert (
        encoded[ACTIVE_LOOP][0].features != scipy.sparse.coo_matrix([[0, 0, 0, 1]])
    ).nnz == 0
    assert (encoded[SLOTS][0].features != scipy.sparse.coo_matrix([[1, 0, 0]])).nnz == 0
def test_encode_state__with_lookup__creates_features_for_intent_and_action_name(
    with_action_listen: bool,
):
    """Tests that features for intent and action name are created if needed.
    Especially tests that this is the case even though no features are present in the
    given lookup table for this intent and action_name.
    However, if no `action_listen` is in the given sub-state, then the user sub-state
    should not be featurized (hence, no features for intent) should be created.
    """

    f = SingleStateFeaturizer()
    f._default_feature_states[INTENT] = {"a": 0, "b": 1}
    f._default_feature_states[ACTION_NAME] = {"c": 0, "d": 1, ACTION_LISTEN_NAME: 2}

    # create state
    action_name = ACTION_LISTEN_NAME if with_action_listen else "c"
    state = {USER: {INTENT: "e"}, PREVIOUS_ACTION: {ACTION_NAME: action_name}}

    # create a lookup table with all relevant entries **but no Features**
    precomputations = MessageContainerForCoreFeaturization()
    precomputations.add(Message(data={INTENT: state[USER][INTENT]}))
    precomputations.add(
        Message(data={ACTION_NAME: state[PREVIOUS_ACTION][ACTION_NAME]})
    )

    # encode!
    encoded = f.encode_state(state, precomputations=precomputations)

    if with_action_listen:
        assert set(encoded.keys()) == set([INTENT, ACTION_NAME])
        assert (
            encoded[INTENT][0].features != scipy.sparse.coo_matrix([[0, 0]])
        ).nnz == 0
    else:
        assert set(encoded.keys()) == set([ACTION_NAME])
Beispiel #3
0
def test_single_state_featurizer_without_interpreter_state_no_intent_no_action_name(
):
    f = SingleStateFeaturizer()
    f._default_feature_states[INTENT] = {"a": 0, "b": 1}
    f._default_feature_states[ACTION_NAME] = {
        "c": 0,
        "d": 1,
        "action_listen": 2
    }
    f._default_feature_states[SLOTS] = {"e_0": 0, "f_0": 1, "g_0": 2}
    f._default_feature_states[ACTIVE_LOOP] = {"h": 0, "i": 1, "j": 2, "k": 3}
    # check that no intent / action_name features are added when the interpreter isn't there and
    # intent / action_name not in input
    encoded = f.encode_state(
        {
            "user": {
                "text": "blah blah blah"
            },
            "prev_action": {
                "action_text": "boom"
            },
            "active_loop": {
                "name": "k"
            },
            "slots": {
                "e": (1.0, )
            },
        },
        interpreter=RegexInterpreter(),
    )
    assert list(encoded.keys()) == [ACTIVE_LOOP, SLOTS]
    assert (encoded[ACTIVE_LOOP][0].features != scipy.sparse.coo_matrix(
        [[0, 0, 0, 1]])).nnz == 0
    assert (encoded[SLOTS][0].features != scipy.sparse.coo_matrix([[1, 0, 0]
                                                                   ])).nnz == 0
Beispiel #4
0
def test_single_state_featurizer_with_interpreter_state_with_no_action_name(
    unpacked_trained_moodbot_path: Text,
):
    # check that action name features are not added by the featurizer when not
    # present in the state and
    # check user input is ignored when action is not action_listen
    # and action_name is features are not added
    from rasa.core.agent import Agent

    interpreter = Agent.load(unpacked_trained_moodbot_path).interpreter

    f = SingleStateFeaturizer()
    f._default_feature_states[INTENT] = {"a": 0, "b": 1}
    f._default_feature_states[ENTITIES] = {"c": 0}
    f._default_feature_states[ACTION_NAME] = {"e": 0, "d": 1, "action_listen": 2}
    f._default_feature_states[SLOTS] = {"e_0": 0, "f_0": 1, "g_0": 2}
    f._default_feature_states[ACTIVE_LOOP] = {"h": 0, "i": 1, "j": 2, "k": 3}

    encoded = f.encode_state(
        {
            "user": {"text": "a ball", "intent": "b", "entities": ["c"]},
            "prev_action": {"action_text": "throw a ball"},
            "active_loop": {"name": "k"},
            "slots": {"e": (1.0,)},
        },
        interpreter=interpreter,
    )

    assert list(encoded.keys()) == [ACTION_TEXT, ACTIVE_LOOP, SLOTS]
    assert encoded[ACTION_TEXT][0].features.shape[-1] == 300
    assert (encoded[SLOTS][0].features != scipy.sparse.coo_matrix([[1, 0, 0]])).nnz == 0
    assert (
        encoded[ACTIVE_LOOP][0].features != scipy.sparse.coo_matrix([[0, 0, 0, 1]])
    ).nnz == 0
Beispiel #5
0
def test_single_state_featurizer_without_interpreter_state_not_with_action_listen():
    """This test are for encoding state without a trained interpreter.
    action_name is not action_listen, so, INTENT, TEXT and ENTITIES should not be
    featurized.
    """
    f = SingleStateFeaturizer()
    f._default_feature_states[INTENT] = {"a": 0, "b": 1}
    f._default_feature_states[ACTION_NAME] = {"c": 0, "d": 1, "action_listen": 2}
    f._default_feature_states[SLOTS] = {"e_0": 0, "f_0": 1, "g_0": 2}
    f._default_feature_states[ACTIVE_LOOP] = {"h": 0, "i": 1, "j": 2, "k": 3}

    encoded = f.encode_state(
        {
            "user": {"intent": "a", "text": "blah blah blah"},
            "prev_action": {"action_name": "d", "action_text": "boom"},
            "active_loop": {"name": "i"},
            "slots": {"g": (1.0,)},
        },
        interpreter=RegexInterpreter(),
    )

    # user input is ignored as prev action is not action_listen
    assert list(encoded.keys()) == [ACTION_NAME, ACTIVE_LOOP, SLOTS]
    assert (
        encoded[ACTION_NAME][0].features != scipy.sparse.coo_matrix([[0, 1, 0]])
    ).nnz == 0
    assert (
        encoded[ACTIVE_LOOP][0].features != scipy.sparse.coo_matrix([[0, 1, 0, 0]])
    ).nnz == 0
    assert (encoded[SLOTS][0].features != scipy.sparse.coo_matrix([[0, 0, 1]])).nnz == 0
def test_single_state_featurizer_with_interpreter_state_with_action_listen(
    unpacked_trained_spacybot_path: Text,
):
    interpreter = Agent.load(unpacked_trained_spacybot_path).interpreter

    f = SingleStateFeaturizer()
    f._default_feature_states[INTENT] = {"greet": 0, "inform": 1}
    f._default_feature_states[ENTITIES] = {
        "city": 0,
        "name": 1,
        f"city{ENTITY_LABEL_SEPARATOR}to": 2,
        f"city{ENTITY_LABEL_SEPARATOR}from": 3,
    }
    f._default_feature_states[ACTION_NAME] = {
        "utter_ask_where_to": 0,
        "utter_greet": 1,
        "action_listen": 2,
    }
    # `_0` in slots represent feature dimension
    f._default_feature_states[SLOTS] = {"slot_1_0": 0, "slot_2_0": 1, "slot_3_0": 2}
    f._default_feature_states[ACTIVE_LOOP] = {
        "active_loop_1": 0,
        "active_loop_2": 1,
        "active_loop_3": 2,
        "active_loop_4": 3,
    }
    encoded = f.encode_state(
        {
            "user": {
                "text": "I am flying from London to Paris",
                "intent": "inform",
                "entities": ["city", f"city{ENTITY_LABEL_SEPARATOR}to"],
            },
            "prev_action": {
                "action_name": "action_listen",
                "action_text": "throw a ball",
            },
            "active_loop": {"name": "active_loop_4"},
            "slots": {"slot_1": (1.0,)},
        },
        interpreter=interpreter,
    )

    # check all the features are encoded and *_text features are encoded by a
    # dense featurizer
    assert sorted(list(encoded.keys())) == sorted(
        [TEXT, ENTITIES, ACTION_NAME, SLOTS, ACTIVE_LOOP, INTENT, ACTION_TEXT]
    )
    assert encoded[TEXT][0].features.shape[-1] == 300
    assert encoded[ACTION_TEXT][0].features.shape[-1] == 300
    assert (encoded[INTENT][0].features != scipy.sparse.coo_matrix([[0, 1]])).nnz == 0
    assert (
        encoded[ACTION_NAME][0].features != scipy.sparse.coo_matrix([[0, 0, 1]])
    ).nnz == 0
    assert encoded[ENTITIES][0].features.shape[-1] == 4
    assert (encoded[SLOTS][0].features != scipy.sparse.coo_matrix([[1, 0, 0]])).nnz == 0
    assert (
        encoded[ACTIVE_LOOP][0].features != scipy.sparse.coo_matrix([[0, 0, 0, 1]])
    ).nnz == 0
def test_encode_state__without_lookup(action_name: Text):
    """Tests that `encode_state` creates features for every attribute.
    In particular, that this is done even when there is no lookup table.
    If there is no action_listen in the  state, then no features should be created for
    the user sub-state.
    """
    f = SingleStateFeaturizer()
    f._default_feature_states[INTENT] = {"a": 0, "b": 1}
    f._default_feature_states[ACTION_NAME] = {
        "c": 0,
        "d": 1,
        "NOT_action_listen": 2,
        ACTION_LISTEN_NAME: 3,
    }
    f._default_feature_states[SLOTS] = {"e_0": 0, "f_0": 1, "g_0": 2}
    f._default_feature_states[ACTIVE_LOOP] = {"h": 0, "i": 1, "j": 2, "k": 3}

    state = {
        USER: {INTENT: "a", TEXT: "blah blah blah"},
        PREVIOUS_ACTION: {ACTION_TEXT: "boom"},
        ACTIVE_LOOP: {"name": "i"},
        SLOTS: {"g": (1.0,)},
    }
    if action_name is not None:
        state[PREVIOUS_ACTION][ACTION_NAME] = action_name

    encoded = f.encode_state(state, precomputations=None)

    # this differs depending on whether action name is ACTION_LISTEN_NAME or "d"
    expected_attributes = [ACTIVE_LOOP, SLOTS]
    if action_name == ACTION_LISTEN_NAME:
        expected_attributes += [INTENT]
    if action_name is not None:
        expected_attributes += [ACTION_NAME]
    assert set(encoded.keys()) == set(expected_attributes)

    # the encoding of action_name of course depends on the sub-state
    if action_name is not None:
        if action_name == "NOT_action_listen":
            action_name_encoding = [0, 0, 1, 0]
        else:
            action_name_encoding = [0, 0, 0, 1]
        assert sparse_equals_dense(
            encoded[ACTION_NAME][0].features, np.array([action_name_encoding])
        )

    # the intent / user substate is only featurized if action_listen is
    # with_action_listen
    if action_name == ACTION_LISTEN_NAME:
        assert sparse_equals_dense(encoded[INTENT][0].features, np.array([[1, 0]]))

    # this is always the same
    assert sparse_equals_dense(
        encoded[ACTIVE_LOOP][0].features, np.array([[0, 1, 0, 0]])
    )
    assert sparse_equals_dense(encoded[SLOTS][0].features, np.array([[0, 0, 1]]))
Beispiel #8
0
def test_single_state_featurizer_correctly_encodes_non_existing_value():
    f = SingleStateFeaturizer()
    f._default_feature_states[INTENT] = {"a": 0, "b": 1}
    f._default_feature_states[ACTION_NAME] = {"c": 0, "d": 1}
    encoded = f.encode_state(
        {"user": {"intent": "e"}, "prev_action": {"action_name": "action_listen"}},
        interpreter=RegexInterpreter(),
    )
    assert list(encoded.keys()) == [INTENT, ACTION_NAME]
    assert (encoded[INTENT][0].features != scipy.sparse.coo_matrix([[0, 0]])).nnz == 0
Beispiel #9
0
def test_single_state_featurizer_uses_dtype_float():
    f = SingleStateFeaturizer()
    f._default_feature_states[INTENT] = {"a": 0, "b": 1}
    f._default_feature_states[ACTION_NAME] = {"e": 0, "d": 1}
    f._default_feature_states[ENTITIES] = {"c": 0}
    encoded = f.encode_state(
        {
            "user": {"intent": "a", "entities": ["c"]},
            "prev_action": {"action_name": "d"},
        },
        interpreter=RegexInterpreter(),
    )
    assert encoded[ACTION_NAME][0].features.dtype == np.float32
Beispiel #10
0
def test_single_state_featurizer_with_interpreter_state_with_action_listen(
    unpacked_trained_moodbot_path: Text, ):
    from rasa.core.agent import Agent

    interpreter = Agent.load(unpacked_trained_moodbot_path).interpreter

    f = SingleStateFeaturizer()
    f._default_feature_states[INTENT] = {"a": 0, "b": 1}
    f._default_feature_states[ENTITIES] = {"c": 0}
    f._default_feature_states[ACTION_NAME] = {
        "e": 0,
        "d": 1,
        "action_listen": 2
    }
    f._default_feature_states[SLOTS] = {"e_0": 0, "f_0": 1, "g_0": 2}
    f._default_feature_states[ACTIVE_LOOP] = {"h": 0, "i": 1, "j": 2, "k": 3}
    encoded = f.encode_state(
        {
            "user": {
                "text": "a ball",
                "intent": "b",
                "entities": ["c"]
            },
            "prev_action": {
                "action_name": "action_listen",
                "action_text": "throw a ball",
            },
            "active_loop": {
                "name": "k"
            },
            "slots": {
                "e": (1.0, )
            },
        },
        interpreter=interpreter,
    )
    # check all the features are encoded and *_text features are encoded by a densefeaturizer
    assert sorted(list(encoded.keys())) == sorted(
        [TEXT, ENTITIES, ACTION_NAME, SLOTS, ACTIVE_LOOP, INTENT, ACTION_TEXT])
    assert encoded[TEXT][0].features.shape[-1] == 300
    assert encoded[ACTION_TEXT][0].features.shape[-1] == 300
    assert (encoded[INTENT][0].features != scipy.sparse.coo_matrix(
        [[0, 1]])).nnz == 0
    assert (encoded[ACTION_NAME][0].features != scipy.sparse.coo_matrix(
        [[0, 0, 1]])).nnz == 0
    assert encoded[ENTITIES][0].features.shape[-1] == 1
    assert (encoded[SLOTS][0].features != scipy.sparse.coo_matrix([[1, 0, 0]
                                                                   ])).nnz == 0
    assert (encoded[ACTIVE_LOOP][0].features != scipy.sparse.coo_matrix(
        [[0, 0, 0, 1]])).nnz == 0
Beispiel #11
0
def test_single_state_featurizer_with_entity_roles_and_groups(
    unpacked_trained_moodbot_path: Text, ):
    from rasa.core.agent import Agent

    interpreter = Agent.load(unpacked_trained_moodbot_path).interpreter

    f = SingleStateFeaturizer()
    f._default_feature_states[INTENT] = {"a": 0, "b": 1}
    f._default_feature_states[ENTITIES] = {
        "c": 0,
        "d": 1,
        f"d{ENTITY_LABEL_SEPARATOR}e": 2,
    }
    f._default_feature_states[ACTION_NAME] = {
        "e": 0,
        "d": 1,
        "action_listen": 2
    }
    f._default_feature_states[SLOTS] = {"e_0": 0, "f_0": 1, "g_0": 2}
    f._default_feature_states[ACTIVE_LOOP] = {"h": 0, "i": 1, "j": 2, "k": 3}
    encoded = f.encode_state(
        {
            "user": {
                "text": "a ball",
                "intent": "b",
                "entities": ["c", f"d{ENTITY_LABEL_SEPARATOR}e"],
            },
            "prev_action": {
                "action_name": "action_listen",
                "action_text": "throw a ball",
            },
            "active_loop": {
                "name": "k"
            },
            "slots": {
                "e": (1.0, )
            },
        },
        interpreter=interpreter,
    )
    # check all the features are encoded and *_text features are encoded by a densefeaturizer
    assert sorted(list(encoded.keys())) == sorted(
        [TEXT, ENTITIES, ACTION_NAME, SLOTS, ACTIVE_LOOP, INTENT, ACTION_TEXT])
    assert np.all(encoded[ENTITIES][0].features.toarray() == [1, 0, 1])
def test_encode_state__with_lookup__looksup_or_creates_features(action_name: Text):
    """Tests that features from table are combined or created from scratch.
    If the given action name is ...
    - ACTION_LISTEN_NAME then the user substate and the action name are encoded
    - some "other" action, then the user-substate is not encoed but the action name is
    - set to "None", then we remove the action name from the user substate and as a
      result there should be no encoding for the action name and for the user substate
    """
    f = SingleStateFeaturizer()
    f._default_feature_states[INTENT] = {"greet": 0, "inform": 1}
    f._default_feature_states[ENTITIES] = {
        "city": 0,
        "name": 1,
        f"city{ENTITY_LABEL_SEPARATOR}to": 2,
        f"city{ENTITY_LABEL_SEPARATOR}from": 3,
    }
    f._default_feature_states[ACTION_NAME] = {
        "NOT_action_listen": 0,
        "utter_greet": 1,
        ACTION_LISTEN_NAME: 2,
    }
    # `_0` in slots represent feature dimension
    f._default_feature_states[SLOTS] = {"slot_1_0": 0, "slot_2_0": 1, "slot_3_0": 2}
    f._default_feature_states[ACTIVE_LOOP] = {
        "active_loop_1": 0,
        "active_loop_2": 1,
        "active_loop_3": 2,
        "active_loop_4": 3,
    }

    # create state
    text = "I am flying from London to Paris"
    tokens = [
        Token(text=match.group(), start=match.start())
        for match in re.finditer(r"\S+", text)
    ]
    entity_name_list = ["city", f"city{ENTITY_LABEL_SEPARATOR}to"]
    action_text = "throw a ball"
    intent = "inform"
    state = {
        USER: {TEXT: text, INTENT: intent, ENTITIES: entity_name_list},
        PREVIOUS_ACTION: {ACTION_NAME: action_name, ACTION_TEXT: action_text},
        ACTIVE_LOOP: {"name": "active_loop_4"},
        SLOTS: {"slot_1": (1.0,)},
    }
    if action_name is None:
        del state[PREVIOUS_ACTION][ACTION_NAME]

    # Build lookup table with all relevant information - and dummy features for all
    # dense featurizable attributes.
    # Note that we don't need to add the `ENTITIES` to the message including `TEXT`
    # here because `encode_state` won't featurize the entities using the lookup table
    # (only `encode_entities` does that).
    units = 300
    precomputations = MessageContainerForCoreFeaturization()
    precomputations.add_all(
        [
            Message(
                data={TEXT: text, TOKENS_NAMES[TEXT]: tokens},
                features=[
                    dummy_features(
                        fill_value=11,
                        units=units,
                        attribute=TEXT,
                        type=SENTENCE,
                        is_sparse=True,
                    ),
                    dummy_features(
                        fill_value=12,
                        units=units,
                        attribute=TEXT,
                        type=SEQUENCE,
                        is_sparse=False,
                    ),
                    # Note: sparse sequence feature is last here
                    dummy_features(
                        fill_value=13,
                        units=units,
                        attribute=TEXT,
                        type=SEQUENCE,
                        is_sparse=True,
                    ),
                ],
            ),
            Message(data={INTENT: intent}),
            Message(
                data={ACTION_TEXT: action_text},
                features=[
                    dummy_features(
                        fill_value=1,
                        units=units,
                        attribute=ACTION_TEXT,
                        type=SEQUENCE,
                        is_sparse=True,
                    )
                ],
            ),
        ]
    )
    if action_name is not None:
        precomputations.add(Message(data={ACTION_NAME: action_name}))

    # encode the state
    encoded = f.encode_state(state, precomputations=precomputations)

    # check all the features are encoded and *_text features are encoded by a
    # dense featurizer
    expected_attributes = [SLOTS, ACTIVE_LOOP, ACTION_TEXT]
    if action_name is not None:  # i.e. we did not remove it from the state
        expected_attributes += [ACTION_NAME]
    if action_name == ACTION_LISTEN_NAME:
        expected_attributes += [TEXT, ENTITIES, INTENT]
    assert set(encoded.keys()) == set(expected_attributes)

    # Remember, sparse sequence features come first (and `.features` denotes the matrix
    # not a `Features` object)
    if action_name == ACTION_LISTEN_NAME:
        assert encoded[TEXT][0].features.shape[-1] == units
        assert encoded[TEXT][0].is_sparse()
        assert encoded[ENTITIES][0].features.shape[-1] == 4
        assert sparse_equals_dense(encoded[INTENT][0].features, np.array([[0, 1]]))
    assert encoded[ACTION_TEXT][0].features.shape[-1] == units
    assert encoded[ACTION_TEXT][0].is_sparse()
    if action_name is not None:
        if action_name == "NOT_action_listen":
            action_name_encoding = [1, 0, 0]
        else:  # action_listen
            action_name_encoding = [0, 0, 1]
        assert sparse_equals_dense(
            encoded[ACTION_NAME][0].features, np.array([action_name_encoding])
        )
    else:
        assert ACTION_NAME not in encoded
    assert sparse_equals_dense(encoded[SLOTS][0].features, np.array([[1, 0, 0]]))
    assert sparse_equals_dense(
        encoded[ACTIVE_LOOP][0].features, np.array([[0, 0, 0, 1]])
    )