Ejemplo n.º 1
0
def test_container_feature_lookup():
    arbitrary_attribute = "other"
    messages = [
        Message(data={TEXT: "A"}, features=[_dummy_features(1, TEXT)]),
        Message(
            data={
                INTENT: "B",
                arbitrary_attribute: "C"
            },
            features=[_dummy_features(2, arbitrary_attribute)],
        ),
        Message(data={TEXT: "A2"}, features=[_dummy_features(3, TEXT)]),
        Message(
            data={
                INTENT: "B2",
                arbitrary_attribute: "C2"
            },
            features=[_dummy_features(4, arbitrary_attribute)],
        ),
    ]

    table = MessageContainerForCoreFeaturization()
    table.add_all(messages)

    # If we don't specify a list of attributes, the resulting features dictionary will
    # only contain those attributes for which there are features.
    sub_state = {TEXT: "A", INTENT: "B", arbitrary_attribute: "C"}
    features = table.collect_features(sub_state=sub_state)
    for attribute, feature_value in [
        (TEXT, 1),
        (INTENT, None),
        (arbitrary_attribute, 2),
    ]:
        if feature_value is not None:
            assert attribute in features
            assert len(features[attribute]) == 1
            assert feature_value == features[attribute][0].features[0]
        else:
            assert attribute not in features

    # If we query features for `INTENT`, then a key will be there, even if there are
    # no features
    features = table.collect_features(sub_state=sub_state,
                                      attributes=list(sub_state.keys()))
    assert INTENT in features
    assert len(features[INTENT]) == 0

    # We only get the list of features we want...
    features = table.collect_features(sub_state,
                                      attributes=[arbitrary_attribute])
    assert TEXT not in features
    assert INTENT not in features
    assert len(features[arbitrary_attribute]) == 1

    # ... even if there are no features:
    YET_ANOTHER = "another"
    features = table.collect_features(sub_state, attributes=[YET_ANOTHER])
    assert len(features[YET_ANOTHER]) == 0
Ejemplo n.º 2
0
def test_container_message_lookup():
    # create some messages with unique key attributes
    messages = [
        Message(data={TEXT: "A"}, features=[_dummy_features(1, TEXT)]),
        Message(data={TEXT: "B"}),
        Message(data={INTENT: "B"}),
        Message(data={ACTION_TEXT: "B"}),
        Message(data={ACTION_NAME: "B"}),
    ]
    # add messages to container
    table = MessageContainerForCoreFeaturization()
    table.add_all(messages)
    # lookup messages using existing texts
    message = table.lookup_message(user_text="A")
    assert message
    assert len(message.data) == 1
    assert len(message.features) == 1
    message = table.lookup_message(user_text="B")
    assert message
    assert len(message.data) == 1
Ejemplo n.º 3
0
def test_container_all_messages():
    message_data_list = [{INTENT: "1"}, {INTENT: "2", "other": 3}, {TEXT: "3"}]
    container = MessageContainerForCoreFeaturization()
    container.add_all([Message(data=data) for data in message_data_list])
    assert len(container.all_messages()) == 3
Ejemplo n.º 4
0
def test_container_keys():
    message_data_list = [{INTENT: "1"}, {INTENT: "2"}, {TEXT: "3", "other": 3}]
    container = MessageContainerForCoreFeaturization()
    container.add_all([Message(data=data) for data in message_data_list])
    assert set(container.keys(INTENT)) == {"1", "2"}
    assert set(container.keys(TEXT)) == {"3"}
def test_encode_state__with_lookup__looksup_or_creates_features(action_name: Text):
    """Tests that features from table are combined or created from scratch.
    If the given action name is ...
    - ACTION_LISTEN_NAME then the user substate and the action name are encoded
    - some "other" action, then the user-substate is not encoed but the action name is
    - set to "None", then we remove the action name from the user substate and as a
      result there should be no encoding for the action name and for the user substate
    """
    f = SingleStateFeaturizer()
    f._default_feature_states[INTENT] = {"greet": 0, "inform": 1}
    f._default_feature_states[ENTITIES] = {
        "city": 0,
        "name": 1,
        f"city{ENTITY_LABEL_SEPARATOR}to": 2,
        f"city{ENTITY_LABEL_SEPARATOR}from": 3,
    }
    f._default_feature_states[ACTION_NAME] = {
        "NOT_action_listen": 0,
        "utter_greet": 1,
        ACTION_LISTEN_NAME: 2,
    }
    # `_0` in slots represent feature dimension
    f._default_feature_states[SLOTS] = {"slot_1_0": 0, "slot_2_0": 1, "slot_3_0": 2}
    f._default_feature_states[ACTIVE_LOOP] = {
        "active_loop_1": 0,
        "active_loop_2": 1,
        "active_loop_3": 2,
        "active_loop_4": 3,
    }

    # create state
    text = "I am flying from London to Paris"
    tokens = [
        Token(text=match.group(), start=match.start())
        for match in re.finditer(r"\S+", text)
    ]
    entity_name_list = ["city", f"city{ENTITY_LABEL_SEPARATOR}to"]
    action_text = "throw a ball"
    intent = "inform"
    state = {
        USER: {TEXT: text, INTENT: intent, ENTITIES: entity_name_list,},
        PREVIOUS_ACTION: {ACTION_NAME: action_name, ACTION_TEXT: action_text,},
        ACTIVE_LOOP: {"name": "active_loop_4"},
        SLOTS: {"slot_1": (1.0,)},
    }
    if action_name is None:
        del state[PREVIOUS_ACTION][ACTION_NAME]

    # Build lookup table with all relevant information - and dummy features for all
    # dense featurizable attributes.
    # Note that we don't need to add the `ENTITIES` to the message including `TEXT`
    # here because `encode_state` won't featurize the entities using the lookup table
    # (only `encode_entities` does that).
    units = 300
    precomputations = MessageContainerForCoreFeaturization()
    precomputations.add_all(
        [
            Message(
                data={TEXT: text, TOKENS_NAMES[TEXT]: tokens},
                features=[
                    dummy_features(
                        fill_value=11,
                        units=units,
                        attribute=TEXT,
                        type=SENTENCE,
                        is_sparse=True,
                    ),
                    dummy_features(
                        fill_value=12,
                        units=units,
                        attribute=TEXT,
                        type=SEQUENCE,
                        is_sparse=False,
                    ),
                    # Note: sparse sequence feature is last here
                    dummy_features(
                        fill_value=13,
                        units=units,
                        attribute=TEXT,
                        type=SEQUENCE,
                        is_sparse=True,
                    ),
                ],
            ),
            Message(data={INTENT: intent}),
            Message(
                data={ACTION_TEXT: action_text},
                features=[
                    dummy_features(
                        fill_value=1,
                        units=units,
                        attribute=ACTION_TEXT,
                        type=SEQUENCE,
                        is_sparse=True,
                    )
                ],
            ),
        ]
    )
    if action_name is not None:
        precomputations.add(Message(data={ACTION_NAME: action_name}))

    # encode the state
    encoded = f.encode_state(state, precomputations=precomputations,)

    # check all the features are encoded and *_text features are encoded by a
    # dense featurizer
    expected_attributes = [SLOTS, ACTIVE_LOOP, ACTION_TEXT]
    if action_name is not None:  # i.e. we did not remove it from the state
        expected_attributes += [ACTION_NAME]
    if action_name == ACTION_LISTEN_NAME:
        expected_attributes += [TEXT, ENTITIES, INTENT]
    assert set(encoded.keys()) == set(expected_attributes)

    # Remember, sparse sequence features come first (and `.features` denotes the matrix
    # not a `Features` object)
    if action_name == ACTION_LISTEN_NAME:
        assert encoded[TEXT][0].features.shape[-1] == units
        assert encoded[TEXT][0].is_sparse()
        assert encoded[ENTITIES][0].features.shape[-1] == 4
        assert sparse_equals_dense(encoded[INTENT][0].features, np.array([[0, 1]]))
    assert encoded[ACTION_TEXT][0].features.shape[-1] == units
    assert encoded[ACTION_TEXT][0].is_sparse()
    if action_name is not None:
        if action_name == "NOT_action_listen":
            action_name_encoding = [1, 0, 0]
        else:  # action_listen
            action_name_encoding = [0, 0, 1]
        assert sparse_equals_dense(
            encoded[ACTION_NAME][0].features, np.array([action_name_encoding])
        )
    else:
        assert ACTION_NAME not in encoded
    assert sparse_equals_dense(encoded[SLOTS][0].features, np.array([[1, 0, 0]]))
    assert sparse_equals_dense(
        encoded[ACTIVE_LOOP][0].features, np.array([[0, 0, 0, 1]])
    )