Python MessageContainerForCoreFeaturization.addの例、rasa.core.featurizers.precomputation.MessageContainerForCoreFeaturization.add Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_precomputation.py プロジェクト: zoovu/rasa

def test_container_add_does_not_fail_if_message_feature_content_differs():
    # construct a set of unique substates
    dummy_value = "this-could-be-anything"
    substates_with_unique_key_attribute = [
        {
            INTENT: "greet"
        },
        {
            TEXT: "text",
            ENTITIES: dummy_value
        },
        {
            ACTION_TEXT: "action_text"
        },
        {
            ACTION_NAME: "action_name"
        },
    ]
    constant_feature = _dummy_features(id=1, attribute="arbitrary")
    different_feature = _dummy_features(id=1, attribute="arbitrary")
    lookup_table = MessageContainerForCoreFeaturization()
    for sub_state in substates_with_unique_key_attribute:
        lookup_table.add(Message(data=sub_state, features=[constant_feature]))
    length = len(lookup_table)
    # with different feature
    for sub_state in substates_with_unique_key_attribute:
        lookup_table.add(Message(data=sub_state, features=[different_feature]))
        assert len(lookup_table) == length

コード例 #2

0

ファイルを表示

ファイル: test_single_state_featurizers.py プロジェクト: praneethgb/rasa

def test_encode_state__with_lookup__creates_features_for_intent_and_action_name(
    with_action_listen: bool,
):
    """Tests that features for intent and action name are created if needed.
    Especially tests that this is the case even though no features are present in the
    given lookup table for this intent and action_name.
    However, if no `action_listen` is in the given sub-state, then the user sub-state
    should not be featurized (hence, no features for intent) should be created.
    """

    f = SingleStateFeaturizer()
    f._default_feature_states[INTENT] = {"a": 0, "b": 1}
    f._default_feature_states[ACTION_NAME] = {"c": 0, "d": 1, ACTION_LISTEN_NAME: 2}

    # create state
    action_name = ACTION_LISTEN_NAME if with_action_listen else "c"
    state = {USER: {INTENT: "e"}, PREVIOUS_ACTION: {ACTION_NAME: action_name}}

    # create a lookup table with all relevant entries **but no Features**
    precomputations = MessageContainerForCoreFeaturization()
    precomputations.add(Message(data={INTENT: state[USER][INTENT]}))
    precomputations.add(
        Message(data={ACTION_NAME: state[PREVIOUS_ACTION][ACTION_NAME]})
    )

    # encode!
    encoded = f.encode_state(state, precomputations=precomputations)

    if with_action_listen:
        assert set(encoded.keys()) == set([INTENT, ACTION_NAME])
        assert (
            encoded[INTENT][0].features != scipy.sparse.coo_matrix([[0, 0]])
        ).nnz == 0
    else:
        assert set(encoded.keys()) == set([ACTION_NAME])

コード例 #3

0

ファイルを表示

def test_encode_entities__with_entity_roles_and_groups():

    # create fake message that has been tokenized and entities have been extracted
    text = "I am flying from London to Paris"
    tokens = [
        Token(text=match.group(), start=match.start())
        for match in re.finditer(r"\S+", text)
    ]
    entity_tags = ["city", f"city{ENTITY_LABEL_SEPARATOR}to"]
    entities = [
        {
            ENTITY_ATTRIBUTE_TYPE: entity_tags[0],
            ENTITY_ATTRIBUTE_VALUE: "London",
            ENTITY_ATTRIBUTE_START: 17,
            ENTITY_ATTRIBUTE_END: 23,
        },
        {
            ENTITY_ATTRIBUTE_TYPE: entity_tags[1],
            ENTITY_ATTRIBUTE_VALUE: "Paris",
            ENTITY_ATTRIBUTE_START: 27,
            ENTITY_ATTRIBUTE_END: 32,
        },
    ]
    message = Message({
        TEXT: text,
        TOKENS_NAMES[TEXT]: tokens,
        ENTITIES: entities
    })

    # create a lookup table that has seen this message
    precomputations = MessageContainerForCoreFeaturization()
    precomputations.add(message)

    # instantiate matching domain and single state featurizer
    domain = Domain(
        intents=[],
        entities=entity_tags,
        slots=[],
        responses={},
        forms={},
        action_names=[],
    )
    f = SingleStateFeaturizer()
    f.prepare_for_training(domain)

    # encode!
    encoded = f.encode_entities(entity_data={
        TEXT: text,
        ENTITIES: entities
    },
                                precomputations=precomputations)

    # check
    assert len(f.entity_tag_specs) == 1
    tags_to_ids = f.entity_tag_specs[0].tags_to_ids
    for idx, entity_tag in enumerate(entity_tags):
        tags_to_ids[entity_tag] = idx + 1  # hence, city -> 1, city#to -> 2
    assert sorted(list(encoded.keys())) == [ENTITY_TAGS]
    assert np.all(encoded[ENTITY_TAGS][0].features == [[0], [0], [0], [0], [1],
                                                       [0], [2]])

コード例 #4

0

ファイルを表示

ファイル: test_precomputation.py プロジェクト: ChenHuaYou/rasa

def test_container_add_message_copies():
    # construct a set of unique substates and messages
    dummy_value = "this-could-be-anything"
    substates_with_unique_key_attribute = [
        {INTENT: "greet"},
        {TEXT: "text", ENTITIES: dummy_value},
        {TEXT: "other-text"},
        {ACTION_TEXT: "action_text"},
        {ACTION_NAME: "action_name"},
    ]
    unique_messages = [
        Message(sub_state) for sub_state in substates_with_unique_key_attribute
    ]
    # make some copies
    num_copies = 3
    messages = unique_messages * (1 + num_copies)
    # build table
    lookup_table = MessageContainerForCoreFeaturization()
    for message in messages:
        lookup_table.add(message)
    # assert that we have as many entries as unique keys
    assert len(lookup_table) == len(substates_with_unique_key_attribute)
    assert set(lookup_table.all_messages()) == set(unique_messages)
    assert (
        lookup_table.num_collisions_ignored
        == len(substates_with_unique_key_attribute) * num_copies
    )

コード例 #5

0

ファイルを表示

ファイル: test_precomputation.py プロジェクト: ChenHuaYou/rasa

def test_container_fingerprint_differ_for_containers_with_different_insertion_order():
    # because we use this for training data and order might affect training of
    # e.g. featurizers, we want this to differ
    container1 = MessageContainerForCoreFeaturization()
    container1.add(Message(data={INTENT: "1"}))
    container1.add(Message(data={INTENT: "2"}))
    container2 = MessageContainerForCoreFeaturization()
    container2.add(Message(data={INTENT: "2"}))
    container2.add(Message(data={INTENT: "1"}))
    assert container2.fingerprint() != container1.fingerprint()

コード例 #6

0

ファイルを表示

ファイル: test_precomputation.py プロジェクト: ChenHuaYou/rasa

def test_container_add_fails_if_messages_are_different_but_have_same_key():
    # construct a set of unique substates
    dummy_value = "this-could-be-anything"
    substates_with_unique_key_attribute = [
        {INTENT: "greet"},
        {TEXT: "text", ENTITIES: dummy_value},
        {ACTION_TEXT: "action_text"},
        {ACTION_NAME: "action_name"},
    ]
    constant_feature = _dummy_features(id=1, attribute="arbitrary")
    different_feature = _dummy_features(id=1, attribute="arbitrary")
    # adding the unique messages works fine of course,...
    lookup_table = MessageContainerForCoreFeaturization()
    for sub_state in substates_with_unique_key_attribute:
        lookup_table.add(Message(data=sub_state, features=[constant_feature]))
    # ... but adding any substate with same key but different content doesn't
    new_key = "some-new-key"
    expected_error_message = "Expected added message to be consistent"
    for sub_state in substates_with_unique_key_attribute:
        # with extra attribute
        sub_state_with_extra_attribute = sub_state.copy()
        sub_state_with_extra_attribute[new_key] = "some-value-for-the-new-key"
        with pytest.raises(ValueError, match=expected_error_message):
            lookup_table.add(Message(data=sub_state_with_extra_attribute))
        # with new feature
        with pytest.raises(ValueError, match=expected_error_message):
            lookup_table.add(
                Message(data=sub_state, features=[constant_feature, different_feature])
            )
        # without features
        with pytest.raises(ValueError, match=expected_error_message):
            lookup_table.add(Message(data=sub_state))

コード例 #7

0

ファイルを表示

ファイル: test_precomputation.py プロジェクト: zoovu/rasa

def test_container_fingerprints_differ_for_different_containers():
    container1 = MessageContainerForCoreFeaturization()
    container1.add(Message(data={INTENT: "1"}))
    container2 = MessageContainerForCoreFeaturization()
    container2.add(Message(data={INTENT: "2"}))
    assert container2.fingerprint() != container1.fingerprint()

コード例 #8

0

ファイルを表示

ファイル: test_single_state_featurizers.py プロジェクト: praneethgb/rasa

def test_encode_entities__with_bilou_entity_roles_and_groups():

    # Instantiate domain and configure the single state featurizer for this domain.
    # Note that there are 2 entity tags here.
    entity_tags = ["city", f"city{ENTITY_LABEL_SEPARATOR}to"]
    domain = Domain(
        intents=[],
        entities=entity_tags,
        slots=[],
        responses={},
        forms={},
        action_names=[],
    )
    f = SingleStateFeaturizer()
    f.prepare_for_training(domain, bilou_tagging=True)

    # (1) example with both entities

    # create message that has been tokenized and where entities have been extracted
    text = "I am flying from London to Paris"
    tokens = [
        Token(text=match.group(), start=match.start())
        for match in re.finditer(r"\S+", text)
    ]
    entities = [
        {
            ENTITY_ATTRIBUTE_TYPE: entity_tags[0],
            ENTITY_ATTRIBUTE_VALUE: "London",
            ENTITY_ATTRIBUTE_START: 17,
            ENTITY_ATTRIBUTE_END: 23,
        },
        {
            ENTITY_ATTRIBUTE_TYPE: entity_tags[1],
            ENTITY_ATTRIBUTE_VALUE: "Paris",
            ENTITY_ATTRIBUTE_START: 27,
            ENTITY_ATTRIBUTE_END: 32,
        },
    ]
    message = Message({TEXT: text, TOKENS_NAMES[TEXT]: tokens, ENTITIES: entities})

    # create a lookup table that has seen this message
    precomputations = MessageContainerForCoreFeaturization()
    precomputations.add(message)

    # encode!
    encoded = f.encode_entities(
        {TEXT: text, ENTITIES: entities,},
        precomputations=precomputations,
        bilou_tagging=True,
    )
    assert sorted(list(encoded.keys())) == sorted([ENTITY_TAGS])
    assert np.all(
        encoded[ENTITY_TAGS][0].features == [[0], [0], [0], [0], [4], [0], [8]]
    )

    # (2) example with only the "city" entity

    # create message that has been tokenized and where entities have been extracted
    text = "I am flying to Saint Petersburg"
    tokens = [
        Token(text=match.group(), start=match.start())
        for match in re.finditer(r"\S+", text)
    ]
    entities = [
        {
            ENTITY_ATTRIBUTE_TYPE: "city",
            ENTITY_ATTRIBUTE_VALUE: "Saint Petersburg",
            ENTITY_ATTRIBUTE_START: 15,
            ENTITY_ATTRIBUTE_END: 31,
        },
    ]
    message = Message({TEXT: text, TOKENS_NAMES[TEXT]: tokens, ENTITIES: entities})

    # create a lookup table that has seen this message
    precomputations = MessageContainerForCoreFeaturization()
    precomputations.add(message)

    # encode!
    encoded = f.encode_entities(
        {TEXT: text, ENTITIES: entities,},
        precomputations=precomputations,
        bilou_tagging=True,
    )
    assert sorted(list(encoded.keys())) == sorted([ENTITY_TAGS])
    assert np.all(encoded[ENTITY_TAGS][0].features == [[0], [0], [0], [0], [1], [3]])

コード例 #9

0

ファイルを表示

ファイル: test_single_state_featurizers.py プロジェクト: praneethgb/rasa

def test_encode_state__with_lookup__looksup_or_creates_features(action_name: Text):
    """Tests that features from table are combined or created from scratch.
    If the given action name is ...
    - ACTION_LISTEN_NAME then the user substate and the action name are encoded
    - some "other" action, then the user-substate is not encoed but the action name is
    - set to "None", then we remove the action name from the user substate and as a
      result there should be no encoding for the action name and for the user substate
    """
    f = SingleStateFeaturizer()
    f._default_feature_states[INTENT] = {"greet": 0, "inform": 1}
    f._default_feature_states[ENTITIES] = {
        "city": 0,
        "name": 1,
        f"city{ENTITY_LABEL_SEPARATOR}to": 2,
        f"city{ENTITY_LABEL_SEPARATOR}from": 3,
    }
    f._default_feature_states[ACTION_NAME] = {
        "NOT_action_listen": 0,
        "utter_greet": 1,
        ACTION_LISTEN_NAME: 2,
    }
    # `_0` in slots represent feature dimension
    f._default_feature_states[SLOTS] = {"slot_1_0": 0, "slot_2_0": 1, "slot_3_0": 2}
    f._default_feature_states[ACTIVE_LOOP] = {
        "active_loop_1": 0,
        "active_loop_2": 1,
        "active_loop_3": 2,
        "active_loop_4": 3,
    }

    # create state
    text = "I am flying from London to Paris"
    tokens = [
        Token(text=match.group(), start=match.start())
        for match in re.finditer(r"\S+", text)
    ]
    entity_name_list = ["city", f"city{ENTITY_LABEL_SEPARATOR}to"]
    action_text = "throw a ball"
    intent = "inform"
    state = {
        USER: {TEXT: text, INTENT: intent, ENTITIES: entity_name_list,},
        PREVIOUS_ACTION: {ACTION_NAME: action_name, ACTION_TEXT: action_text,},
        ACTIVE_LOOP: {"name": "active_loop_4"},
        SLOTS: {"slot_1": (1.0,)},
    }
    if action_name is None:
        del state[PREVIOUS_ACTION][ACTION_NAME]

    # Build lookup table with all relevant information - and dummy features for all
    # dense featurizable attributes.
    # Note that we don't need to add the `ENTITIES` to the message including `TEXT`
    # here because `encode_state` won't featurize the entities using the lookup table
    # (only `encode_entities` does that).
    units = 300
    precomputations = MessageContainerForCoreFeaturization()
    precomputations.add_all(
        [
            Message(
                data={TEXT: text, TOKENS_NAMES[TEXT]: tokens},
                features=[
                    dummy_features(
                        fill_value=11,
                        units=units,
                        attribute=TEXT,
                        type=SENTENCE,
                        is_sparse=True,
                    ),
                    dummy_features(
                        fill_value=12,
                        units=units,
                        attribute=TEXT,
                        type=SEQUENCE,
                        is_sparse=False,
                    ),
                    # Note: sparse sequence feature is last here
                    dummy_features(
                        fill_value=13,
                        units=units,
                        attribute=TEXT,
                        type=SEQUENCE,
                        is_sparse=True,
                    ),
                ],
            ),
            Message(data={INTENT: intent}),
            Message(
                data={ACTION_TEXT: action_text},
                features=[
                    dummy_features(
                        fill_value=1,
                        units=units,
                        attribute=ACTION_TEXT,
                        type=SEQUENCE,
                        is_sparse=True,
                    )
                ],
            ),
        ]
    )
    if action_name is not None:
        precomputations.add(Message(data={ACTION_NAME: action_name}))

    # encode the state
    encoded = f.encode_state(state, precomputations=precomputations,)

    # check all the features are encoded and *_text features are encoded by a
    # dense featurizer
    expected_attributes = [SLOTS, ACTIVE_LOOP, ACTION_TEXT]
    if action_name is not None:  # i.e. we did not remove it from the state
        expected_attributes += [ACTION_NAME]
    if action_name == ACTION_LISTEN_NAME:
        expected_attributes += [TEXT, ENTITIES, INTENT]
    assert set(encoded.keys()) == set(expected_attributes)

    # Remember, sparse sequence features come first (and `.features` denotes the matrix
    # not a `Features` object)
    if action_name == ACTION_LISTEN_NAME:
        assert encoded[TEXT][0].features.shape[-1] == units
        assert encoded[TEXT][0].is_sparse()
        assert encoded[ENTITIES][0].features.shape[-1] == 4
        assert sparse_equals_dense(encoded[INTENT][0].features, np.array([[0, 1]]))
    assert encoded[ACTION_TEXT][0].features.shape[-1] == units
    assert encoded[ACTION_TEXT][0].is_sparse()
    if action_name is not None:
        if action_name == "NOT_action_listen":
            action_name_encoding = [1, 0, 0]
        else:  # action_listen
            action_name_encoding = [0, 0, 1]
        assert sparse_equals_dense(
            encoded[ACTION_NAME][0].features, np.array([action_name_encoding])
        )
    else:
        assert ACTION_NAME not in encoded
    assert sparse_equals_dense(encoded[SLOTS][0].features, np.array([[1, 0, 0]]))
    assert sparse_equals_dense(
        encoded[ACTIVE_LOOP][0].features, np.array([[0, 0, 0, 1]])
    )