Example #1
0
def test_build_tag_id_dict():
    message_1 = Message("Germany is part of the European Union")
    message_1.set(
        BILOU_ENTITIES,
        ["U-location", "O", "O", "O", "O", "B-organisation", "L-organisation"],
    )

    message_2 = Message("Berlin is the capital of Germany")
    message_2.set(BILOU_ENTITIES, ["U-location", "O", "O", "O", "O", "U-location"])

    training_data = TrainingData([message_1, message_2])

    tag_id_dict = bilou_utils.build_tag_id_dict(training_data)

    assert tag_id_dict == {
        "O": 0,
        "B-location": 1,
        "I-location": 2,
        "U-location": 3,
        "L-location": 4,
        "B-organisation": 5,
        "I-organisation": 6,
        "U-organisation": 7,
        "L-organisation": 8,
    }
Example #2
0
    def _tag_id_index_mapping(self,
                              training_data: TrainingData) -> Dict[Text, int]:
        """Create tag_id dictionary"""

        if self.component_config[BILOU_FLAG]:
            return bilou_utils.build_tag_id_dict(training_data)

        distinct_tag_ids = set(e["entity"]
                               for example in training_data.entity_examples
                               for e in example.get(ENTITIES)) - {None}

        tag_id_dict = {
            tag_id: idx
            for idx, tag_id in enumerate(sorted(distinct_tag_ids), 1)
        }
        # NO_ENTITY_TAG corresponds to non-entity which should correspond to 0 index
        # needed for correct prediction for padding
        tag_id_dict[NO_ENTITY_TAG] = 0

        return tag_id_dict