Beispiel #1
0
    def __init__(self, dataset_file_path: str,
                 event_type_vocabulary: Vocabulary):
        """
        初始化 ACE Event Dataset
        :param dataset_file_path: 数据集的文件路基
        """
        super().__init__()
        self._ace_dataset = ACEDataset(dataset_file_path=dataset_file_path)

        self._instances: List[Instance] = list()

        for ori_instance in self._ace_dataset:

            ori_event_types = ori_instance["event_types"]

            ori_event_type_set = None

            if ori_event_types is not None:  # 实际预测的时候 ori_event_types is None
                # 针对 training 和 validation 设置,因为 对于 pair<sentence, unk>, label = 1
                ori_event_type_set = set(ori_event_types)

                if len(ori_event_type_set) == 0:
                    ori_event_type_set.add(event_type_vocabulary.unk)

            for index in range(event_type_vocabulary.size):
                # 遍历所有的label, 形成 pair<句子,事件类型>,作为样本
                event_type = event_type_vocabulary.token(index)

                instance = Instance()

                instance["sentence"] = ori_instance["sentence"]

                instance["entity_tag"] = ori_instance["entity_tag"]

                instance["event_type"] = event_type
                instance["metadata"] = ori_instance["metadata"]

                if ori_event_type_set is not None:
                    if event_type in ori_event_type_set:
                        instance["label"] = 1
                    else:
                        instance["label"] = 0
                else:
                    # 是针对实际的 prediction 设置的
                    pass

                self._instances.append(instance)
    def __init__(self, event_type_vocabulary: Vocabulary):
        """
        初始化
        :param event_type_vocabulary: event type vocabulary
        """
        super().__init__()

        self._event_type_f1: Dict[str, LabelF1Metric] = dict()

        for index in range(0, event_type_vocabulary.size):
            event_type = event_type_vocabulary.token(index)

            if event_type != event_type_vocabulary.unk:
                self._event_type_f1[event_type] = LabelF1Metric(
                    labels=[1], label_vocabulary=None)

        self._event_type_f1[EventF1MetricAdapter.__OVERALL] = LabelF1Metric(
            labels=[1], label_vocabulary=None)
        self._event_type_vocabulary = event_type_vocabulary