Esempio n. 1
0
    def load(
        self,
        document: Document,
        cases: List[str],
        exophors: List[str],
        coreference: bool,
        bridging: bool,
        relations: List[str],
        kc: bool,
        pas_targets: List[str],
        tokenizer: BertTokenizer,
    ) -> None:
        self.doc_id = document.doc_id
        process_all = (kc is False) or (document.doc_id.split('-')[-1] == '00')
        last_sent = document.sentences[-1] if len(document) > 0 else None
        relax_exophors = {}
        for exophor in exophors:
            relax_exophors[exophor] = exophor
            if exophor in ('不特定:人', '不特定:物', '不特定:状況'):
                for n in '123456789':
                    relax_exophors[exophor + n] = exophor
        dmid2arguments: Dict[int, Dict[str, List[BaseArgument]]] = {
            pred.dmid: document.get_arguments(pred)
            for pred in document.get_predicates()
        }
        head_dmids = []
        for sentence in document:
            process: bool = process_all or (sentence is last_sent)
            head_dmids += [bp.dmid for bp in sentence.bps]
            for bp in sentence.bps:
                for mrph in bp.mrph_list():
                    self.words.append(mrph.midasi)
                    self.dtids.append(bp.dtid)
                    self.ddeps.append(
                        bp.parent.dtid if bp.parent is not None else -1)
                    arguments = OrderedDict((rel, []) for rel in relations)
                    arg_candidates = ment_candidates = []
                    if document.mrph2dmid[mrph] == bp.dmid and process is True:
                        if is_pas_target(bp,
                                         verbal=('pred' in pas_targets),
                                         nominal=('noun' in pas_targets)):
                            arg_candidates = [
                                x for x in head_dmids if x != bp.dmid
                            ]
                            for case in cases:
                                dmid2args = {
                                    dmid: arguments[case]
                                    for dmid, arguments in
                                    dmid2arguments.items()
                                }
                                arguments[case] = self._get_args(
                                    bp.dmid, dmid2args, relax_exophors,
                                    arg_candidates)

                        if bridging and is_bridging_target(bp):
                            arg_candidates = [
                                x for x in head_dmids if x != bp.dmid
                            ]
                            dmid2args = {
                                dmid: arguments['ノ']
                                for dmid, arguments in dmid2arguments.items()
                            }
                            arguments['ノ'] = self._get_args(
                                bp.dmid, dmid2args, relax_exophors,
                                arg_candidates)

                        if coreference and is_coreference_target(bp):
                            ment_candidates = [
                                x for x in head_dmids if x < bp.dmid
                            ]  # do not solve cataphora
                            arguments['='] = self._get_mentions(
                                bp, document, relax_exophors, ment_candidates)

                    self.arguments_set.append(arguments)
                    self.arg_candidates_set.append(arg_candidates)
                    self.ment_candidates_set.append(ment_candidates)
Esempio n. 2
0
def coverage(doc: Document) -> RetValue:
    ret = RetValue()
    for predicate in doc.get_predicates():
        ex = Example(doc, predicate)
        arguments = doc.get_arguments(predicate)
        is_pred_gold = any(arguments[case] for case in PRED_CASES)