예제 #1
0
def main(input_file: str, output_file: str):
    reader_conf = ReaderGetterConf().direct_update(input_format='conllufipb')
    reader_conf.validate()
    # --
    cc = Counter()
    arg_cc = Counter()
    all_insts = list(reader_conf.get_reader(input_path=input_file))
    for sent in all_insts:
        fields_args, fields_preds = sent.info[8], sent.info[9]
        assert len(fields_args) == len(fields_preds) and len(fields_args) == len(sent)
        # first collect preds
        all_preds = {}  # widx -> event
        for widx, vv in enumerate(fields_preds):
            pred_name = None
            for vv2 in vv.split("|"):
                if vv2.startswith("PBSENSE="):
                    assert pred_name is None
                    pred_name = vv2.split("=")[-1]
            if pred_name is not None:
                evt = sent.make_event(widx, 1, type=pred_name)
                assert widx not in all_preds
                all_preds[widx] = evt
        # then collect args
        for widx, vv in enumerate(fields_args):
            for vv2 in vv.split("|"):
                if ":" not in vv2:
                    continue
                tidx, aname = vv2.split(":", 1)
                tidx = int(tidx)
                role = None
                if aname.startswith("PBArg_"):
                    nn = aname[len("PBArg_"):]
                    role = f"ARG{nn}"
                elif aname.startswith("PBArgM_"):
                    _, nn = aname.split("_")
                    role = f"ARGM-{str.upper(nn)}"
                if role is not None:
                    evt = all_preds[tidx-1]
                    ef = sent.make_entity_filler(widx, 1, type="UNK")
                    evt.add_arg(ef, role)
                    arg_cc[role] += 1
        # --
        cc["sent"] += 1
        cc["frames"] += len(sent.events)
        cc["args"] += sum(len(z.args) for z in sent.events)
        # --
    # --
    with WriterGetterConf().get_writer(output_path=output_file) as writer:
        writer.write_insts(all_insts)
    # --
    zlog(f"Read fipb from {input_file} to {output_file}: {cc}")
    zlog(f"Role counts = {arg_cc}")
예제 #2
0
def main(input_format, *input_files: str):
    reader_conf = ReaderGetterConf().direct_update(input_format=input_format)
    reader_conf.validate()
    # --
    all_insts = []
    for ff in input_files:
        one_insts = list(reader_conf.get_reader(input_path=ff))
        zlog(f"Read from {ff}: {len(one_insts)} instances.")
        all_insts.extend(one_insts)
    # --
    if input_format == "conllu":
        do_stat(all_insts)
    do_stat_srl(all_insts)
예제 #3
0
def main(input_format, *input_files: str):
    reader_conf = ReaderGetterConf().direct_update(input_format=input_format)
    reader_conf.validate()
    # --
    all_insts = []
    for ff in input_files:
        one_insts = list(reader_conf.get_reader(input_path=ff))
        cc = Counter()
        for sent in yield_sents(one_insts):
            cc['sent'] += 1
            for evt in sent.events:
                cc['evt'] += 1
                cc['arg'] += len(evt.args)
        zlog(
            f"Read from {ff}: {cc['sent']/1000:.1f}k&{cc['evt']/1000:.1f}k&{cc['arg']/1000:.1f}k"
        )