コード例 #1
0
ファイル: evaluate_ucca_snacs_2.py プロジェクト: jakpra/ucca
def extract_mtl_snacs(_glob):
    mtl_preds = {}
    for snacsname in glob.glob(_glob):
        toknums = tuple(int(n) for n in snacsname.split('_')[-1].rsplit('.', maxsplit=1)[0].split('-'))
#        print('\t', toknums, file=sys.stderr)
        snacs_p = convert.xml2passage(snacsname)
        assert snacs_p is not None
        preds_for_unit = []
        target = None
        for (pos, term) in snacs_p.layer(layer0.LAYER_ID).pairs:
            if term is None: continue
            if term.extra.get('identified_for_pss') and target is None:
                target = term
            if term.text == '*ss*':
                pred = None
                node = term
                while node.incoming:
                    edge = node.incoming[0]
                    if edge.tag.startswith('p.'):
                        pred = edge.tag
                        break
                    node = edge.parent
                if pred is not None:
                    preds_for_unit.append(pred)
        assert len(preds_for_unit) <= 1
        if preds_for_unit:
            snacs_info = target.extra
            snacs_info['ss'] = preds_for_unit[0]
            mtl_preds[toknums[0]] = snacs_info

    return mtl_preds
コード例 #2
0
ファイル: corpus.py プロジェクト: LucasMoncuit/ucca-parser
 def read_passages(path):
     passages = []
     for file in sorted(os.listdir(path)):
         file_path = os.path.join(path, file)
         if os.path.isdir(file_path):
             print(file_path)
         passages.append(xml2passage(file_path))
     return passages
コード例 #3
0
ファイル: evaluate_ucca_snacs_2.py プロジェクト: jakpra/ucca
gold_pred = 0
mutual_mwe = 0

integrated_results = []
vanilla_results = []
snacs_results = []
refined_results = []

with open('edges_refined.tsv', 'w') as f:
    pass
with open('edges_snacs.tsv', 'w') as f:
    pass

for iSent, filename in enumerate(sorted(glob.glob(f'{path}/*.xml'))):
    name = filename.replace('\\', '/').rsplit('/', maxsplit=1)[-1].rsplit('.', maxsplit=1)[0]
    passage = convert.xml2passage(filename)
    ref = convert.xml2passage(f'{ref_path}/{name}.xml')
#    ref = convert.xml2passage(f'{ref_snacs_path}/{name}.xml')


    if mode == 'mtl':
        # 025516_0002_5-6
        # 025516001
 #       print(name, file=sys.stderr)
        mtl_preds = extract_mtl_snacs(f'{snacs_path}/{name[:-3]}_{int(name[-3:])+1:04d}_*.xml')
        #mtl_golds = extract_mtl_snacs(f'{ref_snacs_path}/{name[:-3]}_{int(name[-3:])+1:04d}_*.snacs')

    elif mode == 'concat' and not integrated:
#        print(passage)
        passage = remove_preterminals(passage)
        # print(passage)