def extract_mtl_snacs(_glob): mtl_preds = {} for snacsname in glob.glob(_glob): toknums = tuple(int(n) for n in snacsname.split('_')[-1].rsplit('.', maxsplit=1)[0].split('-')) # print('\t', toknums, file=sys.stderr) snacs_p = convert.xml2passage(snacsname) assert snacs_p is not None preds_for_unit = [] target = None for (pos, term) in snacs_p.layer(layer0.LAYER_ID).pairs: if term is None: continue if term.extra.get('identified_for_pss') and target is None: target = term if term.text == '*ss*': pred = None node = term while node.incoming: edge = node.incoming[0] if edge.tag.startswith('p.'): pred = edge.tag break node = edge.parent if pred is not None: preds_for_unit.append(pred) assert len(preds_for_unit) <= 1 if preds_for_unit: snacs_info = target.extra snacs_info['ss'] = preds_for_unit[0] mtl_preds[toknums[0]] = snacs_info return mtl_preds
def read_passages(path): passages = [] for file in sorted(os.listdir(path)): file_path = os.path.join(path, file) if os.path.isdir(file_path): print(file_path) passages.append(xml2passage(file_path)) return passages
gold_pred = 0 mutual_mwe = 0 integrated_results = [] vanilla_results = [] snacs_results = [] refined_results = [] with open('edges_refined.tsv', 'w') as f: pass with open('edges_snacs.tsv', 'w') as f: pass for iSent, filename in enumerate(sorted(glob.glob(f'{path}/*.xml'))): name = filename.replace('\\', '/').rsplit('/', maxsplit=1)[-1].rsplit('.', maxsplit=1)[0] passage = convert.xml2passage(filename) ref = convert.xml2passage(f'{ref_path}/{name}.xml') # ref = convert.xml2passage(f'{ref_snacs_path}/{name}.xml') if mode == 'mtl': # 025516_0002_5-6 # 025516001 # print(name, file=sys.stderr) mtl_preds = extract_mtl_snacs(f'{snacs_path}/{name[:-3]}_{int(name[-3:])+1:04d}_*.xml') #mtl_golds = extract_mtl_snacs(f'{ref_snacs_path}/{name[:-3]}_{int(name[-3:])+1:04d}_*.snacs') elif mode == 'concat' and not integrated: # print(passage) passage = remove_preterminals(passage) # print(passage)