def new_amr(triples, concepts, roots=None): return Amr.from_triples(ensure_quant(triples), concepts, roots=None, warn=(sys.stderr if config.verbose else None)) # only display AMR cycle warnings in verbose mode
def main(files): # pipeline steps import nes, timex, vprop, nprop, verbalize, conjunctions, copulas, adjsAndAdverbs, auxes, misc, coref, top, beautify nSents = len(files) nSuccess = nConnected = 0 iSent = 0 def wsj_sort(path): m = re.search(r'wsj_(\d{4})\.(\d+)', path) if not m: return 0 docnum, sentnum = m.groups() return (int(docnum), int(sentnum)) for f in sorted(files,key=wsj_sort): try: sentenceId = os.path.basename(f).replace('.json','') if config.showSentence: print(sentenceId) # load dependency parse from sentence file tokens, ww, wTags, depParse = loadDepParse(f) # initialize input to first pipeline step token_accounted_for = [False]*len(depParse) '''Has the token been accounted for yet in the semantics?''' edge_accounted_for = {(dep['gov_idx'],m): False for m in range(len(depParse)) if depParse[m] for dep in depParse[m]} '''Has the dependency edge been accounted for yet in the semantics?''' completed = token_accounted_for, edge_accounted_for amr = Amr() alignments = Alignment() # serially execute pipeline steps # the sentence if config.showSentence: print(' '.join(filter(None,ww))) print() sys.stdout.flush() hasModuleException = False for m in [nes, timex, vprop, nprop, verbalize, conjunctions, copulas, adjsAndAdverbs, auxes, misc, coref, top, beautify]: if config.verbose: print('\n\nSTAGE: ', m.__name__, '...', file=sys.stderr) try: depParse, amr, alignments, completed = m.main(sentenceId, f, tokens, ww, wTags, depParse, amr, alignments, completed) except Exception as ex: hasModuleException = True if not config.errorTolerant: raise print('EXCEPTION IN', m.__name__, 'MODULE\n', file=sys.stderr) print(sentenceId, file=sys.stderr) traceback.print_exception(*sys.exc_info()) if config.verbose: print(repr(amr), file=sys.stderr) print('Completed:',[depParse[i][0]['dep'] for i,v in enumerate(completed[0]) if v and depParse[i]], file=sys.stderr) print(alignments, [deps[0]['dep'] for deps in depParse if deps and not completed[0][deps[0]['dep_idx']]], file=sys.stderr) print(amr, file=sys.stderr) if config.verbose: print(' '.join(tokens), file=sys.stderr) if amr.is_connected(warn=None): nConnected += 1 else: # insert dummy top node, called 'and' for now. remove :-DUMMY triples for (former) orphans. amr = new_amr_from_old(amr, new_triples=[('top','opX',v) for v in amr.roots], new_concepts={'top': 'and'}, avoid_triples=[(x,r,(y,)) for x,r,(y,) in amr.triples(instances=False) if r=='-DUMMY']) print(amr) #amr.render() #print('Amr.from_triples(',amr.triples(instances=False),',',amr.node_to_concepts,')') print() if config.alignments: print(alignments) print() if config.verbose or config.showRemainingDeps: print('\n\nRemaining edges:', file=sys.stderr) for deps in depParse: if deps is None: continue for dep in deps: if dep['gov_idx'] is not None and not completed[1][(dep['gov_idx'],dep['dep_idx'])]: print((dep['gov']+'-'+str(dep['gov_idx']),dep['rel'],dep['dep']+'-'+str(dep['dep_idx'])), file=sys.stderr) if not hasModuleException: nSuccess += 1 except Exception as ex: if not config.errorTolerant: raise print('(x1 / amr-empty)\n') print(sentenceId, file=sys.stderr) traceback.print_exception(*sys.exc_info()) time.sleep(0) iSent += 1 print('{}/{}, {} succeeded without exceptions ({} connected)'.format(iSent, nSents, nSuccess, nConnected), file=sys.stderr)