コード例 #1
0
ファイル: pipeline.py プロジェクト: christianbuck/nlu
def new_amr(triples, concepts, roots=None):
    return Amr.from_triples(ensure_quant(triples), concepts, roots=None, 
                            warn=(sys.stderr if config.verbose else None))  # only display AMR cycle warnings in verbose mode
コード例 #2
0
ファイル: pipeline.py プロジェクト: christianbuck/nlu
def main(files):
    # pipeline steps
    import nes, timex, vprop, nprop, verbalize, conjunctions, copulas, adjsAndAdverbs, auxes, misc, coref, top, beautify
    
    nSents = len(files)
    nSuccess = nConnected = 0
    iSent = 0
    
    def wsj_sort(path):
        m = re.search(r'wsj_(\d{4})\.(\d+)', path)
        if not m: return 0
        docnum, sentnum = m.groups()
        return (int(docnum), int(sentnum))

    for f in sorted(files,key=wsj_sort):

        try:
            sentenceId = os.path.basename(f).replace('.json','')
            
            if config.showSentence:
                print(sentenceId)
        
            # load dependency parse from sentence file
            tokens, ww, wTags, depParse = loadDepParse(f)

            # initialize input to first pipeline step
            token_accounted_for = [False]*len(depParse)
            '''Has the token been accounted for yet in the semantics?'''
        
            edge_accounted_for = {(dep['gov_idx'],m): False for m in range(len(depParse)) if depParse[m] for dep in depParse[m]}
            '''Has the dependency edge been accounted for yet in the semantics?'''
    
            completed = token_accounted_for, edge_accounted_for
    
            amr = Amr()
            alignments = Alignment()
    
            # serially execute pipeline steps
            
            # the sentence
            if config.showSentence:
                print(' '.join(filter(None,ww)))
                print()
                sys.stdout.flush()

            hasModuleException = False
            for m in [nes, timex, vprop, nprop, verbalize, conjunctions, copulas, adjsAndAdverbs, auxes, misc, coref, top, beautify]:
                if config.verbose:
                    print('\n\nSTAGE: ', m.__name__, '...', file=sys.stderr)
                    
                try:
                    depParse, amr, alignments, completed = m.main(sentenceId, f, tokens, ww, wTags, depParse, amr, alignments, completed)
                except Exception as ex:
                    hasModuleException = True
                    if not config.errorTolerant:
                        raise
                    print('EXCEPTION IN', m.__name__, 'MODULE\n', file=sys.stderr)
                    print(sentenceId, file=sys.stderr)
                    traceback.print_exception(*sys.exc_info())
                
                if config.verbose:
                    print(repr(amr), file=sys.stderr)
                    print('Completed:',[depParse[i][0]['dep'] for i,v in enumerate(completed[0]) if v and depParse[i]], file=sys.stderr)
                    print(alignments, [deps[0]['dep'] for deps in depParse if deps and not completed[0][deps[0]['dep_idx']]], file=sys.stderr)
                    print(amr, file=sys.stderr)
                
            if config.verbose:
                print(' '.join(tokens), file=sys.stderr)

            if amr.is_connected(warn=None):
                nConnected += 1
            else:
                # insert dummy top node, called 'and' for now. remove :-DUMMY triples for (former) orphans.
                amr = new_amr_from_old(amr, new_triples=[('top','opX',v) for v in amr.roots], new_concepts={'top': 'and'}, avoid_triples=[(x,r,(y,)) for x,r,(y,) in amr.triples(instances=False) if r=='-DUMMY'])

            print(amr)
            #amr.render()
            #print('Amr.from_triples(',amr.triples(instances=False),',',amr.node_to_concepts,')')
            print()
            if config.alignments:
                print(alignments)
                print()
    
            if config.verbose or config.showRemainingDeps:
                print('\n\nRemaining edges:', file=sys.stderr)
                for deps in depParse:
                    if deps is None: continue
                    for dep in deps:
                        if dep['gov_idx'] is not None and not completed[1][(dep['gov_idx'],dep['dep_idx'])]:
                            print((dep['gov']+'-'+str(dep['gov_idx']),dep['rel'],dep['dep']+'-'+str(dep['dep_idx'])), file=sys.stderr)

            if not hasModuleException:
                nSuccess += 1

            
        except Exception as ex:
            if not config.errorTolerant:
                raise
            print('(x1 / amr-empty)\n')
            print(sentenceId, file=sys.stderr)
            traceback.print_exception(*sys.exc_info())
            time.sleep(0)
            
        iSent += 1
        print('{}/{}, {} succeeded without exceptions ({} connected)'.format(iSent, nSents, nSuccess, nConnected), file=sys.stderr)