Python new_amr_from_old Examples, pipeline.new_amr_from_old Python Examples

Example #1

0

Show file

File: misc.py Project: christianbuck/nlu

def main(sentenceId, jsonFile, tokens, ww, wTags, depParse, inAMR, alignment, completed):
    amr = inAMR
    for deps in depParse:
        if deps is None: continue
        for dep in deps:
            i, r, h = dep["dep_idx"], dep["rel"], dep["gov_idx"]
            if completed[1][(h,i)]: continue
            if r in ['nn','poss'] or r.startswith('prep_'):
                x = amrget(amr, alignment, h, depParse, wTags, completed)
                y = amrget(amr, alignment, i, depParse, wTags, completed) # modifier variable
                
                if r=='nn':   # attach as :mod-NN
                    newtriple = (str(x), 'mod-NN', str(y))
                elif r=='poss':
                    newtriple = (str(x), 'poss', str(y))
                else:   # attach with :prep-X relation
                    assert r.startswith('prep_')
                    newtriple = (str(x), r.replace('_','-'), str(y))
                
                
                amr = new_amr_from_old(amr, new_triples=[newtriple])

                completed[1][(h,i)] = True

    '''
    # simplify adverbs to adjectives based on lexicon
    for v in amr.node_to_concepts.keys():
        amr.node_to_concepts[v] = simplify_adv(amr.node_to_concepts[v])
    '''
    
    return depParse, amr, alignment, completed

Example #2

0

Show file

File: copulas.py Project: christianbuck/nlu

def main(sentenceId, jsonFile, tokens, ww, wTags, depParse, inAMR, alignment, completed):
    amr = inAMR
    cop_preds = set()
    for deps in depParse:
        if deps is None: continue
        for dep in deps:
            i, r, h = dep["dep_idx"], dep["rel"], dep["gov_idx"]
            if completed[1][(h,i)]: continue
            if r=='cop':
                cop_preds.add(h)
                completed[1][(h,i)] = True
    for deps in depParse:
        if deps is None: continue
        for dep in deps:
            i, r, h = dep["dep_idx"], dep["rel"], dep["gov_idx"]
            if (h in cop_preds and r.endswith('subj')) or r=='appos':
                x = amrget(amr, alignment, h, depParse, wTags, completed)
                y = amrget(amr, alignment, i, depParse, wTags, completed)  # asserting non-completion here might be bad
                
                if r=='appos':
                    completed[1][(h,i)] = True
                    if '-FALLBACK' in amr.get_concept(str(x)) and '-FALLBACK' not in amr.get_concept(str(y)):
                        x, y = y, x
                
                if x!=y:
                    newtriple = (str(x), '-COREF' if r=='appos' else 'domain', str(y))
                
                amr = new_amr_from_old(amr, new_triples=[newtriple])

    return depParse, amr, alignment, completed

Example #3

0

Show file

File: adjsAndAdverbs.py Project: christianbuck/nlu

def main(sentenceId, jsonFile, tokens, ww, wTags, depParse, inAMR, alignment, completed):
    amr = inAMR
    for deps in depParse:
        if deps is None: continue
        for itm in deps:
            if completed[1][(itm['gov_idx'],itm['dep_idx'])]: continue
            i = itm['dep_idx']
            if itm['rel'] in ['amod', 'advmod', 'dep', 'num', 'number', 'det']:
                h = itm['gov_idx'] # i's head
                
                if itm['rel']=='det' and itm['dep'].lower() in ['the', 'a', 'an']:
                    # skip articles
                    completed[0][i] = True
                    completed[1][(h,i)] = True
                    continue
                
                x = alignment[:h] # index of variable associated with i's head, if any
                if not (x or x==0): # need a new variable
                    assert not completed[0][h], (depParse[h],amr)
                    x = new_concept_from_token(amr, alignment, h, depParse, wTags)
                    completed[0][h] = True
                y = alignment[:i] # modifier variable
                if not (y or y==0): # new variable
                    y = new_concept_from_token( amr, alignment, i, depParse, wTags)
                    completed[0][i] = True
                if itm['rel'] in ['num', 'number']:   # attach as :quant
                    newtriple = (str(x), 'quant', str(y))   # TODO: for plain values, don't create a variable
                elif 'AGE' in amr.get_concept(str(y)).split('-'):
                    newtriple = (str(x), 'age', str(y))
                    amr.node_to_concepts[str(y)] = amr.node_to_concepts[str(y)].replace('-AGE','')
                else:   # attach with :mod relation
                    newtriple = (str(x), 'mod', str(y))
                
                
                amr = new_amr_from_old(amr, new_triples=[newtriple])
                
                completed[1][(h,i)] = True

    # simplify adverbs to adjectives based on lexicon
    for v in amr.node_to_concepts.keys():
        amr.node_to_concepts[v] = simplify_adv(amr.node_to_concepts[v])

    return depParse, amr, alignment, completed

Example #4

0

Show file

File: coref.py Project: christianbuck/nlu

def main(sentenceId, jsonFile, tokens, ww, wTags, depParse, inAMR, alignment, completed):
    amr = inAMR
    
    coref = loadCoref(jsonFile, ww)
    
    #print(coref)
    
    for cluster in coref.values():
        clusterX = None # choose one member of the cluster to decorate with coreferent equivalents, marked :-COREF
        for i,j,w in sorted(cluster, key=lambda mention: (alignment[:mention[1]] is None or '-FALLBACK_PRON' not in amr.node_to_concepts[str(alignment[:mention[1]])],
                                                          alignment[:mention[1]] is None or '-FALLBACK' not in amr.node_to_concepts[str(alignment[:mention[1]])],
                                                          mention[1]-mention[0]), reverse=True):
            # preferences: pronouns (-FALLBACK_PRON) < hallucinated concepts (-FALLBACK) < content words from the sentence
            assert ' '.join(filter(None, ww[i:j+1]))==w,(w,i,j, ww[i:j+1])
            trips = amr.triples(instances=False)
            h = choose_head(range(i,j+1), depParse)
            x = alignment[:h] # index of variable associated with the head, if any
            if not (x or x==0): # need a new variable
                print('TODO: coreferring mention not yet in AMR')
                assert False,(i,j,w,h,x,amr)
            if clusterX is None:
                clusterX = x
            elif x==clusterX:
                assert False,('coreferent has same head',i,j,w,h,x,clusterX)
            else:
                isCopula = False
                # note that previous modules have inserted some :-COREF links for equivalent nodes
                for x2 in [str(clusterX)]+symmetric_neighbors(str(clusterX), '-COREF', amr):
                    for x3 in [str(x)]+symmetric_neighbors(str(x), '-COREF', amr):
                        if x3 in symmetric_neighbors(x2, 'domain', amr):
                            isCopula = True
                            if config.verbose: print('blocked coreference link (probably a copula cxn) between variables:',x,clusterX, file=sys.stderr)
                            break
                    if isCopula: break
                # copula construction - don't merge as coreferent
                if isCopula:
                    continue

                newtriple = (str(clusterX), '-COREF', str(x))
                
                amr = new_amr_from_old(amr, new_triples=[newtriple])

    return depParse, amr, alignment, completed

Example #5

0

Show file

File: conjunctions.py Project: christianbuck/nlu

def main(sentenceId, jsonFile, tokens, ww, wTags, depParse, inAMR, alignment, completed):
    amr = inAMR
    nConjOps = {}   # maps conjunction concept variable to its current number of :opX relations
    for deps in depParse:
        if deps is None: continue
        for dep in deps:
            if completed[1][(dep["gov_idx"],dep["dep_idx"])]: continue
            i, r, c = dep['dep_idx'], dep["rel"], dep["gov_idx"]
            if r=='conj':
                x = amrget(amr, alignment, c, depParse, wTags, completed)
                y = amrget(amr, alignment, i, depParse, wTags, completed)
                
                newtriple = (str(x), 'op'+str(nConjOps.setdefault(x,0)+1), str(y))
                nConjOps[x] += 1

                amr = new_amr_from_old(amr, new_triples=[newtriple])

                completed[1][(c,i)] = True

    return depParse, amr, alignment, completed

Example #6

0

Show file

File: auxes.py Project: christianbuck/nlu

def main(sentenceId, jsonFile, tokens, ww, wTags, depParse, inAMR, alignment, completed):
    amr = inAMR
    for deps in depParse:
        if deps is None: continue
        for itm in deps:
            if completed[1][(itm['gov_idx'],itm['dep_idx'])]: continue
            i = itm['dep_idx']
            if itm['rel'] in ['aux', 'auxpass']:
                if wTags[i]["PartOfSpeech"]!='MD':
                    # BE or HAVE auxiliary--ignore
                    completed[0][i] = True
                    completed[1][(itm['gov_idx'],i)] = True
                    continue
                
                #print(itm, file=sys.stderr)
                
                mw = itm["dep"]
                mpred = MODALS[mw]
                
                
                x = alignment[:i] # index of variable associated with i's head, if any
                if not (x or x==0): # need a new variable
                    assert not completed[0][i]
                    x = new_concept_from_token(amr, alignment, i, depParse, wTags, concept=pipeline.token2concept(mpred))
                    completed[0][i] = True
                    
                h = itm["gov_idx"] # i's head
                y = alignment[:h] # modifier variable
                if not (y or y==0): # new variable
                    y = new_concept_from_token(amr, alignment, h, depParse, wTags)
                    completed[0][h] = True
                
                newtriple = (str(x), ACTION_ARG[mpred], str(y))

                amr = new_amr_from_old(amr, new_triples=[newtriple])

                completed[1][(itm['gov_idx'],i)] = True

    return depParse, amr, alignment, completed

Example #7

0

Show file

File: timex.py Project: christianbuck/nlu

def main(sentenceId, jsonFile, tokens, ww, wTags, depParse, inAMR, alignment, completed):
    amr = inAMR
    new_triples = set()
    nNewTrip = 0

    time_expressions = pipeline.loadTimex(jsonFile)
    for tid, start, end, raw_timex in time_expressions:
        t = Timex3Entity(ElementTree.fromstring(raw_timex))
        h = choose_head(range(start,end+1), depParse)

        mc = new_concept_from_token(amr, alignment, h, depParse, wTags, concept=pipeline.token2concept(t.main_concept))

        if t.wrapper != None:
            alignment.unlink(mc, h)
            wc = new_concept_from_token(amr, alignment, h, depParse, wTags, concept=pipeline.token2concept(t.wrapper)+'-'+t.type)
            new_triples.add((str(wc), 'op1', str(mc)))
        else:
            amr.node_to_concepts[str(mc)] += '-'+t.type

        if 'weekday' in t.date_entity:
            wd = int(t.date_entity['weekday'])
            wd_name = weekdays[wd] # e.g. 'friday'
            x = new_concept(pipeline.token2concept(wd_name), amr)
            new_triples.add((str(mc), 'weekday', str(x)))
        if 'dayperiod' in t.date_entity:
            dp = t.date_entity['dayperiod']
            dp_name = dayperiods[dp]    # e.g. 'afternoon'
            x = new_concept(pipeline.token2concept(dp_name), amr)
            new_triples.add((str(mc), 'dayperiod', str(x)))

        #print('####', t.date_entity)
        for k, v in t.date_entity.iteritems():
            if k in ['weekday','dayperiod']: continue   # handled above
            if isinstance(v,basestring):
                v = pipeline.token2concept(str(v))
                x = new_concept(v, amr)
                x = str(x)
            else:   # leave literal numeric values alone
                #print(amr.triples(instances=False))
                x = v
            new_triples.add((str(mc), k, x))

        for i in range(start, end+1): # for now mark everything as completed
            completed[0][i] = True
        for i,j in completed[1]:
            if i >= start and i <= end and j >= start and j <= end:
                completed[1][(i,j)] = True
                
        try:
            assert t.main_concept and (t.main_concept not in ['date-entity','temporal-quantity'] or len(new_triples)>nNewTrip)
        except AssertionError:
            if config.verbose or config.warn: print('Warning: Unhandled time expression', file=sys.stderr)
        nNewTrip = len(new_triples)

    #print(list(new_triples))
    
    amr = new_amr_from_old(amr, new_triples=list(new_triples))
    
    
    # TODO: mark all internal dependencies as completed?
    return depParse, amr, alignment, completed

Example #8

0

Show file

File: nprop.py Project: christianbuck/nlu

def main(sentenceId, jsonFile, tokens, ww, wTags, depParse, inAMR, alignment, completed):
    amr = inAMR
    triples = set() # to add to the AMR
    
    props = pipeline.loadNProp(jsonFile)
    
    predheads = {}  # map head index to nominal predicate variable (not reflected in the alignment)
    
    # add all predicates first, so the roleset properly goes into the AMR
    for prop in props:
        baseform, roleset = prop["baseform"], prop["frame"]
        
        if not config.fullNombank and not verbalize.nompred2verbpred(roleset):
            continue    # TODO: maybe add just the pred stem & non-core args that map to AMR role names?
        
        preds = {tuple(arg) for arg in prop["args"] if arg[0]=='rel'}
        assert len(preds)==1
        pred = next(iter(preds))
        assert pred[2]==pred[3] # multiword predicates?
        ph = pred[2]    # predicate head
        #px = alignment[:ph]    # instead of aligning noun predicate to noun in the sentence, introduce the noun predicate separately (so the plain noun concept can be its argument)
        px = predheads.get(ph)
        predconcept = pipeline.token2concept(roleset.replace('.','-n-'))
        if not (px or px==0):
            px = new_concept(predconcept, amr)  # no alignment here - instead use 'predheads'
            #print('###','newconcept',px,'/',predconcept)
            px0 = alignment[:ph]
            if not (px0 or px0==0):
                px0 = new_concept_from_token(amr, alignment, ph, depParse, wTags)
            triples.add((str(px0), '-PRED', str(px)))
            #if len(prop["args"])==1 or (prop["args"][0][0] in ['Support','rel'] and prop["args"][1][0] in ['Support','rel']):
            #    triples.add((str(px), '-DUMMY', ''))
            predheads[ph] = px
        else:   # predicate already a concept in the AMR (e.g. inserted by the 'nouns' module)
            amr.node_to_concepts[str(px)] = predconcept # change the name of the concept
        
        completed[0][ph] = True
        
    # now handle arguments
    for prop in props:
        baseform, roleset = prop["baseform"], prop["frame"]
        
        pred = [arg for arg in prop["args"] if arg[0]=='rel'][0]
        ph = pred[2]    # predicate head
        #px = alignment[:ph]
        if ph not in predheads:
            continue
        
        px = predheads[ph]
        
        for rel,treenode,i,j,yieldS in prop["args"]:
            if i is None or j is None: continue # TODO: special PropBank cases that need further work
            if rel in ['rel', 'Support']: continue
            assert rel[:3]=='ARG'
            h = choose_head(range(i,j+1), depParse)
            if h is None: continue # TODO: improve coverage of complex spans
            
            # handle general proposition arguments
            if str(alignment[:h]) in amr.node_to_concepts:
                rel, amr.node_to_concepts[str(alignment[:h])] = common_arg(rel, amr.get_concept(str(alignment[:h])))
            else:
                drels = [dep["rel"] for dep in depParse[h]]
                rel = common_arg(rel, drels=drels)
            
            if isinstance(rel,tuple):
                rel, val = rel
                assert isinstance(val,Atom)
                triples.add((str(px), rel, val))
            else:
                x = amrget(amr, alignment, h, depParse, wTags)
                
                triples.add((str(px), rel, str(x)))
            #print('###',px,rel,x)
            
            completed[0][h] = True

            # if SRL argument link corresponds to a dependency edge, mark that edge as complete
            if (ph,h) in completed[1]:
                completed[1][(ph,h)] = True
                #print('completed ',(ph,h))
            if (h,ph) in completed[1]:  # also for reverse direction
                completed[1][(h,ph)] = True
                #print('completed ',(ph,h))
    
    #print(triples)
    amr = new_amr_from_old(amr, new_triples=list(triples))

    return depParse, amr, alignment, completed

Example #9

0

Show file

File: beautify.py Project: christianbuck/nlu

def main(sentenceId, jsonFile, tokens, ww, wTags, depParse, inAMR, alignment, completed):
    amr = inAMR
    
    
    # clean up role names: :mod-nn and :MOD => :mod
    repltriples = [(x, r, (y,)) for x,r,(y,) in amr.triples(instances=False) if r in ['mod-NN','MOD']]
    newtriples = [(x, 'mod', (y,)) for x,r,(y,) in repltriples]
    amr = new_amr_from_old(amr, new_triples=newtriples, avoid_triples=repltriples)
    
    
    
    
    # for each triple of the form <x :-COREF y>, delete the triple and replace 
    # all occurrences of y with x
    
    
    
    triples = amr.triples(instances=False)
    
    # Use -COREF flags to establish a mapping from current to new variables 
    
    coref_triples = [trip for trip in triples if trip[1]=='-COREF']
    replacements = {}
    for coref_trip in coref_triples:
        x, _, (y,) = coref_trip
        # TODO: strengthen the choice of main concepts for the cluster
        '''
        assert amr.get_concept(x).replace('-ROOT','')==amr.get_concept(y).replace('-ROOT','') \
            or (alignment[int(y):] is not None and wTags[alignment[int(y):]]["PartOfSpeech"] in ['PRP','PRP$']) \
            or amr.get_concept(y).endswith('-FALLBACK'), (y,ww[alignment[int(y):]],x,ww[alignment[int(x):]])
        '''
        if x in replacements and replacements[x]==y: # avoid 2-node cycle
            continue
        replacements[y] = x

    # Avoid a chain of replacements, e.g. a -> b and b -> c
    # Assume there are no cycles, otherwise this will loop infinitely
    while set(replacements.keys()) & set(replacements.values()):
        for k in replacements.keys():
            if replacements[k] in replacements:
                assert replacements[k]!=k,('Self-coreferent?',k,'in',sentenceId,replacements)
                replacements[k] = replacements[replacements[k]]
                break
    
    # MERGE the coreferent nodes
    
    all_triples = []
    trip2tokAlignment = Alignment('many2one') # source side indexes 'all_triples'
    
    newtriples = []
    oldtriples = coref_triples
    for a, r, (b,) in triples:
        if r=='-COREF': continue
        trip = (a,r,(b,))
        
        change = False
        if a in replacements:
            a = replacements[a]
            change = True
        if b in replacements:
            b = replacements[b]
            change = True
        if change:
            newtriples.append((a,r,b))
            oldtriples.append(trip)
            
        if isinstance(b,basestring) and b in amr.node_to_concepts and alignment[int(b):] is not None:
            trip2tokAlignment.link(len(all_triples), alignment[int(b):])
        all_triples.append((a,r,b))
        
        
    amr = new_amr_from_old(amr, new_triples=newtriples, avoid_triples=oldtriples, avoid_concepts=replacements)
    
    
    # delete various decorations
    for k,v in amr.node_to_concepts.items():
        amr.node_to_concepts[k] = v.replace('-FALLBACK_PRON','').replace('-FALLBACK','').replace('-DATE_RELATIVE','').replace('-DATE','').replace('-TIME','')
    
    if config.verbose:
        print('Triple-to-token alignment:',{trip:ww[trip2tokAlignment[t:]]+'-'+str(trip2tokAlignment[t:]) for t,trip in enumerate(all_triples) if trip2tokAlignment[t:] is not None},
              file=sys.stderr)
    
    
    
    
    
    # delete CARDINAL concepts (cf. the nes module) unless the concept has no parent
    # e.g. in wsj_0077.14, "154.2 million shares" is converted from (s / shares :quant (c / CARDINAL :quant 154200000)) to (s / shares :quant 154200000)
    cardinals = {v for v,c in amr.node_to_concepts.items() if c=='CARDINAL'}
    for v in cardinals:
        old2newvars = {}
        triples = [(x,r,y) for x,r,(y,) in amr.triples(instances=False) if x==v or y==v]
        try:
            assert 1<=len(triples)<=2,(triples,amr)
        except AssertionError:  # something complicated; just punt
            continue
        if len(triples)<2: continue
        t1, t2 = triples
        if t1[2]!=v:
            t1, t2 = t2, t1
        assert t1[2]==t2[0]==v
        old2newvars[v] = t2[2]
        del amr.node_to_concepts[v]
        
        newtrip = (t1[0],t1[1],t2[2])
        assert newtrip[0]!=newtrip[2]
        # replace t1 and t2 with newtrip
        amr = new_amr_from_old(amr, new_triples=[newtrip], avoid_triples=[t1,t2])
        if config.verbose: print('merge CARDINAL:',[t1,t2],'->',newtrip, file=sys.stderr)
        
        t = all_triples.index(t1)
        #assert trip2tokAlignment[t:] is not None
        all_triples[t] = newtrip
        #assert trip2tokAlignment[all_triples.index(t2):] is None
        
        #amr = new_amr([(old2newvars.get(x,x), r, (old2newvars.get(y,y),)) for x,r,(y,) in amr.triples(instances=False) if x!=v], amr.node_to_concepts)
    
    # choose user-friendly variable names
    # assumes current variable names are all integer strings
    old2newvars = {}
    newconcepts = {}
    for v,c in amr.node_to_concepts.items():
        v2 = c[0].lower() if c[0].isalpha() else v
        if v2 in newconcepts:    # append numerical suffix if necessary to disambiguate
            assert v2.isalpha()
            v2 += str(sum(1 for k in newconcepts.keys() if k[0]==v2))
        newconcepts[v2] = c
        old2newvars[v] = v2
    all_triples2 = []
    trip2tokAlignment2 = Alignment('many2one')
    for x,r,(y,) in amr.triples(instances=False):
        t = all_triples.index((x,r,y))
        if trip2tokAlignment[t:] is not None:
            trip2tokAlignment2.link(len(all_triples2), trip2tokAlignment[t:])
        all_triples2.append((old2newvars.get(x,x), r, (old2newvars.get(y,y),)))
    
    finalAlignment = {trip:ww[trip2tokAlignment2[t:]]+'-'+str(trip2tokAlignment2[t:]) for t,trip in enumerate(all_triples2) if trip2tokAlignment2[t:] is not None}
    if config.verbose:
        print('Final triple-to-token alignment:',finalAlignment,
              file=sys.stderr)
    
    amr = new_amr(all_triples2, newconcepts)
    
    
    # detect orphans (variables with no triples)
    orphans = {v: True for v in newconcepts}
    for x,r,(y,) in amr.triples(instances=False):
        if r=='-DUMMY': continue
        orphans[x] = False
        if y in orphans:
            orphans[y] = False
    orphans = [v for v in orphans if orphans[v]]
    if config.verbose: print(len(orphans),'orphans',orphans, file=sys.stderr)
    
    # ensure a node has a :-DUMMY annotation iff it is an orphan
    amr = new_amr([(x,r,(y,)) for x,r,(y,) in amr.triples(instances=False) if r!='-DUMMY']+[(o,'-DUMMY','') for o in orphans], newconcepts)
    
    
    def swap_callback((x,r,(y,)),(x2,r2,(y2,))):
        #TODO: fix alignments
        pass

Example #10

0

Show file

File: vprop.py Project: christianbuck/nlu

def main(sentenceId, jsonFile, tokens, ww, wTags, depParse, inAMR, alignment, completed):
    amr = inAMR
    triples = set() # to add to the AMR
    
    props = pipeline.loadVProp(jsonFile)
    
    # add all predicates first, so the roleset properly goes into the AMR
    for prop in props:
        baseform, roleset = prop["baseform"], prop["frame"]
        
        preds = {tuple(arg[:5]) for arg in prop["args"] if arg[0]=='rel'}
        assert len(preds)==1
        pred = next(iter(preds))
        assert pred[2]==pred[3] # multiword predicates?
        ph = pred[2]    # predicate head
        if ph is None: continue  # TODO: improve coverage of complex spans
        
        px = alignment[:ph]
        if not (px or px==0):
            px = new_concept_from_token(amr, alignment, ph, depParse, wTags, concept=pipeline.token2concept(roleset.replace('.','-')))
            if len(prop["args"])==1 or prop["args"][1][0].startswith('LINK'):
                triples.add((str(px), '-DUMMY', ''))
        completed[0][ph] = True
        
    # now handle arguments
    for prop in props:
        baseform, roleset = prop["baseform"], prop["frame"]
        
        pred = [arg for arg in prop["args"] if arg[0]=='rel'][0]
        ph = pred[2]    # predicate head
        if ph is None: continue # TODO: improve coverage of complex spans
        px = alignment[:ph]
        
        for rel,treenode,i,j,yieldS,_ in prop["args"]:
            if i is None or j is None: continue # TODO: special PropBank cases that need further work
            if rel in ['rel', 'LINK-PCR', 'LINK-SLC']: continue
            assert rel[:3]=='ARG'
            if i==j:
                #assert depParse[i], (tokens[i],rel,treenode,yieldS)
                if depParse[i] is None: continue    # TODO: is this appropriate? e.g. in wsj_0003.0
            #print(roleset,rel,i,j,yieldS)
            h = choose_head(range(i,j+1), depParse)
            if h is None: continue  # TODO: temporary?
            x = alignment[:h] # index of variable associated with i's head, if any
            
            # handle general proposition arguments
            if str(alignment[:h]) in amr.node_to_concepts:
                rel, amr.node_to_concepts[str(alignment[:h])] = common_arg(rel, amr.get_concept(str(alignment[:h])))
            else:
                drels = [dep["rel"] for dep in depParse[h]]
                rel = common_arg(rel, drels=drels)
            
            # verb-specific argument types
            if rel=='ARGM-MOD':
                if yieldS=='will':
                    pass    # skip this auxiliary
                else:
                    continue # handle modal in a later module
            elif isinstance(rel,tuple):
                rel, val = rel
                assert isinstance(val,Atom)
                triples.add((str(px), rel, val))
            else:
                if not (x or x==0): # need a new variable
                    x = new_concept_from_token(amr, alignment, h, depParse, wTags)
                triples.add((str(px), rel, str(x)))
            
            completed[0][h] = True

            # if SRL argument link corresponds to a dependency edge, mark that edge as complete
            if (ph,h) in completed[1]:
                completed[1][(ph,h)] = True
                #print('completed ',(ph,h))
            if (h,ph) in completed[1]:  # also for reverse direction
                completed[1][(h,ph)] = True
                #print('completed ',(ph,h))
    
    #print(triples)
    amr = new_amr_from_old(amr, new_triples=list(triples))

    return depParse, amr, alignment, completed

Example #11

0

Show file

File: nes.py Project: christianbuck/nlu

def main(sentenceId, jsonFile, tokens, ww, wTags, depParse, inAMR, alignment, completed):
    amr = inAMR
    triples = set() # to add to the AMR
    
    entities = pipeline.loadBBN(jsonFile)
    for i,j,name,coarse,fine,raw in entities:
        
        if raw.startswith('<TIMEX'): continue  # use the timex module (sutime output) instead
        
        h = choose_head(range(i,j+1), depParse, 
                        fallback=lambda frontier: max(frontier) if len(frontier)==2 and ww[min(frontier)]=='than' else False)
                        # ^ dirty hack: in 'more than 3 times' (wsj_0003.12), [more than 3] is a value expression 
                        # but 'than' and '3' both attach to 'times' in the dependency parse.
        #print((i,j),name,h,depParse[h+1]['dep'], file=sys.stderr)
        
        x = alignment[:h] # index of variable associated with i's head, if any
        
        if raw.startswith('<NUMEX'):
            if coarse in ['MONEY','CARDINAL','PERCENT']:
                # get normalized value from Stanford tools
                v = wTags[h]["NormalizedNamedEntityTag"]
                
                wrapper = None
                if v[0] in '<>~':
                    if len(v)==1:
                        print('Warning: Unexpected NormalizedNamedEntityTag:',v,'for',raw, file=sys.stderr)
                    else:
                        if v[1]=='=':
                            reln = v[:2]
                            v = v[2:]
                        else:
                            reln = v[0]
                            v = v[1:]
                        concept = {'<': 'less-than', '>': 'more-than', '<=': 'no-more-than', '>=': 'at-least', '~': 'about'}[reln]
                        wrapper = new_concept_from_token(amr, alignment, h, depParse, wTags, concept=concept)
                    
                if coarse=='MONEY':
                    m = re.match(r'^([\$¥£])(\d+\.\d+(E-?\d+)?)$', v)
                    if not m:
                        assert False,v
                    u = m.group(1)
                    v = m.group(2)
                elif coarse=='PERCENT':
                    m = re.match(r'^%(\d+\.\d+(E-?\d+)?)$', v)
                    if not m:
                        assert False,v
                    v = m.group(1)
                
                try:
                    v = float(v)
                    if str(v).endswith('.0'):
                        v = int(v)
                except ValueError:
                    pass
                
                if (wrapper is None or coarse=='MONEY') and not (x or x==0): # need a new variable
                    kind = {'MONEY': 'monetary-quantity', 'PERCENT': 'percentage-entity'}.get(coarse, coarse.upper())
                    if wrapper is None: # if there is a wrapper concept (e.g. 'more-than'), it is aligned, so don't provide an alignment for x
                        x = new_concept_from_token(amr, alignment, h, depParse, wTags, concept=kind)
                    else:
                        x = new_concept(kind, amr)
                
                if (x or x==0):
                    triples.add((str(x), 'value' if coarse=='PERCENT' else 'quant', v))
                    if wrapper is not None:
                        triples.add((str(wrapper), 'op1', str(x)))
                elif wrapper is not None:
                        triples.add((str(wrapper), 'op1', v))   # e.g. more-than :op1 41
                
                
                if coarse=='MONEY':
                    y = new_concept({'$': 'dollar', '¥': 'yen', '£': 'pound'}[u.encode('utf-8')], amr)
                    triples.add((str(x), 'unit', str(y)))
            elif coarse=='ORDINAL':
                pass    # skip--no special treatment in AMR guidelines, though the normalized value could be used
            else:
                assert False,(i,j,raw)
        elif coarse.endswith('_DESC'):
            # make the phrase head word the AMR head concept
            # (could be a multiword term, like Trade Representative)
            if not (x or x==0): # need a new variable
                x = new_concept_from_token(amr, alignment, h, depParse, wTags)
                triples.add((str(x), '-DUMMY', '')) # ensure the concept participates in some triple so it is printed
        else:
            if coarse.lower()=='person' and i>0 and ww[i-1] and ww[i-1].lower() in ['mr','mr.','mister','master','sir','mrs','mrs.','miss']:
                # Extend the NE to include formal titles that do not get concepts
                name = ww[i-1]+' '+name
                i -= 1

            if not (x or x==0): # need a new variable
                ne_class = fine.lower().replace('other','') or coarse.lower()
                concept, amr_name = amrify(ne_class, name)
                x = new_concept_from_token(amr, alignment, h, depParse, wTags, 
                                concept=pipeline.token2concept(concept)+'-FALLBACK')
                # -FALLBACK indicates extra information not in the sentence (NE class)
                n = new_concept('name', amr)
                triples.add((str(x), 'name', str(n)))
                for iw,w in enumerate(amr_name.split()):
                    triples.add((str(n), 'op'+str(iw+1), '"'+w+'"'))
                    
        
        for k in range(i,j+1):
            assert not completed[0][k]
            completed[0][k] = True
            #print('completed token',k)
            if k!=h:
                for link in parent_edges(depParse[k]):
                    completed[1][link] = True  # we don't need to attach non-head parts of names anywhere else
    
    amr = new_amr_from_old(amr, new_triples=list(triples))

    return depParse, amr, alignment, completed