Beispiel #1
0
def main(sentenceId, jsonFile, tokens, ww, wTags, depParse, inAMR, alignment, completed):
    amr = inAMR
    triples = amr.triples(instances=False)
    
    # find noun propositions in the AMR. extract edges of the form (x / xword :-PRED (y / lemma-n.01))
    npropedges = [trip for trip in triples if trip[1]=='-PRED']
    for x,r,(y,) in npropedges:
        npred = amr.get_concept(y)
        nlemma, nsense = re.match(r'^(.+)-n-(\d\d|XX)$', npred).groups()
        npred = nlemma+'.'+nsense
        # get corresponding verb, vpred
        vpred = nompred2verbpred(npred)
        
        if vpred:
            vlemma, vsense = re.match(r'^verb-(.+)\.(\d\d|XX)$', vpred).groups()
            vpred = vlemma+'-'+vsense
            
            # check whether there is a self-reference (~ incorporated argument)
            selfrefs = {r2 for y2,r2,(x2,) in triples if x2==x and y2==y}
            if selfrefs:
                assert len(selfrefs)==1,selfrefs
                r2 = next(iter(selfrefs))
                # new configuration: (x / thing-FALLBACK :ARG#-of (y / vlemma.01))
                # i.e. rename the x and y concepts and the relation between them
                # do not change the alignments (x continues to be aligned to a token, 
                # y continues to be unaligned)
                if (x,r2+'-of',(y,)) not in triples and (y,r2,(x,)) not in triples:
                    triples.append((x,r2+'-of',(y,)))
                amr.node_to_concepts[x] = 'thing-FALLBACK'
                amr.node_to_concepts[y] = vpred
            else:
                # probably an eventive noun, so we will not need the predicate 
                # to be a separate concept. for now, mark the two nodes as coreferential 
                # and label them both with the verbal predicate.
                triples.append((x,'-COREF',(y,)))
                amr.node_to_concepts[x] = amr.node_to_concepts[y] = vpred
        else:   # for now, just keep the nominal predicate (copy it from y to x and mark them as coreferent)
            triples.append((x,'-COREF',(y,)))
            amr.node_to_concepts[x] = amr.node_to_concepts[y]
        triples.remove((x,r,(y,)))
    
        amr = new_amr(triples, amr.node_to_concepts)
        
    return depParse, amr, alignment, completed
Beispiel #2
0
def main(sentenceId, jsonFile, tokens, ww, wTags, depParse, inAMR, alignment, completed):
    amr = inAMR
    
    
    # clean up role names: :mod-nn and :MOD => :mod
    repltriples = [(x, r, (y,)) for x,r,(y,) in amr.triples(instances=False) if r in ['mod-NN','MOD']]
    newtriples = [(x, 'mod', (y,)) for x,r,(y,) in repltriples]
    amr = new_amr_from_old(amr, new_triples=newtriples, avoid_triples=repltriples)
    
    
    
    
    # for each triple of the form <x :-COREF y>, delete the triple and replace 
    # all occurrences of y with x
    
    
    
    triples = amr.triples(instances=False)
    
    # Use -COREF flags to establish a mapping from current to new variables 
    
    coref_triples = [trip for trip in triples if trip[1]=='-COREF']
    replacements = {}
    for coref_trip in coref_triples:
        x, _, (y,) = coref_trip
        # TODO: strengthen the choice of main concepts for the cluster
        '''
        assert amr.get_concept(x).replace('-ROOT','')==amr.get_concept(y).replace('-ROOT','') \
            or (alignment[int(y):] is not None and wTags[alignment[int(y):]]["PartOfSpeech"] in ['PRP','PRP$']) \
            or amr.get_concept(y).endswith('-FALLBACK'), (y,ww[alignment[int(y):]],x,ww[alignment[int(x):]])
        '''
        if x in replacements and replacements[x]==y: # avoid 2-node cycle
            continue
        replacements[y] = x

    # Avoid a chain of replacements, e.g. a -> b and b -> c
    # Assume there are no cycles, otherwise this will loop infinitely
    while set(replacements.keys()) & set(replacements.values()):
        for k in replacements.keys():
            if replacements[k] in replacements:
                assert replacements[k]!=k,('Self-coreferent?',k,'in',sentenceId,replacements)
                replacements[k] = replacements[replacements[k]]
                break
    
    # MERGE the coreferent nodes
    
    all_triples = []
    trip2tokAlignment = Alignment('many2one') # source side indexes 'all_triples'
    
    newtriples = []
    oldtriples = coref_triples
    for a, r, (b,) in triples:
        if r=='-COREF': continue
        trip = (a,r,(b,))
        
        change = False
        if a in replacements:
            a = replacements[a]
            change = True
        if b in replacements:
            b = replacements[b]
            change = True
        if change:
            newtriples.append((a,r,b))
            oldtriples.append(trip)
            
        if isinstance(b,basestring) and b in amr.node_to_concepts and alignment[int(b):] is not None:
            trip2tokAlignment.link(len(all_triples), alignment[int(b):])
        all_triples.append((a,r,b))
        
        
    amr = new_amr_from_old(amr, new_triples=newtriples, avoid_triples=oldtriples, avoid_concepts=replacements)
    
    
    # delete various decorations
    for k,v in amr.node_to_concepts.items():
        amr.node_to_concepts[k] = v.replace('-FALLBACK_PRON','').replace('-FALLBACK','').replace('-DATE_RELATIVE','').replace('-DATE','').replace('-TIME','')
    
    if config.verbose:
        print('Triple-to-token alignment:',{trip:ww[trip2tokAlignment[t:]]+'-'+str(trip2tokAlignment[t:]) for t,trip in enumerate(all_triples) if trip2tokAlignment[t:] is not None},
              file=sys.stderr)
    
    
    
    
    
    # delete CARDINAL concepts (cf. the nes module) unless the concept has no parent
    # e.g. in wsj_0077.14, "154.2 million shares" is converted from (s / shares :quant (c / CARDINAL :quant 154200000)) to (s / shares :quant 154200000)
    cardinals = {v for v,c in amr.node_to_concepts.items() if c=='CARDINAL'}
    for v in cardinals:
        old2newvars = {}
        triples = [(x,r,y) for x,r,(y,) in amr.triples(instances=False) if x==v or y==v]
        try:
            assert 1<=len(triples)<=2,(triples,amr)
        except AssertionError:  # something complicated; just punt
            continue
        if len(triples)<2: continue
        t1, t2 = triples
        if t1[2]!=v:
            t1, t2 = t2, t1
        assert t1[2]==t2[0]==v
        old2newvars[v] = t2[2]
        del amr.node_to_concepts[v]
        
        newtrip = (t1[0],t1[1],t2[2])
        assert newtrip[0]!=newtrip[2]
        # replace t1 and t2 with newtrip
        amr = new_amr_from_old(amr, new_triples=[newtrip], avoid_triples=[t1,t2])
        if config.verbose: print('merge CARDINAL:',[t1,t2],'->',newtrip, file=sys.stderr)
        
        t = all_triples.index(t1)
        #assert trip2tokAlignment[t:] is not None
        all_triples[t] = newtrip
        #assert trip2tokAlignment[all_triples.index(t2):] is None
        
        #amr = new_amr([(old2newvars.get(x,x), r, (old2newvars.get(y,y),)) for x,r,(y,) in amr.triples(instances=False) if x!=v], amr.node_to_concepts)
    
    # choose user-friendly variable names
    # assumes current variable names are all integer strings
    old2newvars = {}
    newconcepts = {}
    for v,c in amr.node_to_concepts.items():
        v2 = c[0].lower() if c[0].isalpha() else v
        if v2 in newconcepts:    # append numerical suffix if necessary to disambiguate
            assert v2.isalpha()
            v2 += str(sum(1 for k in newconcepts.keys() if k[0]==v2))
        newconcepts[v2] = c
        old2newvars[v] = v2
    all_triples2 = []
    trip2tokAlignment2 = Alignment('many2one')
    for x,r,(y,) in amr.triples(instances=False):
        t = all_triples.index((x,r,y))
        if trip2tokAlignment[t:] is not None:
            trip2tokAlignment2.link(len(all_triples2), trip2tokAlignment[t:])
        all_triples2.append((old2newvars.get(x,x), r, (old2newvars.get(y,y),)))
    
    finalAlignment = {trip:ww[trip2tokAlignment2[t:]]+'-'+str(trip2tokAlignment2[t:]) for t,trip in enumerate(all_triples2) if trip2tokAlignment2[t:] is not None}
    if config.verbose:
        print('Final triple-to-token alignment:',finalAlignment,
              file=sys.stderr)
    
    amr = new_amr(all_triples2, newconcepts)
    
    
    # detect orphans (variables with no triples)
    orphans = {v: True for v in newconcepts}
    for x,r,(y,) in amr.triples(instances=False):
        if r=='-DUMMY': continue
        orphans[x] = False
        if y in orphans:
            orphans[y] = False
    orphans = [v for v in orphans if orphans[v]]
    if config.verbose: print(len(orphans),'orphans',orphans, file=sys.stderr)
    
    # ensure a node has a :-DUMMY annotation iff it is an orphan
    amr = new_amr([(x,r,(y,)) for x,r,(y,) in amr.triples(instances=False) if r!='-DUMMY']+[(o,'-DUMMY','') for o in orphans], newconcepts)
    
    
    def swap_callback((x,r,(y,)),(x2,r2,(y2,))):
        #TODO: fix alignments
        pass