def main(sentenceId, jsonFile, tokens, ww, wTags, depParse, inAMR, alignment, completed): amr = inAMR for deps in depParse: if deps is None: continue for dep in deps: i, r, h = dep["dep_idx"], dep["rel"], dep["gov_idx"] if completed[1][(h,i)]: continue if r in ['nn','poss'] or r.startswith('prep_'): x = amrget(amr, alignment, h, depParse, wTags, completed) y = amrget(amr, alignment, i, depParse, wTags, completed) # modifier variable if r=='nn': # attach as :mod-NN newtriple = (str(x), 'mod-NN', str(y)) elif r=='poss': newtriple = (str(x), 'poss', str(y)) else: # attach with :prep-X relation assert r.startswith('prep_') newtriple = (str(x), r.replace('_','-'), str(y)) amr = new_amr_from_old(amr, new_triples=[newtriple]) completed[1][(h,i)] = True ''' # simplify adverbs to adjectives based on lexicon for v in amr.node_to_concepts.keys(): amr.node_to_concepts[v] = simplify_adv(amr.node_to_concepts[v]) ''' return depParse, amr, alignment, completed
def main(sentenceId, jsonFile, tokens, ww, wTags, depParse, inAMR, alignment, completed): amr = inAMR cop_preds = set() for deps in depParse: if deps is None: continue for dep in deps: i, r, h = dep["dep_idx"], dep["rel"], dep["gov_idx"] if completed[1][(h,i)]: continue if r=='cop': cop_preds.add(h) completed[1][(h,i)] = True for deps in depParse: if deps is None: continue for dep in deps: i, r, h = dep["dep_idx"], dep["rel"], dep["gov_idx"] if (h in cop_preds and r.endswith('subj')) or r=='appos': x = amrget(amr, alignment, h, depParse, wTags, completed) y = amrget(amr, alignment, i, depParse, wTags, completed) # asserting non-completion here might be bad if r=='appos': completed[1][(h,i)] = True if '-FALLBACK' in amr.get_concept(str(x)) and '-FALLBACK' not in amr.get_concept(str(y)): x, y = y, x if x!=y: newtriple = (str(x), '-COREF' if r=='appos' else 'domain', str(y)) amr = new_amr_from_old(amr, new_triples=[newtriple]) return depParse, amr, alignment, completed
def main(sentenceId, jsonFile, tokens, ww, wTags, depParse, inAMR, alignment, completed): amr = inAMR for deps in depParse: if deps is None: continue for itm in deps: if completed[1][(itm['gov_idx'],itm['dep_idx'])]: continue i = itm['dep_idx'] if itm['rel'] in ['amod', 'advmod', 'dep', 'num', 'number', 'det']: h = itm['gov_idx'] # i's head if itm['rel']=='det' and itm['dep'].lower() in ['the', 'a', 'an']: # skip articles completed[0][i] = True completed[1][(h,i)] = True continue x = alignment[:h] # index of variable associated with i's head, if any if not (x or x==0): # need a new variable assert not completed[0][h], (depParse[h],amr) x = new_concept_from_token(amr, alignment, h, depParse, wTags) completed[0][h] = True y = alignment[:i] # modifier variable if not (y or y==0): # new variable y = new_concept_from_token( amr, alignment, i, depParse, wTags) completed[0][i] = True if itm['rel'] in ['num', 'number']: # attach as :quant newtriple = (str(x), 'quant', str(y)) # TODO: for plain values, don't create a variable elif 'AGE' in amr.get_concept(str(y)).split('-'): newtriple = (str(x), 'age', str(y)) amr.node_to_concepts[str(y)] = amr.node_to_concepts[str(y)].replace('-AGE','') else: # attach with :mod relation newtriple = (str(x), 'mod', str(y)) amr = new_amr_from_old(amr, new_triples=[newtriple]) completed[1][(h,i)] = True # simplify adverbs to adjectives based on lexicon for v in amr.node_to_concepts.keys(): amr.node_to_concepts[v] = simplify_adv(amr.node_to_concepts[v]) return depParse, amr, alignment, completed
def main(sentenceId, jsonFile, tokens, ww, wTags, depParse, inAMR, alignment, completed): amr = inAMR coref = loadCoref(jsonFile, ww) #print(coref) for cluster in coref.values(): clusterX = None # choose one member of the cluster to decorate with coreferent equivalents, marked :-COREF for i,j,w in sorted(cluster, key=lambda mention: (alignment[:mention[1]] is None or '-FALLBACK_PRON' not in amr.node_to_concepts[str(alignment[:mention[1]])], alignment[:mention[1]] is None or '-FALLBACK' not in amr.node_to_concepts[str(alignment[:mention[1]])], mention[1]-mention[0]), reverse=True): # preferences: pronouns (-FALLBACK_PRON) < hallucinated concepts (-FALLBACK) < content words from the sentence assert ' '.join(filter(None, ww[i:j+1]))==w,(w,i,j, ww[i:j+1]) trips = amr.triples(instances=False) h = choose_head(range(i,j+1), depParse) x = alignment[:h] # index of variable associated with the head, if any if not (x or x==0): # need a new variable print('TODO: coreferring mention not yet in AMR') assert False,(i,j,w,h,x,amr) if clusterX is None: clusterX = x elif x==clusterX: assert False,('coreferent has same head',i,j,w,h,x,clusterX) else: isCopula = False # note that previous modules have inserted some :-COREF links for equivalent nodes for x2 in [str(clusterX)]+symmetric_neighbors(str(clusterX), '-COREF', amr): for x3 in [str(x)]+symmetric_neighbors(str(x), '-COREF', amr): if x3 in symmetric_neighbors(x2, 'domain', amr): isCopula = True if config.verbose: print('blocked coreference link (probably a copula cxn) between variables:',x,clusterX, file=sys.stderr) break if isCopula: break # copula construction - don't merge as coreferent if isCopula: continue newtriple = (str(clusterX), '-COREF', str(x)) amr = new_amr_from_old(amr, new_triples=[newtriple]) return depParse, amr, alignment, completed
def main(sentenceId, jsonFile, tokens, ww, wTags, depParse, inAMR, alignment, completed): amr = inAMR nConjOps = {} # maps conjunction concept variable to its current number of :opX relations for deps in depParse: if deps is None: continue for dep in deps: if completed[1][(dep["gov_idx"],dep["dep_idx"])]: continue i, r, c = dep['dep_idx'], dep["rel"], dep["gov_idx"] if r=='conj': x = amrget(amr, alignment, c, depParse, wTags, completed) y = amrget(amr, alignment, i, depParse, wTags, completed) newtriple = (str(x), 'op'+str(nConjOps.setdefault(x,0)+1), str(y)) nConjOps[x] += 1 amr = new_amr_from_old(amr, new_triples=[newtriple]) completed[1][(c,i)] = True return depParse, amr, alignment, completed
def main(sentenceId, jsonFile, tokens, ww, wTags, depParse, inAMR, alignment, completed): amr = inAMR for deps in depParse: if deps is None: continue for itm in deps: if completed[1][(itm['gov_idx'],itm['dep_idx'])]: continue i = itm['dep_idx'] if itm['rel'] in ['aux', 'auxpass']: if wTags[i]["PartOfSpeech"]!='MD': # BE or HAVE auxiliary--ignore completed[0][i] = True completed[1][(itm['gov_idx'],i)] = True continue #print(itm, file=sys.stderr) mw = itm["dep"] mpred = MODALS[mw] x = alignment[:i] # index of variable associated with i's head, if any if not (x or x==0): # need a new variable assert not completed[0][i] x = new_concept_from_token(amr, alignment, i, depParse, wTags, concept=pipeline.token2concept(mpred)) completed[0][i] = True h = itm["gov_idx"] # i's head y = alignment[:h] # modifier variable if not (y or y==0): # new variable y = new_concept_from_token(amr, alignment, h, depParse, wTags) completed[0][h] = True newtriple = (str(x), ACTION_ARG[mpred], str(y)) amr = new_amr_from_old(amr, new_triples=[newtriple]) completed[1][(itm['gov_idx'],i)] = True return depParse, amr, alignment, completed
def main(sentenceId, jsonFile, tokens, ww, wTags, depParse, inAMR, alignment, completed): amr = inAMR new_triples = set() nNewTrip = 0 time_expressions = pipeline.loadTimex(jsonFile) for tid, start, end, raw_timex in time_expressions: t = Timex3Entity(ElementTree.fromstring(raw_timex)) h = choose_head(range(start,end+1), depParse) mc = new_concept_from_token(amr, alignment, h, depParse, wTags, concept=pipeline.token2concept(t.main_concept)) if t.wrapper != None: alignment.unlink(mc, h) wc = new_concept_from_token(amr, alignment, h, depParse, wTags, concept=pipeline.token2concept(t.wrapper)+'-'+t.type) new_triples.add((str(wc), 'op1', str(mc))) else: amr.node_to_concepts[str(mc)] += '-'+t.type if 'weekday' in t.date_entity: wd = int(t.date_entity['weekday']) wd_name = weekdays[wd] # e.g. 'friday' x = new_concept(pipeline.token2concept(wd_name), amr) new_triples.add((str(mc), 'weekday', str(x))) if 'dayperiod' in t.date_entity: dp = t.date_entity['dayperiod'] dp_name = dayperiods[dp] # e.g. 'afternoon' x = new_concept(pipeline.token2concept(dp_name), amr) new_triples.add((str(mc), 'dayperiod', str(x))) #print('####', t.date_entity) for k, v in t.date_entity.iteritems(): if k in ['weekday','dayperiod']: continue # handled above if isinstance(v,basestring): v = pipeline.token2concept(str(v)) x = new_concept(v, amr) x = str(x) else: # leave literal numeric values alone #print(amr.triples(instances=False)) x = v new_triples.add((str(mc), k, x)) for i in range(start, end+1): # for now mark everything as completed completed[0][i] = True for i,j in completed[1]: if i >= start and i <= end and j >= start and j <= end: completed[1][(i,j)] = True try: assert t.main_concept and (t.main_concept not in ['date-entity','temporal-quantity'] or len(new_triples)>nNewTrip) except AssertionError: if config.verbose or config.warn: print('Warning: Unhandled time expression', file=sys.stderr) nNewTrip = len(new_triples) #print(list(new_triples)) amr = new_amr_from_old(amr, new_triples=list(new_triples)) # TODO: mark all internal dependencies as completed? return depParse, amr, alignment, completed
def main(sentenceId, jsonFile, tokens, ww, wTags, depParse, inAMR, alignment, completed): amr = inAMR triples = set() # to add to the AMR props = pipeline.loadNProp(jsonFile) predheads = {} # map head index to nominal predicate variable (not reflected in the alignment) # add all predicates first, so the roleset properly goes into the AMR for prop in props: baseform, roleset = prop["baseform"], prop["frame"] if not config.fullNombank and not verbalize.nompred2verbpred(roleset): continue # TODO: maybe add just the pred stem & non-core args that map to AMR role names? preds = {tuple(arg) for arg in prop["args"] if arg[0]=='rel'} assert len(preds)==1 pred = next(iter(preds)) assert pred[2]==pred[3] # multiword predicates? ph = pred[2] # predicate head #px = alignment[:ph] # instead of aligning noun predicate to noun in the sentence, introduce the noun predicate separately (so the plain noun concept can be its argument) px = predheads.get(ph) predconcept = pipeline.token2concept(roleset.replace('.','-n-')) if not (px or px==0): px = new_concept(predconcept, amr) # no alignment here - instead use 'predheads' #print('###','newconcept',px,'/',predconcept) px0 = alignment[:ph] if not (px0 or px0==0): px0 = new_concept_from_token(amr, alignment, ph, depParse, wTags) triples.add((str(px0), '-PRED', str(px))) #if len(prop["args"])==1 or (prop["args"][0][0] in ['Support','rel'] and prop["args"][1][0] in ['Support','rel']): # triples.add((str(px), '-DUMMY', '')) predheads[ph] = px else: # predicate already a concept in the AMR (e.g. inserted by the 'nouns' module) amr.node_to_concepts[str(px)] = predconcept # change the name of the concept completed[0][ph] = True # now handle arguments for prop in props: baseform, roleset = prop["baseform"], prop["frame"] pred = [arg for arg in prop["args"] if arg[0]=='rel'][0] ph = pred[2] # predicate head #px = alignment[:ph] if ph not in predheads: continue px = predheads[ph] for rel,treenode,i,j,yieldS in prop["args"]: if i is None or j is None: continue # TODO: special PropBank cases that need further work if rel in ['rel', 'Support']: continue assert rel[:3]=='ARG' h = choose_head(range(i,j+1), depParse) if h is None: continue # TODO: improve coverage of complex spans # handle general proposition arguments if str(alignment[:h]) in amr.node_to_concepts: rel, amr.node_to_concepts[str(alignment[:h])] = common_arg(rel, amr.get_concept(str(alignment[:h]))) else: drels = [dep["rel"] for dep in depParse[h]] rel = common_arg(rel, drels=drels) if isinstance(rel,tuple): rel, val = rel assert isinstance(val,Atom) triples.add((str(px), rel, val)) else: x = amrget(amr, alignment, h, depParse, wTags) triples.add((str(px), rel, str(x))) #print('###',px,rel,x) completed[0][h] = True # if SRL argument link corresponds to a dependency edge, mark that edge as complete if (ph,h) in completed[1]: completed[1][(ph,h)] = True #print('completed ',(ph,h)) if (h,ph) in completed[1]: # also for reverse direction completed[1][(h,ph)] = True #print('completed ',(ph,h)) #print(triples) amr = new_amr_from_old(amr, new_triples=list(triples)) return depParse, amr, alignment, completed
def main(sentenceId, jsonFile, tokens, ww, wTags, depParse, inAMR, alignment, completed): amr = inAMR # clean up role names: :mod-nn and :MOD => :mod repltriples = [(x, r, (y,)) for x,r,(y,) in amr.triples(instances=False) if r in ['mod-NN','MOD']] newtriples = [(x, 'mod', (y,)) for x,r,(y,) in repltriples] amr = new_amr_from_old(amr, new_triples=newtriples, avoid_triples=repltriples) # for each triple of the form <x :-COREF y>, delete the triple and replace # all occurrences of y with x triples = amr.triples(instances=False) # Use -COREF flags to establish a mapping from current to new variables coref_triples = [trip for trip in triples if trip[1]=='-COREF'] replacements = {} for coref_trip in coref_triples: x, _, (y,) = coref_trip # TODO: strengthen the choice of main concepts for the cluster ''' assert amr.get_concept(x).replace('-ROOT','')==amr.get_concept(y).replace('-ROOT','') \ or (alignment[int(y):] is not None and wTags[alignment[int(y):]]["PartOfSpeech"] in ['PRP','PRP$']) \ or amr.get_concept(y).endswith('-FALLBACK'), (y,ww[alignment[int(y):]],x,ww[alignment[int(x):]]) ''' if x in replacements and replacements[x]==y: # avoid 2-node cycle continue replacements[y] = x # Avoid a chain of replacements, e.g. a -> b and b -> c # Assume there are no cycles, otherwise this will loop infinitely while set(replacements.keys()) & set(replacements.values()): for k in replacements.keys(): if replacements[k] in replacements: assert replacements[k]!=k,('Self-coreferent?',k,'in',sentenceId,replacements) replacements[k] = replacements[replacements[k]] break # MERGE the coreferent nodes all_triples = [] trip2tokAlignment = Alignment('many2one') # source side indexes 'all_triples' newtriples = [] oldtriples = coref_triples for a, r, (b,) in triples: if r=='-COREF': continue trip = (a,r,(b,)) change = False if a in replacements: a = replacements[a] change = True if b in replacements: b = replacements[b] change = True if change: newtriples.append((a,r,b)) oldtriples.append(trip) if isinstance(b,basestring) and b in amr.node_to_concepts and alignment[int(b):] is not None: trip2tokAlignment.link(len(all_triples), alignment[int(b):]) all_triples.append((a,r,b)) amr = new_amr_from_old(amr, new_triples=newtriples, avoid_triples=oldtriples, avoid_concepts=replacements) # delete various decorations for k,v in amr.node_to_concepts.items(): amr.node_to_concepts[k] = v.replace('-FALLBACK_PRON','').replace('-FALLBACK','').replace('-DATE_RELATIVE','').replace('-DATE','').replace('-TIME','') if config.verbose: print('Triple-to-token alignment:',{trip:ww[trip2tokAlignment[t:]]+'-'+str(trip2tokAlignment[t:]) for t,trip in enumerate(all_triples) if trip2tokAlignment[t:] is not None}, file=sys.stderr) # delete CARDINAL concepts (cf. the nes module) unless the concept has no parent # e.g. in wsj_0077.14, "154.2 million shares" is converted from (s / shares :quant (c / CARDINAL :quant 154200000)) to (s / shares :quant 154200000) cardinals = {v for v,c in amr.node_to_concepts.items() if c=='CARDINAL'} for v in cardinals: old2newvars = {} triples = [(x,r,y) for x,r,(y,) in amr.triples(instances=False) if x==v or y==v] try: assert 1<=len(triples)<=2,(triples,amr) except AssertionError: # something complicated; just punt continue if len(triples)<2: continue t1, t2 = triples if t1[2]!=v: t1, t2 = t2, t1 assert t1[2]==t2[0]==v old2newvars[v] = t2[2] del amr.node_to_concepts[v] newtrip = (t1[0],t1[1],t2[2]) assert newtrip[0]!=newtrip[2] # replace t1 and t2 with newtrip amr = new_amr_from_old(amr, new_triples=[newtrip], avoid_triples=[t1,t2]) if config.verbose: print('merge CARDINAL:',[t1,t2],'->',newtrip, file=sys.stderr) t = all_triples.index(t1) #assert trip2tokAlignment[t:] is not None all_triples[t] = newtrip #assert trip2tokAlignment[all_triples.index(t2):] is None #amr = new_amr([(old2newvars.get(x,x), r, (old2newvars.get(y,y),)) for x,r,(y,) in amr.triples(instances=False) if x!=v], amr.node_to_concepts) # choose user-friendly variable names # assumes current variable names are all integer strings old2newvars = {} newconcepts = {} for v,c in amr.node_to_concepts.items(): v2 = c[0].lower() if c[0].isalpha() else v if v2 in newconcepts: # append numerical suffix if necessary to disambiguate assert v2.isalpha() v2 += str(sum(1 for k in newconcepts.keys() if k[0]==v2)) newconcepts[v2] = c old2newvars[v] = v2 all_triples2 = [] trip2tokAlignment2 = Alignment('many2one') for x,r,(y,) in amr.triples(instances=False): t = all_triples.index((x,r,y)) if trip2tokAlignment[t:] is not None: trip2tokAlignment2.link(len(all_triples2), trip2tokAlignment[t:]) all_triples2.append((old2newvars.get(x,x), r, (old2newvars.get(y,y),))) finalAlignment = {trip:ww[trip2tokAlignment2[t:]]+'-'+str(trip2tokAlignment2[t:]) for t,trip in enumerate(all_triples2) if trip2tokAlignment2[t:] is not None} if config.verbose: print('Final triple-to-token alignment:',finalAlignment, file=sys.stderr) amr = new_amr(all_triples2, newconcepts) # detect orphans (variables with no triples) orphans = {v: True for v in newconcepts} for x,r,(y,) in amr.triples(instances=False): if r=='-DUMMY': continue orphans[x] = False if y in orphans: orphans[y] = False orphans = [v for v in orphans if orphans[v]] if config.verbose: print(len(orphans),'orphans',orphans, file=sys.stderr) # ensure a node has a :-DUMMY annotation iff it is an orphan amr = new_amr([(x,r,(y,)) for x,r,(y,) in amr.triples(instances=False) if r!='-DUMMY']+[(o,'-DUMMY','') for o in orphans], newconcepts) def swap_callback((x,r,(y,)),(x2,r2,(y2,))): #TODO: fix alignments pass
def main(sentenceId, jsonFile, tokens, ww, wTags, depParse, inAMR, alignment, completed): amr = inAMR triples = set() # to add to the AMR props = pipeline.loadVProp(jsonFile) # add all predicates first, so the roleset properly goes into the AMR for prop in props: baseform, roleset = prop["baseform"], prop["frame"] preds = {tuple(arg[:5]) for arg in prop["args"] if arg[0]=='rel'} assert len(preds)==1 pred = next(iter(preds)) assert pred[2]==pred[3] # multiword predicates? ph = pred[2] # predicate head if ph is None: continue # TODO: improve coverage of complex spans px = alignment[:ph] if not (px or px==0): px = new_concept_from_token(amr, alignment, ph, depParse, wTags, concept=pipeline.token2concept(roleset.replace('.','-'))) if len(prop["args"])==1 or prop["args"][1][0].startswith('LINK'): triples.add((str(px), '-DUMMY', '')) completed[0][ph] = True # now handle arguments for prop in props: baseform, roleset = prop["baseform"], prop["frame"] pred = [arg for arg in prop["args"] if arg[0]=='rel'][0] ph = pred[2] # predicate head if ph is None: continue # TODO: improve coverage of complex spans px = alignment[:ph] for rel,treenode,i,j,yieldS,_ in prop["args"]: if i is None or j is None: continue # TODO: special PropBank cases that need further work if rel in ['rel', 'LINK-PCR', 'LINK-SLC']: continue assert rel[:3]=='ARG' if i==j: #assert depParse[i], (tokens[i],rel,treenode,yieldS) if depParse[i] is None: continue # TODO: is this appropriate? e.g. in wsj_0003.0 #print(roleset,rel,i,j,yieldS) h = choose_head(range(i,j+1), depParse) if h is None: continue # TODO: temporary? x = alignment[:h] # index of variable associated with i's head, if any # handle general proposition arguments if str(alignment[:h]) in amr.node_to_concepts: rel, amr.node_to_concepts[str(alignment[:h])] = common_arg(rel, amr.get_concept(str(alignment[:h]))) else: drels = [dep["rel"] for dep in depParse[h]] rel = common_arg(rel, drels=drels) # verb-specific argument types if rel=='ARGM-MOD': if yieldS=='will': pass # skip this auxiliary else: continue # handle modal in a later module elif isinstance(rel,tuple): rel, val = rel assert isinstance(val,Atom) triples.add((str(px), rel, val)) else: if not (x or x==0): # need a new variable x = new_concept_from_token(amr, alignment, h, depParse, wTags) triples.add((str(px), rel, str(x))) completed[0][h] = True # if SRL argument link corresponds to a dependency edge, mark that edge as complete if (ph,h) in completed[1]: completed[1][(ph,h)] = True #print('completed ',(ph,h)) if (h,ph) in completed[1]: # also for reverse direction completed[1][(h,ph)] = True #print('completed ',(ph,h)) #print(triples) amr = new_amr_from_old(amr, new_triples=list(triples)) return depParse, amr, alignment, completed
def main(sentenceId, jsonFile, tokens, ww, wTags, depParse, inAMR, alignment, completed): amr = inAMR triples = set() # to add to the AMR entities = pipeline.loadBBN(jsonFile) for i,j,name,coarse,fine,raw in entities: if raw.startswith('<TIMEX'): continue # use the timex module (sutime output) instead h = choose_head(range(i,j+1), depParse, fallback=lambda frontier: max(frontier) if len(frontier)==2 and ww[min(frontier)]=='than' else False) # ^ dirty hack: in 'more than 3 times' (wsj_0003.12), [more than 3] is a value expression # but 'than' and '3' both attach to 'times' in the dependency parse. #print((i,j),name,h,depParse[h+1]['dep'], file=sys.stderr) x = alignment[:h] # index of variable associated with i's head, if any if raw.startswith('<NUMEX'): if coarse in ['MONEY','CARDINAL','PERCENT']: # get normalized value from Stanford tools v = wTags[h]["NormalizedNamedEntityTag"] wrapper = None if v[0] in '<>~': if len(v)==1: print('Warning: Unexpected NormalizedNamedEntityTag:',v,'for',raw, file=sys.stderr) else: if v[1]=='=': reln = v[:2] v = v[2:] else: reln = v[0] v = v[1:] concept = {'<': 'less-than', '>': 'more-than', '<=': 'no-more-than', '>=': 'at-least', '~': 'about'}[reln] wrapper = new_concept_from_token(amr, alignment, h, depParse, wTags, concept=concept) if coarse=='MONEY': m = re.match(r'^([\$¥£])(\d+\.\d+(E-?\d+)?)$', v) if not m: assert False,v u = m.group(1) v = m.group(2) elif coarse=='PERCENT': m = re.match(r'^%(\d+\.\d+(E-?\d+)?)$', v) if not m: assert False,v v = m.group(1) try: v = float(v) if str(v).endswith('.0'): v = int(v) except ValueError: pass if (wrapper is None or coarse=='MONEY') and not (x or x==0): # need a new variable kind = {'MONEY': 'monetary-quantity', 'PERCENT': 'percentage-entity'}.get(coarse, coarse.upper()) if wrapper is None: # if there is a wrapper concept (e.g. 'more-than'), it is aligned, so don't provide an alignment for x x = new_concept_from_token(amr, alignment, h, depParse, wTags, concept=kind) else: x = new_concept(kind, amr) if (x or x==0): triples.add((str(x), 'value' if coarse=='PERCENT' else 'quant', v)) if wrapper is not None: triples.add((str(wrapper), 'op1', str(x))) elif wrapper is not None: triples.add((str(wrapper), 'op1', v)) # e.g. more-than :op1 41 if coarse=='MONEY': y = new_concept({'$': 'dollar', '¥': 'yen', '£': 'pound'}[u.encode('utf-8')], amr) triples.add((str(x), 'unit', str(y))) elif coarse=='ORDINAL': pass # skip--no special treatment in AMR guidelines, though the normalized value could be used else: assert False,(i,j,raw) elif coarse.endswith('_DESC'): # make the phrase head word the AMR head concept # (could be a multiword term, like Trade Representative) if not (x or x==0): # need a new variable x = new_concept_from_token(amr, alignment, h, depParse, wTags) triples.add((str(x), '-DUMMY', '')) # ensure the concept participates in some triple so it is printed else: if coarse.lower()=='person' and i>0 and ww[i-1] and ww[i-1].lower() in ['mr','mr.','mister','master','sir','mrs','mrs.','miss']: # Extend the NE to include formal titles that do not get concepts name = ww[i-1]+' '+name i -= 1 if not (x or x==0): # need a new variable ne_class = fine.lower().replace('other','') or coarse.lower() concept, amr_name = amrify(ne_class, name) x = new_concept_from_token(amr, alignment, h, depParse, wTags, concept=pipeline.token2concept(concept)+'-FALLBACK') # -FALLBACK indicates extra information not in the sentence (NE class) n = new_concept('name', amr) triples.add((str(x), 'name', str(n))) for iw,w in enumerate(amr_name.split()): triples.add((str(n), 'op'+str(iw+1), '"'+w+'"')) for k in range(i,j+1): assert not completed[0][k] completed[0][k] = True #print('completed token',k) if k!=h: for link in parent_edges(depParse[k]): completed[1][link] = True # we don't need to attach non-head parts of names anywhere else amr = new_amr_from_old(amr, new_triples=list(triples)) return depParse, amr, alignment, completed