def do_passives(self): nodes = find_nodes(self,lambda n:n.features.get("Passive Voice",False)) for n in nodes: curNeighbours = n.neighbors() for subjNeigbour in multi_get(curNeighbours, subject_dependencies): edge = (n,subjNeigbour) self.del_edge(edge) self.add_edge(edge,"obj") for prepByNeigbour in multi_get(curNeighbours,["prep_by"]): edge = (n,prepByNeigbour) self.del_edge(edge) self.add_edge(edge,"subj")
def do_acomp(self): edges = find_edges( self, lambda ((u, v)): self.edge_label( (u, v)) == "acomp" and u.isPredicate) for predNode, acompNode in edges: neighbors = predNode.neighbors() subjs = multi_get(neighbors, subject_dependencies) if len(subjs) != 1: # self.types.add("debug") pass else: if (predNode.text[0].word in self.modalVerbs) or (predNode.features.get( "Lemma", "") in self.modalVerbs): subj = subjs[0] self.del_edge((predNode, acompNode)) self.add_edge((acompNode, subj), label=domain_label) acompNode.isPredicate = True self.del_edge((predNode, subj)) duplicate_all_incidents(gr=self, source=predNode, target=acompNode) self.add_edge((acompNode, predNode), label=SOURCE_LABEL) if (len(self.neighbors(predNode)) == 0) and (len( predNode.text) == 1) and (predNode.text[0].word in contractions): self.del_node(predNode) else: self.types.add("acomp_as_modal") else: self.types.add("acomp_as_mwe") merge_nodes(gr=self, node1=predNode, node2=acompNode)
def do_acomp(self): edges = find_edges(self, lambda((u, v)):self.edge_label((u, v)) == "acomp" and u.isPredicate) for predNode,acompNode in edges: neighbors = predNode.neighbors() subjs = multi_get(neighbors,subject_dependencies) if len(subjs)!=1: # self.types.add("debug") pass else: if (predNode.text[0].word in self.modalVerbs) or (predNode.features.get("Lemma","") in self.modalVerbs): subj = subjs[0] self.del_edge((predNode,acompNode)) self.add_edge((acompNode,subj),label=domain_label) acompNode.isPredicate=True self.del_edge((predNode,subj)) duplicate_all_incidents(gr=self, source=predNode, target=acompNode) self.add_edge((acompNode,predNode),label=SOURCE_LABEL) if (len(self.neighbors(predNode))==0) and (len(predNode.text)==1) and (predNode.text[0].word in contractions): self.del_node(predNode) else: self.types.add("acomp_as_modal") else: self.types.add("acomp_as_mwe") merge_nodes(gr=self, node1=predNode, node2=acompNode)
def _fix(self): # remove mark->that edges = find_edges(self, lambda (u, v): self.edge_label( (u, v)) == "mark") for (u, v) in edges: if (len(self.neighbors(v)) == 0) and (len( v.text) == 1) and (v.text[0].word == "that"): self.del_node(v) return True # rcmod with no relation to father edges = find_edges( self, lambda (u, v): (self.edge_label( (u, v)) == "rcmod") and (not self.has_edge((v, u)))) for u, v in edges: self.add_edge((v, u), label=ARG_LABEL) return True # prep collapse edges = find_edges( self, lambda (u, v): (self.edge_label((u, v)) == "prep") and (len(self.neighbors(v)) == 1) and ("pobj" in v.neighbors())) if edges: for (u, v) in edges: pobj = v.neighbors()["pobj"][0] if not (self.has_edge((u, pobj))): w = v.text[0] u.surface_form += [w] self.add_edge((u, pobj), label="prep_" + w.word) self.del_node(v) # fix dependency collapse bugs edges = find_edges( self, lambda (u, v): (self.edge_label( (u, v)) == "pobj") and ("prep" not in u.incidents())) for (u, v) in sorted(edges, key=lambda ((u, v)): u.minIndex()): neighbors = u.neighbors() candidates = [ n for n in multi_get( neighbors, [rel for rel in neighbors if rel.startswith("prepc_")]) if len(self.neighbors(n)) == 0 ] candidates.sort(key=lambda n: n.minIndex()) if len(candidates) > 0: curToDel = candidates[0] rel = self.edge_label((u, curToDel)) self.del_edge((u, v)) self.add_edge((u, v), label=rel) self.del_node(curToDel)
def calcTopNodes(self): all_accessible = multi_get(accessibility_wo_self(self), self.nodes()) for topNode in [n for n in self.nodes() if n not in all_accessible]: topNode.makeTopNode() change = True while change: change = False for topNode in [n for n in self.nodes() if n.features.get("top", [])]: for sourceNode in topNode.incidents().get(SOURCE_LABEL, []): change = sourceNode.makeTopNode() if topNode.isConj(): for n in self.neighbors(topNode): change = n.makeTopNode()
def _fix(self): # remove mark->that edges = find_edges(self, lambda (u, v):self.edge_label((u, v)) == "mark") for (u, v) in edges: if (len(self.neighbors(v)) == 0) and (len(v.text) == 1) and (v.text[0].word == "that"): self.del_node(v) return True # rcmod with no relation to father edges = find_edges(self, lambda (u, v):(self.edge_label((u, v)) == "rcmod") and (not self.has_edge((v, u)))) for u, v in edges: self.add_edge((v, u), label=ARG_LABEL) return True # prep collapse edges = find_edges(self, lambda (u, v):(self.edge_label((u, v)) == "prep") and (len(self.neighbors(v)) == 1) and ("pobj" in v.neighbors())) if edges: for (u, v) in edges: pobj = v.neighbors()["pobj"][0] if not (self.has_edge((u, pobj))): w = v.text[0] u.surface_form += [w] self.add_edge((u, pobj), label="prep_" + w.word) self.del_node(v) # fix dependency collapse bugs edges = find_edges(self, lambda (u, v):(self.edge_label((u, v)) == "pobj") and ("prep" not in u.incidents())) for (u, v) in sorted(edges,key=lambda((u,v)): u.minIndex()): neighbors = u.neighbors() candidates = [n for n in multi_get(neighbors, [rel for rel in neighbors if rel.startswith("prepc_")]) if len(self.neighbors(n)) == 0] candidates.sort(key=lambda n:n.minIndex()) if len(candidates) > 0: curToDel = candidates[0] rel = self.edge_label((u, curToDel)) self.del_edge((u, v)) self.add_edge((u, v), label=rel) self.del_node(curToDel)
def do_prop(self): # prenominal of definite edges = find_edges(self, lambda (u, v): self.edge_label( (u, v)) == "amod") for domain, mod in edges: if domain.pos( ) in determined_labels: # the np by itself is definite self.createPropRel(domain=domain, mod=mod) mod.features["top"] = True self.del_edge((domain, mod)) # copular on adjective or indefinite # and sameAs otherwise # find copular nodes = find_nodes( self, lambda n: len(n.text) == 1 and n.text[0].word in copular_verbs and n.isPredicate) for curNode in nodes: curNeighbours = curNode.neighbors() subjs = multi_get(curNeighbours, subject_dependencies) objs = multi_get(curNeighbours, clausal_complements) if not objs: objs = multi_get(curNeighbours, ["dep"]) others = [ n for n in self.neighbors(curNode) if n not in subjs + objs ] if (len(objs) > 0) and ( len(subjs) > 0): #and (not others) and (len(objs) == 1): others += objs[1:] if others: self.types.add("complicated BE") obj = objs[0] if len(objs) > 1: self.types.add("debug") for subj in subjs: if 'Lemma' in curNode.features: del (curNode.features['Lemma']) if (subj in self.neighbors(obj)): obj.features.update(curNode.features) else: if (not isDefinite(obj)) or (obj in curNeighbours.get( "acomp", [])): self.createPropRel(domain=subj, mod=obj) head = obj obj.surface_form += curNode.surface_form else: self.types.add("SameAs") self.del_edge((curNode, subj)) if self.has_edge((curNode, obj)): self.del_edge((curNode, obj)) # self.del_edges([(curNode, subj), (curNode, obj)]) copularNode = getCopular(self, curNode.text[0].index, features=curNode.features) copularNode.surface_form = curNode.surface_form self.add_edge((copularNode, subj), label=FIRST_ENTITY_LABEL) self.add_edge((copularNode, obj), label=SECOND_ENTITY_LABEL) head = copularNode head.features.update(curNode.features) for curFather in self.incidents(curNode): if not self.has_edge((curFather, head)): duplicateEdge(graph=self, orig=(curFather, curNode), new=(curFather, head)) for curOther in others: if not self.has_edge((obj, curOther)): duplicateEdge(graph=self, orig=(curNode, curOther), new=(head, curOther)) # erase "be" node self.del_node(curNode) # find appositions for subj, obj in find_edges( self, lambda edge: self.edge_label(edge) == "appos"): # duplicate relations for curFather in self.incidents(subj): curIndex = curFather.features.get("apposIndex", 0) + 1 # curLabel = "{0},{1}".format(curIndex,self.edge_label((curFather,subj))) curLabel = self.edge_label((curFather, subj)) self.del_edge((curFather, subj)) self.add_edge((curFather, subj), curLabel) self.add_edge((curFather, obj), curLabel) ls = curFather.features.get("dups", []) ls.append((subj, obj)) curFather.features["dups"] = ls curFather.features["apposIndex"] = curIndex if (not isDefinite(subj) and not isDefinite(obj)) or (obj in subj.neighbors().get( "acomp", [])): self.createPropRel(domain=subj, mod=obj) obj.features["top"] = True else: # add new node # TODO: subj here is a problem - should point to the comma or something self.types.add("SameAs") copularNode = getCopular(self, subj.text[0].index, features={}) copularNode.surface_form = [] self.add_edge((copularNode, subj), label=FIRST_ENTITY_LABEL) self.add_edge((copularNode, obj), label=SECOND_ENTITY_LABEL) self.del_edge((subj, obj))
def extract_entities(self): ret = find_nodes(graph=self, filterFunc=lambda node: (not node.isPredicate) and multi_get( node.incidents(), arguments_dependencies)) return ret
def do_prop(self): # prenominal of definite edges = find_edges(self, lambda (u, v):self.edge_label((u, v)) == "amod") for domain, mod in edges: if domain.pos() in determined_labels: # the np by itself is definite self.createPropRel(domain=domain, mod=mod) mod.features["top"] = True self.del_edge((domain, mod)) # copular on adjective or indefinite # and sameAs otherwise # find copular nodes = find_nodes(self, lambda n: len(n.text) == 1 and n.text[0].word in copular_verbs and n.isPredicate) for curNode in nodes: curNeighbours = curNode.neighbors() subjs = multi_get(curNeighbours, subject_dependencies) objs = multi_get(curNeighbours, clausal_complements) if not objs: objs = multi_get(curNeighbours,["dep"]) others = [n for n in self.neighbors(curNode) if n not in subjs + objs] if (len(objs)>0)and (len(subjs)>0): #and (not others) and (len(objs) == 1): others+=objs[1:] if others: self.types.add("complicated BE") obj = objs[0] if len(objs)>1: self.types.add("debug") for subj in subjs: if 'Lemma' in curNode.features: del(curNode.features['Lemma']) if (subj in self.neighbors(obj)): obj.features.update(curNode.features) else: if (not isDefinite(obj)) or (obj in curNeighbours.get("acomp", [])): self.createPropRel(domain=subj, mod=obj) head = obj obj.surface_form += curNode.surface_form else: self.types.add("SameAs") self.del_edge((curNode, subj)) if self.has_edge((curNode,obj)): self.del_edge((curNode, obj)) # self.del_edges([(curNode, subj), (curNode, obj)]) copularNode = getCopular(self, curNode.text[0].index, features=curNode.features) copularNode.surface_form = curNode.surface_form self.add_edge((copularNode, subj), label=FIRST_ENTITY_LABEL) self.add_edge((copularNode, obj), label=SECOND_ENTITY_LABEL) head = copularNode head.features.update(curNode.features) for curFather in self.incidents(curNode): if not self.has_edge((curFather, head)): duplicateEdge(graph=self, orig=(curFather, curNode), new=(curFather, head)) for curOther in others: if not self.has_edge((obj, curOther)): duplicateEdge(graph=self, orig=(curNode, curOther), new=(head, curOther)) # erase "be" node self.del_node(curNode) # find appositions for subj, obj in find_edges(self, lambda edge:self.edge_label(edge) == "appos"): # duplicate relations for curFather in self.incidents(subj): curIndex = curFather.features.get("apposIndex", 0) + 1 # curLabel = "{0},{1}".format(curIndex,self.edge_label((curFather,subj))) curLabel = self.edge_label((curFather, subj)) self.del_edge((curFather, subj)) self.add_edge((curFather, subj), curLabel) self.add_edge((curFather, obj), curLabel) ls = curFather.features.get("dups", []) ls.append((subj, obj)) curFather.features["dups"] = ls curFather.features["apposIndex"] = curIndex if (not isDefinite(subj) and not isDefinite(obj)) or (obj in subj.neighbors().get("acomp", [])): self.createPropRel(domain=subj, mod=obj) obj.features["top"] = True else: # add new node # TODO: subj here is a problem - should point to the comma or something self.types.add("SameAs") copularNode = getCopular(self, subj.text[0].index, features={}) copularNode.surface_form = [] self.add_edge((copularNode, subj), label=FIRST_ENTITY_LABEL) self.add_edge((copularNode, obj), label=SECOND_ENTITY_LABEL) self.del_edge((subj, obj))
def extract_entities(self): ret = find_nodes(graph=self, filterFunc=lambda node: (not node.isPredicate) and multi_get(node.incidents(), arguments_dependencies)) return ret
def _fix(self): # remove mark->that edges = find_edges( self, lambda u_v1: self.edge_label((u_v1[0], u_v1[1])) == "mark") for (u, v) in edges: if (len(self.neighbors(v)) == 0) and (len( v.text) == 1) and (v.text[0].word == "that"): self.del_node(v) return True # rcmod with no relation to father edges = find_edges( self, lambda u_v2: (self.edge_label( (u_v2[0], u_v2[1])) == "rcmod") and (not self.has_edge( (u_v2[1], u_v2[0])))) for u, v in edges: self.add_edge((v, u), label=ARG_LABEL) return True # prep collapse edges = find_edges( self, lambda u_v3: (self.edge_label( (u_v3[0], u_v3[1])) == "prep") and (len(self.neighbors(u_v3[1])) == 1) and ("pobj" in u_v3[1].neighbors())) if edges: for (u, v) in edges: pobj = v.neighbors()["pobj"][0] if not (self.has_edge((u, pobj))): w = v.text[0] u.surface_form += [w] self.add_edge((u, pobj), label="prep_" + w.word) self.del_node(v) # fix dependency collapse bugs edges = find_edges( self, lambda u_v4: (self.edge_label( (u_v4[0], u_v4[1])) == "pobj") and ("prep" not in u_v4[0].incidents())) for (u, v) in sorted(edges, key=lambda u_v5: u_v5[0].minIndex()): neighbors = u.neighbors() candidates = [ n for n in multi_get( neighbors, [rel for rel in neighbors if rel.startswith("prepc_")]) if len(self.neighbors(n)) == 0 ] candidates.sort(key=lambda n: n.minIndex()) if len(candidates) > 0: curToDel = candidates[0] rel = self.edge_label((u, curToDel)) self.del_edge((u, v)) self.add_edge((u, v), label=rel) self.del_node(curToDel) # change agent edges with "prep_by" edges = find_edges(self, lambda edge: (self.edge_label(edge) == "agent")) for edge in edges: self.del_edge(edge) self.add_edge(edge, label="prep_by") # #add xcomp inverse node # edges = find_edges(self, lambda (u,v):self.edge_label((u,v)) == "xcomp" and u.isPredicate and v.isPredicate) # for (u,v) in edges: # if not self.has_edge((v, u)): # self.add_edge((v,u), label=SOURCE_LABEL) # self.types.add("infinitives") # return True # if not multi_get(v.neighbors(),subject_dependencies): # rcmodParentIncidents = u.incidents().get("rcmod",[]) # if len(rcmodParentIncidents)==1: # subj = rcmodParentIncidents[0] # if not self.has_edge((v,subj)): # self.add_edge((v,subj),label=ARG_LABEL) return False