Exemple #1
0
 def do_passives(self):
     nodes = find_nodes(self,lambda n:n.features.get("Passive Voice",False))
     for n in nodes:
         curNeighbours = n.neighbors()
         for subjNeigbour in multi_get(curNeighbours, subject_dependencies):
             edge = (n,subjNeigbour)
             self.del_edge(edge)
             self.add_edge(edge,"obj")
         
         for prepByNeigbour in multi_get(curNeighbours,["prep_by"]):
             edge = (n,prepByNeigbour)
             self.del_edge(edge)
             self.add_edge(edge,"subj")
Exemple #2
0
    def do_acomp(self):
        edges = find_edges(
            self, lambda ((u, v)): self.edge_label(
                (u, v)) == "acomp" and u.isPredicate)
        for predNode, acompNode in edges:
            neighbors = predNode.neighbors()
            subjs = multi_get(neighbors, subject_dependencies)
            if len(subjs) != 1:
                #                 self.types.add("debug")
                pass
            else:

                if (predNode.text[0].word
                        in self.modalVerbs) or (predNode.features.get(
                            "Lemma", "") in self.modalVerbs):
                    subj = subjs[0]
                    self.del_edge((predNode, acompNode))
                    self.add_edge((acompNode, subj), label=domain_label)
                    acompNode.isPredicate = True
                    self.del_edge((predNode, subj))
                    duplicate_all_incidents(gr=self,
                                            source=predNode,
                                            target=acompNode)
                    self.add_edge((acompNode, predNode), label=SOURCE_LABEL)
                    if (len(self.neighbors(predNode)) == 0) and (len(
                            predNode.text) == 1) and (predNode.text[0].word
                                                      in contractions):
                        self.del_node(predNode)
                    else:
                        self.types.add("acomp_as_modal")

                else:
                    self.types.add("acomp_as_mwe")
                    merge_nodes(gr=self, node1=predNode, node2=acompNode)
    def do_acomp(self):
        edges = find_edges(self, lambda((u, v)):self.edge_label((u, v)) == "acomp" and u.isPredicate)
        for predNode,acompNode in edges:
            neighbors = predNode.neighbors()
            subjs = multi_get(neighbors,subject_dependencies)
            if len(subjs)!=1:
#                 self.types.add("debug")
                pass
            else:
                
                if (predNode.text[0].word in self.modalVerbs) or (predNode.features.get("Lemma","") in self.modalVerbs):
                    subj = subjs[0]
                    self.del_edge((predNode,acompNode))
                    self.add_edge((acompNode,subj),label=domain_label)
                    acompNode.isPredicate=True
                    self.del_edge((predNode,subj))
                    duplicate_all_incidents(gr=self, source=predNode, target=acompNode)
                    self.add_edge((acompNode,predNode),label=SOURCE_LABEL)
                    if (len(self.neighbors(predNode))==0) and (len(predNode.text)==1) and (predNode.text[0].word in contractions):
                        self.del_node(predNode)
                    else:
                        self.types.add("acomp_as_modal")
                        
                
                else:
                    self.types.add("acomp_as_mwe")
                    merge_nodes(gr=self, node1=predNode, node2=acompNode)
Exemple #4
0
    def _fix(self):
        # remove mark->that
        edges = find_edges(self, lambda (u, v): self.edge_label(
            (u, v)) == "mark")
        for (u, v) in edges:
            if (len(self.neighbors(v)) == 0) and (len(
                    v.text) == 1) and (v.text[0].word == "that"):
                self.del_node(v)
                return True

        # rcmod with no relation to father
        edges = find_edges(
            self, lambda (u, v): (self.edge_label(
                (u, v)) == "rcmod") and (not self.has_edge((v, u))))
        for u, v in edges:
            self.add_edge((v, u), label=ARG_LABEL)
            return True

        # prep collapse
        edges = find_edges(
            self, lambda (u, v): (self.edge_label((u, v)) == "prep") and
            (len(self.neighbors(v)) == 1) and ("pobj" in v.neighbors()))
        if edges:
            for (u, v) in edges:
                pobj = v.neighbors()["pobj"][0]
                if not (self.has_edge((u, pobj))):
                    w = v.text[0]
                    u.surface_form += [w]
                    self.add_edge((u, pobj), label="prep_" + w.word)
                    self.del_node(v)

        # fix dependency collapse bugs
        edges = find_edges(
            self, lambda (u, v): (self.edge_label(
                (u, v)) == "pobj") and ("prep" not in u.incidents()))
        for (u, v) in sorted(edges, key=lambda ((u, v)): u.minIndex()):
            neighbors = u.neighbors()
            candidates = [
                n for n in multi_get(
                    neighbors,
                    [rel for rel in neighbors if rel.startswith("prepc_")])
                if len(self.neighbors(n)) == 0
            ]
            candidates.sort(key=lambda n: n.minIndex())
            if len(candidates) > 0:
                curToDel = candidates[0]
                rel = self.edge_label((u, curToDel))
                self.del_edge((u, v))
                self.add_edge((u, v), label=rel)
                self.del_node(curToDel)
Exemple #5
0
 def calcTopNodes(self):
     all_accessible = multi_get(accessibility_wo_self(self), self.nodes())
     for topNode in [n for n in self.nodes() if n not in all_accessible]:
         topNode.makeTopNode()
     
     change = True
     while change:
         change = False
         for topNode in [n for n in self.nodes() if n.features.get("top", [])]:
             for sourceNode in topNode.incidents().get(SOURCE_LABEL, []):
                     change = sourceNode.makeTopNode()
             if topNode.isConj():
                 for n in self.neighbors(topNode):
                     change = n.makeTopNode()
 def _fix(self):
     # remove mark->that
     edges = find_edges(self, lambda (u, v):self.edge_label((u, v)) == "mark")
     for (u, v) in edges:
         if (len(self.neighbors(v)) == 0) and (len(v.text) == 1) and (v.text[0].word == "that"):
             self.del_node(v)
             return True
     
     # rcmod with no relation to father
     edges = find_edges(self, lambda (u, v):(self.edge_label((u, v)) == "rcmod") and (not self.has_edge((v, u))))
     for u, v in edges:
         self.add_edge((v, u), label=ARG_LABEL)
         return True
     
     # prep collapse
     edges = find_edges(self, lambda (u, v):(self.edge_label((u, v)) == "prep") and (len(self.neighbors(v)) == 1) and ("pobj" in v.neighbors()))
     if edges:
         for (u, v) in edges:
             pobj = v.neighbors()["pobj"][0]
             if not (self.has_edge((u, pobj))):
                 w = v.text[0]
                 u.surface_form += [w]
                 self.add_edge((u, pobj), label="prep_" + w.word)
                 self.del_node(v)
                 
     # fix dependency collapse bugs
     edges = find_edges(self, lambda (u, v):(self.edge_label((u, v)) == "pobj") and ("prep" not in u.incidents()))
     for (u, v) in sorted(edges,key=lambda((u,v)): u.minIndex()):
         neighbors = u.neighbors()
         candidates = [n for n in multi_get(neighbors, [rel for rel in neighbors if rel.startswith("prepc_")]) if len(self.neighbors(n)) == 0]
         candidates.sort(key=lambda n:n.minIndex())
         if len(candidates) > 0:
             curToDel = candidates[0]
             rel = self.edge_label((u, curToDel))
             self.del_edge((u, v))
             self.add_edge((u, v), label=rel)
             self.del_node(curToDel)
Exemple #7
0
    def do_prop(self):
        # prenominal of definite
        edges = find_edges(self, lambda (u, v): self.edge_label(
            (u, v)) == "amod")
        for domain, mod in edges:
            if domain.pos(
            ) in determined_labels:  # the np by itself is definite
                self.createPropRel(domain=domain, mod=mod)
                mod.features["top"] = True
                self.del_edge((domain, mod))

        # copular on adjective or indefinite
        # and sameAs otherwise

        # find copular
        nodes = find_nodes(
            self, lambda n: len(n.text) == 1 and n.text[0].word in
            copular_verbs and n.isPredicate)
        for curNode in nodes:
            curNeighbours = curNode.neighbors()
            subjs = multi_get(curNeighbours, subject_dependencies)
            objs = multi_get(curNeighbours, clausal_complements)
            if not objs: objs = multi_get(curNeighbours, ["dep"])
            others = [
                n for n in self.neighbors(curNode) if n not in subjs + objs
            ]
            if (len(objs) > 0) and (
                    len(subjs) > 0):  #and (not others) and (len(objs) == 1):
                others += objs[1:]
                if others:
                    self.types.add("complicated BE")
                obj = objs[0]
                if len(objs) > 1:
                    self.types.add("debug")
                for subj in subjs:
                    if 'Lemma' in curNode.features:
                        del (curNode.features['Lemma'])
                    if (subj in self.neighbors(obj)):
                        obj.features.update(curNode.features)
                    else:
                        if (not isDefinite(obj)) or (obj in curNeighbours.get(
                                "acomp", [])):
                            self.createPropRel(domain=subj, mod=obj)
                            head = obj
                            obj.surface_form += curNode.surface_form

                        else:
                            self.types.add("SameAs")
                            self.del_edge((curNode, subj))
                            if self.has_edge((curNode, obj)):
                                self.del_edge((curNode, obj))


#                             self.del_edges([(curNode, subj), (curNode, obj)])
                            copularNode = getCopular(self,
                                                     curNode.text[0].index,
                                                     features=curNode.features)
                            copularNode.surface_form = curNode.surface_form
                            self.add_edge((copularNode, subj),
                                          label=FIRST_ENTITY_LABEL)
                            self.add_edge((copularNode, obj),
                                          label=SECOND_ENTITY_LABEL)
                            head = copularNode

                        head.features.update(curNode.features)

                        for curFather in self.incidents(curNode):
                            if not self.has_edge((curFather, head)):
                                duplicateEdge(graph=self,
                                              orig=(curFather, curNode),
                                              new=(curFather, head))

                        for curOther in others:
                            if not self.has_edge((obj, curOther)):
                                duplicateEdge(graph=self,
                                              orig=(curNode, curOther),
                                              new=(head, curOther))
                # erase "be" node
                self.del_node(curNode)

        # find appositions
        for subj, obj in find_edges(
                self, lambda edge: self.edge_label(edge) == "appos"):
            # duplicate relations
            for curFather in self.incidents(subj):
                curIndex = curFather.features.get("apposIndex", 0) + 1
                #                 curLabel = "{0},{1}".format(curIndex,self.edge_label((curFather,subj)))
                curLabel = self.edge_label((curFather, subj))
                self.del_edge((curFather, subj))
                self.add_edge((curFather, subj), curLabel)
                self.add_edge((curFather, obj), curLabel)
                ls = curFather.features.get("dups", [])
                ls.append((subj, obj))
                curFather.features["dups"] = ls

                curFather.features["apposIndex"] = curIndex
            if (not isDefinite(subj)
                    and not isDefinite(obj)) or (obj in subj.neighbors().get(
                        "acomp", [])):
                self.createPropRel(domain=subj, mod=obj)
                obj.features["top"] = True
            else:
                # add new node
                # TODO: subj here is a problem - should point to the comma or something
                self.types.add("SameAs")
                copularNode = getCopular(self, subj.text[0].index, features={})
                copularNode.surface_form = []
                self.add_edge((copularNode, subj), label=FIRST_ENTITY_LABEL)
                self.add_edge((copularNode, obj), label=SECOND_ENTITY_LABEL)

            self.del_edge((subj, obj))
Exemple #8
0
 def extract_entities(self):
     ret = find_nodes(graph=self,
                      filterFunc=lambda node:
                      (not node.isPredicate) and multi_get(
                          node.incidents(), arguments_dependencies))
     return ret
    def do_prop(self):
        # prenominal of definite
        edges = find_edges(self, lambda (u, v):self.edge_label((u, v)) == "amod")
        for domain, mod in edges:
            if domain.pos() in determined_labels:  # the np by itself is definite
                self.createPropRel(domain=domain, mod=mod)
                mod.features["top"] = True
                self.del_edge((domain, mod))
            
        # copular on adjective or indefinite
        # and sameAs otherwise
        
        # find copular
        nodes = find_nodes(self, lambda n: len(n.text) == 1 and n.text[0].word in copular_verbs and n.isPredicate)
        for curNode in nodes:
            curNeighbours = curNode.neighbors()
            subjs = multi_get(curNeighbours, subject_dependencies)
            objs = multi_get(curNeighbours, clausal_complements)
            if not objs: objs = multi_get(curNeighbours,["dep"])
            others = [n for n in self.neighbors(curNode) if n not in subjs + objs]
            if (len(objs)>0)and (len(subjs)>0): #and (not others) and (len(objs) == 1): 
                others+=objs[1:]
                if others:
                    self.types.add("complicated BE")
                obj = objs[0]
                if len(objs)>1:
                    self.types.add("debug")
                for subj in subjs:
                    if 'Lemma' in curNode.features: del(curNode.features['Lemma'])
                    if (subj in self.neighbors(obj)):
                        obj.features.update(curNode.features)
                    else:
                        if (not isDefinite(obj)) or (obj in curNeighbours.get("acomp", [])):
                            self.createPropRel(domain=subj, mod=obj)
                            head = obj
                            obj.surface_form += curNode.surface_form
                            
                        else:
                            self.types.add("SameAs")
                            self.del_edge((curNode, subj))
                            if self.has_edge((curNode,obj)):
                                self.del_edge((curNode, obj))
#                             self.del_edges([(curNode, subj), (curNode, obj)])
                            copularNode = getCopular(self, curNode.text[0].index, features=curNode.features)
                            copularNode.surface_form = curNode.surface_form
                            self.add_edge((copularNode, subj),
                                          label=FIRST_ENTITY_LABEL)
                            self.add_edge((copularNode, obj),
                                          label=SECOND_ENTITY_LABEL)
                            head = copularNode
                            
                        
                        head.features.update(curNode.features)
                        
                        for curFather in self.incidents(curNode):
                            if not self.has_edge((curFather, head)):
                                duplicateEdge(graph=self, orig=(curFather, curNode), new=(curFather, head))
                                
                        for curOther in others:
                                if not self.has_edge((obj, curOther)):
                                    duplicateEdge(graph=self, orig=(curNode, curOther), new=(head, curOther))
                # erase "be" node
                self.del_node(curNode)
                    
                    
        # find appositions
        for subj, obj in find_edges(self, lambda edge:self.edge_label(edge) == "appos"):
            # duplicate relations
            for curFather in self.incidents(subj):
                curIndex = curFather.features.get("apposIndex", 0) + 1
#                 curLabel = "{0},{1}".format(curIndex,self.edge_label((curFather,subj)))
                curLabel = self.edge_label((curFather, subj))
                self.del_edge((curFather, subj))
                self.add_edge((curFather, subj), curLabel)
                self.add_edge((curFather, obj), curLabel)
                ls = curFather.features.get("dups", [])
                ls.append((subj, obj))
                curFather.features["dups"] = ls
                
                curFather.features["apposIndex"] = curIndex
            if (not isDefinite(subj) and not isDefinite(obj)) or (obj in subj.neighbors().get("acomp", [])):
                self.createPropRel(domain=subj, mod=obj)
                obj.features["top"] = True
            else:
                # add new node
                # TODO: subj here is a problem - should point to the comma or something
                self.types.add("SameAs")
                copularNode = getCopular(self, subj.text[0].index, features={})
                copularNode.surface_form = []
                self.add_edge((copularNode, subj),
                              label=FIRST_ENTITY_LABEL)
                self.add_edge((copularNode, obj),
                              label=SECOND_ENTITY_LABEL)
                
                    
            self.del_edge((subj, obj))
 def extract_entities(self):
     ret = find_nodes(graph=self, filterFunc=lambda node: (not node.isPredicate) and multi_get(node.incidents(), arguments_dependencies))
     return ret
Exemple #11
0
    def _fix(self):
        # remove mark->that
        edges = find_edges(
            self, lambda u_v1: self.edge_label((u_v1[0], u_v1[1])) == "mark")
        for (u, v) in edges:
            if (len(self.neighbors(v)) == 0) and (len(
                    v.text) == 1) and (v.text[0].word == "that"):
                self.del_node(v)
                return True

        # rcmod with no relation to father
        edges = find_edges(
            self, lambda u_v2: (self.edge_label(
                (u_v2[0], u_v2[1])) == "rcmod") and (not self.has_edge(
                    (u_v2[1], u_v2[0]))))
        for u, v in edges:
            self.add_edge((v, u), label=ARG_LABEL)
            return True

        # prep collapse
        edges = find_edges(
            self, lambda u_v3: (self.edge_label(
                (u_v3[0], u_v3[1])) == "prep") and
            (len(self.neighbors(u_v3[1])) == 1) and
            ("pobj" in u_v3[1].neighbors()))
        if edges:
            for (u, v) in edges:
                pobj = v.neighbors()["pobj"][0]
                if not (self.has_edge((u, pobj))):
                    w = v.text[0]
                    u.surface_form += [w]
                    self.add_edge((u, pobj), label="prep_" + w.word)
                    self.del_node(v)

        # fix dependency collapse bugs
        edges = find_edges(
            self, lambda u_v4: (self.edge_label(
                (u_v4[0], u_v4[1])) == "pobj") and
            ("prep" not in u_v4[0].incidents()))
        for (u, v) in sorted(edges, key=lambda u_v5: u_v5[0].minIndex()):
            neighbors = u.neighbors()
            candidates = [
                n for n in multi_get(
                    neighbors,
                    [rel for rel in neighbors if rel.startswith("prepc_")])
                if len(self.neighbors(n)) == 0
            ]
            candidates.sort(key=lambda n: n.minIndex())
            if len(candidates) > 0:
                curToDel = candidates[0]
                rel = self.edge_label((u, curToDel))
                self.del_edge((u, v))
                self.add_edge((u, v), label=rel)
                self.del_node(curToDel)

        # change agent edges with "prep_by"
        edges = find_edges(self, lambda edge:
                           (self.edge_label(edge) == "agent"))
        for edge in edges:
            self.del_edge(edge)
            self.add_edge(edge, label="prep_by")

#         #add xcomp inverse node
#         edges  = find_edges(self, lambda (u,v):self.edge_label((u,v)) == "xcomp" and u.isPredicate and v.isPredicate)
#         for (u,v) in edges:
#             if not self.has_edge((v, u)):
#                 self.add_edge((v,u), label=SOURCE_LABEL)
#                 self.types.add("infinitives")
#                 return True
#             if not multi_get(v.neighbors(),subject_dependencies):
#                 rcmodParentIncidents = u.incidents().get("rcmod",[])
#                 if len(rcmodParentIncidents)==1:
#                     subj = rcmodParentIncidents[0]
#                     if not self.has_edge((v,subj)):
#                         self.add_edge((v,subj),label=ARG_LABEL)

        return False