コード例 #1
0
ファイル: fix_graph.py プロジェクト: arueckle/props
 def fixPossesive(self):
     """
     fix phrasing in possessives, such as "its -> it" "her -> she" "his -> he", etc.
     """
     possNodes = find_nodes(self.gr, isPossessive)
     for possNode in possNodes:
         possessor = deref(graph=self.gr, node = possNode, rel = POSSESSOR_LABEL)[0]
         fixPossessor(possessor)
コード例 #2
0
 def fixPossesive(self):
     """
     fix phrasing in possessives, such as "its -> it" "her -> she" "his -> he", etc.
     """
     possNodes = find_nodes(self.gr, isPossessive)
     for possNode in possNodes:
         possessor = deref(graph=self.gr,
                           node=possNode,
                           rel=POSSESSOR_LABEL)[0]
         fixPossessor(possessor)
コード例 #3
0
ファイル: graph_wrapper.py プロジェクト: lulitaCopycat/props
 def do_passives(self):
     nodes = find_nodes(self,lambda n:n.features.get("Passive Voice",False))
     for n in nodes:
         curNeighbours = n.neighbors()
         for subjNeigbour in multi_get(curNeighbours, subject_dependencies):
             edge = (n,subjNeigbour)
             self.del_edge(edge)
             self.add_edge(edge,"obj")
         
         for prepByNeigbour in multi_get(curNeighbours,["prep_by"]):
             edge = (n,prepByNeigbour)
             self.del_edge(edge)
             self.add_edge(edge,"subj")
コード例 #4
0
    def do_prop(self):
        # prenominal of definite
        edges = find_edges(self, lambda (u, v): self.edge_label(
            (u, v)) == "amod")
        for domain, mod in edges:
            if domain.pos(
            ) in determined_labels:  # the np by itself is definite
                self.createPropRel(domain=domain, mod=mod)
                mod.features["top"] = True
                self.del_edge((domain, mod))

        # copular on adjective or indefinite
        # and sameAs otherwise

        # find copular
        nodes = find_nodes(
            self, lambda n: len(n.text) == 1 and n.text[0].word in
            copular_verbs and n.isPredicate)
        for curNode in nodes:
            curNeighbours = curNode.neighbors()
            subjs = multi_get(curNeighbours, subject_dependencies)
            objs = multi_get(curNeighbours, clausal_complements)
            if not objs: objs = multi_get(curNeighbours, ["dep"])
            others = [
                n for n in self.neighbors(curNode) if n not in subjs + objs
            ]
            if (len(objs) > 0) and (
                    len(subjs) > 0):  #and (not others) and (len(objs) == 1):
                others += objs[1:]
                if others:
                    self.types.add("complicated BE")
                obj = objs[0]
                if len(objs) > 1:
                    self.types.add("debug")
                for subj in subjs:
                    if 'Lemma' in curNode.features:
                        del (curNode.features['Lemma'])
                    if (subj in self.neighbors(obj)):
                        obj.features.update(curNode.features)
                    else:
                        if (not isDefinite(obj)) or (obj in curNeighbours.get(
                                "acomp", [])):
                            self.createPropRel(domain=subj, mod=obj)
                            head = obj
                            obj.surface_form += curNode.surface_form

                        else:
                            self.types.add("SameAs")
                            self.del_edge((curNode, subj))
                            if self.has_edge((curNode, obj)):
                                self.del_edge((curNode, obj))


#                             self.del_edges([(curNode, subj), (curNode, obj)])
                            copularNode = getCopular(self,
                                                     curNode.text[0].index,
                                                     features=curNode.features)
                            copularNode.surface_form = curNode.surface_form
                            self.add_edge((copularNode, subj),
                                          label=FIRST_ENTITY_LABEL)
                            self.add_edge((copularNode, obj),
                                          label=SECOND_ENTITY_LABEL)
                            head = copularNode

                        head.features.update(curNode.features)

                        for curFather in self.incidents(curNode):
                            if not self.has_edge((curFather, head)):
                                duplicateEdge(graph=self,
                                              orig=(curFather, curNode),
                                              new=(curFather, head))

                        for curOther in others:
                            if not self.has_edge((obj, curOther)):
                                duplicateEdge(graph=self,
                                              orig=(curNode, curOther),
                                              new=(head, curOther))
                # erase "be" node
                self.del_node(curNode)

        # find appositions
        for subj, obj in find_edges(
                self, lambda edge: self.edge_label(edge) == "appos"):
            # duplicate relations
            for curFather in self.incidents(subj):
                curIndex = curFather.features.get("apposIndex", 0) + 1
                #                 curLabel = "{0},{1}".format(curIndex,self.edge_label((curFather,subj)))
                curLabel = self.edge_label((curFather, subj))
                self.del_edge((curFather, subj))
                self.add_edge((curFather, subj), curLabel)
                self.add_edge((curFather, obj), curLabel)
                ls = curFather.features.get("dups", [])
                ls.append((subj, obj))
                curFather.features["dups"] = ls

                curFather.features["apposIndex"] = curIndex
            if (not isDefinite(subj)
                    and not isDefinite(obj)) or (obj in subj.neighbors().get(
                        "acomp", [])):
                self.createPropRel(domain=subj, mod=obj)
                obj.features["top"] = True
            else:
                # add new node
                # TODO: subj here is a problem - should point to the comma or something
                self.types.add("SameAs")
                copularNode = getCopular(self, subj.text[0].index, features={})
                copularNode.surface_form = []
                self.add_edge((copularNode, subj), label=FIRST_ENTITY_LABEL)
                self.add_edge((copularNode, obj), label=SECOND_ENTITY_LABEL)

            self.del_edge((subj, obj))
コード例 #5
0
 def extract_entities(self):
     ret = find_nodes(graph=self,
                      filterFunc=lambda node:
                      (not node.isPredicate) and multi_get(
                          node.incidents(), arguments_dependencies))
     return ret
コード例 #6
0
ファイル: fix_graph.py プロジェクト: arueckle/props
        def inner():
            change = False
            # 1,2
            nodes = find_nodes(self.gr, isCondition)
            nodes.extend(find_nodes(self.gr, isPreposition))
            for curNode in nodes:
                sisterNodes = sister_nodes(graph=self.gr, node=curNode)
                for sisterNode in sisterNodes:
                    if isProp(sisterNode) and is_following(graph=self.gr,
                                                           node1=sisterNode,
                                                           node2=curNode):
                        reattch(graph=self.gr, 
                                node=curNode, 
                                new_father=sisterNode)
                        return True
                        break
            # 3
            nodes = find_nodes(self.gr, isAdverb)
            for curNode in nodes:
                sisterNodes = sister_nodes(graph=self.gr, node=curNode)
                for sisterNode in sisterNodes:
                    if isProp(sisterNode) and is_following(graph=self.gr,
                                                           node1=curNode,
                                                           node2=sisterNode):
                        reattch(graph=self.gr, 
                                node=curNode, 
                                new_father=sisterNode)
                        return True
                        break
                    
            #4
            nodes = find_nodes(self.gr,
                               lambda n:isCondition(n) and n.text[0].word == "{0}-{1}".format(COND,'that'))
            for curNode in nodes:
                curFathers = self.gr.incidents(curNode)
                curChildren = self.gr.neighbors(curNode)
                for curFather in curFathers:
                    for curChild in curChildren:
                        self.gr.add_edge(edge = (curFather,curChild),
                                         label = "that")
                self.gr.del_node(curNode)
                change = True
            
            #5
            filterFunc = lambda n:isConjunction(n) and len(self.gr.incidents(n)) == 1 and isConjunction(self.gr.incidents(n)[0]) and (n.conjType  == self.gr.incidents(n)[0].conjType) #TODO: efficiency - multiple calls to incidents and a lot of deref
            nodes = find_nodes(self.gr,filterFunc)
                               
            for curNode in nodes:
                curFather = self.gr.incidents(curNode)[0]
                for curChild in self.gr.neighbors(curNode):
                    self.gr.add_edge((curFather,curChild))
                self.gr.del_node(curNode)
                change = True 
                
            #6
            nodes = find_nodes(self.gr,
                               lambda n:len(n.text)==1 and n.text[0].word == "able")
            for curNode in nodes:
                curFathers = self.gr.incidents(curNode)
                if len(curFathers)==1:
                    curChildren = self.gr.neighbors(curNode)
                    if len(curChildren) ==1:
                        child = curChildren[0]
                        if child.isPredicate and (self.gr.edge_label((curNode,child))=="xcomp"):
                            father = curFathers[0]
                            self.gr.add_edge(edge=(father,child),
                                             label=self.gr.edge_label((father,curNode)))
                            child.features["Modal"]={"Value":['able']} #TODO: is this maybe overrun previous modals?
                            self.gr.del_node(curNode)
                            change=True
            #7
            edges = find_edges(self.gr,
                               lambda (u,v):isTime(u)and isTime(v) and len(self.gr.neighbors(u))==1)
            for curFather,curSon in edges:
                for curNode in self.gr.neighbors(curSon):
                    self.gr.add_edge(edge=(curFather,curNode),
                                     label = self.gr.edge_label((curSon,curNode)))
                self.gr.del_node(curSon)
                return True
            
            #8
            edges = find_edges(self.gr,
                               lambda (u,v):(isTime(v) or isLocation(v)) and isPreposition(u) and u.is_time_prep())
            
            for prepNode,timeNode in edges:
                if (len(self.gr.neighbors(prepNode))==1):
                    # time node is only son - attach time to all of prep incidents
                    for curFather in self.gr.incidents(prepNode):
                        self.gr.add_edge(edge=(curFather,timeNode),
                                         label = self.gr.edge_label((curFather,prepNode)))
                    self.gr.del_node(prepNode)
                    change=True
                    
            #9
            conjNodes = find_nodes(self.gr, lambda n: isConjunction(n) and n.conjType.lower() == "and")
            for conjNode in conjNodes:
                curParents = []
                curChildren = self.gr.neighbors(conjNode)
                for curChild in curChildren:
                    curParents.extend([parent for parent in self.gr.incidents(curChild) if parent != conjNode])
                
                if len(curParents)==1:
                    parent = curParents[0]
                    if isProp(parent):
                        # found a prop->conj construction 
                        # connect all prop to parent of conj and remove the conj node
                        for child in curChildren:
                            if not (parent,child) in self.gr.edges():
                                self.gr.add_edge(edge = (parent,child))
                        self.gr.del_node(conjNode)
                        change = True
                        
                    
            
            #10
            change = change or self.fixRanges()
            
            #11
            edges = find_edges(self.gr,
                               lambda (u,v):self.gr.edge_label((u,v))=="loc" and len(self.gr.neighbors(u))>1)
            
            for topNode,loc in edges:
                for curNeigbor in self.gr.neighbors(topNode):
                    if curNeigbor != loc:
                        duplicateEdge(graph=self.gr, orig=(topNode,curNeigbor), new=(loc,curNeigbor))
                for curFather in self.gr.incidents(topNode):
                    duplicateEdge(graph=self.gr, orig=(curFather,topNode), new=(curFather,loc))
                self.gr.del_node(topNode)
                self.types.remove(APPENDIX_LOCATION)
                change=True
                    
            
            #12
            edges = find_edges(graph=self.gr, 
                               filterFunc = lambda (u,v): isProp(u) and isLocation(v))
            
            for _,locNode in edges:
                for curFather in self.gr.incidents(locNode):
                    for curNeighbour in self.gr.neighbors(locNode):
                        duplicateEdge(graph=self.gr, orig=(locNode,curNeighbour), new=(curFather,curNeighbour))
                self.gr.del_node(locNode)
                self.types.remove(APPENDIX_LOCATION)
                change=True
                
            #13
            edges = find_edges(graph=self.gr, 
                               filterFunc = lambda (u,v): isProp(u) and v.isPredicate and (len(self.gr.neighbors(v)) ==0) and (len(self.gr.incidents(u)) ==1) and (len(self.gr.neighbors(u)) ==1))
            
            for propNode,predNode in edges:
                change = True
                curFather = self.gr.incidents(propNode)[0]
                if not isApposition(curFather):
                    jointNode = node.join(node1=curFather, 
                                          node2=predNode, 
                                          gr=self.gr)
                    curFather.text = jointNode.text
                    self.gr.del_nodes([propNode,predNode])
                else:
                    self.gr.del_node(propNode)
                    self.gr.add_edge((predNode,curFather))
                    for curIncident in self.gr.incidents(curFather):
                        duplicateEdge(graph=self.gr, 
                                      orig=(curIncident,curFather), 
                                      new=(curIncident,predNode))
                        self.gr.del_edge((curIncident,curFather))
                        
                        
            #14
            propNodes = find_nodes(self.gr, lambda n:isProp(n) and len(self.gr.incidents(n))==1)
            for propNode in propNodes:
                curFather = self.gr.incidents(propNode)[0]
                if ((len(curFather.str)==1) and (not isCopular(curFather)) and (curFather.str[0].word == "be" or curFather.str[0].word in contractions)) or ((isProp(curFather) or isRcmodProp(curFather)) and len(self.gr.neighbors(curFather))==1):
                    if len(self.gr.incidents(curFather))==1:                    
                        curAncestor = self.gr.incidents(curFather)[0]
                        duplicateEdge(graph=self.gr,
                                      orig=(curAncestor,curFather), 
                                      new=(curAncestor,propNode))
                        self.gr.del_node(curFather)
                        # this node no longer describes the "be" relation
                        propNode.parent_relation = ''
                        return True
            
            #15
            edges = find_edges(graph=self.gr, 
                               filterFunc = lambda (u,v): isProp(v) and (v.parent_relation == "acomp") and len(self.gr.neighbors(v))==1 and u.isPredicate)
            
            for pred, prop in edges:
                acompNode = self.gr.neighbors(prop)[0]
                duplicateEdge(graph=self.gr, orig=(pred,prop), new=(pred,acompNode),
                              newLabel = "modifier")
                self.gr.del_node(prop) # TODO: could there be others connected to it?
                newPred = node.join(pred,acompNode,self.gr)
                newPred.isPredicate =True
                self.gr.add_node(newPred)
                for neigbour in self.gr.neighbors(pred):
                    duplicateEdge(graph=self.gr, orig=(pred,neigbour), new=(newPred,neigbour))
                
                for curFather in self.gr.incidents(pred):
                    duplicateEdge(graph=self.gr, orig=(curFather,pred), new=(curFather,newPred))
                
                if len(self.gr.neighbors(acompNode))==0:
                    self.gr.del_node(acompNode)
                    
                self.gr.del_node(pred)
#                 newPred.features["debug"] =True #TODO: remove this
                self.types.add("ACOMP")
                return True
            
            #16
            edges = find_edges(graph=self.gr,
                               filterFunc = lambda (u,v): (isProp(v) or isRcmodProp(v)) and (u in self.gr.neighbors(v)))
            
            for _,v in edges:
                if (len(self.gr.neighbors(v))==1):
                    self.gr.del_node(v)
                    return True
            
            #17
            edges = find_edges(graph=self.gr,
                               filterFunc = lambda (u,v): self.gr.edge_label((u,v))==SOURCE_LABEL and (len(self.gr.neighbors(v))==0))
            for _,v in edges:
                curStr = " ".join([w.word for w in v.text])
                if curStr in contractions:
                    self.gr.del_node(v)
                    return True
                        
            #18 - verbal complements
            edges = find_edges(graph=self.gr,
                               filterFunc = lambda (u,v): self.gr.edge_label((u,v))=='ccomp' and u.isPredicate)
            for u,v in edges:
                self.gr.del_edge((u,v))
                self.gr.add_edge(edge=(u,v),
                                 label = 'dobj')
                v.features["debug"] =True
                self.types.add("DEBUG")
                return True
                    
            return change
コード例 #7
0
ファイル: fix_graph.py プロジェクト: arueckle/props
 def fixApposition(self):
     """ 
     remove apposition nodes, and change to our format
     """
     def inner(curNode,children,relation):
         curNode.dups = [duplicate_node(graph=self.gr,node=curNode,connectToNeighbours=True) for _ in children]
         for childIndex,child in enumerate(children):
             self.gr.add_edge(edge = (curNode.dups[childIndex],child),
                              label = relation)
         parents = self.gr.incidents(curNode)
         for parent in parents:
             if not hasattr(parent,'isDuplicated'): #TODO: efficiency
                 if hasattr(parent,'dups'):        #TODO: efficiency
                     for curParentDupInd,curParentDup in enumerate(parent.dups): # cycle detected - we already visited this parent, don't enter recursion
                         self.gr.add_edge(edge  = (curParentDup,curNode.dups[curParentDupInd]),
                                          label = self.gr.edge_label((parent,curNode)))
                 else:
                     inner(curNode = parent,
                           children = curNode.dups,
                           relation = self.gr.edge_label((parent,curNode)))
         
         self.gr.del_node(curNode)
         
     # find apposition nodes in topological ordering 
     apposNodes = sort_nodes_topologically(self.gr,find_nodes(self.gr, isApposition))
     
     for apposNode in apposNodes:
         # for each apposition node
         entities = [n for n in self.gr.neighbors(apposNode) if self.gr.edge_label((apposNode,n)) in [FIRST_ENTITY_LABEL,SECOND_ENTITY_LABEL]] 
         # move rcmod construction to the apposition node
         for entInd,ent in enumerate(entities):
             isEntRCmod = isRCmod(graph=self.gr,node=ent) 
             if isEntRCmod:
             #if hasattr(ent,'rcmod'): #TODO: efficiency
                 secondEntity = entities[entInd-1]
                 propNode,relClause = isEntRCmod
                 dupProp = duplicate_node(graph=self.gr, node=propNode, connectToNeighbours=False)
                 dupRelClause = duplicate_node(graph=self.gr, node=relClause, connectToNeighbours=False)
                 self.gr.add_edge((secondEntity,dupProp))
                 self.gr.add_edge((dupProp,dupRelClause))
                 self.gr.add_edge(edge=(dupRelClause,secondEntity),
                                  label=self.gr.edge_label((relClause,ent)))
                 
                 for curNeigbour in self.gr.neighbors(relClause):
                     if curNeigbour != ent:
                         topNode = duplicate_component(graph=self.gr, node=curNeigbour)
                         self.gr.add_edge(edge=(dupRelClause,topNode),
                                          label=self.gr.edge_label((relClause,curNeigbour)))
         
         # get his non-entities children
         # and connect to entities 
         children = [c for c in self.gr.neighbors(apposNode) if c not in entities]
         for child in children:
             for ent in entities:
                 self.gr.add_edge(edge=(ent,child),
                                  label = self.gr.edge_label((apposNode,child)))
             # remove connection to appos node
             self.gr.del_edge(edge=(apposNode,child))    
         
         
         # connect entities of apposition to the copular node    
         # and copy all propagation from appos node to its entities
         copNode = CopularNode.init(index = apposNode.text[0].index,     # create a copular node to replace it
                               features = apposNode.features,
                               valid=True)
         self.gr.add_node(copNode) # add it to graph
         
         for ent in entities:
             self.gr.add_edge(edge = (copNode,ent),
                              label = self.gr.edge_label((apposNode,ent)))
             for curNode in apposNode.propagateTo:
                 addSymmetricPropogation(ent,curNode)
             
         
         # deal with parent of apposition
         for parent in self.gr.incidents(apposNode):
             inner(curNode = parent,
                   children = entities,
                   relation = self.gr.edge_label((parent,apposNode)))
             
         # finally - remove the apposition                
         self.gr.del_node(apposNode)
コード例 #8
0
    def do_prop(self):
        # prenominal of definite
        edges = find_edges(self, lambda (u, v):self.edge_label((u, v)) == "amod")
        for domain, mod in edges:
            if domain.pos() in determined_labels:  # the np by itself is definite
                self.createPropRel(domain=domain, mod=mod)
                mod.features["top"] = True
                self.del_edge((domain, mod))
            
        # copular on adjective or indefinite
        # and sameAs otherwise
        
        # find copular
        nodes = find_nodes(self, lambda n: len(n.text) == 1 and n.text[0].word in copular_verbs and n.isPredicate)
        for curNode in nodes:
            curNeighbours = curNode.neighbors()
            subjs = multi_get(curNeighbours, subject_dependencies)
            objs = multi_get(curNeighbours, clausal_complements)
            if not objs: objs = multi_get(curNeighbours,["dep"])
            others = [n for n in self.neighbors(curNode) if n not in subjs + objs]
            if (len(objs)>0)and (len(subjs)>0): #and (not others) and (len(objs) == 1): 
                others+=objs[1:]
                if others:
                    self.types.add("complicated BE")
                obj = objs[0]
                if len(objs)>1:
                    self.types.add("debug")
                for subj in subjs:
                    if 'Lemma' in curNode.features: del(curNode.features['Lemma'])
                    if (subj in self.neighbors(obj)):
                        obj.features.update(curNode.features)
                    else:
                        if (not isDefinite(obj)) or (obj in curNeighbours.get("acomp", [])):
                            self.createPropRel(domain=subj, mod=obj)
                            head = obj
                            obj.surface_form += curNode.surface_form
                            
                        else:
                            self.types.add("SameAs")
                            self.del_edge((curNode, subj))
                            if self.has_edge((curNode,obj)):
                                self.del_edge((curNode, obj))
#                             self.del_edges([(curNode, subj), (curNode, obj)])
                            copularNode = getCopular(self, curNode.text[0].index, features=curNode.features)
                            copularNode.surface_form = curNode.surface_form
                            self.add_edge((copularNode, subj),
                                          label=FIRST_ENTITY_LABEL)
                            self.add_edge((copularNode, obj),
                                          label=SECOND_ENTITY_LABEL)
                            head = copularNode
                            
                        
                        head.features.update(curNode.features)
                        
                        for curFather in self.incidents(curNode):
                            if not self.has_edge((curFather, head)):
                                duplicateEdge(graph=self, orig=(curFather, curNode), new=(curFather, head))
                                
                        for curOther in others:
                                if not self.has_edge((obj, curOther)):
                                    duplicateEdge(graph=self, orig=(curNode, curOther), new=(head, curOther))
                # erase "be" node
                self.del_node(curNode)
                    
                    
        # find appositions
        for subj, obj in find_edges(self, lambda edge:self.edge_label(edge) == "appos"):
            # duplicate relations
            for curFather in self.incidents(subj):
                curIndex = curFather.features.get("apposIndex", 0) + 1
#                 curLabel = "{0},{1}".format(curIndex,self.edge_label((curFather,subj)))
                curLabel = self.edge_label((curFather, subj))
                self.del_edge((curFather, subj))
                self.add_edge((curFather, subj), curLabel)
                self.add_edge((curFather, obj), curLabel)
                ls = curFather.features.get("dups", [])
                ls.append((subj, obj))
                curFather.features["dups"] = ls
                
                curFather.features["apposIndex"] = curIndex
            if (not isDefinite(subj) and not isDefinite(obj)) or (obj in subj.neighbors().get("acomp", [])):
                self.createPropRel(domain=subj, mod=obj)
                obj.features["top"] = True
            else:
                # add new node
                # TODO: subj here is a problem - should point to the comma or something
                self.types.add("SameAs")
                copularNode = getCopular(self, subj.text[0].index, features={})
                copularNode.surface_form = []
                self.add_edge((copularNode, subj),
                              label=FIRST_ENTITY_LABEL)
                self.add_edge((copularNode, obj),
                              label=SECOND_ENTITY_LABEL)
                
                    
            self.del_edge((subj, obj))
コード例 #9
0
 def extract_entities(self):
     ret = find_nodes(graph=self, filterFunc=lambda node: (not node.isPredicate) and multi_get(node.incidents(), arguments_dependencies))
     return ret
コード例 #10
0
        def inner():
            change = False
            # 1,2
            nodes = find_nodes(self.gr, isCondition)
            nodes.extend(find_nodes(self.gr, isPreposition))
            for curNode in nodes:
                sisterNodes = sister_nodes(graph=self.gr, node=curNode)
                for sisterNode in sisterNodes:
                    if isProp(sisterNode) and is_following(
                            graph=self.gr, node1=sisterNode, node2=curNode):
                        reattch(graph=self.gr,
                                node=curNode,
                                new_father=sisterNode)
                        return True
                        break
            # 3
            nodes = find_nodes(self.gr, isAdverb)
            for curNode in nodes:
                sisterNodes = sister_nodes(graph=self.gr, node=curNode)
                for sisterNode in sisterNodes:
                    if isProp(sisterNode) and is_following(
                            graph=self.gr, node1=curNode, node2=sisterNode):
                        reattch(graph=self.gr,
                                node=curNode,
                                new_father=sisterNode)
                        return True
                        break

            #4
            nodes = find_nodes(
                self.gr, lambda n: isCondition(n) and n.text[0].word ==
                "{0}-{1}".format(COND, 'that'))
            for curNode in nodes:
                curFathers = self.gr.incidents(curNode)
                curChildren = self.gr.neighbors(curNode)
                for curFather in curFathers:
                    for curChild in curChildren:
                        self.gr.add_edge(edge=(curFather, curChild),
                                         label="that")
                self.gr.del_node(curNode)
                change = True

            #5
            filterFunc = lambda n: isConjunction(n) and len(
                self.gr.incidents(n)
            ) == 1 and isConjunction(self.gr.incidents(n)[0]) and (
                n.conjType == self.gr.incidents(n)[0].conjType
            )  #TODO: efficiency - multiple calls to incidents and a lot of deref
            nodes = find_nodes(self.gr, filterFunc)

            for curNode in nodes:
                curFather = self.gr.incidents(curNode)[0]
                for curChild in self.gr.neighbors(curNode):
                    self.gr.add_edge((curFather, curChild))
                self.gr.del_node(curNode)
                change = True

            #6
            nodes = find_nodes(
                self.gr,
                lambda n: len(n.text) == 1 and n.text[0].word == "able")
            for curNode in nodes:
                curFathers = self.gr.incidents(curNode)
                if len(curFathers) == 1:
                    curChildren = self.gr.neighbors(curNode)
                    if len(curChildren) == 1:
                        child = curChildren[0]
                        if child.isPredicate and (self.gr.edge_label(
                            (curNode, child)) == "xcomp"):
                            father = curFathers[0]
                            self.gr.add_edge(edge=(father, child),
                                             label=self.gr.edge_label(
                                                 (father, curNode)))
                            child.features["Modal"] = {
                                "Value": ['able']
                            }  #TODO: is this maybe overrun previous modals?
                            self.gr.del_node(curNode)
                            change = True
            #7
            edges = find_edges(
                self.gr, lambda (u, v): isTime(u) and isTime(v) and len(
                    self.gr.neighbors(u)) == 1)
            for curFather, curSon in edges:
                for curNode in self.gr.neighbors(curSon):
                    self.gr.add_edge(edge=(curFather, curNode),
                                     label=self.gr.edge_label(
                                         (curSon, curNode)))
                self.gr.del_node(curSon)
                return True

            #8
            edges = find_edges(
                self.gr, lambda (u, v): (isTime(v) or isLocation(v)) and
                isPreposition(u) and u.is_time_prep())

            for prepNode, timeNode in edges:
                if (len(self.gr.neighbors(prepNode)) == 1):
                    # time node is only son - attach time to all of prep incidents
                    for curFather in self.gr.incidents(prepNode):
                        self.gr.add_edge(edge=(curFather, timeNode),
                                         label=self.gr.edge_label(
                                             (curFather, prepNode)))
                    self.gr.del_node(prepNode)
                    change = True

            #9
            conjNodes = find_nodes(
                self.gr,
                lambda n: isConjunction(n) and n.conjType.lower() == "and")
            for conjNode in conjNodes:
                curParents = []
                curChildren = self.gr.neighbors(conjNode)
                for curChild in curChildren:
                    curParents.extend([
                        parent for parent in self.gr.incidents(curChild)
                        if parent != conjNode
                    ])

                if len(curParents) == 1:
                    parent = curParents[0]
                    if isProp(parent):
                        # found a prop->conj construction
                        # connect all prop to parent of conj and remove the conj node
                        for child in curChildren:
                            if not (parent, child) in self.gr.edges():
                                self.gr.add_edge(edge=(parent, child))
                        self.gr.del_node(conjNode)
                        change = True

            #10
            change = change or self.fixRanges()

            #11
            edges = find_edges(
                self.gr, lambda (u, v): self.gr.edge_label(
                    (u, v)) == "loc" and len(self.gr.neighbors(u)) > 1)

            for topNode, loc in edges:
                for curNeigbor in self.gr.neighbors(topNode):
                    if curNeigbor != loc:
                        duplicateEdge(graph=self.gr,
                                      orig=(topNode, curNeigbor),
                                      new=(loc, curNeigbor))
                for curFather in self.gr.incidents(topNode):
                    duplicateEdge(graph=self.gr,
                                  orig=(curFather, topNode),
                                  new=(curFather, loc))
                self.gr.del_node(topNode)
                self.types.remove(APPENDIX_LOCATION)
                change = True

            #12
            edges = find_edges(graph=self.gr,
                               filterFunc=lambda
                               (u, v): isProp(u) and isLocation(v))

            for _, locNode in edges:
                for curFather in self.gr.incidents(locNode):
                    for curNeighbour in self.gr.neighbors(locNode):
                        duplicateEdge(graph=self.gr,
                                      orig=(locNode, curNeighbour),
                                      new=(curFather, curNeighbour))
                self.gr.del_node(locNode)
                self.types.remove(APPENDIX_LOCATION)
                change = True

            #13
            edges = find_edges(graph=self.gr,
                               filterFunc=lambda
                               (u, v): isProp(u) and v.isPredicate and
                               (len(self.gr.neighbors(v)) == 0) and
                               (len(self.gr.incidents(u)) == 1) and
                               (len(self.gr.neighbors(u)) == 1))

            for propNode, predNode in edges:
                change = True
                curFather = self.gr.incidents(propNode)[0]
                if not isApposition(curFather):
                    jointNode = node.join(node1=curFather,
                                          node2=predNode,
                                          gr=self.gr)
                    curFather.text = jointNode.text
                    self.gr.del_nodes([propNode, predNode])
                else:
                    self.gr.del_node(propNode)
                    self.gr.add_edge((predNode, curFather))
                    for curIncident in self.gr.incidents(curFather):
                        duplicateEdge(graph=self.gr,
                                      orig=(curIncident, curFather),
                                      new=(curIncident, predNode))
                        self.gr.del_edge((curIncident, curFather))

            #14
            propNodes = find_nodes(
                self.gr,
                lambda n: isProp(n) and len(self.gr.incidents(n)) == 1)
            for propNode in propNodes:
                curFather = self.gr.incidents(propNode)[0]
                if ((len(curFather.str) == 1) and
                    (not isCopular(curFather)) and
                    (curFather.str[0].word == "be"
                     or curFather.str[0].word in contractions)) or (
                         (isProp(curFather) or isRcmodProp(curFather))
                         and len(self.gr.neighbors(curFather)) == 1):
                    if len(self.gr.incidents(curFather)) == 1:
                        curAncestor = self.gr.incidents(curFather)[0]
                        duplicateEdge(graph=self.gr,
                                      orig=(curAncestor, curFather),
                                      new=(curAncestor, propNode))
                        self.gr.del_node(curFather)
                        # this node no longer describes the "be" relation
                        propNode.parent_relation = ''
                        return True

            #15
            edges = find_edges(
                graph=self.gr,
                filterFunc=lambda
                (u, v): isProp(v) and (v.parent_relation == "acomp") and len(
                    self.gr.neighbors(v)) == 1 and u.isPredicate)

            for pred, prop in edges:
                acompNode = self.gr.neighbors(prop)[0]
                duplicateEdge(graph=self.gr,
                              orig=(pred, prop),
                              new=(pred, acompNode),
                              newLabel="modifier")
                self.gr.del_node(
                    prop)  # TODO: could there be others connected to it?
                newPred = node.join(pred, acompNode, self.gr)
                newPred.isPredicate = True
                self.gr.add_node(newPred)
                for neigbour in self.gr.neighbors(pred):
                    duplicateEdge(graph=self.gr,
                                  orig=(pred, neigbour),
                                  new=(newPred, neigbour))

                for curFather in self.gr.incidents(pred):
                    duplicateEdge(graph=self.gr,
                                  orig=(curFather, pred),
                                  new=(curFather, newPred))

                if len(self.gr.neighbors(acompNode)) == 0:
                    self.gr.del_node(acompNode)

                self.gr.del_node(pred)
                #                 newPred.features["debug"] =True #TODO: remove this
                self.types.add("ACOMP")
                return True

            #16
            edges = find_edges(graph=self.gr,
                               filterFunc=lambda (u, v):
                               (isProp(v) or isRcmodProp(v)) and
                               (u in self.gr.neighbors(v)))

            for _, v in edges:
                if (len(self.gr.neighbors(v)) == 1):
                    self.gr.del_node(v)
                    return True

            #17
            edges = find_edges(graph=self.gr,
                               filterFunc=lambda (u, v): self.gr.edge_label(
                                   (u, v)) == SOURCE_LABEL and
                               (len(self.gr.neighbors(v)) == 0))
            for _, v in edges:
                curStr = " ".join([w.word for w in v.text])
                if curStr in contractions:
                    self.gr.del_node(v)
                    return True

            #18 - verbal complements
            edges = find_edges(graph=self.gr,
                               filterFunc=lambda (u, v): self.gr.edge_label(
                                   (u, v)) == 'ccomp' and u.isPredicate)
            for u, v in edges:
                self.gr.del_edge((u, v))
                self.gr.add_edge(edge=(u, v), label='dobj')
                v.features["debug"] = True
                self.types.add("DEBUG")
                return True

            return change
コード例 #11
0
    def fixApposition(self):
        """ 
        remove apposition nodes, and change to our format
        """
        def inner(curNode, children, relation):
            curNode.dups = [
                duplicate_node(graph=self.gr,
                               node=curNode,
                               connectToNeighbours=True) for _ in children
            ]
            for childIndex, child in enumerate(children):
                self.gr.add_edge(edge=(curNode.dups[childIndex], child),
                                 label=relation)
            parents = self.gr.incidents(curNode)
            for parent in parents:
                if not hasattr(parent, 'isDuplicated'):  #TODO: efficiency
                    if hasattr(parent, 'dups'):  #TODO: efficiency
                        for curParentDupInd, curParentDup in enumerate(
                                parent.dups
                        ):  # cycle detected - we already visited this parent, don't enter recursion
                            self.gr.add_edge(
                                edge=(curParentDup,
                                      curNode.dups[curParentDupInd]),
                                label=self.gr.edge_label((parent, curNode)))
                    else:
                        inner(curNode=parent,
                              children=curNode.dups,
                              relation=self.gr.edge_label((parent, curNode)))

            self.gr.del_node(curNode)

        # find apposition nodes in topological ordering
        apposNodes = sort_nodes_topologically(
            self.gr, find_nodes(self.gr, isApposition))

        for apposNode in apposNodes:
            # for each apposition node
            entities = [
                n for n in self.gr.neighbors(apposNode) if self.gr.edge_label((
                    apposNode,
                    n)) in [FIRST_ENTITY_LABEL, SECOND_ENTITY_LABEL]
            ]
            # move rcmod construction to the apposition node
            for entInd, ent in enumerate(entities):
                isEntRCmod = isRCmod(graph=self.gr, node=ent)
                if isEntRCmod:
                    #if hasattr(ent,'rcmod'): #TODO: efficiency
                    secondEntity = entities[entInd - 1]
                    propNode, relClause = isEntRCmod
                    dupProp = duplicate_node(graph=self.gr,
                                             node=propNode,
                                             connectToNeighbours=False)
                    dupRelClause = duplicate_node(graph=self.gr,
                                                  node=relClause,
                                                  connectToNeighbours=False)
                    self.gr.add_edge((secondEntity, dupProp))
                    self.gr.add_edge((dupProp, dupRelClause))
                    self.gr.add_edge(edge=(dupRelClause, secondEntity),
                                     label=self.gr.edge_label(
                                         (relClause, ent)))

                    for curNeigbour in self.gr.neighbors(relClause):
                        if curNeigbour != ent:
                            topNode = duplicate_component(graph=self.gr,
                                                          node=curNeigbour)
                            self.gr.add_edge(edge=(dupRelClause, topNode),
                                             label=self.gr.edge_label(
                                                 (relClause, curNeigbour)))

            # get his non-entities children
            # and connect to entities
            children = [
                c for c in self.gr.neighbors(apposNode) if c not in entities
            ]
            for child in children:
                for ent in entities:
                    self.gr.add_edge(edge=(ent, child),
                                     label=self.gr.edge_label(
                                         (apposNode, child)))
                # remove connection to appos node
                self.gr.del_edge(edge=(apposNode, child))

            # connect entities of apposition to the copular node
            # and copy all propagation from appos node to its entities
            copNode = CopularNode.init(
                index=apposNode.text[0].
                index,  # create a copular node to replace it
                features=apposNode.features,
                valid=True)
            self.gr.add_node(copNode)  # add it to graph

            for ent in entities:
                self.gr.add_edge(edge=(copNode, ent),
                                 label=self.gr.edge_label((apposNode, ent)))
                for curNode in apposNode.propagateTo:
                    addSymmetricPropogation(ent, curNode)

            # deal with parent of apposition
            for parent in self.gr.incidents(apposNode):
                inner(curNode=parent,
                      children=entities,
                      relation=self.gr.edge_label((parent, apposNode)))

            # finally - remove the apposition
            self.gr.del_node(apposNode)