Ejemplo n.º 1
0
def collapse_graph(gr):
    # prepositions
    prep_edges = find_edges(graph=gr,
                            filterFunc=lambda (u, v): gr.edge_label(
                                (u, v)) == "prep" and gr.neighbors(v) == 1)
    for u, v in prep_edges:
        pobj = gr.neighbors(v)[0]
        gr.add_edge((u, pobj), "prep_" + v.text[0].word.lower())
        gr.del_node(v)

    # conjunctions
    conj_edges = find_edges(
        graph=gr,
        filterFunc=lambda (u, v): gr.edge_label(
            (u, v)) == "conj" and len(u.neighbors().get("cc", [])) == 1)
    toDel = []
    for u, v in conj_edges:
        cc = u.neighbors()['cc'][0]
        if len(gr.neighbors(cc)) == 0:
            gr.del_edge((u, v))
            gr.add_edge((u, v), "conj_" + cc.text[0].word.lower())
            toDel.append(cc)
    for n in set(toDel):
        gr.del_node(n)
    return gr
Ejemplo n.º 2
0
 def _do_conditionals(self):
     # find conditionals constructions
     edges = find_edges(
         self, lambda u_v12: (self.edge_label(
             (u_v12[0], u_v12[1])) == "mark") and
         (u_v12[1].text[0].word.lower() in
          ["if", "while", "because", "although", "as", "once"]))
     for (markFather, markNode) in edges:
         neighbors = markFather.neighbors()
         incidents = markFather.incidents()
         advclNode = False
         if "advcl" in neighbors:
             advclNode = neighbors["advcl"][0]
             toDel = (markFather, advclNode)
             head = markFather
         elif "advcl" in incidents:
             advclNode = incidents["advcl"][0]
             toDel = (advclNode, markFather)
             head = advclNode
         if advclNode:
             if "advcl" in advclNode.incidents():
                 continue
             head = self.head(head)
             self.del_edge(toDel)
             self.del_edge((markFather, markNode))
             if "rcmod" not in head.incidents():
                 for father in self.incidents(head):
                     duplicateEdge(graph=self,
                                   orig=(father, head),
                                   new=(father, markNode))
                     self.del_edge((father, head))
             self.conditional_specific(markNode, markFather, advclNode)
             markNode.isPredicate = True
             return True
Ejemplo n.º 3
0
    def do_questions(self):
        """
        Identify questions and introduce appropriate structure
        This currently follows the syntactic format of geo query wh-questions.
        Such as: "How large is Texas?"
        Where we have a WH question word ("How") dependent of a modifier of some property ("Large")
        """
        # Find relevant edges
        edges = find_edges(self,
                           lambda (u, v): v.is_wh_question())

        # Handle each separately
        for (modifier, wh_question) in edges:
            self.types.add("Questions")
            # 1. Remove dep edge
            self.del_edge((modifier, wh_question))

            # 2. Posit the wh question as head of the embedded clause
            self.add_edge(edge = (wh_question,
                                  find_top_of_component(self,
                                                        modifier)),
                          label = QUESTION_INQUIRY)

            # 3. Mark that the Wh-question node is a predicate
            wh_question.isPredicate = True
Ejemplo n.º 4
0
    def do_acomp(self):
        edges = find_edges(self, lambda((u, v)):self.edge_label((u, v)) == "acomp" and u.isPredicate)
        for predNode,acompNode in edges:
            neighbors = predNode.neighbors()
            subjs = multi_get(neighbors,subject_dependencies)
            if len(subjs)!=1:
#                 self.types.add("debug")
                pass
            else:
                
                if (predNode.text[0].word in self.modalVerbs) or (predNode.features.get("Lemma","") in self.modalVerbs):
                    subj = subjs[0]
                    self.del_edge((predNode,acompNode))
                    self.add_edge((acompNode,subj),label=domain_label)
                    acompNode.isPredicate=True
                    self.del_edge((predNode,subj))
                    duplicate_all_incidents(gr=self, source=predNode, target=acompNode)
                    self.add_edge((acompNode,predNode),label=SOURCE_LABEL)
                    if (len(self.neighbors(predNode))==0) and (len(predNode.text)==1) and (predNode.text[0].word in contractions):
                        self.del_node(predNode)
                    else:
                        self.types.add("acomp_as_modal")
                        
                
                else:
                    self.types.add("acomp_as_mwe")
                    merge_nodes(gr=self, node1=predNode, node2=acompNode)
Ejemplo n.º 5
0
 def _do_conditionals(self):
     # find conditionals constructions
     edges = find_edges(self, lambda((u, v)):(self.edge_label((u, v)) == "mark") and (v.text[0].word.lower() in ["if","while","because","although","as","once"]))
     for (markFather,markNode) in edges:
         neighbors = markFather.neighbors()
         incidents = markFather.incidents()
         advclNode = False
         if "advcl" in neighbors:
             advclNode = neighbors["advcl"][0]
             toDel = (markFather,advclNode)
             head = markFather
         elif "advcl" in incidents:
             advclNode = incidents["advcl"][0]
             toDel = (advclNode,markFather)
             head = advclNode
         if advclNode:
             if "advcl" in advclNode.incidents():
                 continue
             head = self.head(head)
             self.del_edge(toDel)
             self.del_edge((markFather,markNode))
             if "rcmod" not in head.incidents():
                 for father in self.incidents(head):
                     duplicateEdge(graph=self, orig=(father,head), new=(father,markNode))
                     self.del_edge((father,head))
             self.conditional_specific(markNode, markFather, advclNode)
             markNode.isPredicate = True
             return True
Ejemplo n.º 6
0
 def remove_aux(self):
     edges = find_edges(self,
                        lambda edge: self.edge_label(edge) in ignore_labels)
     for u, v in edges:
         if v.uid in self.nodesMap:
             u.original_text.extend(v.original_text)
             self.del_node(v)
Ejemplo n.º 7
0
    def do_acomp(self):
        edges = find_edges(
            self, lambda u_v11: self.edge_label(
                (u_v11[0], u_v11[1])) == "acomp" and u_v11[0].isPredicate)
        for predNode, acompNode in edges:
            neighbors = predNode.neighbors()
            subjs = multi_get(neighbors, subject_dependencies)
            if len(subjs) != 1:
                #                 self.types.add("debug")
                pass
            else:

                if (predNode.text[0].word
                        in self.modalVerbs) or (predNode.features.get(
                            "Lemma", "") in self.modalVerbs):
                    subj = subjs[0]
                    self.del_edge((predNode, acompNode))
                    self.add_edge((acompNode, subj), label=domain_label)
                    acompNode.isPredicate = True
                    self.del_edge((predNode, subj))
                    duplicate_all_incidents(gr=self,
                                            source=predNode,
                                            target=acompNode)
                    self.add_edge((acompNode, predNode), label=SOURCE_LABEL)
                    if (len(self.neighbors(predNode)) == 0) and (len(
                            predNode.text) == 1) and (predNode.text[0].word
                                                      in contractions):
                        self.del_node(predNode)
                    else:
                        self.types.add("acomp_as_modal")

                else:
                    self.types.add("acomp_as_mwe")
                    merge_nodes(gr=self, node1=predNode, node2=acompNode)
Ejemplo n.º 8
0
 def fixExistensials(self):
     """ 
     Generate existensials structure
     """
     explEdges = find_edges(
         graph=self.gr,
         filterFunc=lambda edge: self.gr.edge_label(edge) == EXPL_LABEL)
     for (topNode, expl) in explEdges:
         subjNodes = deref(graph=self.gr,
                           node=topNode,
                           rel=subject_dependencies)
         if len(subjNodes) != 1:
             continue
         self.types.add(APPENDIX_EXISTENSIALS)
         self.gr.del_node(expl)
         subjNode = subjNodes[0]
         for curNeigbour in [
                 n for n in self.gr.neighbors(topNode) if n != subjNode
         ]:
             self.gr.add_edge(edge=(subjNode, curNeigbour),
                              label=self.gr.edge_label(
                                  (topNode, curNeigbour)))
             self.gr.del_edge((topNode, curNeigbour))
         topNode.text[0].word = EXISTENSIAL
         topNode.features = {}
Ejemplo n.º 9
0
 def do_vmod_relclause(self):
     edges = find_edges(self, lambda (u, v):(self.edge_label((u, v)) == "rcmod"))
     for (u, v) in edges:
         v.features["top"] = True
         if  u.pos() in determined_labels:
             self.del_edge((u, v))
             self.types.add("definite rcmod")
             if not self.has_edge((v, u)):
                 self.add_edge((v, u), label=ARG_LABEL)
     
     edges = find_edges(self, lambda (u, v):(self.edge_label((u, v)) == "vmod"))
     for (u, v) in edges:
         self.types.add("vmod")
         if u.pos() in determined_labels:
             self.del_edge((u, v))
             self.types.add("definite vmod")
             if not self.has_edge((v, u)):
                 self.add_edge((v, u), label=ARG_LABEL)
Ejemplo n.º 10
0
 def do_poss(self):
     edges = find_edges(self, lambda (u, v):self.edge_label((u, v)) == "poss")
     for (possessed, possessor) in edges:
         self.types.add("Possessives")
         possessiveNode = getPossesive(self, possessor.minIndex())  # TODO: refine index
         self.add_edge(edge=(possessiveNode, possessor),
                       label=POSSESSOR_LABEL)
         self.add_edge(edge=(possessiveNode, possessed),
                       label=POSSESSED_LABEL)
Ejemplo n.º 11
0
 def do_existensials(self):
     edges = find_edges(self, lambda((u, v)):self.edge_label((u, v)) == "expl" and len(self.neighbors(v)) == 0)
     for (u, v) in edges:
         self.types.add("existensials")
         u.text = deepcopy(u.text)
         u.text[0].word = EXISTENSIAL
         u.removeLemma()
         u.surface_form += v.surface_form
         u.features["implicit"] = True
         self.del_node(v)
Ejemplo n.º 12
0
 def do_poss(self):
     edges = find_edges(
         self, lambda u_v8: self.edge_label((u_v8[0], u_v8[1])) == "poss")
     for (possessed, possessor) in edges:
         self.types.add("Possessives")
         possessiveNode = getPossesive(
             self, possessor.minIndex())  # TODO: refine index
         self.add_edge(edge=(possessiveNode, possessor),
                       label=POSSESSOR_LABEL)
         self.add_edge(edge=(possessiveNode, possessed),
                       label=POSSESSED_LABEL)
Ejemplo n.º 13
0
def collapse_graph(gr):
    # prepositions
    prep_edges = find_edges(graph=gr, filterFunc = lambda (u,v): gr.edge_label((u,v))=="prep" and gr.neighbors(v)==1)
    for u,v in prep_edges:
        pobj = gr.neighbors(v)[0]
        gr.add_edge((u,pobj),"prep_"+v.text[0].word.lower())
        gr.del_node(v)
        
    # conjunctions
    conj_edges = find_edges(graph=gr, filterFunc = lambda (u,v): gr.edge_label((u,v))=="conj" and len(u.neighbors().get("cc",[]))==1)
    toDel = []
    for u,v in conj_edges:
        cc = u.neighbors()['cc'][0]
        if len(gr.neighbors(cc))==0:
            gr.del_edge((u,v))
            gr.add_edge((u,v),"conj_"+cc.text[0].word.lower())
            toDel.append(cc)
    for n in set(toDel):
        gr.del_node(n)
    return gr
Ejemplo n.º 14
0
    def do_vmod_relclause(self):
        edges = find_edges(
            self, lambda u_v6: (self.edge_label(
                (u_v6[0], u_v6[1])) == "rcmod"))
        for (u, v) in edges:
            v.features["top"] = True
            if u.pos() in determined_labels:
                self.del_edge((u, v))
                self.types.add("definite rcmod")
                if not self.has_edge((v, u)):
                    self.add_edge((v, u), label=ARG_LABEL)

        edges = find_edges(
            self, lambda u_v7: (self.edge_label((u_v7[0], u_v7[1])) == "vmod"))
        for (u, v) in edges:
            self.types.add("vmod")
            if u.pos() in determined_labels:
                self.del_edge((u, v))
                self.types.add("definite vmod")
                if not self.has_edge((v, u)):
                    self.add_edge((v, u), label=ARG_LABEL)
Ejemplo n.º 15
0
 def do_existensials(self):
     edges = find_edges(
         self, lambda u_v13: self.edge_label(
             (u_v13[0], u_v13[1])) == "expl" and len(
                 self.neighbors(u_v13[1])) == 0)
     for (u, v) in edges:
         self.types.add("existensials")
         u.text = deepcopy(u.text)
         u.text[0].word = EXISTENSIAL
         u.removeLemma()
         u.surface_form += v.surface_form
         u.features["implicit"] = True
         self.del_node(v)
Ejemplo n.º 16
0
 def _fix(self):
     # remove mark->that
     edges = find_edges(self, lambda (u, v):self.edge_label((u, v)) == "mark")
     for (u, v) in edges:
         if (len(self.neighbors(v)) == 0) and (len(v.text) == 1) and (v.text[0].word == "that"):
             self.del_node(v)
             return True
     
     # rcmod with no relation to father
     edges = find_edges(self, lambda (u, v):(self.edge_label((u, v)) == "rcmod") and (not self.has_edge((v, u))))
     for u, v in edges:
         self.add_edge((v, u), label=ARG_LABEL)
         return True
     
     # prep collapse
     edges = find_edges(self, lambda (u, v):(self.edge_label((u, v)) == "prep") and (len(self.neighbors(v)) == 1) and ("pobj" in v.neighbors()))
     if edges:
         for (u, v) in edges:
             pobj = v.neighbors()["pobj"][0]
             if not (self.has_edge((u, pobj))):
                 w = v.text[0]
                 u.surface_form += [w]
                 self.add_edge((u, pobj), label="prep_" + w.word)
                 self.del_node(v)
                 
     # fix dependency collapse bugs
     edges = find_edges(self, lambda (u, v):(self.edge_label((u, v)) == "pobj") and ("prep" not in u.incidents()))
     for (u, v) in sorted(edges,key=lambda((u,v)): u.minIndex()):
         neighbors = u.neighbors()
         candidates = [n for n in multi_get(neighbors, [rel for rel in neighbors if rel.startswith("prepc_")]) if len(self.neighbors(n)) == 0]
         candidates.sort(key=lambda n:n.minIndex())
         if len(candidates) > 0:
             curToDel = candidates[0]
             rel = self.edge_label((u, curToDel))
             self.del_edge((u, v))
             self.add_edge((u, v), label=rel)
             self.del_node(curToDel)
Ejemplo n.º 17
0
 def _merge(self):
     edges = find_edges(self, lambda (u,v):(self.edge_label((u,v)) in join_labels) or (self.edge_label((u,v))=="conj_and" and u.features.get("conjType",[""])[0]=='&'))
     for u, v in edges:
         conjType = u.features.get("conjType",False)
         if conjType:
             conjType = conjType[0] #only the words
             matching = [w for w in u.surface_form if w.word == conjType]
             if matching:
                 w = matching[0]
             else:
                 w = Word(index = u.maxIndex()+1,word=conjType)
             u.text.append(w)
         merge_nodes(self, u, v)
         return True
     return False
Ejemplo n.º 18
0
    def do_conj(self):
        edges = find_edges(self, lambda((u, v)):self.edge_label((u, v)).startswith("conj_"))# and (not u.isPredicate) and (not v.isPredicate))
        nodes = set([u for (u,_) in edges])
        for conj1 in nodes:
            curStartIndex = conj1.minIndex()+1
            curNeighbours = conj1.neighbors()
            isModifier = (not bool([father for father in self.incidents(conj1) if not self.is_aux_edge((father.uid, conj1.uid))])) and bool(self.incidents(conj1)) 
            for rel in [rel for rel in curNeighbours if rel.startswith("conj_")]:
                marker = rel.split("conj_")[1]
                
                markerNode = newNode.Node(text=[Word(curStartIndex+1,marker)], #TODO: how to find marker's index
                                          isPredicate=True,
                                          features={"conj":True},
                                          gr=self)

                #decide how to connect it to the rest of the graph, based on its type
                if isModifier:
                    duplicate_all_incidents(gr=self, source=conj1, target=markerNode)
                else:
                    for father in self.incidents(conj1):
                        for conj2 in curNeighbours[rel]:
                            duplicateEdge(graph=self, orig=((father,conj1)), new=((father,conj2)))
                        duplicateEdge(graph=self, orig=((father,conj1)), new=((father,markerNode)))
                        
                    if conj1.isPredicate:
                        for neighbor in self.neighbors(conj1):
                            if get_min_max_span(self, neighbor)[0] < curStartIndex:
                                for conj2 in curNeighbours[rel]:
                                    if (self.edge_label((conj1,neighbor)) == SOURCE_LABEL) or (not self.is_aux_edge((conj1.uid, neighbor.uid))):
                                        duplicateEdge(graph=self, orig=(conj1,neighbor), new=(conj2,neighbor))
                                    
                # create the coordination construction, headed by the marker
                self.add_edge(edge=(markerNode,conj1),label=rel)
                for conj2 in curNeighbours[rel]:
                    self.del_edge((conj1,conj2))
                    self.add_edge(edge=(markerNode,conj2),label=rel)
                    if conj1.isPredicate:
                        conj2.isPredicate = conj1.isPredicate
                    conj1.surface_form = [w for w in conj1.surface_form if (w not in conj2.surface_form) and (w not in conj1.text) ]
                    for w in conj1.text:
                        if w not in conj1.surface_form:
                            conj1.surface_form.append(w)
                    if conj1.features.get("conjType",False):
                        conj1.text = [w for w in conj1.text if w.index not in conj1.features["conjType"][1]]
                    
            self.types.add(rel)              
Ejemplo n.º 19
0
 def fixProps(self):
     """
     Fix cases of conjunction of properties in indefinite nominals 
     """
     
     edges = find_edges(graph = self.gr, 
                        filterFunc = lambda (u,v): (not isDefinite(u)) and (isProp(v)or isRcmodProp(v)) and (not v.is_prenominal()))
     
     for counter,(u,v) in enumerate(sorted(edges,key= lambda (_,propNode):get_min_max_span(self.gr,propNode)[0])):
         curLabel = self.gr.edge_label((u,v))
         self.gr.del_edge((u,v))
         self.gr.add_edge(edge =(u,v),
                          label = ";".join([curLabel,str(counter+1)]))
         
         
     
     
         
Ejemplo n.º 20
0
 def fixExistensials(self):
     """ 
     Generate existensials structure
     """
     explEdges = find_edges(graph = self.gr, 
                            filterFunc = lambda edge: self.gr.edge_label(edge) == EXPL_LABEL)
     for (topNode,expl) in explEdges:
         subjNodes = deref(graph=self.gr, node=topNode, rel= subject_dependencies)
         if len(subjNodes)!=1:
             continue
         self.types.add(APPENDIX_EXISTENSIALS)
         self.gr.del_node(expl)
         subjNode = subjNodes[0]
         for curNeigbour in [n for n in self.gr.neighbors(topNode) if n != subjNode]:
             self.gr.add_edge(edge = (subjNode,curNeigbour),
                              label = self.gr.edge_label((topNode,curNeigbour)))
             self.gr.del_edge((topNode,curNeigbour))
         topNode.text[0].word = EXISTENSIAL
         topNode.features = {}
Ejemplo n.º 21
0
 def _merge(self):
     edges = find_edges(
         self, lambda u_v: (self.edge_label(
             (u_v[0], u_v[1])) in join_labels) or (self.edge_label(
                 (u_v[0], u_v[1])) == "conj_and" and u_v[0].features.get(
                     "conjType", [""])[0] == '&'))
     for u, v in edges:
         conjType = u.features.get("conjType", False)
         if conjType:
             conjType = conjType[0]  #only the words
             matching = [w for w in u.surface_form if w.word == conjType]
             if matching:
                 w = matching[0]
             else:
                 w = Word(index=u.maxIndex() + 1, word=conjType)
             u.text.append(w)
         merge_nodes(self, u, v)
         return True
     return False
Ejemplo n.º 22
0
    def fixProps(self):
        """
        Fix cases of conjunction of properties in indefinite nominals 
        """

        edges = find_edges(graph=self.gr,
                           filterFunc=lambda (u, v): (not isDefinite(u)) and
                           (isProp(v) or isRcmodProp(v)) and
                           (not v.is_prenominal()))

        for counter, (u, v) in enumerate(
                sorted(edges,
                       key=lambda
                       (_, propNode): get_min_max_span(self.gr, propNode)[0])):
            curLabel = self.gr.edge_label((u, v))
            self.gr.del_edge((u, v))
            self.gr.add_edge(edge=(u, v),
                             label=";".join([curLabel,
                                             str(counter + 1)]))
Ejemplo n.º 23
0
    def do_questions(self):
        """
        Identify questions and introduce appropriate structure
        This currently follows the syntactic format of geo query wh-questions.
        Such as: "How large is Texas?"
        Where we have a WH question word ("How") dependent of a modifier of some property ("Large")
        """
        # Find relevant edges
        edges = find_edges(self, lambda u_v9: u_v9[1].is_wh_question())

        # Handle each separately
        for (modifier, wh_question) in edges:
            self.types.add("Questions")
            # 1. Remove dep edge
            self.del_edge((modifier, wh_question))

            # 2. Posit the wh question as head of the embedded clause
            self.add_edge(edge=(wh_question,
                                find_top_of_component(self, modifier)),
                          label=QUESTION_INQUIRY)

            # 3. Mark that the Wh-question node is a predicate
            wh_question.isPredicate = True
Ejemplo n.º 24
0
        def inner():
            change = False
            # 1,2
            nodes = find_nodes(self.gr, isCondition)
            nodes.extend(find_nodes(self.gr, isPreposition))
            for curNode in nodes:
                sisterNodes = sister_nodes(graph=self.gr, node=curNode)
                for sisterNode in sisterNodes:
                    if isProp(sisterNode) and is_following(
                            graph=self.gr, node1=sisterNode, node2=curNode):
                        reattch(graph=self.gr,
                                node=curNode,
                                new_father=sisterNode)
                        return True
                        break
            # 3
            nodes = find_nodes(self.gr, isAdverb)
            for curNode in nodes:
                sisterNodes = sister_nodes(graph=self.gr, node=curNode)
                for sisterNode in sisterNodes:
                    if isProp(sisterNode) and is_following(
                            graph=self.gr, node1=curNode, node2=sisterNode):
                        reattch(graph=self.gr,
                                node=curNode,
                                new_father=sisterNode)
                        return True
                        break

            #4
            nodes = find_nodes(
                self.gr, lambda n: isCondition(n) and n.text[0].word ==
                "{0}-{1}".format(COND, 'that'))
            for curNode in nodes:
                curFathers = self.gr.incidents(curNode)
                curChildren = self.gr.neighbors(curNode)
                for curFather in curFathers:
                    for curChild in curChildren:
                        self.gr.add_edge(edge=(curFather, curChild),
                                         label="that")
                self.gr.del_node(curNode)
                change = True

            #5
            filterFunc = lambda n: isConjunction(n) and len(
                self.gr.incidents(n)
            ) == 1 and isConjunction(self.gr.incidents(n)[0]) and (
                n.conjType == self.gr.incidents(n)[0].conjType
            )  #TODO: efficiency - multiple calls to incidents and a lot of deref
            nodes = find_nodes(self.gr, filterFunc)

            for curNode in nodes:
                curFather = self.gr.incidents(curNode)[0]
                for curChild in self.gr.neighbors(curNode):
                    self.gr.add_edge((curFather, curChild))
                self.gr.del_node(curNode)
                change = True

            #6
            nodes = find_nodes(
                self.gr,
                lambda n: len(n.text) == 1 and n.text[0].word == "able")
            for curNode in nodes:
                curFathers = self.gr.incidents(curNode)
                if len(curFathers) == 1:
                    curChildren = self.gr.neighbors(curNode)
                    if len(curChildren) == 1:
                        child = curChildren[0]
                        if child.isPredicate and (self.gr.edge_label(
                            (curNode, child)) == "xcomp"):
                            father = curFathers[0]
                            self.gr.add_edge(edge=(father, child),
                                             label=self.gr.edge_label(
                                                 (father, curNode)))
                            child.features["Modal"] = {
                                "Value": ['able']
                            }  #TODO: is this maybe overrun previous modals?
                            self.gr.del_node(curNode)
                            change = True
            #7
            edges = find_edges(
                self.gr, lambda (u, v): isTime(u) and isTime(v) and len(
                    self.gr.neighbors(u)) == 1)
            for curFather, curSon in edges:
                for curNode in self.gr.neighbors(curSon):
                    self.gr.add_edge(edge=(curFather, curNode),
                                     label=self.gr.edge_label(
                                         (curSon, curNode)))
                self.gr.del_node(curSon)
                return True

            #8
            edges = find_edges(
                self.gr, lambda (u, v): (isTime(v) or isLocation(v)) and
                isPreposition(u) and u.is_time_prep())

            for prepNode, timeNode in edges:
                if (len(self.gr.neighbors(prepNode)) == 1):
                    # time node is only son - attach time to all of prep incidents
                    for curFather in self.gr.incidents(prepNode):
                        self.gr.add_edge(edge=(curFather, timeNode),
                                         label=self.gr.edge_label(
                                             (curFather, prepNode)))
                    self.gr.del_node(prepNode)
                    change = True

            #9
            conjNodes = find_nodes(
                self.gr,
                lambda n: isConjunction(n) and n.conjType.lower() == "and")
            for conjNode in conjNodes:
                curParents = []
                curChildren = self.gr.neighbors(conjNode)
                for curChild in curChildren:
                    curParents.extend([
                        parent for parent in self.gr.incidents(curChild)
                        if parent != conjNode
                    ])

                if len(curParents) == 1:
                    parent = curParents[0]
                    if isProp(parent):
                        # found a prop->conj construction
                        # connect all prop to parent of conj and remove the conj node
                        for child in curChildren:
                            if not (parent, child) in self.gr.edges():
                                self.gr.add_edge(edge=(parent, child))
                        self.gr.del_node(conjNode)
                        change = True

            #10
            change = change or self.fixRanges()

            #11
            edges = find_edges(
                self.gr, lambda (u, v): self.gr.edge_label(
                    (u, v)) == "loc" and len(self.gr.neighbors(u)) > 1)

            for topNode, loc in edges:
                for curNeigbor in self.gr.neighbors(topNode):
                    if curNeigbor != loc:
                        duplicateEdge(graph=self.gr,
                                      orig=(topNode, curNeigbor),
                                      new=(loc, curNeigbor))
                for curFather in self.gr.incidents(topNode):
                    duplicateEdge(graph=self.gr,
                                  orig=(curFather, topNode),
                                  new=(curFather, loc))
                self.gr.del_node(topNode)
                self.types.remove(APPENDIX_LOCATION)
                change = True

            #12
            edges = find_edges(graph=self.gr,
                               filterFunc=lambda
                               (u, v): isProp(u) and isLocation(v))

            for _, locNode in edges:
                for curFather in self.gr.incidents(locNode):
                    for curNeighbour in self.gr.neighbors(locNode):
                        duplicateEdge(graph=self.gr,
                                      orig=(locNode, curNeighbour),
                                      new=(curFather, curNeighbour))
                self.gr.del_node(locNode)
                self.types.remove(APPENDIX_LOCATION)
                change = True

            #13
            edges = find_edges(graph=self.gr,
                               filterFunc=lambda
                               (u, v): isProp(u) and v.isPredicate and
                               (len(self.gr.neighbors(v)) == 0) and
                               (len(self.gr.incidents(u)) == 1) and
                               (len(self.gr.neighbors(u)) == 1))

            for propNode, predNode in edges:
                change = True
                curFather = self.gr.incidents(propNode)[0]
                if not isApposition(curFather):
                    jointNode = node.join(node1=curFather,
                                          node2=predNode,
                                          gr=self.gr)
                    curFather.text = jointNode.text
                    self.gr.del_nodes([propNode, predNode])
                else:
                    self.gr.del_node(propNode)
                    self.gr.add_edge((predNode, curFather))
                    for curIncident in self.gr.incidents(curFather):
                        duplicateEdge(graph=self.gr,
                                      orig=(curIncident, curFather),
                                      new=(curIncident, predNode))
                        self.gr.del_edge((curIncident, curFather))

            #14
            propNodes = find_nodes(
                self.gr,
                lambda n: isProp(n) and len(self.gr.incidents(n)) == 1)
            for propNode in propNodes:
                curFather = self.gr.incidents(propNode)[0]
                if ((len(curFather.str) == 1) and
                    (not isCopular(curFather)) and
                    (curFather.str[0].word == "be"
                     or curFather.str[0].word in contractions)) or (
                         (isProp(curFather) or isRcmodProp(curFather))
                         and len(self.gr.neighbors(curFather)) == 1):
                    if len(self.gr.incidents(curFather)) == 1:
                        curAncestor = self.gr.incidents(curFather)[0]
                        duplicateEdge(graph=self.gr,
                                      orig=(curAncestor, curFather),
                                      new=(curAncestor, propNode))
                        self.gr.del_node(curFather)
                        # this node no longer describes the "be" relation
                        propNode.parent_relation = ''
                        return True

            #15
            edges = find_edges(
                graph=self.gr,
                filterFunc=lambda
                (u, v): isProp(v) and (v.parent_relation == "acomp") and len(
                    self.gr.neighbors(v)) == 1 and u.isPredicate)

            for pred, prop in edges:
                acompNode = self.gr.neighbors(prop)[0]
                duplicateEdge(graph=self.gr,
                              orig=(pred, prop),
                              new=(pred, acompNode),
                              newLabel="modifier")
                self.gr.del_node(
                    prop)  # TODO: could there be others connected to it?
                newPred = node.join(pred, acompNode, self.gr)
                newPred.isPredicate = True
                self.gr.add_node(newPred)
                for neigbour in self.gr.neighbors(pred):
                    duplicateEdge(graph=self.gr,
                                  orig=(pred, neigbour),
                                  new=(newPred, neigbour))

                for curFather in self.gr.incidents(pred):
                    duplicateEdge(graph=self.gr,
                                  orig=(curFather, pred),
                                  new=(curFather, newPred))

                if len(self.gr.neighbors(acompNode)) == 0:
                    self.gr.del_node(acompNode)

                self.gr.del_node(pred)
                #                 newPred.features["debug"] =True #TODO: remove this
                self.types.add("ACOMP")
                return True

            #16
            edges = find_edges(graph=self.gr,
                               filterFunc=lambda (u, v):
                               (isProp(v) or isRcmodProp(v)) and
                               (u in self.gr.neighbors(v)))

            for _, v in edges:
                if (len(self.gr.neighbors(v)) == 1):
                    self.gr.del_node(v)
                    return True

            #17
            edges = find_edges(graph=self.gr,
                               filterFunc=lambda (u, v): self.gr.edge_label(
                                   (u, v)) == SOURCE_LABEL and
                               (len(self.gr.neighbors(v)) == 0))
            for _, v in edges:
                curStr = " ".join([w.word for w in v.text])
                if curStr in contractions:
                    self.gr.del_node(v)
                    return True

            #18 - verbal complements
            edges = find_edges(graph=self.gr,
                               filterFunc=lambda (u, v): self.gr.edge_label(
                                   (u, v)) == 'ccomp' and u.isPredicate)
            for u, v in edges:
                self.gr.del_edge((u, v))
                self.gr.add_edge(edge=(u, v), label='dobj')
                v.features["debug"] = True
                self.types.add("DEBUG")
                return True

            return change
Ejemplo n.º 25
0
            candidates = [
                n for n in multi_get(
                    neighbors,
                    [rel for rel in neighbors if rel.startswith("prepc_")])
                if len(self.neighbors(n)) == 0
            ]
            candidates.sort(key=lambda n: n.minIndex())
            if len(candidates) > 0:
                curToDel = candidates[0]
                rel = self.edge_label((u, curToDel))
                self.del_edge((u, v))
                self.add_edge((u, v), label=rel)
                self.del_node(curToDel)

        # change agent edges with "prep_by"
        edges = find_edges(self, lambda edge:
                           (self.edge_label(edge) == "agent"))
        for edge in edges:
            self.del_edge(edge)
            self.add_edge(edge, label="prep_by")

#         #add xcomp inverse node
#         edges  = find_edges(self, lambda (u,v):self.edge_label((u,v)) == "xcomp" and u.isPredicate and v.isPredicate)
#         for (u,v) in edges:
#             if not self.has_edge((v, u)):
#                 self.add_edge((v,u), label=SOURCE_LABEL)
#                 self.types.add("infinitives")
#                 return True
#             if not multi_get(v.neighbors(),subject_dependencies):
#                 rcmodParentIncidents = u.incidents().get("rcmod",[])
#                 if len(rcmodParentIncidents)==1:
#                     subj = rcmodParentIncidents[0]
Ejemplo n.º 26
0
        def inner():
            change = False
            # 1,2
            nodes = find_nodes(self.gr, isCondition)
            nodes.extend(find_nodes(self.gr, isPreposition))
            for curNode in nodes:
                sisterNodes = sister_nodes(graph=self.gr, node=curNode)
                for sisterNode in sisterNodes:
                    if isProp(sisterNode) and is_following(graph=self.gr,
                                                           node1=sisterNode,
                                                           node2=curNode):
                        reattch(graph=self.gr, 
                                node=curNode, 
                                new_father=sisterNode)
                        return True
                        break
            # 3
            nodes = find_nodes(self.gr, isAdverb)
            for curNode in nodes:
                sisterNodes = sister_nodes(graph=self.gr, node=curNode)
                for sisterNode in sisterNodes:
                    if isProp(sisterNode) and is_following(graph=self.gr,
                                                           node1=curNode,
                                                           node2=sisterNode):
                        reattch(graph=self.gr, 
                                node=curNode, 
                                new_father=sisterNode)
                        return True
                        break
                    
            #4
            nodes = find_nodes(self.gr,
                               lambda n:isCondition(n) and n.text[0].word == "{0}-{1}".format(COND,'that'))
            for curNode in nodes:
                curFathers = self.gr.incidents(curNode)
                curChildren = self.gr.neighbors(curNode)
                for curFather in curFathers:
                    for curChild in curChildren:
                        self.gr.add_edge(edge = (curFather,curChild),
                                         label = "that")
                self.gr.del_node(curNode)
                change = True
            
            #5
            filterFunc = lambda n:isConjunction(n) and len(self.gr.incidents(n)) == 1 and isConjunction(self.gr.incidents(n)[0]) and (n.conjType  == self.gr.incidents(n)[0].conjType) #TODO: efficiency - multiple calls to incidents and a lot of deref
            nodes = find_nodes(self.gr,filterFunc)
                               
            for curNode in nodes:
                curFather = self.gr.incidents(curNode)[0]
                for curChild in self.gr.neighbors(curNode):
                    self.gr.add_edge((curFather,curChild))
                self.gr.del_node(curNode)
                change = True 
                
            #6
            nodes = find_nodes(self.gr,
                               lambda n:len(n.text)==1 and n.text[0].word == "able")
            for curNode in nodes:
                curFathers = self.gr.incidents(curNode)
                if len(curFathers)==1:
                    curChildren = self.gr.neighbors(curNode)
                    if len(curChildren) ==1:
                        child = curChildren[0]
                        if child.isPredicate and (self.gr.edge_label((curNode,child))=="xcomp"):
                            father = curFathers[0]
                            self.gr.add_edge(edge=(father,child),
                                             label=self.gr.edge_label((father,curNode)))
                            child.features["Modal"]={"Value":['able']} #TODO: is this maybe overrun previous modals?
                            self.gr.del_node(curNode)
                            change=True
            #7
            edges = find_edges(self.gr,
                               lambda (u,v):isTime(u)and isTime(v) and len(self.gr.neighbors(u))==1)
            for curFather,curSon in edges:
                for curNode in self.gr.neighbors(curSon):
                    self.gr.add_edge(edge=(curFather,curNode),
                                     label = self.gr.edge_label((curSon,curNode)))
                self.gr.del_node(curSon)
                return True
            
            #8
            edges = find_edges(self.gr,
                               lambda (u,v):(isTime(v) or isLocation(v)) and isPreposition(u) and u.is_time_prep())
            
            for prepNode,timeNode in edges:
                if (len(self.gr.neighbors(prepNode))==1):
                    # time node is only son - attach time to all of prep incidents
                    for curFather in self.gr.incidents(prepNode):
                        self.gr.add_edge(edge=(curFather,timeNode),
                                         label = self.gr.edge_label((curFather,prepNode)))
                    self.gr.del_node(prepNode)
                    change=True
                    
            #9
            conjNodes = find_nodes(self.gr, lambda n: isConjunction(n) and n.conjType.lower() == "and")
            for conjNode in conjNodes:
                curParents = []
                curChildren = self.gr.neighbors(conjNode)
                for curChild in curChildren:
                    curParents.extend([parent for parent in self.gr.incidents(curChild) if parent != conjNode])
                
                if len(curParents)==1:
                    parent = curParents[0]
                    if isProp(parent):
                        # found a prop->conj construction 
                        # connect all prop to parent of conj and remove the conj node
                        for child in curChildren:
                            if not (parent,child) in self.gr.edges():
                                self.gr.add_edge(edge = (parent,child))
                        self.gr.del_node(conjNode)
                        change = True
                        
                    
            
            #10
            change = change or self.fixRanges()
            
            #11
            edges = find_edges(self.gr,
                               lambda (u,v):self.gr.edge_label((u,v))=="loc" and len(self.gr.neighbors(u))>1)
            
            for topNode,loc in edges:
                for curNeigbor in self.gr.neighbors(topNode):
                    if curNeigbor != loc:
                        duplicateEdge(graph=self.gr, orig=(topNode,curNeigbor), new=(loc,curNeigbor))
                for curFather in self.gr.incidents(topNode):
                    duplicateEdge(graph=self.gr, orig=(curFather,topNode), new=(curFather,loc))
                self.gr.del_node(topNode)
                self.types.remove(APPENDIX_LOCATION)
                change=True
                    
            
            #12
            edges = find_edges(graph=self.gr, 
                               filterFunc = lambda (u,v): isProp(u) and isLocation(v))
            
            for _,locNode in edges:
                for curFather in self.gr.incidents(locNode):
                    for curNeighbour in self.gr.neighbors(locNode):
                        duplicateEdge(graph=self.gr, orig=(locNode,curNeighbour), new=(curFather,curNeighbour))
                self.gr.del_node(locNode)
                self.types.remove(APPENDIX_LOCATION)
                change=True
                
            #13
            edges = find_edges(graph=self.gr, 
                               filterFunc = lambda (u,v): isProp(u) and v.isPredicate and (len(self.gr.neighbors(v)) ==0) and (len(self.gr.incidents(u)) ==1) and (len(self.gr.neighbors(u)) ==1))
            
            for propNode,predNode in edges:
                change = True
                curFather = self.gr.incidents(propNode)[0]
                if not isApposition(curFather):
                    jointNode = node.join(node1=curFather, 
                                          node2=predNode, 
                                          gr=self.gr)
                    curFather.text = jointNode.text
                    self.gr.del_nodes([propNode,predNode])
                else:
                    self.gr.del_node(propNode)
                    self.gr.add_edge((predNode,curFather))
                    for curIncident in self.gr.incidents(curFather):
                        duplicateEdge(graph=self.gr, 
                                      orig=(curIncident,curFather), 
                                      new=(curIncident,predNode))
                        self.gr.del_edge((curIncident,curFather))
                        
                        
            #14
            propNodes = find_nodes(self.gr, lambda n:isProp(n) and len(self.gr.incidents(n))==1)
            for propNode in propNodes:
                curFather = self.gr.incidents(propNode)[0]
                if ((len(curFather.str)==1) and (not isCopular(curFather)) and (curFather.str[0].word == "be" or curFather.str[0].word in contractions)) or ((isProp(curFather) or isRcmodProp(curFather)) and len(self.gr.neighbors(curFather))==1):
                    if len(self.gr.incidents(curFather))==1:                    
                        curAncestor = self.gr.incidents(curFather)[0]
                        duplicateEdge(graph=self.gr,
                                      orig=(curAncestor,curFather), 
                                      new=(curAncestor,propNode))
                        self.gr.del_node(curFather)
                        # this node no longer describes the "be" relation
                        propNode.parent_relation = ''
                        return True
            
            #15
            edges = find_edges(graph=self.gr, 
                               filterFunc = lambda (u,v): isProp(v) and (v.parent_relation == "acomp") and len(self.gr.neighbors(v))==1 and u.isPredicate)
            
            for pred, prop in edges:
                acompNode = self.gr.neighbors(prop)[0]
                duplicateEdge(graph=self.gr, orig=(pred,prop), new=(pred,acompNode),
                              newLabel = "modifier")
                self.gr.del_node(prop) # TODO: could there be others connected to it?
                newPred = node.join(pred,acompNode,self.gr)
                newPred.isPredicate =True
                self.gr.add_node(newPred)
                for neigbour in self.gr.neighbors(pred):
                    duplicateEdge(graph=self.gr, orig=(pred,neigbour), new=(newPred,neigbour))
                
                for curFather in self.gr.incidents(pred):
                    duplicateEdge(graph=self.gr, orig=(curFather,pred), new=(curFather,newPred))
                
                if len(self.gr.neighbors(acompNode))==0:
                    self.gr.del_node(acompNode)
                    
                self.gr.del_node(pred)
#                 newPred.features["debug"] =True #TODO: remove this
                self.types.add("ACOMP")
                return True
            
            #16
            edges = find_edges(graph=self.gr,
                               filterFunc = lambda (u,v): (isProp(v) or isRcmodProp(v)) and (u in self.gr.neighbors(v)))
            
            for _,v in edges:
                if (len(self.gr.neighbors(v))==1):
                    self.gr.del_node(v)
                    return True
            
            #17
            edges = find_edges(graph=self.gr,
                               filterFunc = lambda (u,v): self.gr.edge_label((u,v))==SOURCE_LABEL and (len(self.gr.neighbors(v))==0))
            for _,v in edges:
                curStr = " ".join([w.word for w in v.text])
                if curStr in contractions:
                    self.gr.del_node(v)
                    return True
                        
            #18 - verbal complements
            edges = find_edges(graph=self.gr,
                               filterFunc = lambda (u,v): self.gr.edge_label((u,v))=='ccomp' and u.isPredicate)
            for u,v in edges:
                self.gr.del_edge((u,v))
                self.gr.add_edge(edge=(u,v),
                                 label = 'dobj')
                v.features["debug"] =True
                self.types.add("DEBUG")
                return True
                    
            return change
Ejemplo n.º 27
0
    def _fix(self):
        # remove mark->that
        edges = find_edges(
            self, lambda u_v1: self.edge_label((u_v1[0], u_v1[1])) == "mark")
        for (u, v) in edges:
            if (len(self.neighbors(v)) == 0) and (len(
                    v.text) == 1) and (v.text[0].word == "that"):
                self.del_node(v)
                return True

        # rcmod with no relation to father
        edges = find_edges(
            self, lambda u_v2: (self.edge_label(
                (u_v2[0], u_v2[1])) == "rcmod") and (not self.has_edge(
                    (u_v2[1], u_v2[0]))))
        for u, v in edges:
            self.add_edge((v, u), label=ARG_LABEL)
            return True

        # prep collapse
        edges = find_edges(
            self, lambda u_v3: (self.edge_label(
                (u_v3[0], u_v3[1])) == "prep") and
            (len(self.neighbors(u_v3[1])) == 1) and
            ("pobj" in u_v3[1].neighbors()))
        if edges:
            for (u, v) in edges:
                pobj = v.neighbors()["pobj"][0]
                if not (self.has_edge((u, pobj))):
                    w = v.text[0]
                    u.surface_form += [w]
                    self.add_edge((u, pobj), label="prep_" + w.word)
                    self.del_node(v)

        # fix dependency collapse bugs
        edges = find_edges(
            self, lambda u_v4: (self.edge_label(
                (u_v4[0], u_v4[1])) == "pobj") and
            ("prep" not in u_v4[0].incidents()))
        for (u, v) in sorted(edges, key=lambda u_v5: u_v5[0].minIndex()):
            neighbors = u.neighbors()
            candidates = [
                n for n in multi_get(
                    neighbors,
                    [rel for rel in neighbors if rel.startswith("prepc_")])
                if len(self.neighbors(n)) == 0
            ]
            candidates.sort(key=lambda n: n.minIndex())
            if len(candidates) > 0:
                curToDel = candidates[0]
                rel = self.edge_label((u, curToDel))
                self.del_edge((u, v))
                self.add_edge((u, v), label=rel)
                self.del_node(curToDel)

        # change agent edges with "prep_by"
        edges = find_edges(self, lambda edge:
                           (self.edge_label(edge) == "agent"))
        for edge in edges:
            self.del_edge(edge)
            self.add_edge(edge, label="prep_by")

#         #add xcomp inverse node
#         edges  = find_edges(self, lambda (u,v):self.edge_label((u,v)) == "xcomp" and u.isPredicate and v.isPredicate)
#         for (u,v) in edges:
#             if not self.has_edge((v, u)):
#                 self.add_edge((v,u), label=SOURCE_LABEL)
#                 self.types.add("infinitives")
#                 return True
#             if not multi_get(v.neighbors(),subject_dependencies):
#                 rcmodParentIncidents = u.incidents().get("rcmod",[])
#                 if len(rcmodParentIncidents)==1:
#                     subj = rcmodParentIncidents[0]
#                     if not self.has_edge((v,subj)):
#                         self.add_edge((v,subj),label=ARG_LABEL)

        return False
Ejemplo n.º 28
0
                    
        # fix dependency collapse bugs
        edges = find_edges(self, lambda (u, v):(self.edge_label((u, v)) == "pobj") and ("prep" not in u.incidents()))
        for (u, v) in sorted(edges,key=lambda((u,v)): u.minIndex()):
            neighbors = u.neighbors()
            candidates = [n for n in multi_get(neighbors, [rel for rel in neighbors if rel.startswith("prepc_")]) if len(self.neighbors(n)) == 0]
            candidates.sort(key=lambda n:n.minIndex())
            if len(candidates) > 0:
                curToDel = candidates[0]
                rel = self.edge_label((u, curToDel))
                self.del_edge((u, v))
                self.add_edge((u, v), label=rel)
                self.del_node(curToDel)
        
        # change agent edges with "prep_by"
        edges = find_edges(self, lambda edge:(self.edge_label(edge) == "agent"))
        for edge in edges:
            self.del_edge(edge)
            self.add_edge(edge,label="prep_by")
            
#         #add xcomp inverse node
#         edges  = find_edges(self, lambda (u,v):self.edge_label((u,v)) == "xcomp" and u.isPredicate and v.isPredicate)
#         for (u,v) in edges:
#             if not self.has_edge((v, u)):
#                 self.add_edge((v,u), label=SOURCE_LABEL)
#                 self.types.add("infinitives")
#                 return True
#             if not multi_get(v.neighbors(),subject_dependencies):
#                 rcmodParentIncidents = u.incidents().get("rcmod",[]) 
#                 if len(rcmodParentIncidents)==1:
#                     subj = rcmodParentIncidents[0]
Ejemplo n.º 29
0
 def remove_aux(self):
     edges = find_edges(self, lambda edge:self.edge_label(edge) in ignore_labels)
     for u, v in edges:
         if v.uid in self.nodesMap:
             u.original_text.extend(v.original_text)
             self.del_node(v)
Ejemplo n.º 30
0
    def do_prop(self):
        # prenominal of definite
        edges = find_edges(self, lambda (u, v): self.edge_label(
            (u, v)) == "amod")
        for domain, mod in edges:
            if domain.pos(
            ) in determined_labels:  # the np by itself is definite
                self.createPropRel(domain=domain, mod=mod)
                mod.features["top"] = True
                self.del_edge((domain, mod))

        # copular on adjective or indefinite
        # and sameAs otherwise

        # find copular
        nodes = find_nodes(
            self, lambda n: len(n.text) == 1 and n.text[0].word in
            copular_verbs and n.isPredicate)
        for curNode in nodes:
            curNeighbours = curNode.neighbors()
            subjs = multi_get(curNeighbours, subject_dependencies)
            objs = multi_get(curNeighbours, clausal_complements)
            if not objs: objs = multi_get(curNeighbours, ["dep"])
            others = [
                n for n in self.neighbors(curNode) if n not in subjs + objs
            ]
            if (len(objs) > 0) and (
                    len(subjs) > 0):  #and (not others) and (len(objs) == 1):
                others += objs[1:]
                if others:
                    self.types.add("complicated BE")
                obj = objs[0]
                if len(objs) > 1:
                    self.types.add("debug")
                for subj in subjs:
                    if 'Lemma' in curNode.features:
                        del (curNode.features['Lemma'])
                    if (subj in self.neighbors(obj)):
                        obj.features.update(curNode.features)
                    else:
                        if (not isDefinite(obj)) or (obj in curNeighbours.get(
                                "acomp", [])):
                            self.createPropRel(domain=subj, mod=obj)
                            head = obj
                            obj.surface_form += curNode.surface_form

                        else:
                            self.types.add("SameAs")
                            self.del_edge((curNode, subj))
                            if self.has_edge((curNode, obj)):
                                self.del_edge((curNode, obj))


#                             self.del_edges([(curNode, subj), (curNode, obj)])
                            copularNode = getCopular(self,
                                                     curNode.text[0].index,
                                                     features=curNode.features)
                            copularNode.surface_form = curNode.surface_form
                            self.add_edge((copularNode, subj),
                                          label=FIRST_ENTITY_LABEL)
                            self.add_edge((copularNode, obj),
                                          label=SECOND_ENTITY_LABEL)
                            head = copularNode

                        head.features.update(curNode.features)

                        for curFather in self.incidents(curNode):
                            if not self.has_edge((curFather, head)):
                                duplicateEdge(graph=self,
                                              orig=(curFather, curNode),
                                              new=(curFather, head))

                        for curOther in others:
                            if not self.has_edge((obj, curOther)):
                                duplicateEdge(graph=self,
                                              orig=(curNode, curOther),
                                              new=(head, curOther))
                # erase "be" node
                self.del_node(curNode)

        # find appositions
        for subj, obj in find_edges(
                self, lambda edge: self.edge_label(edge) == "appos"):
            # duplicate relations
            for curFather in self.incidents(subj):
                curIndex = curFather.features.get("apposIndex", 0) + 1
                #                 curLabel = "{0},{1}".format(curIndex,self.edge_label((curFather,subj)))
                curLabel = self.edge_label((curFather, subj))
                self.del_edge((curFather, subj))
                self.add_edge((curFather, subj), curLabel)
                self.add_edge((curFather, obj), curLabel)
                ls = curFather.features.get("dups", [])
                ls.append((subj, obj))
                curFather.features["dups"] = ls

                curFather.features["apposIndex"] = curIndex
            if (not isDefinite(subj)
                    and not isDefinite(obj)) or (obj in subj.neighbors().get(
                        "acomp", [])):
                self.createPropRel(domain=subj, mod=obj)
                obj.features["top"] = True
            else:
                # add new node
                # TODO: subj here is a problem - should point to the comma or something
                self.types.add("SameAs")
                copularNode = getCopular(self, subj.text[0].index, features={})
                copularNode.surface_form = []
                self.add_edge((copularNode, subj), label=FIRST_ENTITY_LABEL)
                self.add_edge((copularNode, obj), label=SECOND_ENTITY_LABEL)

            self.del_edge((subj, obj))
Ejemplo n.º 31
0
    def do_conj(self):
        edges = find_edges(self, lambda u_v10: self.edge_label(
            (u_v10[0], u_v10[1])).startswith(
                "conj_"))  # and (not u.isPredicate) and (not v.isPredicate))
        nodes = set([u for (u, _) in edges])
        for conj1 in nodes:
            curStartIndex = conj1.minIndex() + 1
            curNeighbours = conj1.neighbors()
            isModifier = (not bool([
                father for father in self.incidents(conj1)
                if not self.is_aux_edge((father.uid, conj1.uid))
            ])) and bool(self.incidents(conj1))
            for rel in [
                    rel for rel in curNeighbours if rel.startswith("conj_")
            ]:
                marker = rel.split("conj_")[1]

                markerNode = newNode.Node(
                    text=[Word(curStartIndex + 1,
                               marker)],  #TODO: how to find marker's index
                    isPredicate=True,
                    features={"conj": True},
                    gr=self)

                #decide how to connect it to the rest of the graph, based on its type
                if isModifier:
                    duplicate_all_incidents(gr=self,
                                            source=conj1,
                                            target=markerNode)
                else:
                    for father in self.incidents(conj1):
                        for conj2 in curNeighbours[rel]:
                            duplicateEdge(graph=self,
                                          orig=((father, conj1)),
                                          new=((father, conj2)))
                        duplicateEdge(graph=self,
                                      orig=((father, conj1)),
                                      new=((father, markerNode)))

                    if conj1.isPredicate:
                        for neighbor in self.neighbors(conj1):
                            if get_min_max_span(self,
                                                neighbor)[0] < curStartIndex:
                                for conj2 in curNeighbours[rel]:
                                    if (self.edge_label(
                                        (conj1, neighbor)) == SOURCE_LABEL
                                        ) or (not self.is_aux_edge(
                                            (conj1.uid, neighbor.uid))):
                                        duplicateEdge(graph=self,
                                                      orig=(conj1, neighbor),
                                                      new=(conj2, neighbor))

                # create the coordination construction, headed by the marker
                self.add_edge(edge=(markerNode, conj1), label=rel)
                for conj2 in curNeighbours[rel]:
                    self.del_edge((conj1, conj2))
                    self.add_edge(edge=(markerNode, conj2), label=rel)
                    if conj1.isPredicate:
                        conj2.isPredicate = conj1.isPredicate
                    conj1.surface_form = [
                        w for w in conj1.surface_form
                        if (w not in conj2.surface_form) and (
                            w not in conj1.text)
                    ]
                    for w in conj1.text:
                        if w not in conj1.surface_form:
                            conj1.surface_form.append(w)
                    if conj1.features.get("conjType", False):
                        conj1.text = [
                            w for w in conj1.text
                            if w.index not in conj1.features["conjType"][1]
                        ]

            self.types.add(rel)
Ejemplo n.º 32
0
    def do_prop(self):
        # prenominal of definite
        edges = find_edges(self, lambda (u, v):self.edge_label((u, v)) == "amod")
        for domain, mod in edges:
            if domain.pos() in determined_labels:  # the np by itself is definite
                self.createPropRel(domain=domain, mod=mod)
                mod.features["top"] = True
                self.del_edge((domain, mod))
            
        # copular on adjective or indefinite
        # and sameAs otherwise
        
        # find copular
        nodes = find_nodes(self, lambda n: len(n.text) == 1 and n.text[0].word in copular_verbs and n.isPredicate)
        for curNode in nodes:
            curNeighbours = curNode.neighbors()
            subjs = multi_get(curNeighbours, subject_dependencies)
            objs = multi_get(curNeighbours, clausal_complements)
            if not objs: objs = multi_get(curNeighbours,["dep"])
            others = [n for n in self.neighbors(curNode) if n not in subjs + objs]
            if (len(objs)>0)and (len(subjs)>0): #and (not others) and (len(objs) == 1): 
                others+=objs[1:]
                if others:
                    self.types.add("complicated BE")
                obj = objs[0]
                if len(objs)>1:
                    self.types.add("debug")
                for subj in subjs:
                    if 'Lemma' in curNode.features: del(curNode.features['Lemma'])
                    if (subj in self.neighbors(obj)):
                        obj.features.update(curNode.features)
                    else:
                        if (not isDefinite(obj)) or (obj in curNeighbours.get("acomp", [])):
                            self.createPropRel(domain=subj, mod=obj)
                            head = obj
                            obj.surface_form += curNode.surface_form
                            
                        else:
                            self.types.add("SameAs")
                            self.del_edge((curNode, subj))
                            if self.has_edge((curNode,obj)):
                                self.del_edge((curNode, obj))
#                             self.del_edges([(curNode, subj), (curNode, obj)])
                            copularNode = getCopular(self, curNode.text[0].index, features=curNode.features)
                            copularNode.surface_form = curNode.surface_form
                            self.add_edge((copularNode, subj),
                                          label=FIRST_ENTITY_LABEL)
                            self.add_edge((copularNode, obj),
                                          label=SECOND_ENTITY_LABEL)
                            head = copularNode
                            
                        
                        head.features.update(curNode.features)
                        
                        for curFather in self.incidents(curNode):
                            if not self.has_edge((curFather, head)):
                                duplicateEdge(graph=self, orig=(curFather, curNode), new=(curFather, head))
                                
                        for curOther in others:
                                if not self.has_edge((obj, curOther)):
                                    duplicateEdge(graph=self, orig=(curNode, curOther), new=(head, curOther))
                # erase "be" node
                self.del_node(curNode)
                    
                    
        # find appositions
        for subj, obj in find_edges(self, lambda edge:self.edge_label(edge) == "appos"):
            # duplicate relations
            for curFather in self.incidents(subj):
                curIndex = curFather.features.get("apposIndex", 0) + 1
#                 curLabel = "{0},{1}".format(curIndex,self.edge_label((curFather,subj)))
                curLabel = self.edge_label((curFather, subj))
                self.del_edge((curFather, subj))
                self.add_edge((curFather, subj), curLabel)
                self.add_edge((curFather, obj), curLabel)
                ls = curFather.features.get("dups", [])
                ls.append((subj, obj))
                curFather.features["dups"] = ls
                
                curFather.features["apposIndex"] = curIndex
            if (not isDefinite(subj) and not isDefinite(obj)) or (obj in subj.neighbors().get("acomp", [])):
                self.createPropRel(domain=subj, mod=obj)
                obj.features["top"] = True
            else:
                # add new node
                # TODO: subj here is a problem - should point to the comma or something
                self.types.add("SameAs")
                copularNode = getCopular(self, subj.text[0].index, features={})
                copularNode.surface_form = []
                self.add_edge((copularNode, subj),
                              label=FIRST_ENTITY_LABEL)
                self.add_edge((copularNode, obj),
                              label=SECOND_ENTITY_LABEL)
                
                    
            self.del_edge((subj, obj))