コード例 #1
0
ファイル: tree_readers.py プロジェクト: mfeblowitz/props
def create_dep_graphs_from_stream(stream, HOME_DIR):
    graphs = []
    init = True
    curGraph = GraphWrapper("", HOME_DIR)
    nodesMap = {}
    for line in stream:

        line = line.strip()
        #         print line
        if line:
            init = False
            m = pat.match(line)
            rel, head, head_id, dep, dep_id = m.groups()
            #             head_id = int(head_id)
            #             dep_id = int(dep_id)
            if head_id not in nodesMap:
                nodesMap[head_id] = Node(
                    text=[Word(index=int(head_id.split("'")[0]), word=head)],
                    isPredicate=False,
                    features={},
                    gr=curGraph,
                    orderText=True)
            if dep_id not in nodesMap:
                nodesMap[dep_id] = Node(
                    text=[Word(index=int(dep_id.split("'")[0]), word=dep)],
                    isPredicate=False,
                    features={},
                    gr=curGraph,
                    orderText=True)
            headNode = nodesMap[head_id]
            depNode = nodesMap[dep_id]
            if curGraph.has_edge((headNode, depNode)):  # stanford bug
                curGraph.del_edge((headNode, depNode))
            curGraph.add_edge(edge=(nodesMap[head_id], nodesMap[dep_id]),
                              label=rel)
        if (not line) and (not init):
            init = True
            graphs.append((curGraph, nodesMap))
            curGraph = GraphWrapper("", HOME_DIR)
            nodesMap = {}
    return graphs
コード例 #2
0
ファイル: convert.py プロジェクト: gabrielStanovsky/props
def tree_to_graph(tree):
    """
    @type tree DepTree
    """

    HOME_DIR = os.environ.get("PROPEXTRACTION_HOME_DIR")+"\\"

    ret = GraphWrapper(tree[0].original_sentence,HOME_DIR)
    graphNodes = {}
    for t in tree.values():
        if t.id:
            if t.parent_relation != "erased":

                graphNodes[t.id]=treeNode_to_graphNode(t,ret)
    for t in tree.values():
        if t.id:
            curParent =t.parent.id
            if curParent:
                ret.add_edge(edge=(graphNodes[curParent],graphNodes[t.id]),
                             label = t.parent_relation)

    return ret
コード例 #3
0
def tree_to_graph(tree):
    """
    @type tree DepTree
    """

    HOME_DIR = os.environ.get("PROPEXTRACTION_HOME_DIR") + "\\"

    ret = GraphWrapper(tree[0].original_sentence, HOME_DIR)
    graphNodes = {}
    for t in tree.values():
        if t.id:
            if t.parent_relation != "erased":

                graphNodes[t.id] = treeNode_to_graphNode(t, ret)
    for t in tree.values():
        if t.id:
            curParent = t.parent.id
            if curParent:
                ret.add_edge(edge=(graphNodes[curParent], graphNodes[t.id]),
                             label=t.parent_relation)

    return ret
コード例 #4
0
ファイル: parse_graph.py プロジェクト: arueckle/props
class ParseGraph:
    """
    class to bunch together all function of conversion from DepTree to digraph
    Mainly in order to store the graph as a member which all these functions can edit.   
    """
    def __init__(self,t,locationAnnotator):
        """
        initialize a graph class, followed by converting a tree
    
        @type  t: Tree
        @param tree: syntactic tree to be converted
        
        @type  id: int
        @param id: a unique id for current Tree
        
        @type gr: digraph
        @var  gr: the graph representing t
        """
        if not t.id:      # meaning this is the ROOT element
            self.tree = t.children[0]
        else:
            self.tree = t    
        self.gr = GraphWrapper(t.get_original_sentence())
        
        self.locationAnnotator = locationAnnotator
        
        # maintain an appendix for easier browsing
        self.types = appendix_types()
        
        self.parse(self.tree)
        
    def parse(self,t):
        """
        Get the graph representation from a syntactic representation
        Returns through the graph parameter.
        
        @type  t: DepTree
        @param tree: syntactic tree to be converted
        
        @rtype: Node
        @return: the node in the graph corresponding to the top node in t
        """
        
        #order matters!
        if t.is_conditional_predicate():
            self.types.add(APPENDIX_COND)
            return self.parseConditional(outcome = t._CONDITIONAL_PREDICATE_FEATURE_Outcome()["Value"],
                                         condList = t.condPred)

        
        if t._VERBAL_PREDICATE_SUBTREE_Adv():
            advChildren = t.adverb_children
            advSubj = t.adverb_subj
            return self.parseAdverb(subj=advSubj, 
                             advChildren=advChildren)
        
        if t.is_conjunction_predicate():
            self.types.add(APPENDIX_CONJUNCTION)
            return self.parseConjunction(baseElm = t.baseElm,
                                         conjResult = t.conjResult)
        
        if t.is_appositional_predicate():
            self.types.add(APPENDIX_APPOS)
            firstEntity = t._APPOSITIONAL_PREDICATE_FEATURE_Left_Side()["Value"]
            secondEntity = t._APPOSITIONAL_PREDICATE_FEATURE_Right_Side()["Value"]
            return self.parseApposition(index = t.id,
                                        first_entity=firstEntity,
                                        second_entity=secondEntity)
        
        if t.is_relative_clause():
            self.types.add(APPENDIX_RCMOD)
            return self.parseRcmod(np = t._RELCLAUSE_PREDICATE_FEATURE_Rest()['Value'], 
                                   modList = t.rcmodPred)
        
        if t.is_prepositional_predicate():
            self.types.add(APPENDIX_PREP)
            return self.parsePreposition(psubj=t._PREPOSITIONAL_PREDICATE_FEATURE_psubj()["Value"],
                                          prepChildList=t.prepChildList)
                    
        if t.is_copular_predicate():
            self.types.add(APPENDIX_COP)
            firstEntity = t._COPULAR_PREDICATE_FEATURE_Copular_Predicate()["Value"]
            secondEntity = t._COPULAR_PREDICATE_FEATURE_Copular_Object()["Value"]
            return self.parseCopular(index = t.id,
                                     first_entity=firstEntity,
                                     second_entity=secondEntity,
                                     features = syntactic_item.get_verbal_features(t))
        
        if t.is_possesive_predicate():
            self.types.add(APPENDIX_POSS)
            possessor = t._POSSESSIVE_PREDICATE_FEATURE_Possessor()["Value"]
            possessed = t._POSSESSIVE_PREDICATE_FEATURE_Possessed()["Value"]
            possessive = t._POSSESSIVE_PREDICATE_FEATURE_Possessive()["Value"]
            return self.parsePossessive(possessor = possessor, 
                                        possessed = possessed,
                                        possessive = possessive)
        
            
        if t.is_adjectival_predicate():
            self.types.add(APPENDIX_ADJ)
            return self.parseProp(subject = t._ADJECTIVAL_PREDICATE_FEATURE_Subject()["Value"],
                                  copulaIndex = NO_INDEX,
                                  adjectiveChildList = t.adjectivalChildList,
                                  propAsHead=False)
            
        if t.is_clausal_complement():
            self.types.add(APPENDIX_COMPLEMENT)
            return self.parseComplement(compSubj = t.compSubj,
                                        compChildren = t.compChildList)
        
        if t.unhandled_advcl():
            # put each unhandled advcl as a disconnected subgraph
            for c in t.advcl:
                self.parse(c)
            return self.parse(t)
        
        if t.is_verbal_predicate():
            self.types.add(APPENDIX_VERB)
            head_ret = t._VERBAL_PREDICATE_SUBTREE_Head()
            return self.parseVerbal(indexes = head_ret["Span"],
                             verbs = head_ret["Value"].split(" "),
                             arguments = t.collect_arguments(),
                             tree = t)
        
            
        
        else:
            # fall back - pack all the tree in a single node
            if len(t.children)==1:
                if (t.children[0].parent_relation == "nn") and (t.word.endswith(",")) and (t.children[0].word.endswith(",")):
                    #conjunction in disguise
                    child = t.children[0]
                    t.children = []
                    ret =  self.parseConjunction(cc = [(t.id,"and")], 
                                                conjElements = [t,child])
                    t.children = [child]
                    return ret
            
            nodes = t._get_subtree(filter_labels_ban)
            text = [Word(index=index,
                         word=nodes[index]) for index in sorted(nodes.keys())] 
            topNode = self.parseBottom(text = sorted(text,key=lambda x:x.index),
                        features = syntactic_item.get_verbal_features(t))

            return topNode
    

    def parseBottom(self,text,features):
        """
        Parse a node for which all other construction test has failed,
        no tree structure is assumed over the input text.
        
        @type  text: list[Word]
        @param text: words to appear at node, oredered by index
        
        @type  features: dict{string:string}
        @param features: features of the node
        
        @rtype  Node
        @return the node which was inserted into the graph
        """
        time_res = timexWrapper(text)
        if time_res[0]:
            self.types.add(APPENDIX_TIME)
            time_node = self.parseTime(time_res[0])
        else:
            time_node = False
            s = " ".join([w.word for w in text])
            if self.locationAnnotator.is_location(s):
                locNode = LocationNode.init(features={})
                self.gr.add_node(locNode)
                bottomNode = Node(isPredicate=False,
                                  text = text,
                                  features = features,
                                  valid=True)
                self.gr.add_node(bottomNode)
                self.gr.add_edge((locNode,bottomNode),
                                 label="loc")
                self.types.add(APPENDIX_LOCATION)
                return locNode
        
        left_text = time_res[1]
        if left_text:        
            topNode = Node(isPredicate=False,
                           text = left_text,
                           features = features,
                           valid=True)
            if not topNode.str:
                time_node.features.update(topNode.features)
                topNode = time_node
                
            else: 
                self.gr.add_node(topNode)
                if time_node:
                    self.gr.add_edge((topNode,time_node))
            
        else:
            if not time_node:
                #TODO: probably not good, but happens
                topNode = Node(isPredicate=False,
                           text = [],
                           features = features,
                           valid=True)
                self.gr.add_node(topNode)
            else:
                topNode = time_node
        
        return topNode
         
    
    def parseTime(self,time_res):
        """
        Add a time node to the graph, given the results of the automated tool.
        
        @type  time_res: list[TimeExpression]
        @param time_res: Time Expressions to be added to the graph, all as single nodes, and under the same "time" node
         
        @rtype  Node
        @return the top node (time node)
        """
        topNode = TimeNode.init(features={})
        self.gr.add_node(topNode)
        
        for timeExpression in time_res:
            curNode = Node(isPredicate = False,
                           text = timeExpression.text,
                           features = {"Time Value":timeExpression.value},
                           valid = True)
            self.gr.add_node(curNode)
            self.gr.add_edge((topNode,curNode))
        return topNode
    
    def parseComplement(self,compSubj,compChildren):
        """
        add a complement subgraph to the graph
        
        @type  compSubj: DepTree
        @param compSubj: the subject of all following complements
        
        @type  compChildren: list [depTree]  
        @param compChildren: all subclauses
        """         
        
        topNode = self.parse(compSubj)
        
        for child in compChildren:
            curNode = self.parse(child)
            self.gr.add_edge(edge=(topNode,curNode),
                             label=child.parent_relation)
        return topNode
        
    
    def parseConjunction(self,baseElm,conjResult):
        """
        add a conjunction subgraph to the graph
        
        @type  cc: list [(int,string)]
        @param cc: the connecting element
        
        @type  conjElements: list [DepTree]  
        @param conjElements: subtrees to be joined in conjunction
        """
        
        
        retNode = self.parse(baseElm)
        
        for cc,conjElements in conjResult:
        
            if not conjElements:
                # discourse marker
                discourseNode = Node(isPredicate = False,
                                text = [Word(ind,word) for ind,word in cc],
                                features = {},
                                valid=True)
                self.gr.add_node(discourseNode)
            
                self.gr.add_edge(edge =(retNode,discourseNode),
                                 label= DISCOURSE_LABEL)
            else:
                # generate top conjunction node
                conjNode = ConjunctionNode.init(text = [Word(ind,word) for ind,word in cc],
                                      features = {})
                self.gr.add_node(conjNode)
                #connect cc to base element
                self.gr.add_edge((conjNode,retNode))
                
                #generate node for each element and connect to topNode
                for elm in conjElements:
                    curNode = self.parse(elm)
                    self.gr.add_edge(edge = (conjNode,curNode))
            
        return retNode
            
        
    
    def parseRcmod(self,np,modList):
        """
        add a relative clause subgraph to the graph
        
        @type  np: DepTree
        @param np: the entity being modified by the relative clause
        
        @type  modlist: a list of DepTrees,  
        @param modList: trees modifying np
        """
        
        topNode = self.parse(np)
        
        for temp_t in modList:
            # add nodes
            rcmodNode = self.parse(temp_t._RELCLAUSE_PREDICATE_FEATURE_Relclause()["Value"])
            propNode = RCMODPropNode.init(features={},
                                     valid=True)
            self.gr.add_node(propNode)
            
            #add edges
            self.gr.add_edge(edge=(topNode,propNode))
            self.gr.add_edge(edge=(propNode,rcmodNode))
            if rcmodNode.isPredicate:
                # this will create a cycle, label is a hurestic to guess the connection between relative clause and top node
                self.gr.add_edge(edge=(rcmodNode,topNode), label=temp_t.rcmodRel)
            
            # record that this construction came from rcmod
            topNode.rcmod = [propNode,rcmodNode] 
        
        return topNode
        
    
    def parseConditional(self,outcome,condList):
        """
        add a conditional subgraph to the graph
        
        @type  outcome: DepTree
        @param outcome: the outcome of all following conditions
        
        @type  condList: a list of DepTrees,  
        @param condList: all conditionals regarding outcome
        """
        
        outcomeNode = self.parse(outcome)
        
        for temp_t in condList:
            mark = temp_t._CONDITIONAL_PREDICATE_FEATURE_Mark()
            markValue = mark["Value"]
            markIndex = mark["Span"][0]
            conditionNode = self.parse(temp_t._CONDITIONAL_PREDICATE_FEATURE_Condition()["Value"]) 
            
            #create nodes
            markNode = CondNode.init(index = markIndex,
                                condType = markValue,
                                features = {},
                                valid=True)
            self.gr.add_node(markNode)
            
            markValue = markValue.lower()

            # add edges according to the type of conditional
            if markValue in condition_outcome_markers:
                self.gr.add_edge(edge = (markNode,outcomeNode),
                                 label = OUTCOME_LABEL)
                
                self.gr.add_edge(edge = (markNode,conditionNode),
                                 label = CONDITION_LABEL)
            
            elif markValue in reason_outcome_markers:
                self.gr.add_edge(edge = (markNode,outcomeNode),
                                 label = OUTCOME_LABEL)
                
                self.gr.add_edge(edge = (markNode,conditionNode),
                                 label = REASON_LABEL)
            
            elif markValue in comp_markers:
                self.gr.add_edge(edge = (conditionNode,outcomeNode),
                                 label = COMP_LABEL)
            
            else:
                #add edges
                self.gr.add_edge((outcomeNode,markNode))
                self.gr.add_edge((markNode,conditionNode))
    
        #return top node
        return outcomeNode
    
    def parsePreposition(self,psubj,prepChildList):
        """
        add a preposition subgraph to the graph
        
        @type  psubj: DepTree
        @param psubj: the subject of all following prepositions
        
        @type  prepChildList: a list of DepTrees,  
        @param prepChildList: all prepositions regarding nsubj
        """
        
        #create top nodes:
        
        topNode = self.parse(psubj)
        
        for temp_t in prepChildList:
            #generate bottom node and connect to prep
            pobj = temp_t._PREPOSITIONAL_PREDICATE_FEATURE_pobj()["Value"]
            if not pobj: # e.g., #460
                continue
            
            bottomNode = self.parse(pobj)
            
            
            #generate prep node and connect to top node
            prepNode = PrepNode.init(index=temp_t.prepInd,
                                prepType=temp_t.prepType,
                                features={},
                                valid = True)
#             self.gr.add_node(prepNode)
            
            #self.gr.add_edge(edge = (prepNode,bottomNode))
            self.gr.add_edge(edge = (topNode,bottomNode),
                             label = " ".join([w.word for w in prepNode.str]))
            
            
        
            
            
            
            
            
        return topNode
        
    def parseVerbal(self,indexes,verbs,arguments,tree):
        """
        add a verbal subgraph to the graph
        
        @type  indexes: list [int]
        @param indexes: the index(es) of the verb in the sentence
        
        @type  verbs: list [string] 
        @param verbs: the string(s) representing the verb
        
        @type tree: DepTree
        @param tree: tree object from which to extract various features
        
        @type  arguments: list 
        @param arguments: list of DepTrees of arguments
        """
        
        # create verbal head node
        # start by extracting features
        feats = syntactic_item.get_verbal_features(tree)
        if feats['Lemma'] == verbs[0]:
            del(feats['Lemma'])
        
        for k in feats:
            self.types.add(k)
            
            
        verbNode = graph_representation.node.Node(isPredicate=True,
                                                  text = [Word(index=index,
                                                               word=verb) for index,verb in zip(indexes,verbs)],
                                                  features=feats,
                                                  valid=True)
        self.gr.add_node(verbNode)
        
        # handle arguments
        for arg_t in arguments:
            curNode = self.parse(arg_t)
            #curNode.features = syntactic_item.get_verbal_features(arg_t)
            self.gr.add_edge((verbNode,curNode), arg_t.parent_relation)
        
        
        # handle time expressions
        (timeSubtree,_) = tree._VERBAL_PREDICATE_SUBTREE_Time()
        if timeSubtree:
            timeNode = graph_representation.node.TimeNode.init(features = {})
            self.gr.add_node(timeNode)
            timeSubGraph = self.parse(timeSubtree)
            self.gr.add_edge((verbNode,timeNode))
            self.gr.add_edge((timeNode,timeSubGraph))
            
        return verbNode 
        
    def parseAdverb(self,subj,advChildren):
        topNode = self.parse(subj) 
        
        for advChild,mwe in advChildren:
#             advTopNode = advNode.init(features = {})
#             self.gr.add_node(advTopNode)
#             self.gr.add_edge(edge = (topNode,advTopNode))
            if mwe:
                # in case this is a complex adverb ("as long as")
                curAdvNode = Node(isPredicate = False,
                                  text = [Word(ind,word) for ind,word in mwe],
                                  features = {},
                                  valid = True)
                self.gr.add_node(curAdvNode)
                curChildNode = self.parse(advChild)
                self.gr.add_edge(edge=(topNode,curAdvNode),
                                 label = ADV_LABEL)
                self.gr.add_edge(edge = (curAdvNode,curChildNode),
                                 label = advChild.parent_relation)
                
                
                
            else:
                curChildNode = self.parse(advChild)
                self.gr.add_edge(edge = (topNode,curChildNode),
                                 label = ADV_LABEL)

        return topNode 

        
    
    def parseCopular(self,index,first_entity,second_entity,features):
        """
        add a copular subgraph to the graph
        
        @type  index: int
        @param index: the index of the copula in the sentence
        
        @type  first_entity: DepTree
        @param first_entity: the syntax tree of the first entity
        
        @type  second_entity: DepTree
        @param second_entity: the syntax tree of the second entity
        
        @rtype: Node
        @return: the top node of the copula subgraph
        """
        
        if (second_entity.parent_relation in adjectival_mod_dependencies) \
        or (not second_entity.is_definite()):
            # reduce to prop construction when the second element in the copula is an adjective
            # e.g., Rabbit is white -> white rabbit
            # or when the second element is indefinite
            second_entity.adjectivalChild = [second_entity]
            second_entity.relative_adj = False #TODO: calculate this
            second_entity.parent_relation = "copular" #TODO: this might be dangerous :\
            return self.parseProp(subject = first_entity,
                                  copulaIndex = index,
                                  adjectiveChildList = [second_entity],
                                  features=features,
                                  propAsHead = True)
             
        
        # generate the top node and add to the graph
        topNode = CopularNode.init(index=index,
                              features=features, 
                              valid=True)
        self.gr.add_node(topNode)
        
        # generate both entities subgraphs
        firstEntityNode = self.parse(first_entity)
        secondEntityNode = self.parse(second_entity)
        
        #propagate properties between the two nodes
        graph_representation.node.addSymmetricPropogation(firstEntityNode, 
                                                          secondEntityNode)
        
        #add labeled edges
        self.gr.add_edge(edge=(topNode,firstEntityNode),
                         label=FIRST_ENTITY_LABEL)
        self.gr.add_edge(edge=(topNode,secondEntityNode),
                         label=SECOND_ENTITY_LABEL)
        
        return topNode
    

    def parseApposition(self,index,first_entity,second_entity):
        """
        add an apposition subgraph to the graph
        
        @type  index: int
        @param index: the index of the apposition in the sentence
        
        @type  first_entity: DepTree
        @param first_entity: the syntax tree of the first entity
        
        @type  second_entity: DepTree
        @param second_entity: the syntax tree of the second entity
        
        @rtype: Node
        @return: the top node of the apposition subgraph
        """
        
        #copied from copular, interesting to see if this happens
        if (second_entity.parent_relation in adjectival_mod_dependencies) \
        or (not second_entity.is_definite()):
            # reduce to prop construction when the second element in the copula is an adective
            # e.g., Rabbit is white -> white rabbit
            second_entity.adjectivalChild = [second_entity]
            second_entity.relative_adj = False #TODO - calculate this
            second_entity.parent_relation = "appos" #TODO: this might be dangerous :\
            return self.parseProp(subject = first_entity,
                                  copulaIndex = NO_INDEX,
                                  adjectiveChildList = [second_entity],
                                  propAsHead = True)
             
        
        # generate the top node and add to the graph
        topNode = AppositionNode.init(index=index,
                              features={}) 
                              
        self.gr.add_node(topNode)
        
        # generate both entities subgraphs
        firstEntityNode = self.parse(first_entity)
        secondEntityNode = self.parse(second_entity)
        
        # remember first and second entities in apposition's node
#         topNode.entities = [firstEntityNode,secondEntityNode]
        
        # propagate properties between the two nodes
        graph_representation.node.addSymmetricPropogation(firstEntityNode, 
                                                          secondEntityNode)

        #add labeled edges
        self.gr.add_edge(edge=(topNode,firstEntityNode),
                         label=FIRST_ENTITY_LABEL)
        self.gr.add_edge(edge=(topNode,secondEntityNode),
                         label=SECOND_ENTITY_LABEL)
        
        return topNode

    
    
    def parsePossessive(self,possessor,possessed,possessive):
        """
        add a possessive subgraph to the graph
        
        @type  index: int
        @param index: the index of the possessive in the sentence
        
        @type  possessor: DepTree
        @param possessor: the syntax tree of the possessor
        
        @type  possessed: DepTree
        @param possessed: the syntax tree of the possessed
        
        @type  possessive: DepTree
        @param possessive: the syntax tree of the possessive - e.g - 's
        
        @rtype: Node
        @return: the top node of the possessive subgraph
        """
        
        if not possessive:
            index = graph_representation.word.NO_INDEX
        else:
            index = possessive.id
        
        # generate nodes
        possessorNode = self.parse(possessor)
        possessedNode = self.parse(possessed)
        
        if isTime(possessorNode) or isLocation(possessorNode):
            #possessive construction to indicate time
            self.gr.add_edge((possessedNode,possessorNode))
            return possessedNode
        
        #otherwise - proper possessive:
        hasNode = PossessiveNode.init(index=index,
                                 features={}, 
                                 valid=True)
        self.gr.add_node(hasNode)
        
        # add edges to graph
        self.gr.add_edge(edge=(hasNode,possessorNode), 
                         label=POSSESSOR_LABEL)
        self.gr.add_edge(edge=(hasNode,possessedNode), 
                         label=POSSESSED_LABEL)
        
        # create top node
        # get list of all relevant nodes
        nodeLs = [possessorNode,possessedNode]
        
        if possessive: # in some cases there's no possessive marker (e.g., "their woman")
            possessiveNode = graph_representation.node.Node(isPredicate=False,
                                                            text = [Word(possessive.id,
                                                                        possessive.get_original_sentence(root=False))],
                                                            features = {},
                                                            valid=True)
            nodeLs.append(possessiveNode)
        
        
        # create possessive top node, add to graph, and return it
        topNode = graph_utils.generate_possessive_top_node(graph=self.gr, nodeLs=nodeLs)
        self.gr.add_node(topNode)
        
        #mark that features and neighbours should propagate from the top node to the possessed
        # John's results were low -> features should propogate between (John's results) and (results)
        graph_representation.node.addSymmetricPropogation(topNode, possessedNode)
        
        return topNode 
        
    def parseProp(self,subject,copulaIndex,adjectiveChildList,propAsHead,features={}):
        """
        add a prop subgraph to the graph
        
        @type  adjective: DepTree
        @param adjective: the syntax tree of the adjective
        
        @type  subject: DepTree
        @param subject: the syntax tree of the subject
        
        @rtype: Node
        @return: the top node of the copula subgraph
        """
        
        # parse top node
        subjectNode = self.parse(subject)
        topNode = subjectNode
        #parse each property and connect to top node
        for temp_t in adjectiveChildList:
            adjective = temp_t._ADJECTIVAL_PREDICATE_FEATURE_Adjective()["Value"]
            adjectiveNode = self.parse(adjective)
            if "Lemma" in features:
                del(features["Lemma"])
            adjectiveNode.features.update(features)
            
            # generate the top node and add to the graph
            propNode = PropNode.init(features={"relative":temp_t.relative_adj},
                                     index = copulaIndex,
                                     valid=True,
                                     parent_relation = adjective.parent_relation)
            self.gr.add_node(propNode)
            
            if propAsHead:
                topNode = propNode
            
            #add labeled edges
            self.gr.add_edge(edge=(subjectNode,propNode),
                             label="")
            self.gr.add_edge(edge=(propNode,adjectiveNode),
                             label="")
            
            
        return topNode
コード例 #5
0
class ParseGraph:
    """
    class to bunch together all function of conversion from DepTree to digraph
    Mainly in order to store the graph as a member which all these functions can edit.   
    """
    def __init__(self,t,locationAnnotator):
        """
        initialize a graph class, followed by converting a tree
    
        @type  t: Tree
        @param tree: syntactic tree to be converted
        
        @type  id: int
        @param id: a unique id for current Tree
        
        @type gr: digraph
        @var  gr: the graph representing t
        """
        if not t.id:      # meaning this is the ROOT element
            self.tree = t.children[0]
        else:
            self.tree = t    
        self.gr = GraphWrapper(t.get_original_sentence())
        
        self.locationAnnotator = locationAnnotator
        
        # maintain an appendix for easier browsing
        self.types = appendix_types()
        
        self.parse(self.tree)
        
    def parse(self,t):
        """
        Get the graph representation from a syntactic representation
        Returns through the graph parameter.
        
        @type  t: DepTree
        @param tree: syntactic tree to be converted
        
        @rtype: Node
        @return: the node in the graph corresponding to the top node in t
        """
        
        #order matters!
        if t.is_conditional_predicate():
            self.types.add(APPENDIX_COND)
            return self.parseConditional(outcome = t._CONDITIONAL_PREDICATE_FEATURE_Outcome()["Value"],
                                         condList = t.condPred)

        
        if t._VERBAL_PREDICATE_SUBTREE_Adv():
            advChildren = t.adverb_children
            advSubj = t.adverb_subj
            return self.parseAdverb(subj=advSubj, 
                             advChildren=advChildren)
        
        if t.is_conjunction_predicate():
            self.types.add(APPENDIX_CONJUNCTION)
            return self.parseConjunction(baseElm = t.baseElm,
                                         conjResult = t.conjResult)
        
        if t.is_appositional_predicate():
            self.types.add(APPENDIX_APPOS)
            firstEntity = t._APPOSITIONAL_PREDICATE_FEATURE_Left_Side()["Value"]
            secondEntity = t._APPOSITIONAL_PREDICATE_FEATURE_Right_Side()["Value"]
            return self.parseApposition(index = t.id,
                                        first_entity=firstEntity,
                                        second_entity=secondEntity)
        
        if t.is_relative_clause():
            self.types.add(APPENDIX_RCMOD)
            return self.parseRcmod(np = t._RELCLAUSE_PREDICATE_FEATURE_Rest()['Value'], 
                                   modList = t.rcmodPred)
        
        if t.is_prepositional_predicate():
            self.types.add(APPENDIX_PREP)
            return self.parsePreposition(psubj=t._PREPOSITIONAL_PREDICATE_FEATURE_psubj()["Value"],
                                          prepChildList=t.prepChildList)
                    
        if t.is_copular_predicate():
            self.types.add(APPENDIX_COP)
            firstEntity = t._COPULAR_PREDICATE_FEATURE_Copular_Predicate()["Value"]
            secondEntity = t._COPULAR_PREDICATE_FEATURE_Copular_Object()["Value"]
            return self.parseCopular(index = t.id,
                                     first_entity=firstEntity,
                                     second_entity=secondEntity,
                                     features = syntactic_item.get_verbal_features(t))
        
        if t.is_possesive_predicate():
            self.types.add(APPENDIX_POSS)
            possessor = t._POSSESSIVE_PREDICATE_FEATURE_Possessor()["Value"]
            possessed = t._POSSESSIVE_PREDICATE_FEATURE_Possessed()["Value"]
            possessive = t._POSSESSIVE_PREDICATE_FEATURE_Possessive()["Value"]
            return self.parsePossessive(possessor = possessor, 
                                        possessed = possessed,
                                        possessive = possessive)
        
            
        if t.is_adjectival_predicate():
            self.types.add(APPENDIX_ADJ)
            return self.parseProp(subject = t._ADJECTIVAL_PREDICATE_FEATURE_Subject()["Value"],
                                  copulaIndex = NO_INDEX,
                                  adjectiveChildList = t.adjectivalChildList,
                                  propAsHead=False)
            
        if t.is_clausal_complement():
            self.types.add(APPENDIX_COMPLEMENT)
            return self.parseComplement(compSubj = t.compSubj,
                                        compChildren = t.compChildList)
        
        if t.unhandled_advcl():
            # put each unhandled advcl as a disconnected subgraph
            for c in t.advcl:
                self.parse(c)
            return self.parse(t)
        
        if t.is_verbal_predicate():
            self.types.add(APPENDIX_VERB)
            head_ret = t._VERBAL_PREDICATE_SUBTREE_Head()
            return self.parseVerbal(indexes = head_ret["Span"],
                             verbs = head_ret["Value"].split(" "),
                             arguments = t.collect_arguments(),
                             tree = t)
        
            
        
        else:
            # fall back - pack all the tree in a single node
            if len(t.children)==1:
                if (t.children[0].parent_relation == "nn") and (t.word.endswith(",")) and (t.children[0].word.endswith(",")):
                    #conjunction in disguise
                    child = t.children[0]
                    t.children = []
                    ret =  self.parseConjunction(cc = [(t.id,"and")], 
                                                conjElements = [t,child])
                    t.children = [child]
                    return ret
            
            nodes = t._get_subtree(filter_labels_ban)
            text = [Word(index=index,
                         word=nodes[index]) for index in sorted(nodes.keys())] 
            topNode = self.parseBottom(text = sorted(text,key=lambda x:x.index),
                        features = syntactic_item.get_verbal_features(t))

            return topNode
    

    def parseBottom(self,text,features):
        """
        Parse a node for which all other construction test has failed,
        no tree structure is assumed over the input text.
        
        @type  text: list[Word]
        @param text: words to appear at node, oredered by index
        
        @type  features: dict{string:string}
        @param features: features of the node
        
        @rtype  Node
        @return the node which was inserted into the graph
        """
        time_res = timexWrapper(text)
        if time_res[0]:
            self.types.add(APPENDIX_TIME)
            time_node = self.parseTime(time_res[0])
        else:
            time_node = False
            s = " ".join([w.word for w in text])
            if self.locationAnnotator.is_location(s):
                locNode = LocationNode.init(features={})
                self.gr.add_node(locNode)
                bottomNode = Node(isPredicate=False,
                                  text = text,
                                  features = features,
                                  valid=True)
                self.gr.add_node(bottomNode)
                self.gr.add_edge((locNode,bottomNode),
                                 label="loc")
                self.types.add(APPENDIX_LOCATION)
                return locNode
        
        left_text = time_res[1]
        if left_text:        
            topNode = Node(isPredicate=False,
                           text = left_text,
                           features = features,
                           valid=True)
            if not topNode.str:
                time_node.features.update(topNode.features)
                topNode = time_node
                
            else: 
                self.gr.add_node(topNode)
                if time_node:
                    self.gr.add_edge((topNode,time_node))
            
        else:
            if not time_node:
                #TODO: probably not good, but happens
                topNode = Node(isPredicate=False,
                           text = [],
                           features = features,
                           valid=True)
                self.gr.add_node(topNode)
            else:
                topNode = time_node
        
        return topNode
         
    
    def parseTime(self,time_res):
        """
        Add a time node to the graph, given the results of the automated tool.
        
        @type  time_res: list[TimeExpression]
        @param time_res: Time Expressions to be added to the graph, all as single nodes, and under the same "time" node
         
        @rtype  Node
        @return the top node (time node)
        """
        topNode = TimeNode.init(features={})
        self.gr.add_node(topNode)
        
        for timeExpression in time_res:
            curNode = Node(isPredicate = False,
                           text = timeExpression.text,
                           features = {"Time Value":timeExpression.value},
                           valid = True)
            self.gr.add_node(curNode)
            self.gr.add_edge((topNode,curNode))
        return topNode
    
    def parseComplement(self,compSubj,compChildren):
        """
        add a complement subgraph to the graph
        
        @type  compSubj: DepTree
        @param compSubj: the subject of all following complements
        
        @type  compChildren: list [depTree]  
        @param compChildren: all subclauses
        """         
        
        topNode = self.parse(compSubj)
        
        for child in compChildren:
            curNode = self.parse(child)
            self.gr.add_edge(edge=(topNode,curNode),
                             label=child.parent_relation)
        return topNode
        
    
    def parseConjunction(self,baseElm,conjResult):
        """
        add a conjunction subgraph to the graph
        
        @type  cc: list [(int,string)]
        @param cc: the connecting element
        
        @type  conjElements: list [DepTree]  
        @param conjElements: subtrees to be joined in conjunction
        """
        
        
        retNode = self.parse(baseElm)
        
        for cc,conjElements in conjResult:
        
            if not conjElements:
                # discourse marker
                discourseNode = Node(isPredicate = False,
                                text = [Word(ind,word) for ind,word in cc],
                                features = {},
                                valid=True)
                self.gr.add_node(discourseNode)
            
                self.gr.add_edge(edge =(retNode,discourseNode),
                                 label= DISCOURSE_LABEL)
            else:
                # generate top conjunction node
                conjNode = ConjunctionNode.init(text = [Word(ind,word) for ind,word in cc],
                                      features = {})
                self.gr.add_node(conjNode)
                #connect cc to base element
                self.gr.add_edge((conjNode,retNode))
                
                #generate node for each element and connect to topNode
                for elm in conjElements:
                    curNode = self.parse(elm)
                    self.gr.add_edge(edge = (conjNode,curNode))
            
        return retNode
            
        
    
    def parseRcmod(self,np,modList):
        """
        add a relative clause subgraph to the graph
        
        @type  np: DepTree
        @param np: the entity being modified by the relative clause
        
        @type  modlist: a list of DepTrees,  
        @param modList: trees modifying np
        """
        
        topNode = self.parse(np)
        
        for temp_t in modList:
            # add nodes
            rcmodNode = self.parse(temp_t._RELCLAUSE_PREDICATE_FEATURE_Relclause()["Value"])
            propNode = RCMODPropNode.init(features={},
                                     valid=True)
            self.gr.add_node(propNode)
            
            #add edges
            self.gr.add_edge(edge=(topNode,propNode))
            self.gr.add_edge(edge=(propNode,rcmodNode))
            if rcmodNode.isPredicate:
                # this will create a cycle, label is a hurestic to guess the connection between relative clause and top node
                self.gr.add_edge(edge=(rcmodNode,topNode), label=temp_t.rcmodRel)
            
            # record that this construction came from rcmod
            topNode.rcmod = [propNode,rcmodNode] 
        
        return topNode
        
    
    def parseConditional(self,outcome,condList):
        """
        add a conditional subgraph to the graph
        
        @type  outcome: DepTree
        @param outcome: the outcome of all following conditions
        
        @type  condList: a list of DepTrees,  
        @param condList: all conditionals regarding outcome
        """
        
        outcomeNode = self.parse(outcome)
        
        for temp_t in condList:
            mark = temp_t._CONDITIONAL_PREDICATE_FEATURE_Mark()
            markValue = mark["Value"]
            markIndex = mark["Span"][0]
            conditionNode = self.parse(temp_t._CONDITIONAL_PREDICATE_FEATURE_Condition()["Value"]) 
            
            #create nodes
            markNode = CondNode.init(index = markIndex,
                                condType = markValue,
                                features = {},
                                valid=True)
            self.gr.add_node(markNode)
            
            markValue = markValue.lower()

            # add edges according to the type of conditional
            if markValue in condition_outcome_markers:
                self.gr.add_edge(edge = (markNode,outcomeNode),
                                 label = OUTCOME_LABEL)
                
                self.gr.add_edge(edge = (markNode,conditionNode),
                                 label = CONDITION_LABEL)
            
            elif markValue in reason_outcome_markers:
                self.gr.add_edge(edge = (markNode,outcomeNode),
                                 label = OUTCOME_LABEL)
                
                self.gr.add_edge(edge = (markNode,conditionNode),
                                 label = REASON_LABEL)
            
            elif markValue in comp_markers:
                self.gr.add_edge(edge = (conditionNode,outcomeNode),
                                 label = COMP_LABEL)
            
            else:
                #add edges
                self.gr.add_edge((outcomeNode,markNode))
                self.gr.add_edge((markNode,conditionNode))
    
        #return top node
        return outcomeNode
    
    def parsePreposition(self,psubj,prepChildList):
        """
        add a preposition subgraph to the graph
        
        @type  psubj: DepTree
        @param psubj: the subject of all following prepositions
        
        @type  prepChildList: a list of DepTrees,  
        @param prepChildList: all prepositions regarding nsubj
        """
        
        #create top nodes:
        
        topNode = self.parse(psubj)
        
        for temp_t in prepChildList:
            #generate bottom node and connect to prep
            pobj = temp_t._PREPOSITIONAL_PREDICATE_FEATURE_pobj()["Value"]
            if not pobj: # e.g., #460
                continue
            
            bottomNode = self.parse(pobj)
            
            
            #generate prep node and connect to top node
            prepNode = PrepNode.init(index=temp_t.prepInd,
                                prepType=temp_t.prepType,
                                features={},
                                valid = True)
#             self.gr.add_node(prepNode)
            
            #self.gr.add_edge(edge = (prepNode,bottomNode))
            self.gr.add_edge(edge = (topNode,bottomNode),
                             label = " ".join([w.word for w in prepNode.str]))
            
            
        
            
            
            
            
            
        return topNode
        
    def parseVerbal(self,indexes,verbs,arguments,tree):
        """
        add a verbal subgraph to the graph
        
        @type  indexes: list [int]
        @param indexes: the index(es) of the verb in the sentence
        
        @type  verbs: list [string] 
        @param verbs: the string(s) representing the verb
        
        @type tree: DepTree
        @param tree: tree object from which to extract various features
        
        @type  arguments: list 
        @param arguments: list of DepTrees of arguments
        """
        
        # create verbal head node
        # start by extracting features
        feats = syntactic_item.get_verbal_features(tree)
        if feats['Lemma'] == verbs[0]:
            del(feats['Lemma'])
        
        for k in feats:
            self.types.add(k)
            
            
        verbNode = graph_representation.node.Node(isPredicate=True,
                                                  text = [Word(index=index,
                                                               word=verb) for index,verb in zip(indexes,verbs)],
                                                  features=feats,
                                                  valid=True)
        self.gr.add_node(verbNode)
        
        # handle arguments
        for arg_t in arguments:
            curNode = self.parse(arg_t)
            #curNode.features = syntactic_item.get_verbal_features(arg_t)
            self.gr.add_edge((verbNode,curNode), arg_t.parent_relation)
        
        
        # handle time expressions
        (timeSubtree,_) = tree._VERBAL_PREDICATE_SUBTREE_Time()
        if timeSubtree:
            timeNode = graph_representation.node.TimeNode.init(features = {})
            self.gr.add_node(timeNode)
            timeSubGraph = self.parse(timeSubtree)
            self.gr.add_edge((verbNode,timeNode))
            self.gr.add_edge((timeNode,timeSubGraph))
            
        return verbNode 
        
    def parseAdverb(self,subj,advChildren):
        topNode = self.parse(subj) 
        
        for advChild,mwe in advChildren:
#             advTopNode = advNode.init(features = {})
#             self.gr.add_node(advTopNode)
#             self.gr.add_edge(edge = (topNode,advTopNode))
            if mwe:
                # in case this is a complex adverb ("as long as")
                curAdvNode = Node(isPredicate = False,
                                  text = [Word(ind,word) for ind,word in mwe],
                                  features = {},
                                  valid = True)
                self.gr.add_node(curAdvNode)
                curChildNode = self.parse(advChild)
                self.gr.add_edge(edge=(topNode,curAdvNode),
                                 label = ADV_LABEL)
                self.gr.add_edge(edge = (curAdvNode,curChildNode),
                                 label = advChild.parent_relation)
                
                
                
            else:
                curChildNode = self.parse(advChild)
                self.gr.add_edge(edge = (topNode,curChildNode),
                                 label = ADV_LABEL)

        return topNode 

        
    
    def parseCopular(self,index,first_entity,second_entity,features):
        """
        add a copular subgraph to the graph
        
        @type  index: int
        @param index: the index of the copula in the sentence
        
        @type  first_entity: DepTree
        @param first_entity: the syntax tree of the first entity
        
        @type  second_entity: DepTree
        @param second_entity: the syntax tree of the second entity
        
        @rtype: Node
        @return: the top node of the copula subgraph
        """
        
        if (second_entity.parent_relation in adjectival_mod_dependencies) \
        or (not second_entity.is_definite()):
            # reduce to prop construction when the second element in the copula is an adjective
            # e.g., Rabbit is white -> white rabbit
            # or when the second element is indefinite
            second_entity.adjectivalChild = [second_entity]
            second_entity.relative_adj = False #TODO: calculate this
            second_entity.parent_relation = "copular" #TODO: this might be dangerous :\
            return self.parseProp(subject = first_entity,
                                  copulaIndex = index,
                                  adjectiveChildList = [second_entity],
                                  features=features,
                                  propAsHead = True)
             
        
        # generate the top node and add to the graph
        topNode = CopularNode.init(index=index,
                              features=features, 
                              valid=True)
        self.gr.add_node(topNode)
        
        # generate both entities subgraphs
        firstEntityNode = self.parse(first_entity)
        secondEntityNode = self.parse(second_entity)
        
        #propagate properties between the two nodes
        graph_representation.node.addSymmetricPropogation(firstEntityNode, 
                                                          secondEntityNode)
        
        #add labeled edges
        self.gr.add_edge(edge=(topNode,firstEntityNode),
                         label=FIRST_ENTITY_LABEL)
        self.gr.add_edge(edge=(topNode,secondEntityNode),
                         label=SECOND_ENTITY_LABEL)
        
        return topNode
    

    def parseApposition(self,index,first_entity,second_entity):
        """
        add an apposition subgraph to the graph
        
        @type  index: int
        @param index: the index of the apposition in the sentence
        
        @type  first_entity: DepTree
        @param first_entity: the syntax tree of the first entity
        
        @type  second_entity: DepTree
        @param second_entity: the syntax tree of the second entity
        
        @rtype: Node
        @return: the top node of the apposition subgraph
        """
        
        #copied from copular, interesting to see if this happens
        if (second_entity.parent_relation in adjectival_mod_dependencies) \
        or (not second_entity.is_definite()):
            # reduce to prop construction when the second element in the copula is an adective
            # e.g., Rabbit is white -> white rabbit
            second_entity.adjectivalChild = [second_entity]
            second_entity.relative_adj = False #TODO - calculate this
            second_entity.parent_relation = "appos" #TODO: this might be dangerous :\
            return self.parseProp(subject = first_entity,
                                  copulaIndex = NO_INDEX,
                                  adjectiveChildList = [second_entity],
                                  propAsHead = True)
             
        
        # generate the top node and add to the graph
        topNode = AppositionNode.init(index=index,
                              features={}) 
                              
        self.gr.add_node(topNode)
        
        # generate both entities subgraphs
        firstEntityNode = self.parse(first_entity)
        secondEntityNode = self.parse(second_entity)
        
        # remember first and second entities in apposition's node
#         topNode.entities = [firstEntityNode,secondEntityNode]
        
        # propagate properties between the two nodes
        graph_representation.node.addSymmetricPropogation(firstEntityNode, 
                                                          secondEntityNode)

        #add labeled edges
        self.gr.add_edge(edge=(topNode,firstEntityNode),
                         label=FIRST_ENTITY_LABEL)
        self.gr.add_edge(edge=(topNode,secondEntityNode),
                         label=SECOND_ENTITY_LABEL)
        
        return topNode

    
    
    def parsePossessive(self,possessor,possessed,possessive):
        """
        add a possessive subgraph to the graph
        
        @type  index: int
        @param index: the index of the possessive in the sentence
        
        @type  possessor: DepTree
        @param possessor: the syntax tree of the possessor
        
        @type  possessed: DepTree
        @param possessed: the syntax tree of the possessed
        
        @type  possessive: DepTree
        @param possessive: the syntax tree of the possessive - e.g - 's
        
        @rtype: Node
        @return: the top node of the possessive subgraph
        """
        
        if not possessive:
            index = graph_representation.word.NO_INDEX
        else:
            index = possessive.id
        
        # generate nodes
        possessorNode = self.parse(possessor)
        possessedNode = self.parse(possessed)
        
        if isTime(possessorNode) or isLocation(possessorNode):
            #possessive construction to indicate time
            self.gr.add_edge((possessedNode,possessorNode))
            return possessedNode
        
        #otherwise - proper possessive:
        hasNode = PossessiveNode.init(index=index,
                                 features={}, 
                                 valid=True)
        self.gr.add_node(hasNode)
        
        # add edges to graph
        self.gr.add_edge(edge=(hasNode,possessorNode), 
                         label=POSSESSOR_LABEL)
        self.gr.add_edge(edge=(hasNode,possessedNode), 
                         label=POSSESSED_LABEL)
        
        # create top node
        # get list of all relevant nodes
        nodeLs = [possessorNode,possessedNode]
        
        if possessive: # in some cases there's no possessive marker (e.g., "their woman")
            possessiveNode = graph_representation.node.Node(isPredicate=False,
                                                            text = [Word(possessive.id,
                                                                        possessive.get_original_sentence(root=False))],
                                                            features = {},
                                                            valid=True)
            nodeLs.append(possessiveNode)
        
        
        # create possessive top node, add to graph, and return it
        topNode = graph_utils.generate_possessive_top_node(graph=self.gr, nodeLs=nodeLs)
        self.gr.add_node(topNode)
        
        #mark that features and neighbours should propagate from the top node to the possessed
        # John's results were low -> features should propogate between (John's results) and (results)
        graph_representation.node.addSymmetricPropogation(topNode, possessedNode)
        
        return topNode 
        
    def parseProp(self,subject,copulaIndex,adjectiveChildList,propAsHead,features={}):
        """
        add a prop subgraph to the graph
        
        @type  adjective: DepTree
        @param adjective: the syntax tree of the adjective
        
        @type  subject: DepTree
        @param subject: the syntax tree of the subject
        
        @rtype: Node
        @return: the top node of the copula subgraph
        """
        
        # parse top node
        subjectNode = self.parse(subject)
        topNode = subjectNode
        #parse each property and connect to top node
        for temp_t in adjectiveChildList:
            adjective = temp_t._ADJECTIVAL_PREDICATE_FEATURE_Adjective()["Value"]
            adjectiveNode = self.parse(adjective)
            if "Lemma" in features:
                del(features["Lemma"])
            adjectiveNode.features.update(features)
            
            # generate the top node and add to the graph
            propNode = PropNode.init(features={"relative":temp_t.relative_adj},
                                     index = copulaIndex,
                                     valid=True,
                                     parent_relation = adjective.parent_relation)
            self.gr.add_node(propNode)
            
            if propAsHead:
                topNode = propNode
            
            #add labeled edges
            self.gr.add_edge(edge=(subjectNode,propNode),
                             label="")
            self.gr.add_edge(edge=(propNode,adjectiveNode),
                             label="")
            
            
        return topNode