Esempio n. 1
0
    def fixRanges(self):
        rangeFuncList = [
            lambda n: (isPreposition(n) and n.prepType == "from" and len(
                self.gr.neighbors(n)) == 1), lambda n:
            ((isTime(n) or isLocation(n)) and len(self.gr.neighbors(n)) == 2),
            lambda n: (isPreposition(n) and n.prepType == "to" and len(
                self.gr.neighbors(n)) == 1), lambda n:
            ((isTime(n) or isLocation(n)) and len(self.gr.neighbors(n)) == 1)
        ]
        ls = findChain(self.gr, rangeFuncList)
        if not ls:
            return False

        [fromNode, start, toNode, end] = ls
        startNode = [
            n for n in self.gr.neighbors(start) if not isPreposition(n)
        ][0]
        endNode = self.gr.neighbors(end)[0]
        if isTime(start):
            rangeNode = TimeNode.init(features={"Range": True})
        elif isLocation(start):
            rangeNode = LocationNode.init(features={"Range": True})

        self.gr.add_node(rangeNode)
        if isTime(start):
            sonNode = Node(isPredicate=False,
                           text=startNode.text +
                           [Word(index=toNode.text[0].index, word="to")] +
                           endNode.text,
                           features={
                               'Time Value':
                               "-".join([
                                   startNode.features['Time Value'],
                                   endNode.features['Time Value']
                               ])
                           },
                           valid=True)
        elif isLocation(start):
            sonNode = Node(isPredicate=False,
                           text=startNode.text +
                           [Word(index=toNode.text[0].index, word="to")] +
                           endNode.text,
                           features={},
                           valid=True)
        self.gr.add_node(sonNode)
        self.gr.add_edge((rangeNode, sonNode))
        for curFather in self.gr.incidents(fromNode):
            duplicateEdge(graph=self.gr,
                          orig=(curFather, fromNode),
                          new=(curFather, rangeNode))

        delete_component(graph=self.gr, node=fromNode)
        self.types.add(APPENDIX_RANGE)
        return True
Esempio n. 2
0
 def init(cls,index,features,valid):
     if "Lemma" in features:
         del(features["Lemma"])
     return cls(isPredicate=True,
                text=[Word(index,COPULA)],
                features=features,
                valid=valid)
Esempio n. 3
0
 def __str__(self):
     ret = '<TABLE BORDER="0" CELLSPACING="0"><TR><TD>'
     filtered_spans = []
     for feat,_ in PRINT_FEATURES:
         if (feat in self.features) and (isinstance(self.features[feat], dict)) and ("Span" in self.features[feat]):
             filtered_spans.extend(self.features[feat]["Span"])
     
     
     if 'Lemma' in self.features and len(self.text)==1:
         self.str = [Word(index = self.text[0].index,word=self.features['Lemma'])]
     else:
         ls = self.text
         if self.orderText:
             ls = sorted(self.text,key=lambda word:word.index)
         # self.str stores the words as displayed in the node
         self.str = [w for w in ls if w.index not in filtered_spans] 
         
     self.str = strip_punctuations(self.str)
         
     ret+= "  ".join([str(x) for x in self.str])
     
     
     ret+="</TD></TR>"
     for feat, printFunc in PRINT_FEATURES:
         if feat in self.features:
             if self.isPredicate and feat =="Definite":
                 continue
             ret += "<TR><TD>" 
             ret+= '<FONT POINT-SIZE="10">{0}</FONT>'.format(cgi.escape(str(printFunc(self.features[feat]))))
             ret+="</TD></TR>"
         
     ret +="</TABLE>" 
     return ret
Esempio n. 4
0
    def parseAdverb(self,subj,advChildren):
        topNode = self.parse(subj) 
        
        for advChild,mwe in advChildren:
#             advTopNode = advNode.init(features = {})
#             self.gr.add_node(advTopNode)
#             self.gr.add_edge(edge = (topNode,advTopNode))
            if mwe:
                # in case this is a complex adverb ("as long as")
                curAdvNode = Node(isPredicate = False,
                                  text = [Word(ind,word) for ind,word in mwe],
                                  features = {},
                                  valid = True)
                self.gr.add_node(curAdvNode)
                curChildNode = self.parse(advChild)
                self.gr.add_edge(edge=(topNode,curAdvNode),
                                 label = ADV_LABEL)
                self.gr.add_edge(edge = (curAdvNode,curChildNode),
                                 label = advChild.parent_relation)
                
                
                
            else:
                curChildNode = self.parse(advChild)
                self.gr.add_edge(edge = (topNode,curChildNode),
                                 label = ADV_LABEL)

        return topNode 
Esempio n. 5
0
 def init(cls,features):
     """
     initialize an adverb head node
     """
     return cls(isPredicate=True,
                   text=[Word(NO_INDEX,ADVERB)],
                   features=features,
                   valid=True)
Esempio n. 6
0
 def init(cls,index,prepType,features,valid):
     prepType = prepType.lower()
     ret = cls(isPredicate=True,
                   text=[Word(index,"{0}-{1}".format(PREP,prepType))],
                   features=features,
                   valid=valid) 
     ret.prepType = prepType
     return ret
Esempio n. 7
0
 def init(cls,index,condType,features,valid):
     condType = condType.lower()        
     ret= cls(isPredicate=True,
                   text=[Word(index,"{0}-{1}".format(COND,condType))],
                   features=features,
                   valid=valid)
     ret.condType = condType
     ret.nodeShape = RECT_NODE_SHAPE
     return ret
Esempio n. 8
0
def getPossesive(gr, index):
    ret = Node(text=[Word(index=index, word=POSSESSIVE)],
               isPredicate=True,
               features={},
               gr=gr,
               orderText=True)
    ret.features["implicit"] = True
    ret.original_text = []
    return ret
Esempio n. 9
0
 def init(cls,features,valid,index,parent_relation):
     if "Lemma" in features:
         del(features["Lemma"])
     ret =  cls(isPredicate=True,
                text=[Word(index,PROP)],
                features=features,
                valid=valid)
     ret.parent_relation = parent_relation
     return ret
Esempio n. 10
0
def missing_children(treeNode, graphNode):
    neighbors = graphNode.neighbors()
    ret = [
        Word(index=c.id, word=c.word) for c in treeNode.children
        if (c.parent_relation not in neighbors) or (
            c.id != neighbors[c.parent_relation][0].text[0].index) or (
                c.parent_relation in ignore_labels)
    ]
    return ret
Esempio n. 11
0
def getCopular(gr, index, features):
    if "Lemma" in features:
        del (features["Lemma"])
    ret = Node(text=[Word(index=index, word=COPULA)],
               isPredicate=True,
               features=features,
               gr=gr,
               orderText=True)
    ret.features["implicit"] = True
    ret.original_text = []
    return ret
Esempio n. 12
0
def create_dep_graphs_from_stream(stream, HOME_DIR):
    graphs = []
    init = True
    curGraph = GraphWrapper("", HOME_DIR)
    nodesMap = {}
    for line in stream:

        line = line.strip()
        #         print line
        if line:
            init = False
            m = pat.match(line)
            rel, head, head_id, dep, dep_id = m.groups()
            #             head_id = int(head_id)
            #             dep_id = int(dep_id)
            if head_id not in nodesMap:
                nodesMap[head_id] = Node(
                    text=[Word(index=int(head_id.split("'")[0]), word=head)],
                    isPredicate=False,
                    features={},
                    gr=curGraph,
                    orderText=True)
            if dep_id not in nodesMap:
                nodesMap[dep_id] = Node(
                    text=[Word(index=int(dep_id.split("'")[0]), word=dep)],
                    isPredicate=False,
                    features={},
                    gr=curGraph,
                    orderText=True)
            headNode = nodesMap[head_id]
            depNode = nodesMap[dep_id]
            if curGraph.has_edge((headNode, depNode)):  # stanford bug
                curGraph.del_edge((headNode, depNode))
            curGraph.add_edge(edge=(nodesMap[head_id], nodesMap[dep_id]),
                              label=rel)
        if (not line) and (not init):
            init = True
            graphs.append((curGraph, nodesMap))
            curGraph = GraphWrapper("", HOME_DIR)
            nodesMap = {}
    return graphs
Esempio n. 13
0
    def init(cls, text, features):
        """
        initialize a conjunction head node
        """
        conjType = " ".join(
            [x.word for x in sorted(text, key=lambda word: word.index)])

        text = [Word(NO_INDEX, CONJUNCTION)] + text
        ret = cls(isPredicate=True, text=text, features=features, valid=True)
        ret.conjType = conjType
        ret.__str__()
        return ret
Esempio n. 14
0
 def parseConjunction(self,baseElm,conjResult):
     """
     add a conjunction subgraph to the graph
     
     @type  cc: list [(int,string)]
     @param cc: the connecting element
     
     @type  conjElements: list [DepTree]  
     @param conjElements: subtrees to be joined in conjunction
     """
     
     
     retNode = self.parse(baseElm)
     
     for cc,conjElements in conjResult:
     
         if not conjElements:
             # discourse marker
             discourseNode = Node(isPredicate = False,
                             text = [Word(ind,word) for ind,word in cc],
                             features = {},
                             valid=True)
             self.gr.add_node(discourseNode)
         
             self.gr.add_edge(edge =(retNode,discourseNode),
                              label= DISCOURSE_LABEL)
         else:
             # generate top conjunction node
             conjNode = ConjunctionNode.init(text = [Word(ind,word) for ind,word in cc],
                                   features = {})
             self.gr.add_node(conjNode)
             #connect cc to base element
             self.gr.add_edge((conjNode,retNode))
             
             #generate node for each element and connect to topNode
             for elm in conjElements:
                 curNode = self.parse(elm)
                 self.gr.add_edge(edge = (conjNode,curNode))
         
     return retNode
Esempio n. 15
0
    def parseVerbal(self, indexes, verbs, arguments, tree):
        """
        add a verbal subgraph to the graph
        
        @type  indexes: list [int]
        @param indexes: the index(es) of the verb in the sentence
        
        @type  verbs: list [string] 
        @param verbs: the string(s) representing the verb
        
        @type tree: DepTree
        @param tree: tree object from which to extract various features
        
        @type  arguments: list 
        @param arguments: list of DepTrees of arguments
        """

        # create verbal head node
        # start by extracting features
        feats = syntactic_item.get_verbal_features(tree)
        if feats['Lemma'] == verbs[0]:
            del (feats['Lemma'])

        for k in feats:
            self.types.add(k)

        verbNode = graph_representation.node.Node(
            isPredicate=True,
            text=[
                Word(index=index, word=verb)
                for index, verb in zip(indexes, verbs)
            ],
            features=feats,
            valid=True)
        self.gr.add_node(verbNode)

        # handle arguments
        for arg_t in arguments:
            curNode = self.parse(arg_t)
            #curNode.features = syntactic_item.get_verbal_features(arg_t)
            self.gr.add_edge((verbNode, curNode), arg_t.parent_relation)

        # handle time expressions
        (timeSubtree, _) = tree._VERBAL_PREDICATE_SUBTREE_Time()
        if timeSubtree:
            timeNode = graph_representation.node.TimeNode.init(features={})
            self.gr.add_node(timeNode)
            timeSubGraph = self.parse(timeSubtree)
            self.gr.add_edge((verbNode, timeNode))
            self.gr.add_edge((timeNode, timeSubGraph))

        return verbNode
Esempio n. 16
0
def treeNode_to_graphNode(treeNode, gr):
    """
    @type treeNode DepTree
    """

    feats = get_verbal_features(treeNode)
    ret = newNode.Node(text=[Word(index=treeNode.id, word=treeNode.word)],
                       isPredicate=treeNode.is_verbal_predicate(),
                       features=feats,
                       gr=gr)
    ret.features["pos"] = treeNode.pos
    ret.original_text = copy(ret.text)
    return ret
Esempio n. 17
0
 def _merge(self):
     edges = find_edges(self, lambda (u,v):(self.edge_label((u,v)) in join_labels) or (self.edge_label((u,v))=="conj_and" and u.features.get("conjType",[""])[0]=='&'))
     for u, v in edges:
         conjType = u.features.get("conjType",False)
         if conjType:
             conjType = conjType[0] #only the words
             matching = [w for w in u.surface_form if w.word == conjType]
             if matching:
                 w = matching[0]
             else:
                 w = Word(index = u.maxIndex()+1,word=conjType)
             u.text.append(w)
         merge_nodes(self, u, v)
         return True
     return False
Esempio n. 18
0
    def do_conj(self):
        edges = find_edges(self, lambda((u, v)):self.edge_label((u, v)).startswith("conj_"))# and (not u.isPredicate) and (not v.isPredicate))
        nodes = set([u for (u,_) in edges])
        for conj1 in nodes:
            curStartIndex = conj1.minIndex()+1
            curNeighbours = conj1.neighbors()
            isModifier = (not bool([father for father in self.incidents(conj1) if not self.is_aux_edge((father.uid, conj1.uid))])) and bool(self.incidents(conj1)) 
            for rel in [rel for rel in curNeighbours if rel.startswith("conj_")]:
                marker = rel.split("conj_")[1]
                
                markerNode = newNode.Node(text=[Word(curStartIndex+1,marker)], #TODO: how to find marker's index
                                          isPredicate=True,
                                          features={"conj":True},
                                          gr=self)

                #decide how to connect it to the rest of the graph, based on its type
                if isModifier:
                    duplicate_all_incidents(gr=self, source=conj1, target=markerNode)
                else:
                    for father in self.incidents(conj1):
                        for conj2 in curNeighbours[rel]:
                            duplicateEdge(graph=self, orig=((father,conj1)), new=((father,conj2)))
                        duplicateEdge(graph=self, orig=((father,conj1)), new=((father,markerNode)))
                        
                    if conj1.isPredicate:
                        for neighbor in self.neighbors(conj1):
                            if get_min_max_span(self, neighbor)[0] < curStartIndex:
                                for conj2 in curNeighbours[rel]:
                                    if (self.edge_label((conj1,neighbor)) == SOURCE_LABEL) or (not self.is_aux_edge((conj1.uid, neighbor.uid))):
                                        duplicateEdge(graph=self, orig=(conj1,neighbor), new=(conj2,neighbor))
                                    
                # create the coordination construction, headed by the marker
                self.add_edge(edge=(markerNode,conj1),label=rel)
                for conj2 in curNeighbours[rel]:
                    self.del_edge((conj1,conj2))
                    self.add_edge(edge=(markerNode,conj2),label=rel)
                    if conj1.isPredicate:
                        conj2.isPredicate = conj1.isPredicate
                    conj1.surface_form = [w for w in conj1.surface_form if (w not in conj2.surface_form) and (w not in conj1.text) ]
                    for w in conj1.text:
                        if w not in conj1.surface_form:
                            conj1.surface_form.append(w)
                    if conj1.features.get("conjType",False):
                        conj1.text = [w for w in conj1.text if w.index not in conj1.features["conjType"][1]]
                    
            self.types.add(rel)              
Esempio n. 19
0
def load_prop_from_file(filename, HOME_DIR):
    fin = open(filename)
    flag = True
    ret = []
    for line in fin:
        line = line.strip("\n")
        if flag:
            curSentence = line
            flag = False
            curGraph = GraphWrapper(curSentence, HOME_DIR)
            parentsList = []
        else:
            if line:
                uid, words, pos, isPredicate, isAsserted, parents = line.split(
                    "\t")
                uid = int(uid)
                isAsserted = bool(int(isAsserted))
                text = [
                    Word(int(index), word) for index, word in
                    [ent.split(",") for ent in words.split(";")]
                ]
                if isAsserted:
                    feats = {"top": isAsserted}
                else:
                    feats = {}
                if parents:
                    parentsList.extend([
                        ((int(index), uid), rel) for rel, index in
                        [ent.split(",") for ent in parents.split(";")]
                    ])

                curNode = newNode.Node(text,
                                       bool(int(isPredicate)),
                                       feats,
                                       curGraph,
                                       uid=uid)

            else:
                for edge, rel in parentsList:
                    digraph.add_edge(curGraph, edge=edge, label=rel)
                ret.append(curGraph)
                flag = True

    fin.close()
    return ret
Esempio n. 20
0
 def get_text(self,gr):
     return [Word(index = self.text[0].index,
                 word = self.condType)]
Esempio n. 21
0
 def parsePossessive(self,possessor,possessed,possessive):
     """
     add a possessive subgraph to the graph
     
     @type  index: int
     @param index: the index of the possessive in the sentence
     
     @type  possessor: DepTree
     @param possessor: the syntax tree of the possessor
     
     @type  possessed: DepTree
     @param possessed: the syntax tree of the possessed
     
     @type  possessive: DepTree
     @param possessive: the syntax tree of the possessive - e.g - 's
     
     @rtype: Node
     @return: the top node of the possessive subgraph
     """
     
     if not possessive:
         index = graph_representation.word.NO_INDEX
     else:
         index = possessive.id
     
     # generate nodes
     possessorNode = self.parse(possessor)
     possessedNode = self.parse(possessed)
     
     if isTime(possessorNode) or isLocation(possessorNode):
         #possessive construction to indicate time
         self.gr.add_edge((possessedNode,possessorNode))
         return possessedNode
     
     #otherwise - proper possessive:
     hasNode = PossessiveNode.init(index=index,
                              features={}, 
                              valid=True)
     self.gr.add_node(hasNode)
     
     # add edges to graph
     self.gr.add_edge(edge=(hasNode,possessorNode), 
                      label=POSSESSOR_LABEL)
     self.gr.add_edge(edge=(hasNode,possessedNode), 
                      label=POSSESSED_LABEL)
     
     # create top node
     # get list of all relevant nodes
     nodeLs = [possessorNode,possessedNode]
     
     if possessive: # in some cases there's no possessive marker (e.g., "their woman")
         possessiveNode = graph_representation.node.Node(isPredicate=False,
                                                         text = [Word(possessive.id,
                                                                     possessive.get_original_sentence(root=False))],
                                                         features = {},
                                                         valid=True)
         nodeLs.append(possessiveNode)
     
     
     # create possessive top node, add to graph, and return it
     topNode = graph_utils.generate_possessive_top_node(graph=self.gr, nodeLs=nodeLs)
     self.gr.add_node(topNode)
     
     #mark that features and neighbours should propagate from the top node to the possessed
     # John's results were low -> features should propogate between (John's results) and (results)
     graph_representation.node.addSymmetricPropogation(topNode, possessedNode)
     
     return topNode 
Esempio n. 22
0
    def parse(self,t):
        """
        Get the graph representation from a syntactic representation
        Returns through the graph parameter.
        
        @type  t: DepTree
        @param tree: syntactic tree to be converted
        
        @rtype: Node
        @return: the node in the graph corresponding to the top node in t
        """
        
        #order matters!
        if t.is_conditional_predicate():
            self.types.add(APPENDIX_COND)
            return self.parseConditional(outcome = t._CONDITIONAL_PREDICATE_FEATURE_Outcome()["Value"],
                                         condList = t.condPred)

        
        if t._VERBAL_PREDICATE_SUBTREE_Adv():
            advChildren = t.adverb_children
            advSubj = t.adverb_subj
            return self.parseAdverb(subj=advSubj, 
                             advChildren=advChildren)
        
        if t.is_conjunction_predicate():
            self.types.add(APPENDIX_CONJUNCTION)
            return self.parseConjunction(baseElm = t.baseElm,
                                         conjResult = t.conjResult)
        
        if t.is_appositional_predicate():
            self.types.add(APPENDIX_APPOS)
            firstEntity = t._APPOSITIONAL_PREDICATE_FEATURE_Left_Side()["Value"]
            secondEntity = t._APPOSITIONAL_PREDICATE_FEATURE_Right_Side()["Value"]
            return self.parseApposition(index = t.id,
                                        first_entity=firstEntity,
                                        second_entity=secondEntity)
        
        if t.is_relative_clause():
            self.types.add(APPENDIX_RCMOD)
            return self.parseRcmod(np = t._RELCLAUSE_PREDICATE_FEATURE_Rest()['Value'], 
                                   modList = t.rcmodPred)
        
        if t.is_prepositional_predicate():
            self.types.add(APPENDIX_PREP)
            return self.parsePreposition(psubj=t._PREPOSITIONAL_PREDICATE_FEATURE_psubj()["Value"],
                                          prepChildList=t.prepChildList)
                    
        if t.is_copular_predicate():
            self.types.add(APPENDIX_COP)
            firstEntity = t._COPULAR_PREDICATE_FEATURE_Copular_Predicate()["Value"]
            secondEntity = t._COPULAR_PREDICATE_FEATURE_Copular_Object()["Value"]
            return self.parseCopular(index = t.id,
                                     first_entity=firstEntity,
                                     second_entity=secondEntity,
                                     features = syntactic_item.get_verbal_features(t))
        
        if t.is_possesive_predicate():
            self.types.add(APPENDIX_POSS)
            possessor = t._POSSESSIVE_PREDICATE_FEATURE_Possessor()["Value"]
            possessed = t._POSSESSIVE_PREDICATE_FEATURE_Possessed()["Value"]
            possessive = t._POSSESSIVE_PREDICATE_FEATURE_Possessive()["Value"]
            return self.parsePossessive(possessor = possessor, 
                                        possessed = possessed,
                                        possessive = possessive)
        
            
        if t.is_adjectival_predicate():
            self.types.add(APPENDIX_ADJ)
            return self.parseProp(subject = t._ADJECTIVAL_PREDICATE_FEATURE_Subject()["Value"],
                                  copulaIndex = NO_INDEX,
                                  adjectiveChildList = t.adjectivalChildList,
                                  propAsHead=False)
            
        if t.is_clausal_complement():
            self.types.add(APPENDIX_COMPLEMENT)
            return self.parseComplement(compSubj = t.compSubj,
                                        compChildren = t.compChildList)
        
        if t.unhandled_advcl():
            # put each unhandled advcl as a disconnected subgraph
            for c in t.advcl:
                self.parse(c)
            return self.parse(t)
        
        if t.is_verbal_predicate():
            self.types.add(APPENDIX_VERB)
            head_ret = t._VERBAL_PREDICATE_SUBTREE_Head()
            return self.parseVerbal(indexes = head_ret["Span"],
                             verbs = head_ret["Value"].split(" "),
                             arguments = t.collect_arguments(),
                             tree = t)
        
            
        
        else:
            # fall back - pack all the tree in a single node
            if len(t.children)==1:
                if (t.children[0].parent_relation == "nn") and (t.word.endswith(",")) and (t.children[0].word.endswith(",")):
                    #conjunction in disguise
                    child = t.children[0]
                    t.children = []
                    ret =  self.parseConjunction(cc = [(t.id,"and")], 
                                                conjElements = [t,child])
                    t.children = [child]
                    return ret
            
            nodes = t._get_subtree(filter_labels_ban)
            text = [Word(index=index,
                         word=nodes[index]) for index in sorted(nodes.keys())] 
            topNode = self.parseBottom(text = sorted(text,key=lambda x:x.index),
                        features = syntactic_item.get_verbal_features(t))

            return topNode
Esempio n. 23
0
 def init(cls,index,features,valid):
     return cls(isPredicate=True,
                text=[Word(index,POSSESSIVE)],
                features=features,
                valid=valid)
Esempio n. 24
0
 def init(cls,features,valid):
     return cls(isPredicate=True,
                   text=[Word(NO_INDEX,RCMOD_PROP)],
                   features=features,
                   valid=valid)
Esempio n. 25
0
 def init(cls,features):
     return cls(isPredicate=False,
                   text=[Word(NO_INDEX,TIME)],
                   features=features,
                   valid=True)
     cls.nodeShape = RECT_NODE_SHAPE
Esempio n. 26
0
 def init(cls,features):
     return cls(isPredicate=True,
                   text=[Word(NO_INDEX,LOCATION)],
                   features=features,
                   valid=True)
Esempio n. 27
0
 def get_text(self):
     ret = [Word(index=self.id,word=self.word)]
     for c in self.children:
         ret += c.get_text()
     return ret
Esempio n. 28
0
 def init(cls,index,features):
     return cls(isPredicate=False,
                   text=[Word(index,APPOSITION)],
                   features=features,
                   valid=False)