def fixRanges(self): rangeFuncList = [ lambda n: (isPreposition(n) and n.prepType == "from" and len( self.gr.neighbors(n)) == 1), lambda n: ((isTime(n) or isLocation(n)) and len(self.gr.neighbors(n)) == 2), lambda n: (isPreposition(n) and n.prepType == "to" and len( self.gr.neighbors(n)) == 1), lambda n: ((isTime(n) or isLocation(n)) and len(self.gr.neighbors(n)) == 1) ] ls = findChain(self.gr, rangeFuncList) if not ls: return False [fromNode, start, toNode, end] = ls startNode = [ n for n in self.gr.neighbors(start) if not isPreposition(n) ][0] endNode = self.gr.neighbors(end)[0] if isTime(start): rangeNode = TimeNode.init(features={"Range": True}) elif isLocation(start): rangeNode = LocationNode.init(features={"Range": True}) self.gr.add_node(rangeNode) if isTime(start): sonNode = Node(isPredicate=False, text=startNode.text + [Word(index=toNode.text[0].index, word="to")] + endNode.text, features={ 'Time Value': "-".join([ startNode.features['Time Value'], endNode.features['Time Value'] ]) }, valid=True) elif isLocation(start): sonNode = Node(isPredicate=False, text=startNode.text + [Word(index=toNode.text[0].index, word="to")] + endNode.text, features={}, valid=True) self.gr.add_node(sonNode) self.gr.add_edge((rangeNode, sonNode)) for curFather in self.gr.incidents(fromNode): duplicateEdge(graph=self.gr, orig=(curFather, fromNode), new=(curFather, rangeNode)) delete_component(graph=self.gr, node=fromNode) self.types.add(APPENDIX_RANGE) return True
def init(cls,index,features,valid): if "Lemma" in features: del(features["Lemma"]) return cls(isPredicate=True, text=[Word(index,COPULA)], features=features, valid=valid)
def __str__(self): ret = '<TABLE BORDER="0" CELLSPACING="0"><TR><TD>' filtered_spans = [] for feat,_ in PRINT_FEATURES: if (feat in self.features) and (isinstance(self.features[feat], dict)) and ("Span" in self.features[feat]): filtered_spans.extend(self.features[feat]["Span"]) if 'Lemma' in self.features and len(self.text)==1: self.str = [Word(index = self.text[0].index,word=self.features['Lemma'])] else: ls = self.text if self.orderText: ls = sorted(self.text,key=lambda word:word.index) # self.str stores the words as displayed in the node self.str = [w for w in ls if w.index not in filtered_spans] self.str = strip_punctuations(self.str) ret+= " ".join([str(x) for x in self.str]) ret+="</TD></TR>" for feat, printFunc in PRINT_FEATURES: if feat in self.features: if self.isPredicate and feat =="Definite": continue ret += "<TR><TD>" ret+= '<FONT POINT-SIZE="10">{0}</FONT>'.format(cgi.escape(str(printFunc(self.features[feat])))) ret+="</TD></TR>" ret +="</TABLE>" return ret
def parseAdverb(self,subj,advChildren): topNode = self.parse(subj) for advChild,mwe in advChildren: # advTopNode = advNode.init(features = {}) # self.gr.add_node(advTopNode) # self.gr.add_edge(edge = (topNode,advTopNode)) if mwe: # in case this is a complex adverb ("as long as") curAdvNode = Node(isPredicate = False, text = [Word(ind,word) for ind,word in mwe], features = {}, valid = True) self.gr.add_node(curAdvNode) curChildNode = self.parse(advChild) self.gr.add_edge(edge=(topNode,curAdvNode), label = ADV_LABEL) self.gr.add_edge(edge = (curAdvNode,curChildNode), label = advChild.parent_relation) else: curChildNode = self.parse(advChild) self.gr.add_edge(edge = (topNode,curChildNode), label = ADV_LABEL) return topNode
def init(cls,features): """ initialize an adverb head node """ return cls(isPredicate=True, text=[Word(NO_INDEX,ADVERB)], features=features, valid=True)
def init(cls,index,prepType,features,valid): prepType = prepType.lower() ret = cls(isPredicate=True, text=[Word(index,"{0}-{1}".format(PREP,prepType))], features=features, valid=valid) ret.prepType = prepType return ret
def init(cls,index,condType,features,valid): condType = condType.lower() ret= cls(isPredicate=True, text=[Word(index,"{0}-{1}".format(COND,condType))], features=features, valid=valid) ret.condType = condType ret.nodeShape = RECT_NODE_SHAPE return ret
def getPossesive(gr, index): ret = Node(text=[Word(index=index, word=POSSESSIVE)], isPredicate=True, features={}, gr=gr, orderText=True) ret.features["implicit"] = True ret.original_text = [] return ret
def init(cls,features,valid,index,parent_relation): if "Lemma" in features: del(features["Lemma"]) ret = cls(isPredicate=True, text=[Word(index,PROP)], features=features, valid=valid) ret.parent_relation = parent_relation return ret
def missing_children(treeNode, graphNode): neighbors = graphNode.neighbors() ret = [ Word(index=c.id, word=c.word) for c in treeNode.children if (c.parent_relation not in neighbors) or ( c.id != neighbors[c.parent_relation][0].text[0].index) or ( c.parent_relation in ignore_labels) ] return ret
def getCopular(gr, index, features): if "Lemma" in features: del (features["Lemma"]) ret = Node(text=[Word(index=index, word=COPULA)], isPredicate=True, features=features, gr=gr, orderText=True) ret.features["implicit"] = True ret.original_text = [] return ret
def create_dep_graphs_from_stream(stream, HOME_DIR): graphs = [] init = True curGraph = GraphWrapper("", HOME_DIR) nodesMap = {} for line in stream: line = line.strip() # print line if line: init = False m = pat.match(line) rel, head, head_id, dep, dep_id = m.groups() # head_id = int(head_id) # dep_id = int(dep_id) if head_id not in nodesMap: nodesMap[head_id] = Node( text=[Word(index=int(head_id.split("'")[0]), word=head)], isPredicate=False, features={}, gr=curGraph, orderText=True) if dep_id not in nodesMap: nodesMap[dep_id] = Node( text=[Word(index=int(dep_id.split("'")[0]), word=dep)], isPredicate=False, features={}, gr=curGraph, orderText=True) headNode = nodesMap[head_id] depNode = nodesMap[dep_id] if curGraph.has_edge((headNode, depNode)): # stanford bug curGraph.del_edge((headNode, depNode)) curGraph.add_edge(edge=(nodesMap[head_id], nodesMap[dep_id]), label=rel) if (not line) and (not init): init = True graphs.append((curGraph, nodesMap)) curGraph = GraphWrapper("", HOME_DIR) nodesMap = {} return graphs
def init(cls, text, features): """ initialize a conjunction head node """ conjType = " ".join( [x.word for x in sorted(text, key=lambda word: word.index)]) text = [Word(NO_INDEX, CONJUNCTION)] + text ret = cls(isPredicate=True, text=text, features=features, valid=True) ret.conjType = conjType ret.__str__() return ret
def parseConjunction(self,baseElm,conjResult): """ add a conjunction subgraph to the graph @type cc: list [(int,string)] @param cc: the connecting element @type conjElements: list [DepTree] @param conjElements: subtrees to be joined in conjunction """ retNode = self.parse(baseElm) for cc,conjElements in conjResult: if not conjElements: # discourse marker discourseNode = Node(isPredicate = False, text = [Word(ind,word) for ind,word in cc], features = {}, valid=True) self.gr.add_node(discourseNode) self.gr.add_edge(edge =(retNode,discourseNode), label= DISCOURSE_LABEL) else: # generate top conjunction node conjNode = ConjunctionNode.init(text = [Word(ind,word) for ind,word in cc], features = {}) self.gr.add_node(conjNode) #connect cc to base element self.gr.add_edge((conjNode,retNode)) #generate node for each element and connect to topNode for elm in conjElements: curNode = self.parse(elm) self.gr.add_edge(edge = (conjNode,curNode)) return retNode
def parseVerbal(self, indexes, verbs, arguments, tree): """ add a verbal subgraph to the graph @type indexes: list [int] @param indexes: the index(es) of the verb in the sentence @type verbs: list [string] @param verbs: the string(s) representing the verb @type tree: DepTree @param tree: tree object from which to extract various features @type arguments: list @param arguments: list of DepTrees of arguments """ # create verbal head node # start by extracting features feats = syntactic_item.get_verbal_features(tree) if feats['Lemma'] == verbs[0]: del (feats['Lemma']) for k in feats: self.types.add(k) verbNode = graph_representation.node.Node( isPredicate=True, text=[ Word(index=index, word=verb) for index, verb in zip(indexes, verbs) ], features=feats, valid=True) self.gr.add_node(verbNode) # handle arguments for arg_t in arguments: curNode = self.parse(arg_t) #curNode.features = syntactic_item.get_verbal_features(arg_t) self.gr.add_edge((verbNode, curNode), arg_t.parent_relation) # handle time expressions (timeSubtree, _) = tree._VERBAL_PREDICATE_SUBTREE_Time() if timeSubtree: timeNode = graph_representation.node.TimeNode.init(features={}) self.gr.add_node(timeNode) timeSubGraph = self.parse(timeSubtree) self.gr.add_edge((verbNode, timeNode)) self.gr.add_edge((timeNode, timeSubGraph)) return verbNode
def treeNode_to_graphNode(treeNode, gr): """ @type treeNode DepTree """ feats = get_verbal_features(treeNode) ret = newNode.Node(text=[Word(index=treeNode.id, word=treeNode.word)], isPredicate=treeNode.is_verbal_predicate(), features=feats, gr=gr) ret.features["pos"] = treeNode.pos ret.original_text = copy(ret.text) return ret
def _merge(self): edges = find_edges(self, lambda (u,v):(self.edge_label((u,v)) in join_labels) or (self.edge_label((u,v))=="conj_and" and u.features.get("conjType",[""])[0]=='&')) for u, v in edges: conjType = u.features.get("conjType",False) if conjType: conjType = conjType[0] #only the words matching = [w for w in u.surface_form if w.word == conjType] if matching: w = matching[0] else: w = Word(index = u.maxIndex()+1,word=conjType) u.text.append(w) merge_nodes(self, u, v) return True return False
def do_conj(self): edges = find_edges(self, lambda((u, v)):self.edge_label((u, v)).startswith("conj_"))# and (not u.isPredicate) and (not v.isPredicate)) nodes = set([u for (u,_) in edges]) for conj1 in nodes: curStartIndex = conj1.minIndex()+1 curNeighbours = conj1.neighbors() isModifier = (not bool([father for father in self.incidents(conj1) if not self.is_aux_edge((father.uid, conj1.uid))])) and bool(self.incidents(conj1)) for rel in [rel for rel in curNeighbours if rel.startswith("conj_")]: marker = rel.split("conj_")[1] markerNode = newNode.Node(text=[Word(curStartIndex+1,marker)], #TODO: how to find marker's index isPredicate=True, features={"conj":True}, gr=self) #decide how to connect it to the rest of the graph, based on its type if isModifier: duplicate_all_incidents(gr=self, source=conj1, target=markerNode) else: for father in self.incidents(conj1): for conj2 in curNeighbours[rel]: duplicateEdge(graph=self, orig=((father,conj1)), new=((father,conj2))) duplicateEdge(graph=self, orig=((father,conj1)), new=((father,markerNode))) if conj1.isPredicate: for neighbor in self.neighbors(conj1): if get_min_max_span(self, neighbor)[0] < curStartIndex: for conj2 in curNeighbours[rel]: if (self.edge_label((conj1,neighbor)) == SOURCE_LABEL) or (not self.is_aux_edge((conj1.uid, neighbor.uid))): duplicateEdge(graph=self, orig=(conj1,neighbor), new=(conj2,neighbor)) # create the coordination construction, headed by the marker self.add_edge(edge=(markerNode,conj1),label=rel) for conj2 in curNeighbours[rel]: self.del_edge((conj1,conj2)) self.add_edge(edge=(markerNode,conj2),label=rel) if conj1.isPredicate: conj2.isPredicate = conj1.isPredicate conj1.surface_form = [w for w in conj1.surface_form if (w not in conj2.surface_form) and (w not in conj1.text) ] for w in conj1.text: if w not in conj1.surface_form: conj1.surface_form.append(w) if conj1.features.get("conjType",False): conj1.text = [w for w in conj1.text if w.index not in conj1.features["conjType"][1]] self.types.add(rel)
def load_prop_from_file(filename, HOME_DIR): fin = open(filename) flag = True ret = [] for line in fin: line = line.strip("\n") if flag: curSentence = line flag = False curGraph = GraphWrapper(curSentence, HOME_DIR) parentsList = [] else: if line: uid, words, pos, isPredicate, isAsserted, parents = line.split( "\t") uid = int(uid) isAsserted = bool(int(isAsserted)) text = [ Word(int(index), word) for index, word in [ent.split(",") for ent in words.split(";")] ] if isAsserted: feats = {"top": isAsserted} else: feats = {} if parents: parentsList.extend([ ((int(index), uid), rel) for rel, index in [ent.split(",") for ent in parents.split(";")] ]) curNode = newNode.Node(text, bool(int(isPredicate)), feats, curGraph, uid=uid) else: for edge, rel in parentsList: digraph.add_edge(curGraph, edge=edge, label=rel) ret.append(curGraph) flag = True fin.close() return ret
def get_text(self,gr): return [Word(index = self.text[0].index, word = self.condType)]
def parsePossessive(self,possessor,possessed,possessive): """ add a possessive subgraph to the graph @type index: int @param index: the index of the possessive in the sentence @type possessor: DepTree @param possessor: the syntax tree of the possessor @type possessed: DepTree @param possessed: the syntax tree of the possessed @type possessive: DepTree @param possessive: the syntax tree of the possessive - e.g - 's @rtype: Node @return: the top node of the possessive subgraph """ if not possessive: index = graph_representation.word.NO_INDEX else: index = possessive.id # generate nodes possessorNode = self.parse(possessor) possessedNode = self.parse(possessed) if isTime(possessorNode) or isLocation(possessorNode): #possessive construction to indicate time self.gr.add_edge((possessedNode,possessorNode)) return possessedNode #otherwise - proper possessive: hasNode = PossessiveNode.init(index=index, features={}, valid=True) self.gr.add_node(hasNode) # add edges to graph self.gr.add_edge(edge=(hasNode,possessorNode), label=POSSESSOR_LABEL) self.gr.add_edge(edge=(hasNode,possessedNode), label=POSSESSED_LABEL) # create top node # get list of all relevant nodes nodeLs = [possessorNode,possessedNode] if possessive: # in some cases there's no possessive marker (e.g., "their woman") possessiveNode = graph_representation.node.Node(isPredicate=False, text = [Word(possessive.id, possessive.get_original_sentence(root=False))], features = {}, valid=True) nodeLs.append(possessiveNode) # create possessive top node, add to graph, and return it topNode = graph_utils.generate_possessive_top_node(graph=self.gr, nodeLs=nodeLs) self.gr.add_node(topNode) #mark that features and neighbours should propagate from the top node to the possessed # John's results were low -> features should propogate between (John's results) and (results) graph_representation.node.addSymmetricPropogation(topNode, possessedNode) return topNode
def parse(self,t): """ Get the graph representation from a syntactic representation Returns through the graph parameter. @type t: DepTree @param tree: syntactic tree to be converted @rtype: Node @return: the node in the graph corresponding to the top node in t """ #order matters! if t.is_conditional_predicate(): self.types.add(APPENDIX_COND) return self.parseConditional(outcome = t._CONDITIONAL_PREDICATE_FEATURE_Outcome()["Value"], condList = t.condPred) if t._VERBAL_PREDICATE_SUBTREE_Adv(): advChildren = t.adverb_children advSubj = t.adverb_subj return self.parseAdverb(subj=advSubj, advChildren=advChildren) if t.is_conjunction_predicate(): self.types.add(APPENDIX_CONJUNCTION) return self.parseConjunction(baseElm = t.baseElm, conjResult = t.conjResult) if t.is_appositional_predicate(): self.types.add(APPENDIX_APPOS) firstEntity = t._APPOSITIONAL_PREDICATE_FEATURE_Left_Side()["Value"] secondEntity = t._APPOSITIONAL_PREDICATE_FEATURE_Right_Side()["Value"] return self.parseApposition(index = t.id, first_entity=firstEntity, second_entity=secondEntity) if t.is_relative_clause(): self.types.add(APPENDIX_RCMOD) return self.parseRcmod(np = t._RELCLAUSE_PREDICATE_FEATURE_Rest()['Value'], modList = t.rcmodPred) if t.is_prepositional_predicate(): self.types.add(APPENDIX_PREP) return self.parsePreposition(psubj=t._PREPOSITIONAL_PREDICATE_FEATURE_psubj()["Value"], prepChildList=t.prepChildList) if t.is_copular_predicate(): self.types.add(APPENDIX_COP) firstEntity = t._COPULAR_PREDICATE_FEATURE_Copular_Predicate()["Value"] secondEntity = t._COPULAR_PREDICATE_FEATURE_Copular_Object()["Value"] return self.parseCopular(index = t.id, first_entity=firstEntity, second_entity=secondEntity, features = syntactic_item.get_verbal_features(t)) if t.is_possesive_predicate(): self.types.add(APPENDIX_POSS) possessor = t._POSSESSIVE_PREDICATE_FEATURE_Possessor()["Value"] possessed = t._POSSESSIVE_PREDICATE_FEATURE_Possessed()["Value"] possessive = t._POSSESSIVE_PREDICATE_FEATURE_Possessive()["Value"] return self.parsePossessive(possessor = possessor, possessed = possessed, possessive = possessive) if t.is_adjectival_predicate(): self.types.add(APPENDIX_ADJ) return self.parseProp(subject = t._ADJECTIVAL_PREDICATE_FEATURE_Subject()["Value"], copulaIndex = NO_INDEX, adjectiveChildList = t.adjectivalChildList, propAsHead=False) if t.is_clausal_complement(): self.types.add(APPENDIX_COMPLEMENT) return self.parseComplement(compSubj = t.compSubj, compChildren = t.compChildList) if t.unhandled_advcl(): # put each unhandled advcl as a disconnected subgraph for c in t.advcl: self.parse(c) return self.parse(t) if t.is_verbal_predicate(): self.types.add(APPENDIX_VERB) head_ret = t._VERBAL_PREDICATE_SUBTREE_Head() return self.parseVerbal(indexes = head_ret["Span"], verbs = head_ret["Value"].split(" "), arguments = t.collect_arguments(), tree = t) else: # fall back - pack all the tree in a single node if len(t.children)==1: if (t.children[0].parent_relation == "nn") and (t.word.endswith(",")) and (t.children[0].word.endswith(",")): #conjunction in disguise child = t.children[0] t.children = [] ret = self.parseConjunction(cc = [(t.id,"and")], conjElements = [t,child]) t.children = [child] return ret nodes = t._get_subtree(filter_labels_ban) text = [Word(index=index, word=nodes[index]) for index in sorted(nodes.keys())] topNode = self.parseBottom(text = sorted(text,key=lambda x:x.index), features = syntactic_item.get_verbal_features(t)) return topNode
def init(cls,index,features,valid): return cls(isPredicate=True, text=[Word(index,POSSESSIVE)], features=features, valid=valid)
def init(cls,features,valid): return cls(isPredicate=True, text=[Word(NO_INDEX,RCMOD_PROP)], features=features, valid=valid)
def init(cls,features): return cls(isPredicate=False, text=[Word(NO_INDEX,TIME)], features=features, valid=True) cls.nodeShape = RECT_NODE_SHAPE
def init(cls,features): return cls(isPredicate=True, text=[Word(NO_INDEX,LOCATION)], features=features, valid=True)
def get_text(self): ret = [Word(index=self.id,word=self.word)] for c in self.children: ret += c.get_text() return ret
def init(cls,index,features): return cls(isPredicate=False, text=[Word(index,APPOSITION)], features=features, valid=False)