def build_tree(text): """ Build tree from *.dis file :type text: string :param text: RST tree read from a *.dis file """ tokens = text.strip().replace('//TT_ERR', '').replace( '\n', '').replace('(', ' ( ').replace(')', ' ) ').split() queue = RstTree.process_text(tokens) stack = [] while queue: token = queue.pop(0) if token == ')': # If ')', start processing content = [] # Content in the stack while stack: cont = stack.pop() if cont == '(': break else: content.append(cont) content.reverse() # Reverse to the original (stack) order # Parse according to the first content word if len(content) < 2: raise ValueError("content = {}".format(content)) label = content.pop(0) if label in ['Root', 'Nucleus', 'Satellite']: node = SpanNode(prop=label) node.create_node(content) stack.append(node) elif label == 'span': # Merge beginindex = int(content.pop(0)) endindex = int(content.pop(0)) stack.append(('span', beginindex, endindex)) elif label == 'leaf': # Merge eduindex = int(content.pop(0)) RstTree.check_content(label, content) stack.append(('leaf', eduindex, eduindex)) elif label == 'rel2par': # Merge relation = content.pop(0) RstTree.check_content(label, content) stack.append(('relation', relation)) elif label == 'text': # Merge txt = RstTree.create_text(content) stack.append(('text', txt)) else: raise ValueError( "Unrecognized parsing label: {} \n\twith content = {}\n\tstack={}\n\tqueue={}" .format(label, content, stack, queue)) else: # else, keep push into the stack stack.append(token) return stack[-1]
def init(self, doc): """ Using text to initialize Queue :type doc: Doc instance :param doc: """ N = len(doc.edu_dict) for idx in range(1, N + 1, 1): node = SpanNode(prop=None) node.text = doc.edu_dict[idx] node.edu_span, node.nuc_span = (idx, idx), (idx, idx) node.nuc_edu = idx self.Queue.append(node)
def binarize_tree(tree): """ Convert a general RST tree to a binary RST tree :type tree: instance of SpanNode :param tree: a general RST tree """ queue = [tree] while queue: node = queue.pop(0) queue += node.nodelist # Construct binary tree if len(node.nodelist) == 2: node.lnode = node.nodelist[0] node.rnode = node.nodelist[1] # Parent node node.lnode.pnode = node node.rnode.pnode = node elif len(node.nodelist) > 2: # Remove one node from the nodelist lc = node.nodelist[0].prop mark = 1 for nl in node.nodelist: mark &= nl.visited # if not mark: # # if not [0^nl.visited for nl in node.nodelist]: # if node.relation: # print(RstTree.extract_relation(node.relation)) # print([RstTree.extract_relation(l.relation) for l in node.nodelist]) if len(set([l.prop for l in node.nodelist])) == 1: node.visited = True # for nl in node.nodelist: # nl.visited = True node.lnode = node.nodelist.pop(0) newnode = SpanNode(node.nodelist[0].prop) newnode.nodelist += node.nodelist # Right-branching node.rnode = newnode # Parent node node.lnode.pnode = node node.rnode.pnode = node if node.visited: newnode.visited = True queue.insert(0, newnode) # Clear nodelist for the current node node.nodelist = [] return tree
def init(self, doc): """ Using text to initialize Queue :type doc: Doc instance :param doc: """ if not isinstance(doc, Doc): raise ValueError("doc should be an instance of Doc") N = len(doc.edu_dict) for idx in range(1, N + 1, 1): node = SpanNode(prop=None) node.text = doc.edu_dict[idx] node.edu_span, node.nuc_span = (idx, idx), (idx, idx) node.nuc_edu = idx self.Queue.append(node)
def flat_tree(tree): queue = [tree] while queue: node = queue.pop(0) queue += node.nodelist # Construct binary tree if len(node.nodelist) == 2: node.lnode = node.nodelist[0] node.rnode = node.nodelist[1] # Parent node node.lnode.pnode = node node.rnode.pnode = node elif len(node.nodelist) > 2: if len(set([l.prop for l in node.nodelist])) != 1: node.lnode = node.nodelist.pop(0) newnode = SpanNode(node.nodelist[0].prop) newnode.nodelist += node.nodelist # Right-branching node.rnode = newnode # Parent node node.lnode.pnode = node node.rnode.pnode = node queue.insert(0, newnode) # reset nodelist for the current node node.nodelist = [node.lnode, node.rnode] return tree
def binarize_tree(tree): """ Convert a general RST tree to a binary RST tree :type tree: instance of SpanNode :param tree: a general RST tree """ queue = [tree] while queue: node = queue.pop(0) queue += node.nodelist # Construct binary tree if len(node.nodelist) == 2: node.lnode = node.nodelist[0] node.rnode = node.nodelist[1] # Parent node node.lnode.pnode = node node.rnode.pnode = node elif len(node.nodelist) > 2: # Remove one node from the nodelist node.lnode = node.nodelist.pop(0) newnode = SpanNode(node.nodelist[0].prop) newnode.nodelist += node.nodelist # Right-branching node.rnode = newnode # Parent node node.lnode.pnode = node node.rnode.pnode = node # Add to the head of the queue # So the code will keep branching # until the nodelist size is 2 queue.insert(0, newnode) # Clear nodelist for the current node node.nodelist = [] return tree
def operate(self, action_tuple): """ According to parsing label to modify the status of the Stack/Queue """ action, form = action_tuple if action == 'Shift': if len(self.Queue) == 0: raise ActionError("Shift action error") node = self.Queue.pop(0) self.Stack.append(node) elif action == 'Reduce': if len(self.Stack) < 2: raise ActionError("Reduce action error") rnode = self.Stack.pop() lnode = self.Stack.pop() # Create a new node # Assign a value to prop, only when it is someone's # children node node = SpanNode(prop=None) # Children node node.lnode, node.rnode = lnode, rnode # Parent node of children nodes node.lnode.pnode, node.rnode.pnode = node, node # Node text: concatenate two word lists node.text = lnode.text + rnode.text # EDU span node.edu_span = (lnode.edu_span[0], rnode.edu_span[1]) # Nuc span / Nuc EDU node.form = form if form == 'NN': node.nuc_edu = lnode.nuc_edu node.lnode.prop = "Nucleus" node.rnode.prop = "Nucleus" elif form == 'NS': node.nuc_edu = lnode.nuc_edu node.lnode.prop = "Nucleus" node.rnode.prop = "Satellite" elif form == 'SN': node.nuc_edu = rnode.nuc_edu node.lnode.prop = "Satellite" node.rnode.prop = "Nucleus" else: raise ValueError("Unrecognized form: {}".format(form)) self.Stack.append(node) else: raise ValueError("Unrecognized parsing action: {}".format(action))
def operate(self, action_tuple): """ According to parsing label to modify the status of the Stack/Queue """ action, form = action_tuple if action == 'Shift': if len(self.Queue) == 0: raise ActionError("Shift action error") node = self.Queue.pop(0) self.Stack.append(node) elif action == 'Reduce': if len(self.Stack) < 2: raise ActionError("Reduce action error") rnode = self.Stack.pop() lnode = self.Stack.pop() # Create a new node # Assign a value to prop, only when it is someone's # children node node = SpanNode(prop=None) # Children node node.lnode, node.rnode = lnode, rnode # dependency l = lnode.dependency r = rnode.dependency node.dependency = np.average([l, r], axis=0) # Parent node of children nodes node.lnode.pnode, node.rnode.pnode = node, node node.nodelist = [node.lnode, node.rnode] node.lnode.pnode, node.rnode.pnode = node, node # Node text: concatenate two word lists node.text = lnode.text + rnode.text # EDU span node.edu_span = (lnode.edu_span[0], rnode.edu_span[1]) # Nuc span / Nuc EDU node.form = form if form == 'NN': node.nuc_span = (lnode.edu_span[0], rnode.edu_span[1]) node.nuc_edu = lnode.nuc_edu node.lnode.prop = "Nucleus" node.rnode.prop = "Nucleus" elif form == 'N~': node.nuc_span = (lnode.edu_span[0], rnode.edu_span[1]) node.nuc_edu = lnode.nuc_edu node.lnode.prop = "Nucleus" node.rnode.prop = "Nucleus" elif form == 'NS': node.nuc_span = lnode.edu_span node.nuc_edu = lnode.nuc_edu node.lnode.prop = "Nucleus" node.rnode.prop = "Satellite" elif form == 'SN': node.nuc_span = rnode.edu_span node.nuc_edu = rnode.nuc_edu node.lnode.prop = "Satellite" node.rnode.prop = "Nucleus" else: raise ValueError("Unrecognized form: {}".format(form)) self.Stack.append(node) elif action == 'R~': if len(self.Stack) < 2: raise ActionError("Reduce action error") enode = self.Stack.pop() snode = self.Stack.pop() node = SpanNode(prop=None) if enode.form == 'N~': node.nodelist.append(snode) node.nodelist.extend(enode.nodelist) else: node.nodelist = [snode, enode] # Children node node.lnode, node.rnode = node.nodelist[0], node.nodelist[-1] # dependency node.dependency = np.average([n.dependency for n in node.nodelist], axis=0) # Parent node of children nodes node.lnode.pnode, node.rnode.pnode = node, node lnode, rnode = node.lnode, node.rnode # Node text: concatenate two word lists node.text = [] for nl in node.nodelist: node.text += nl.text # EDU span node.edu_span = (lnode.edu_span[0], rnode.edu_span[1]) node.form = 'N~' node.nuc_span = (lnode.edu_span[0], rnode.edu_span[1]) node.nuc_edu = lnode.nuc_edu node.lnode.prop = "Nucleus" node.rnode.prop = "Nucleus" self.Stack.append(node) else: raise ValueError("Unrecognized parsing action: {}".format(action))