Beispiel #1
0
    def build_tree(text):
        """ Build tree from *.dis file

        :type text: string
        :param text: RST tree read from a *.dis file
        """
        tokens = text.strip().replace('//TT_ERR', '').replace(
            '\n', '').replace('(', ' ( ').replace(')', ' ) ').split()
        queue = RstTree.process_text(tokens)
        stack = []
        while queue:
            token = queue.pop(0)
            if token == ')':
                # If ')', start processing
                content = []  # Content in the stack
                while stack:
                    cont = stack.pop()
                    if cont == '(':
                        break
                    else:
                        content.append(cont)
                content.reverse()  # Reverse to the original (stack) order
                # Parse according to the first content word
                if len(content) < 2:
                    raise ValueError("content = {}".format(content))
                label = content.pop(0)
                if label in ['Root', 'Nucleus', 'Satellite']:
                    node = SpanNode(prop=label)
                    node.create_node(content)
                    stack.append(node)
                elif label == 'span':
                    # Merge
                    beginindex = int(content.pop(0))
                    endindex = int(content.pop(0))
                    stack.append(('span', beginindex, endindex))
                elif label == 'leaf':
                    # Merge
                    eduindex = int(content.pop(0))
                    RstTree.check_content(label, content)
                    stack.append(('leaf', eduindex, eduindex))
                elif label == 'rel2par':
                    # Merge
                    relation = content.pop(0)
                    RstTree.check_content(label, content)
                    stack.append(('relation', relation))
                elif label == 'text':
                    # Merge
                    txt = RstTree.create_text(content)
                    stack.append(('text', txt))
                else:
                    raise ValueError(
                        "Unrecognized parsing label: {} \n\twith content = {}\n\tstack={}\n\tqueue={}"
                        .format(label, content, stack, queue))
            else:
                # else, keep push into the stack
                stack.append(token)
        return stack[-1]
Beispiel #2
0
    def init(self, doc):
        """ Using text to initialize Queue

        :type doc: Doc instance
        :param doc:
        """
        N = len(doc.edu_dict)
        for idx in range(1, N + 1, 1):
            node = SpanNode(prop=None)
            node.text = doc.edu_dict[idx]
            node.edu_span, node.nuc_span = (idx, idx), (idx, idx)
            node.nuc_edu = idx
            self.Queue.append(node)
Beispiel #3
0
    def binarize_tree(tree):
        """ Convert a general RST tree to a binary RST tree

        :type tree: instance of SpanNode
        :param tree: a general RST tree
        """
        queue = [tree]
        while queue:
            node = queue.pop(0)
            queue += node.nodelist
            # Construct binary tree
            if len(node.nodelist) == 2:
                node.lnode = node.nodelist[0]
                node.rnode = node.nodelist[1]
                # Parent node
                node.lnode.pnode = node
                node.rnode.pnode = node

            elif len(node.nodelist) > 2:
                # Remove one node from the nodelist
                lc = node.nodelist[0].prop
                mark = 1
                for nl in node.nodelist:
                    mark &= nl.visited
                # if not mark:
                # # if not [0^nl.visited for nl in node.nodelist]:
                #     if node.relation:
                #         print(RstTree.extract_relation(node.relation))
                #     print([RstTree.extract_relation(l.relation) for l in node.nodelist])
                if len(set([l.prop for l in node.nodelist])) == 1:
                    node.visited = True
                    # for nl in node.nodelist:
                    #     nl.visited = True

                node.lnode = node.nodelist.pop(0)
                newnode = SpanNode(node.nodelist[0].prop)
                newnode.nodelist += node.nodelist
                # Right-branching
                node.rnode = newnode
                # Parent node
                node.lnode.pnode = node
                node.rnode.pnode = node
                if node.visited:
                    newnode.visited = True

                queue.insert(0, newnode)
            # Clear nodelist for the current node
            node.nodelist = []
        return tree
Beispiel #4
0
    def init(self, doc):
        """ Using text to initialize Queue

        :type doc: Doc instance
        :param doc:
        """
        if not isinstance(doc, Doc):
            raise ValueError("doc should be an instance of Doc")
        N = len(doc.edu_dict)
        for idx in range(1, N + 1, 1):
            node = SpanNode(prop=None)
            node.text = doc.edu_dict[idx]
            node.edu_span, node.nuc_span = (idx, idx), (idx, idx)
            node.nuc_edu = idx
            self.Queue.append(node)
Beispiel #5
0
    def flat_tree(tree):
        queue = [tree]
        while queue:
            node = queue.pop(0)
            queue += node.nodelist
            # Construct binary tree
            if len(node.nodelist) == 2:
                node.lnode = node.nodelist[0]
                node.rnode = node.nodelist[1]
                # Parent node
                node.lnode.pnode = node
                node.rnode.pnode = node

            elif len(node.nodelist) > 2:

                if len(set([l.prop for l in node.nodelist])) != 1:
                    node.lnode = node.nodelist.pop(0)
                    newnode = SpanNode(node.nodelist[0].prop)
                    newnode.nodelist += node.nodelist
                    # Right-branching
                    node.rnode = newnode
                    # Parent node
                    node.lnode.pnode = node
                    node.rnode.pnode = node

                    queue.insert(0, newnode)
                    # reset nodelist for the current node
                    node.nodelist = [node.lnode, node.rnode]
        return tree
Beispiel #6
0
    def binarize_tree(tree):
        """ Convert a general RST tree to a binary RST tree

        :type tree: instance of SpanNode
        :param tree: a general RST tree
        """
        queue = [tree]
        while queue:
            node = queue.pop(0)
            queue += node.nodelist
            # Construct binary tree
            if len(node.nodelist) == 2:
                node.lnode = node.nodelist[0]
                node.rnode = node.nodelist[1]
                # Parent node
                node.lnode.pnode = node
                node.rnode.pnode = node
            elif len(node.nodelist) > 2:
                # Remove one node from the nodelist
                node.lnode = node.nodelist.pop(0)
                newnode = SpanNode(node.nodelist[0].prop)
                newnode.nodelist += node.nodelist
                # Right-branching
                node.rnode = newnode
                # Parent node
                node.lnode.pnode = node
                node.rnode.pnode = node
                # Add to the head of the queue
                # So the code will keep branching
                # until the nodelist size is 2
                queue.insert(0, newnode)
            # Clear nodelist for the current node
            node.nodelist = []
        return tree
Beispiel #7
0
 def operate(self, action_tuple):
     """ According to parsing label to modify the status of
         the Stack/Queue
     """
     action, form = action_tuple
     if action == 'Shift':
         if len(self.Queue) == 0:
             raise ActionError("Shift action error")
         node = self.Queue.pop(0)
         self.Stack.append(node)
     elif action == 'Reduce':
         if len(self.Stack) < 2:
             raise ActionError("Reduce action error")
         rnode = self.Stack.pop()
         lnode = self.Stack.pop()
         # Create a new node
         # Assign a value to prop, only when it is someone's
         # children node
         node = SpanNode(prop=None)
         # Children node
         node.lnode, node.rnode = lnode, rnode
         # Parent node of children nodes
         node.lnode.pnode, node.rnode.pnode = node, node
         # Node text: concatenate two word lists
         node.text = lnode.text + rnode.text
         # EDU span
         node.edu_span = (lnode.edu_span[0], rnode.edu_span[1])
         # Nuc span / Nuc EDU
         node.form = form
         if form == 'NN':
             node.nuc_edu = lnode.nuc_edu
             node.lnode.prop = "Nucleus"
             node.rnode.prop = "Nucleus"
         elif form == 'NS':
             node.nuc_edu = lnode.nuc_edu
             node.lnode.prop = "Nucleus"
             node.rnode.prop = "Satellite"
         elif form == 'SN':
             node.nuc_edu = rnode.nuc_edu
             node.lnode.prop = "Satellite"
             node.rnode.prop = "Nucleus"
         else:
             raise ValueError("Unrecognized form: {}".format(form))
         self.Stack.append(node)
     else:
         raise ValueError("Unrecognized parsing action: {}".format(action))
Beispiel #8
0
    def operate(self, action_tuple):
        """ According to parsing label to modify the status of
            the Stack/Queue
        """
        action, form = action_tuple
        if action == 'Shift':
            if len(self.Queue) == 0:
                raise ActionError("Shift action error")
            node = self.Queue.pop(0)

            self.Stack.append(node)
        elif action == 'Reduce':
            if len(self.Stack) < 2:
                raise ActionError("Reduce action error")
            rnode = self.Stack.pop()
            lnode = self.Stack.pop()

            # Create a new node
            # Assign a value to prop, only when it is someone's
            # children node
            node = SpanNode(prop=None)

            # Children node
            node.lnode, node.rnode = lnode, rnode

            # dependency
            l = lnode.dependency
            r = rnode.dependency
            node.dependency = np.average([l, r], axis=0)

            # Parent node of children nodes
            node.lnode.pnode, node.rnode.pnode = node, node
            node.nodelist = [node.lnode, node.rnode]
            node.lnode.pnode, node.rnode.pnode = node, node

            # Node text: concatenate two word lists
            node.text = lnode.text + rnode.text
            # EDU span
            node.edu_span = (lnode.edu_span[0], rnode.edu_span[1])

            # Nuc span / Nuc EDU
            node.form = form
            if form == 'NN':
                node.nuc_span = (lnode.edu_span[0], rnode.edu_span[1])
                node.nuc_edu = lnode.nuc_edu
                node.lnode.prop = "Nucleus"
                node.rnode.prop = "Nucleus"
            elif form == 'N~':
                node.nuc_span = (lnode.edu_span[0], rnode.edu_span[1])
                node.nuc_edu = lnode.nuc_edu
                node.lnode.prop = "Nucleus"
                node.rnode.prop = "Nucleus"
            elif form == 'NS':
                node.nuc_span = lnode.edu_span
                node.nuc_edu = lnode.nuc_edu
                node.lnode.prop = "Nucleus"
                node.rnode.prop = "Satellite"
            elif form == 'SN':
                node.nuc_span = rnode.edu_span
                node.nuc_edu = rnode.nuc_edu
                node.lnode.prop = "Satellite"
                node.rnode.prop = "Nucleus"
            else:
                raise ValueError("Unrecognized form: {}".format(form))
            self.Stack.append(node)
        elif action == 'R~':
            if len(self.Stack) < 2:
                raise ActionError("Reduce action error")
            enode = self.Stack.pop()
            snode = self.Stack.pop()
            node = SpanNode(prop=None)

            if enode.form == 'N~':
                node.nodelist.append(snode)
                node.nodelist.extend(enode.nodelist)
            else:
                node.nodelist = [snode, enode]

            # Children node
            node.lnode, node.rnode = node.nodelist[0], node.nodelist[-1]

            # dependency
            node.dependency = np.average([n.dependency for n in node.nodelist],
                                         axis=0)

            # Parent node of children nodes
            node.lnode.pnode, node.rnode.pnode = node, node
            lnode, rnode = node.lnode, node.rnode

            # Node text: concatenate two word lists
            node.text = []
            for nl in node.nodelist:
                node.text += nl.text
            # EDU span
            node.edu_span = (lnode.edu_span[0], rnode.edu_span[1])
            node.form = 'N~'
            node.nuc_span = (lnode.edu_span[0], rnode.edu_span[1])
            node.nuc_edu = lnode.nuc_edu
            node.lnode.prop = "Nucleus"
            node.rnode.prop = "Nucleus"
            self.Stack.append(node)
        else:
            raise ValueError("Unrecognized parsing action: {}".format(action))