Ejemplo n.º 1
0
    def paragraph_with_marker(self, text, next_text=''):
        """The paragraph has an (a) or a. etc."""
        marker, _ = initial_marker(text)
        n = Node(text, node_type=Node.APPENDIX, label=[marker])

        if initial_marker(next_text):
            next_marker, _ = initial_marker(next_text)
        else:
            next_marker = None

        this_p_levels = set(idx for idx, lvl in enumerate(p_levels)
                            if marker in lvl)
        next_p_levels = set(idx for idx, lvl in enumerate(p_levels)
                            if next_marker in lvl)
        previous_levels = [l for l in self.m_stack.m_stack if l]
        previous_p_levels = set()
        for stack_level in previous_levels:
            previous_p_levels.update(sn.p_level for _, sn in stack_level
                                     if hasattr(sn, 'p_level'))

        #   Ambiguity, e.g. 'i', 'v'. Disambiguate by looking forward
        if len(this_p_levels) > 1 and len(next_p_levels) == 1:
            next_p_level = next_p_levels.pop()
            #   e.g. an 'i' followed by a 'ii'
            if next_p_level in this_p_levels:
                this_p_idx = p_levels[next_p_level].index(marker)
                next_p_idx = p_levels[next_p_level].index(next_marker)
                if this_p_idx < next_p_idx:     # Heuristic
                    n.p_level = next_p_level
            #   e.g. (a)(1)(i) followed by an 'A'
            new_level = this_p_levels - previous_p_levels
            if next_p_level not in previous_p_levels and new_level:
                n.p_level = new_level.pop()

        #   Ambiguity. Disambiguate by looking backwards
        if len(this_p_levels) > 1 and not hasattr(n, 'p_level'):
            for stack_level in previous_levels:
                for lvl, stack_node in stack_level:
                    if getattr(stack_node, 'p_level', None) in this_p_levels:
                        #   Later levels replace earlier ones
                        n.p_level = stack_node.p_level

        #   Simple case (no ambiguity) and cases not seen above
        if not getattr(n, 'p_level', None):
            n.p_level = min(this_p_levels)  # rule of thumb: favor lower case

        #   Check if we've seen this type of marker before
        found_in_prev = False
        for stack_level in previous_levels:
            if stack_level and in_same_p_level(n, stack_level):
                found_in_prev = True
                self.depth = stack_level[-1][0]
        if not found_in_prev:   # New type of marker
            self.depth += 1
        self.m_stack.add(self.depth, n)