예제 #1
0
def determine_level(c, current_level, next_marker=None):
    """ Regulation paragraphs are hierarchical. This determines which level
    the paragraph is at. Convert between p_level indexing and depth here by
    adding one"""
    potential = p_level_of(c)

    if len(potential) > 1 and next_marker:     # resolve ambiguity
        following = p_level_of(next_marker)

        #   Add character index
        potential = [(level, p_levels[level].index(c)) for level in potential]
        following = [(level, p_levels[level].index(next_marker))
                     for level in following]

        #   Check if we can be certain using the following marker
        for pot_level, pot_idx in potential:
            for next_level, next_idx in following:
                if (    # E.g. i followed by A or i followed by 1
                        (next_idx == 0 and next_level == pot_level + 1)
                        or  # E.g. i followed by ii
                        (next_level == pot_level and next_idx > pot_idx)
                        or  # E.g. i followed by 3
                        (next_level < pot_level and next_idx > 0)):
                    return pot_level + 1
        logging.warning("Ambiguous marker (%s) not followed by something "
                        + "disambiguating (%s)", c, next_marker)
        return potential[0][0] + 1

    else:
        return potential[0] + 1
예제 #2
0
def _deeper_level(first, second):
    """Is the second marker deeper than the first"""
    for level1 in p_level_of(first):
        for level2 in p_level_of(second):
            if level1 < level2:
                return True
    return False
예제 #3
0
def _deeper_level(first, second):
    """Is the second marker deeper than the first"""
    for level1 in p_level_of(first):
        for level2 in p_level_of(second):
            if level1 < level2:
                return True
    return False
예제 #4
0
def get_markers(text):
    """ Extract all the paragraph markers from text. Do some checks on the
    collapsed markers."""
    markers = tree_utils.get_paragraph_markers(text)
    collapsed_markers = tree_utils.get_collapsed_markers(text)

    #   Check that the collapsed markers make sense (i.e. are at least one
    #   level below the initial marker)
    if markers and collapsed_markers:
        initial_marker_levels = p_level_of(markers[-1])
        final_collapsed_markers = []
        for collapsed_marker in collapsed_markers:
            collapsed_marker_levels = p_level_of(collapsed_marker)
            if any(c > f for f in initial_marker_levels
                    for c in collapsed_marker_levels):
                final_collapsed_markers.append(collapsed_marker)
        collapsed_markers = final_collapsed_markers
    markers_list = [m for m in markers] + [m for m in collapsed_markers]

    return markers_list
예제 #5
0
def get_markers(text):
    """ Extract all the paragraph markers from text. Do some checks on the
    collapsed markers."""
    markers = tree_utils.get_paragraph_markers(text)
    collapsed_markers = tree_utils.get_collapsed_markers(text)

    #   Check that the collapsed markers make sense (i.e. are at least one
    #   level below the initial marker)
    if markers and collapsed_markers:
        initial_marker_levels = p_level_of(markers[-1])
        final_collapsed_markers = []
        for collapsed_marker in collapsed_markers:
            collapsed_marker_levels = p_level_of(collapsed_marker)
            if any(c > f for f in initial_marker_levels
                    for c in collapsed_marker_levels):
                final_collapsed_markers.append(collapsed_marker)
        collapsed_markers = final_collapsed_markers
    markers_list = [m for m in markers] + [m for m in collapsed_markers]

    return markers_list