def determine_level(c, current_level, next_marker=None): """ Regulation paragraphs are hierarchical. This determines which level the paragraph is at. Convert between p_level indexing and depth here by adding one""" potential = p_level_of(c) if len(potential) > 1 and next_marker: # resolve ambiguity following = p_level_of(next_marker) # Add character index potential = [(level, p_levels[level].index(c)) for level in potential] following = [(level, p_levels[level].index(next_marker)) for level in following] # Check if we can be certain using the following marker for pot_level, pot_idx in potential: for next_level, next_idx in following: if ( # E.g. i followed by A or i followed by 1 (next_idx == 0 and next_level == pot_level + 1) or # E.g. i followed by ii (next_level == pot_level and next_idx > pot_idx) or # E.g. i followed by 3 (next_level < pot_level and next_idx > 0)): return pot_level + 1 logging.warning("Ambiguous marker (%s) not followed by something " + "disambiguating (%s)", c, next_marker) return potential[0][0] + 1 else: return potential[0] + 1
def _deeper_level(first, second): """Is the second marker deeper than the first""" for level1 in p_level_of(first): for level2 in p_level_of(second): if level1 < level2: return True return False
def get_markers(text): """ Extract all the paragraph markers from text. Do some checks on the collapsed markers.""" markers = tree_utils.get_paragraph_markers(text) collapsed_markers = tree_utils.get_collapsed_markers(text) # Check that the collapsed markers make sense (i.e. are at least one # level below the initial marker) if markers and collapsed_markers: initial_marker_levels = p_level_of(markers[-1]) final_collapsed_markers = [] for collapsed_marker in collapsed_markers: collapsed_marker_levels = p_level_of(collapsed_marker) if any(c > f for f in initial_marker_levels for c in collapsed_marker_levels): final_collapsed_markers.append(collapsed_marker) collapsed_markers = final_collapsed_markers markers_list = [m for m in markers] + [m for m in collapsed_markers] return markers_list