예제 #1
0
class RegtextParagraphProcessor(paragraph_processor.ParagraphProcessor):
    MATCHERS = [paragraph_processor.StarsMatcher(),
                paragraph_processor.TableMatcher(),
                paragraph_processor.FencedMatcher(),
                flatsubtree_processor.FlatsubtreeMatcher(
                    tags=['EXTRACT'], node_type=Node.EXTRACT),
                import_category.ImportCategoryMatcher(),
                flatsubtree_processor.FlatsubtreeMatcher(tags=['EXAMPLE']),
                paragraph_processor.HeaderMatcher(),
                paragraph_processor.GraphicsMatcher(),
                ParagraphMatcher(),
                note_processor.NoteMatcher(),
                paragraph_processor.IgnoreTagMatcher(
                    'SECTNO', 'SUBJECT', 'CITA', 'SECAUTH', 'APPRO',
                    'PRTPAGE', 'EAR', 'RESERVED')]

    def additional_constraints(self):
        return [
            optional_rules.depth_type_inverses,
            optional_rules.limit_sequence_gap(3),
            optional_rules.stars_occupy_space,
        ] + self.relaxed_constraints()

    def relaxed_constraints(self):
        return [optional_rules.star_new_level,
                optional_rules.limit_paragraph_types(
                    mtypes.lower, mtypes.upper,
                    mtypes.ints, mtypes.roman,
                    mtypes.em_ints, mtypes.em_roman,
                    mtypes.stars, mtypes.markerless)]
예제 #2
0
class FlatParagraphProcessor(paragraph_processor.ParagraphProcessor):
    """Paragraph Processor which does not try to derive paragraph markers"""
    MATCHERS = [paragraph_processor.StarsMatcher(),
                paragraph_processor.TableMatcher(),
                simple_hierarchy_processor.SimpleHierarchyMatcher(
                    ['NOTE', 'NOTES'], Node.NOTE),
                paragraph_processor.HeaderMatcher(),
                paragraph_processor.SimpleTagMatcher('P', 'FP'),
                us_code.USCodeMatcher(),
                paragraph_processor.GraphicsMatcher(),
                paragraph_processor.IgnoreTagMatcher('PRTPAGE')]