# Only used as the top of the appendix hierarchy a1 = Word(string.digits).setResultsName("a1") aI = Word("IVXLCDM").setResultsName("aI") # Catches the A in 12A but not in 12Awesome markerless_upper = Word(string.ascii_uppercase).setResultsName( 'markerless_upper') + ~FollowedBy(Word(string.ascii_lowercase)) paren_upper = parenthesize(string.ascii_uppercase, "paren_upper") paren_lower = parenthesize(string.ascii_lowercase, "paren_lower") paren_digit = parenthesize(string.digits, "paren_digit") period_upper = decimalize(string.ascii_uppercase, "period_upper") period_lower = decimalize(string.ascii_lowercase, "period_lower") period_digit = decimalize(string.digits, "period_digit") section = (atomic.section_marker.copy().leaveWhitespace() + unified.part_section + SkipTo(LineEnd())) par = (atomic.section.copy().leaveWhitespace() + unified.depth1_p + SkipTo(LineEnd())) marker_par = (atomic.paragraph_marker.copy().leaveWhitespace() + atomic.section + unified.depth1_p) appendix = (atomic.appendix_marker.copy().leaveWhitespace() + atomic.appendix + SkipTo(LineEnd())) headers = utils.QuickSearchable(LineStart() + (section | marker_par | par | appendix))
def _compare_search(self, grammar, text): quick_grammar = utils.QuickSearchable(grammar) self.assertEqual([str(m) for m in grammar.scanString(text)], [str(m) for m in quick_grammar.scanString(text)])
class Delayed: pass effective_date = (utils.Marker("effective") + utils.Marker("date")).setParseAction(lambda: EffectiveDate()) notice_citation = ( Word(string.digits) + utils.Marker('FR') + Word(string.digits)).setParseAction(lambda m: Notice(int(m[0]), int(m[1]))) delayed = utils.Marker("delayed").setParseAction(lambda: Delayed()) def int2Month(m): month = date(2000, m, 1) month = month.strftime('%B') token = utils.Marker(month) return token.setParseAction(lambda: m) months = reduce(lambda l, r: l | r, (int2Month(i) for i in range(2, 13))) date_parser = (months + Word(string.digits) + Suppress(Optional(",")) + Word( string.digits)).setParseAction(lambda m: date(int(m[2]), m[0], int(m[1]))) tokenizer = utils.QuickSearchable(effective_date | notice_citation | delayed | date_parser)