def make_multiple(head, tail=None, wrap_tail=False): """We have a recurring need to parse citations which have a string of terms, e.g. section 11(a), (b)(4), and (5). This function is a shorthand for setting these elements up""" if tail is None: tail = head head = keep_pos(head).setResultsName("head") # We need to address just the matching text separately from the # conjunctive phrase tail = keep_pos(tail).setResultsName("match") tail = (atomic.conj_phrases + tail).setResultsName("tail", listAllMatches=True) if wrap_tail: tail = Optional(Suppress('(')) + tail + Optional(Suppress(')')) return QuickSearchable(head + OneOrMore(tail))
def make_multiple(head, tail=None, wrap_tail=False): """We have a recurring need to parse citations which have a string of terms, e.g. section 11(a), (b)(4), and (5). This function is a shorthand for setting these elements up""" if tail is None: tail = head head = keep_pos(head).setResultsName("head") # We need to address just the matching text separately from the # conjunctive phrase tail = keep_pos(tail).setResultsName("match") tail = (atomic.conj_phrases + tail).setResultsName( "tail", listAllMatches=True) if wrap_tail: tail = Optional(Suppress('(')) + tail + Optional(Suppress(')')) return QuickSearchable(head + OneOrMore(tail))
# vim: set encoding=utf-8 from pyparsing import ( LineStart, Literal, OneOrMore, Optional, Regex, SkipTo, srange, Suppress, Word, ZeroOrMore) from regparser.grammar import atomic, unified from regparser.grammar.utils import ( DocLiteral, keep_pos, Marker, QuickSearchable) smart_quotes = QuickSearchable( Suppress(DocLiteral(u'“', "left-smart-quote")) + keep_pos(SkipTo(DocLiteral( u'”', "right-smart-quote"))).setResultsName("term") ) e_tag = ( Suppress(Regex(r"<E[^>]*>")) + keep_pos(OneOrMore(Word(srange("[a-zA-Z-]")))).setResultsName("term") + Suppress(Literal("</E>")) ) xml_term_parser = QuickSearchable( LineStart() + Optional(Suppress(unified.any_depth_p)) + e_tag.setResultsName("head") + ZeroOrMore( (atomic.conj_phrases + e_tag).setResultsName( "tail", listAllMatches=True)) + Suppress(ZeroOrMore(Regex(r",[a-zA-Z ]+,"))) + Suppress(ZeroOrMore(
# vim: set encoding=utf-8 """Some common combinations""" from pyparsing import (FollowedBy, LineEnd, Literal, OneOrMore, Optional, Suppress, SkipTo, ZeroOrMore) from regparser.grammar import atomic from regparser.grammar.utils import keep_pos, Marker, QuickSearchable period_section = Suppress(".") + atomic.section part_section = atomic.part + period_section marker_part_section = ( keep_pos(atomic.section_marker).setResultsName("marker") + part_section) depth6_p = atomic.em_roman_p | atomic.plaintext_level6_p depth5_p = ((atomic.em_digit_p | atomic.plaintext_level5_p) + Optional(depth6_p)) depth4_p = atomic.upper_p + Optional(depth5_p) depth3_p = atomic.roman_p + Optional(depth4_p) depth2_p = atomic.digit_p + Optional(depth3_p) depth1_p = atomic.lower_p + ~FollowedBy(atomic.upper_p) + Optional(depth2_p) any_depth_p = QuickSearchable(depth1_p | depth2_p | depth3_p | depth4_p | depth5_p | depth6_p) depth3_c = atomic.upper_c + Optional(atomic.em_digit_c) depth2_c = atomic.roman_c + Optional(depth3_c) depth1_c = atomic.digit_c + Optional(depth2_c) any_a = atomic.upper_a | atomic.digit_a section_comment = atomic.section + depth1_c section_paragraph = QuickSearchable(atomic.section + depth1_p)
# vim: set encoding=utf-8 from pyparsing import (LineStart, Literal, OneOrMore, Optional, Regex, SkipTo, srange, Suppress, Word, ZeroOrMore) from regparser.grammar import atomic, unified from regparser.grammar.utils import (DocLiteral, keep_pos, Marker, QuickSearchable) smart_quotes = QuickSearchable( Suppress(DocLiteral(u'“', "left-smart-quote")) + keep_pos( SkipTo(DocLiteral(u'”', "right-smart-quote"))).setResultsName("term")) e_tag = ( Suppress(Regex(r"<E[^>]*>")) + keep_pos(OneOrMore(Word(srange("[a-zA-Z-]")))).setResultsName("term") + Suppress(Literal("</E>"))) xml_term_parser = QuickSearchable( LineStart() + Optional(Suppress(unified.any_depth_p)) + e_tag.setResultsName("head") + ZeroOrMore((atomic.conj_phrases + e_tag).setResultsName("tail", listAllMatches=True)) + Suppress(ZeroOrMore(Regex(r",[a-zA-Z ]+,"))) + Suppress(ZeroOrMore((Marker("this") | Marker("the")) + Marker("term"))) + ((Marker("mean") | Marker("means")) | (Marker("refers") + ZeroOrMore(Marker("only")) + Marker("to")) | ( (Marker("has") | Marker("have")) + Marker("the") + Marker("same") + Marker("meaning") + Marker("as")))) key_term_parser = QuickSearchable( LineStart() + Optional(Suppress(unified.any_depth_p)) +
# vim: set encoding=utf-8 """Some common combinations""" from pyparsing import ( FollowedBy, LineEnd, Literal, OneOrMore, Optional, Suppress, SkipTo, ZeroOrMore) from regparser.grammar import atomic from regparser.grammar.utils import keep_pos, Marker, QuickSearchable period_section = Suppress(".") + atomic.section part_section = atomic.part + period_section marker_part_section = ( keep_pos(atomic.section_marker).setResultsName("marker") + part_section) depth6_p = atomic.em_roman_p | atomic.plaintext_level6_p depth5_p = ( (atomic.em_digit_p | atomic.plaintext_level5_p) + Optional(depth6_p)) depth4_p = atomic.upper_p + Optional(depth5_p) depth3_p = atomic.roman_p + Optional(depth4_p) depth2_p = atomic.digit_p + Optional(depth3_p) depth1_p = atomic.lower_p + ~FollowedBy(atomic.upper_p) + Optional(depth2_p) any_depth_p = QuickSearchable( depth1_p | depth2_p | depth3_p | depth4_p | depth5_p | depth6_p) depth3_c = atomic.upper_c + Optional(atomic.em_digit_c) depth2_c = atomic.roman_c + Optional(depth3_c) depth1_c = atomic.digit_c + Optional(depth2_c) any_a = atomic.upper_a | atomic.digit_a