Esempio n. 1
0
def make_multiple(head, tail=None, wrap_tail=False):
    """We have a recurring need to parse citations which have a string of
    terms, e.g. section 11(a), (b)(4), and (5). This function is a shorthand
    for setting these elements up"""
    if tail is None:
        tail = head
    head = keep_pos(head).setResultsName("head")
    # We need to address just the matching text separately from the
    # conjunctive phrase
    tail = keep_pos(tail).setResultsName("match")
    tail = (atomic.conj_phrases + tail).setResultsName("tail",
                                                       listAllMatches=True)
    if wrap_tail:
        tail = Optional(Suppress('(')) + tail + Optional(Suppress(')'))
    return QuickSearchable(head + OneOrMore(tail))
def make_multiple(head, tail=None, wrap_tail=False):
    """We have a recurring need to parse citations which have a string of
    terms, e.g. section 11(a), (b)(4), and (5). This function is a shorthand
    for setting these elements up"""
    if tail is None:
        tail = head
    head = keep_pos(head).setResultsName("head")
    # We need to address just the matching text separately from the
    # conjunctive phrase
    tail = keep_pos(tail).setResultsName("match")
    tail = (atomic.conj_phrases + tail).setResultsName(
        "tail", listAllMatches=True)
    if wrap_tail:
        tail = Optional(Suppress('(')) + tail + Optional(Suppress(')'))
    return QuickSearchable(head + OneOrMore(tail))
Esempio n. 3
0
# vim: set encoding=utf-8
from pyparsing import (
    LineStart, Literal, OneOrMore, Optional, Regex, SkipTo, srange, Suppress,
    Word, ZeroOrMore)

from regparser.grammar import atomic, unified
from regparser.grammar.utils import (
    DocLiteral, keep_pos, Marker, QuickSearchable)


smart_quotes = QuickSearchable(
    Suppress(DocLiteral(u'“', "left-smart-quote")) +
    keep_pos(SkipTo(DocLiteral(
        u'”', "right-smart-quote"))).setResultsName("term")
)

e_tag = (
    Suppress(Regex(r"<E[^>]*>")) +
    keep_pos(OneOrMore(Word(srange("[a-zA-Z-]")))).setResultsName("term") +
    Suppress(Literal("</E>"))
)

xml_term_parser = QuickSearchable(
    LineStart() +
    Optional(Suppress(unified.any_depth_p)) +
    e_tag.setResultsName("head") +
    ZeroOrMore(
        (atomic.conj_phrases + e_tag).setResultsName(
            "tail", listAllMatches=True)) +
    Suppress(ZeroOrMore(Regex(r",[a-zA-Z ]+,"))) +
    Suppress(ZeroOrMore(
Esempio n. 4
0
# vim: set encoding=utf-8
"""Some common combinations"""
from pyparsing import (FollowedBy, LineEnd, Literal, OneOrMore, Optional,
                       Suppress, SkipTo, ZeroOrMore)

from regparser.grammar import atomic
from regparser.grammar.utils import keep_pos, Marker, QuickSearchable

period_section = Suppress(".") + atomic.section
part_section = atomic.part + period_section
marker_part_section = (
    keep_pos(atomic.section_marker).setResultsName("marker") + part_section)

depth6_p = atomic.em_roman_p | atomic.plaintext_level6_p
depth5_p = ((atomic.em_digit_p | atomic.plaintext_level5_p) +
            Optional(depth6_p))
depth4_p = atomic.upper_p + Optional(depth5_p)
depth3_p = atomic.roman_p + Optional(depth4_p)
depth2_p = atomic.digit_p + Optional(depth3_p)
depth1_p = atomic.lower_p + ~FollowedBy(atomic.upper_p) + Optional(depth2_p)
any_depth_p = QuickSearchable(depth1_p | depth2_p | depth3_p | depth4_p
                              | depth5_p | depth6_p)

depth3_c = atomic.upper_c + Optional(atomic.em_digit_c)
depth2_c = atomic.roman_c + Optional(depth3_c)
depth1_c = atomic.digit_c + Optional(depth2_c)
any_a = atomic.upper_a | atomic.digit_a

section_comment = atomic.section + depth1_c

section_paragraph = QuickSearchable(atomic.section + depth1_p)
Esempio n. 5
0
# vim: set encoding=utf-8
from pyparsing import (LineStart, Literal, OneOrMore, Optional, Regex, SkipTo,
                       srange, Suppress, Word, ZeroOrMore)

from regparser.grammar import atomic, unified
from regparser.grammar.utils import (DocLiteral, keep_pos, Marker,
                                     QuickSearchable)

smart_quotes = QuickSearchable(
    Suppress(DocLiteral(u'“', "left-smart-quote")) + keep_pos(
        SkipTo(DocLiteral(u'”', "right-smart-quote"))).setResultsName("term"))

e_tag = (
    Suppress(Regex(r"<E[^>]*>")) +
    keep_pos(OneOrMore(Word(srange("[a-zA-Z-]")))).setResultsName("term") +
    Suppress(Literal("</E>")))

xml_term_parser = QuickSearchable(
    LineStart() + Optional(Suppress(unified.any_depth_p)) +
    e_tag.setResultsName("head") +
    ZeroOrMore((atomic.conj_phrases +
                e_tag).setResultsName("tail", listAllMatches=True)) +
    Suppress(ZeroOrMore(Regex(r",[a-zA-Z ]+,"))) +
    Suppress(ZeroOrMore((Marker("this") | Marker("the")) + Marker("term"))) +
    ((Marker("mean") | Marker("means"))
     | (Marker("refers") + ZeroOrMore(Marker("only")) + Marker("to")) | (
         (Marker("has") | Marker("have")) + Marker("the") + Marker("same") +
         Marker("meaning") + Marker("as"))))

key_term_parser = QuickSearchable(
    LineStart() + Optional(Suppress(unified.any_depth_p)) +
# vim: set encoding=utf-8
"""Some common combinations"""
from pyparsing import (
    FollowedBy, LineEnd, Literal, OneOrMore, Optional, Suppress, SkipTo,
    ZeroOrMore)

from regparser.grammar import atomic
from regparser.grammar.utils import keep_pos, Marker, QuickSearchable

period_section = Suppress(".") + atomic.section
part_section = atomic.part + period_section
marker_part_section = (
    keep_pos(atomic.section_marker).setResultsName("marker") +
    part_section)

depth6_p = atomic.em_roman_p | atomic.plaintext_level6_p
depth5_p = (
    (atomic.em_digit_p | atomic.plaintext_level5_p) +
    Optional(depth6_p))
depth4_p = atomic.upper_p + Optional(depth5_p)
depth3_p = atomic.roman_p + Optional(depth4_p)
depth2_p = atomic.digit_p + Optional(depth3_p)
depth1_p = atomic.lower_p + ~FollowedBy(atomic.upper_p) + Optional(depth2_p)
any_depth_p = QuickSearchable(
    depth1_p | depth2_p | depth3_p | depth4_p | depth5_p | depth6_p)

depth3_c = atomic.upper_c + Optional(atomic.em_digit_c)
depth2_c = atomic.roman_c + Optional(depth3_c)
depth1_c = atomic.digit_c + Optional(depth2_c)
any_a = atomic.upper_a | atomic.digit_a