コード例 #1
0
def generate_verb(word_list, verb, active):
    """Short hand for making tokens.Verb from a list of trigger words"""
    word_list = [CaselessLiteral(w) for w in word_list]
    if not active:
        word_list = [passive_marker + w for w in word_list]
    grammar = reduce(lambda l, r: l | r, word_list)
    grammar = WordBoundaries(grammar)
    grammar = grammar.setParseAction(
        lambda m: tokens.Verb(verb, active, bool(m.and_prefix)))
    return grammar
コード例 #2
0
def generate_verb(word_list, verb, active):
    """Short hand for making tokens.Verb from a list of trigger words"""
    word_list = [CaselessLiteral(w) for w in word_list]
    if not active:
        word_list = [passive_marker + w for w in word_list]
    grammar = reduce(lambda l, r: l | r, word_list)
    grammar = WordBoundaries(grammar)
    grammar = grammar.setParseAction(
        lambda m: tokens.Verb(verb, active, bool(m.and_prefix)))
    return grammar
コード例 #3
0
section_marker = Suppress(Regex(u"§|Section|section"))
sections_marker = Suppress(Regex(u"§§|Sections|sections"))

# Most of these markers could be SuffixMarkers (which arise due to errors in
# the regulation text). We'll wait until we see explicit examples before
# converting them though, to limit false matches
paragraph_marker = Marker("paragraph")
paragraphs_marker = SuffixMarker("paragraphs")

part_marker = Marker("part")
parts_marker = Marker("parts")

subpart_marker = Marker("subpart")

comment_marker = ((Marker("comment") | Marker("commentary") |
                   (Marker("official") + Marker("interpretations")) |
                   (Marker("supplement") + Suppress(WordBoundaries("I")))) +
                  Optional(Marker("of") | Marker("to")))
comments_marker = Marker("comments")

appendix_marker = Marker("appendix")
appendices_marker = Marker("appendices")

conj_phrases = (
    (Suppress(",") + Optional(Marker("and") | Marker("or"))) | Marker("and")
    | Marker("or") | (Marker("except") + Marker("for"))
    | Suppress(Marker("through") | "-"
               | u"–").setParseAction(lambda: True).setResultsName("through"))

title = Word(string.digits).setResultsName("cfr_title")
コード例 #4
0
# Most of these markers could be SuffixMarkers (which arise due to errors in
# the regulation text). We'll wait until we see explicit examples before
# converting them though, to limit false matches
paragraph_marker = Marker("paragraph")
paragraphs_marker = SuffixMarker("paragraphs")

part_marker = Marker("part")
parts_marker = Marker("parts")

subpart_marker = Marker("subpart")

comment_marker = ((Marker("comment")
                   | Marker("commentary")
                   | (Marker("official") + Marker("interpretations"))
                   | (Marker("supplement") + Suppress(WordBoundaries("I")))) +
                  Optional(Marker("of") | Marker("to")))
comments_marker = Marker("comments")

appendix_marker = Marker("appendix")
appendices_marker = Marker("appendices")

conj_phrases = ((Suppress(",") + Optional(Marker("and") | Marker("or")))
                | Marker("and")
                | Marker("or")
                | (Marker("except") + Marker("for"))
                | Suppress("-")
                | WordBoundaries(
                    CaselessLiteral("through")).setResultsName("through"))

title = Word(string.digits).setResultsName("cfr_title")
コード例 #5
0
import attr
from pyparsing import (CaselessLiteral, FollowedBy, LineEnd, Literal,
                       OneOrMore, Optional, QuotedString, Suppress, Word,
                       ZeroOrMore)
from six.moves import reduce

from regparser.grammar import atomic, tokens, unified
from regparser.grammar.utils import Marker, QuickSearchable, WordBoundaries
from regparser.tree.paragraph import hash_for_paragraph, p_levels
from regparser.tree.reg_text import subjgrp_label

logger = logging.getLogger(__name__)


intro_text_marker = (
    (Marker("introductory") + WordBoundaries(CaselessLiteral("text"))) |
    (Marker("subject") + Marker("heading")).setParseAction(lambda _: "text")
)

of_connective = (Marker("of") | Marker("for") | Marker("to"))

passive_marker = (
    Marker("is") | Marker("are") | Marker("was") | Marker("were") |
    Marker("and").setResultsName("and_prefix").setParseAction(
        lambda _: True))


and_token = Marker("and").setParseAction(lambda _: tokens.AndToken())


# Verbs