def generate_verb(word_list, verb, active): """Short hand for making tokens.Verb from a list of trigger words""" word_list = [CaselessLiteral(w) for w in word_list] if not active: word_list = [passive_marker + w for w in word_list] grammar = reduce(lambda l, r: l | r, word_list) grammar = WordBoundaries(grammar) grammar = grammar.setParseAction( lambda m: tokens.Verb(verb, active, bool(m.and_prefix))) return grammar
section_marker = Suppress(Regex(u"§|Section|section")) sections_marker = Suppress(Regex(u"§§|Sections|sections")) # Most of these markers could be SuffixMarkers (which arise due to errors in # the regulation text). We'll wait until we see explicit examples before # converting them though, to limit false matches paragraph_marker = Marker("paragraph") paragraphs_marker = SuffixMarker("paragraphs") part_marker = Marker("part") parts_marker = Marker("parts") subpart_marker = Marker("subpart") comment_marker = ((Marker("comment") | Marker("commentary") | (Marker("official") + Marker("interpretations")) | (Marker("supplement") + Suppress(WordBoundaries("I")))) + Optional(Marker("of") | Marker("to"))) comments_marker = Marker("comments") appendix_marker = Marker("appendix") appendices_marker = Marker("appendices") conj_phrases = ( (Suppress(",") + Optional(Marker("and") | Marker("or"))) | Marker("and") | Marker("or") | (Marker("except") + Marker("for")) | Suppress(Marker("through") | "-" | u"–").setParseAction(lambda: True).setResultsName("through")) title = Word(string.digits).setResultsName("cfr_title")
# Most of these markers could be SuffixMarkers (which arise due to errors in # the regulation text). We'll wait until we see explicit examples before # converting them though, to limit false matches paragraph_marker = Marker("paragraph") paragraphs_marker = SuffixMarker("paragraphs") part_marker = Marker("part") parts_marker = Marker("parts") subpart_marker = Marker("subpart") comment_marker = ((Marker("comment") | Marker("commentary") | (Marker("official") + Marker("interpretations")) | (Marker("supplement") + Suppress(WordBoundaries("I")))) + Optional(Marker("of") | Marker("to"))) comments_marker = Marker("comments") appendix_marker = Marker("appendix") appendices_marker = Marker("appendices") conj_phrases = ((Suppress(",") + Optional(Marker("and") | Marker("or"))) | Marker("and") | Marker("or") | (Marker("except") + Marker("for")) | Suppress("-") | WordBoundaries( CaselessLiteral("through")).setResultsName("through")) title = Word(string.digits).setResultsName("cfr_title")
import attr from pyparsing import (CaselessLiteral, FollowedBy, LineEnd, Literal, OneOrMore, Optional, QuotedString, Suppress, Word, ZeroOrMore) from six.moves import reduce from regparser.grammar import atomic, tokens, unified from regparser.grammar.utils import Marker, QuickSearchable, WordBoundaries from regparser.tree.paragraph import hash_for_paragraph, p_levels from regparser.tree.reg_text import subjgrp_label logger = logging.getLogger(__name__) intro_text_marker = ( (Marker("introductory") + WordBoundaries(CaselessLiteral("text"))) | (Marker("subject") + Marker("heading")).setParseAction(lambda _: "text") ) of_connective = (Marker("of") | Marker("for") | Marker("to")) passive_marker = ( Marker("is") | Marker("are") | Marker("was") | Marker("were") | Marker("and").setResultsName("and_prefix").setParseAction( lambda _: True)) and_token = Marker("and").setParseAction(lambda _: tokens.AndToken()) # Verbs