def getToken(self): tableCell = Regex(r"(?P<text>(.|(\\\n))*?)\|\|") tableCell.setParseAction(self.__convertTableCell) tableRow = LineStart() + Literal("||") + OneOrMore( tableCell).leaveWhitespace() + Optional(LineEnd()) tableRow.setParseAction(self.__convertTableRow) table = LineStart() + Regex( r"\|\| *(?P<params>.+)?") + LineEnd() + OneOrMore(tableRow) table = table.setParseAction(self.__convertTable)("table") return table
def get_dialect_patterns(dialect_name: str) -> dict: """Return dialect patterns (used in SQL parsing), given dialect name. If dialect name not recorded, return empty dictionary. """ # Notice, that if DIALECT_PATTERS is a global variable, pyparsing slows down remarkably. DIALECT_PATTERNS = { 'mssql': { 'quoted_strings': [ # depends on how QUOTED_IDENTIFIER is set QuotedString("'", escQuote="''", multiline=True), QuotedString('"', escQuote='""', multiline=True) ], 'one_line_comment': Combine('--' + restOfLine), 'multiline_comment': Regex(r'/\*.+?\*/', flags=DOTALL), # GO must be on its own line 'batch_separator': LineStart().leaveWhitespace() + ( ( CaselessKeyword('GO') + Word(nums) ) | CaselessKeyword('GO') ), 'script_variable_pattern': '$({})' }, 'postgresql': { # https://www.postgresql.org/docs/current/sql-syntax-lexical.html 'quoted_strings': [ QuotedString("'", escQuote="''", multiline=True), QuotedString('$$', multiline=True) # TODO: dollar quote with tag ], 'one_line_comment': Combine('--' + restOfLine), 'multiline_comment': Regex(r'/\*.+?\*/', flags=DOTALL), 'batch_separator': Literal(';'), 'script_variable_pattern': ':{}' }, 'sqlite': { # https://sqlite.org/lang.html 'quoted_strings': [QuotedString("'", escQuote="''", multiline=True)], 'one_line_comment': Combine('--' + restOfLine), 'multiline_comment': Regex(r'/\*.+?\*/', flags=DOTALL), 'batch_separator': Literal(';') } } return DIALECT_PATTERNS.get(dialect_name, {})
def _compile_grammar(self): # type: () -> ParserElement """ Takes the individual grammars from each registered directive and compiles them into a full test fixture grammar whose callback methods are the bound methods on this class instance. :return: The full PyParsing grammar for test fixture files. """ grammars = [ (LineEnd().suppress()).setParseAction( functools.partial(self._finalize_test_case) ) ] # directives for directive_class in get_all_directives(): grammars.append( LineStart() + directive_class.get_full_grammar().setParseAction( functools.partial(self._ingest_directive, directive_class) ) + LineEnd() ) return StringStart() + OneOrMore(MatchFirst(grammars)) + StringEnd()
def create_parser(self): LBRACKET = Suppress("[") RBRACKET = Suppress("]") EQ = Suppress("=") SLASH = Suppress("/") KEY = Word(alphanums + "-") VALUE = Word(alphanums + "-/.:_+") | QuotedString('"') FIND = LBRACKET + Group( Literal("find") + Literal("default-name") + EQ + VALUE) + RBRACKET KVP = Group(KEY + EQ + VALUE) BEGIN = LineStart() + SLASH + restOfLine.setParseAction(self.on_begin) ADD_OP = LineStart() + Literal("add") + ZeroOrMore(KVP).setParseAction( self.on_add) SET_OP = (LineStart() + Literal("set") + (Optional(FIND | KEY + ~FollowedBy(EQ) | QuotedString('"')) + ZeroOrMore(KVP)).setParseAction(self.on_set)) CONFIG = ZeroOrMore(BEGIN | ADD_OP | SET_OP) return CONFIG
def mwgtofasta(wiki, evt): from pyparsing import Word, Literal, printables, LineStart, SkipTo, Combine, nums raw_string = wiki.getActiveEditor().GetSelectedText() start, end = wiki.getActiveEditor().GetSelection() fastaheader = Combine( Literal(">").suppress() + Word(nums).setResultsName("number") + Literal("_").suppress()) try: data, dataStart, dataEnd = fastaheader.scanString(raw_string).next() except StopIteration: number = 1 dataStart = end - start else: number = int(data.number) + 1 wiki.getActiveEditor().SetSelectionByCharPos(start, start + dataStart) name = Word(printables).setResultsName("name") seq_start = Literal("5'").suppress() seq_stop = Literal("3'").suppress() sequence = Combine(seq_start + SkipTo(seq_stop)).setResultsName("seq") mwg_primer = LineStart() + name + SkipTo(LineStart()) + sequence result = mwg_primer.scanString(raw_string) seqlist = [data for data, dataStart, dataEnd in result] number += len(seqlist) fasta_string = "" for data in seqlist: number -= 1 s = data.seq.strip("-").replace("\n", "").replace(" ", "") fasta_string += ">{number}_{name} ({length}-mer)\n{seq}\n\n".format( number=number, name=data.name, length=len(s), seq=s) wiki.getActiveEditor().ReplaceSelection(fasta_string) wiki.getActiveEditor().SetSelectionByCharPos(start, start + len(fasta_string))
def parsed_title(text, appendix_letter): digit_str_parser = (Marker(appendix_letter) + Suppress('-') + grammar.a1.copy().leaveWhitespace() + Optional(grammar.markerless_upper) + Optional(grammar.paren_upper | grammar.paren_lower) + Optional(grammar.paren_digit)) part_roman_parser = Marker("part") + grammar.aI parser = QuickSearchable(LineStart() + (digit_str_parser | part_roman_parser)) for match, _, _ in parser.scanString(text): return match
def expr(self) -> ParserElement: NL = LineEnd() LIST_BREAK = NL + Optional(White(" \t")) + NL | StringEnd() IGNORE = BlockQuote(**self.init_kwargs).expr | Panel( **self.init_kwargs).expr | Color(**self.init_kwargs).expr ROW = LineStart() + Combine( Optional(self.nested_token, default="") + ListIndent(self.indent_state, self.tokens) + SkipTo(NL + Char(self.nested_token + self.tokens) | LIST_BREAK, ignore=IGNORE) + Optional(NL), ) return OneOrMore(ROW, stopOn=LIST_BREAK).setParseAction(self.action)
def build_parser(): """ Build a pyparsing parser for our custom topology description language. :return: A pyparsing parser. :rtype: pyparsing.MatchFirst """ ParserElement.setDefaultWhitespaceChars(' \t') nl = Suppress(LineEnd()) inumber = Word(nums).setParseAction(lambda l, s, t: int(t[0])) fnumber = (Combine( Optional('-') + Word(nums) + '.' + Word(nums) + Optional('E' | 'e' + Optional('-') + Word(nums))) ).setParseAction(lambda toks: float(toks[0])) boolean = (CaselessLiteral('true') | CaselessLiteral('false') ).setParseAction(lambda l, s, t: t[0].casefold() == 'true') comment = Literal('#') + restOfLine + nl text = QuotedString('"') identifier = Word(alphas, alphanums + '_') empty_line = LineStart() + LineEnd() item_list = ((text | fnumber | inumber | boolean) + Optional(Suppress(',')) + Optional(nl)) custom_list = (Suppress('(') + Optional(nl) + Group(OneOrMore(item_list)) + Optional(nl) + Suppress(')')).setParseAction(lambda tok: tok.asList()) attribute = Group( identifier('key') + Suppress(Literal('=')) + (custom_list | text | fnumber | inumber | boolean | identifier)('value') + Optional(nl)) attributes = (Suppress(Literal('[')) + Optional(nl) + OneOrMore(attribute) + Suppress(Literal(']'))) node = identifier('node') port = Group(node + Suppress(Literal(':')) + (identifier | inumber)('port')) link = Group( port('endpoint_a') + Suppress(Literal('--')) + port('endpoint_b')) environment_spec = (attributes + nl).setResultsName('env_spec', listAllMatches=True) nodes_spec = (Group( Optional(attributes)('attributes') + Group(OneOrMore(node))('nodes')) + nl).setResultsName('node_spec', listAllMatches=True) ports_spec = (Group( Optional(attributes)('attributes') + Group(OneOrMore(port))('ports')) + nl).setResultsName('port_spec', listAllMatches=True) link_spec = (Group(Optional(attributes)('attributes') + link('links')) + nl).setResultsName('link_spec', listAllMatches=True) statements = OneOrMore(comment | link_spec | ports_spec | nodes_spec | environment_spec | empty_line) return statements
def _define_grammar(self): '''define the grammar to be used, and add actions''' self._define_actions() eol = LineEnd().suppress() white = Optional(White()).suppress() begin = Keyword('begin').suppress() end = Keyword('end').suppress() comment = (Literal('#') + restOfLine).suppress() data_value = Combine(OneOrMore(CharsNotIn('#\n\r'))) data_line = (LineStart() + white + Optional(data_value) + Optional(comment) + eol) block_name = Word(alphas, alphanums + '_') begin_block = (LineStart() + begin + block_name + Optional(comment) + eol) end_block = LineStart() + end + block_name + Optional(comment) + eol junk = ZeroOrMore(LineStart() + white + NotAny(begin) + restOfLine + eol).suppress() data = Group(ZeroOrMore(NotAny(end) + data_line)) block_def = begin_block + data + end_block block_defs = junk + OneOrMore(block_def + junk) self._grammar = block_defs begin_block.addParseAction(self._begin_block_action) end_block.addParseAction(self._end_block_action) data_value.addParseAction(self._data_value_action)
def parse_config_file(filepath): """ This function defines that to parsed the netscalar input file :param filepath: path of netscalar input configuration :return: return parsed dict """ EOL = LineEnd().suppress() comment = Suppress("#") + Suppress(restOfLine) + EOL SOL = LineStart().suppress() blank_line = SOL + EOL result = [] hyphen = Literal("-") not_hyphen_sign = ''.join(c for c in printables if c != '-') text = Word(not_hyphen_sign, printables) key = Word('-', printables).setParseAction(lambda t: t[0].replace('-', '', 1)) val = originalTextFor(Optional(ZeroOrMore(text), default=None)) option = Group(key + val) multi_word_names = quotedString q_obj = originalTextFor(Keyword('q{') + SkipTo(Keyword("}"))) command = Group( OneOrMore(q_obj | multi_word_names | text) + ZeroOrMore(option)) command.ignore(comment | blank_line) with open(filepath) as infile: line_no = 1 print("Parsing Input Configuration...") lines = infile.readlines() total_lines = len(lines) for line in lines: try: tmp = command.parseString(line) tokens = tmp.asList() if tokens: tokens[0].append(['line_no', str(line_no)]) result += tokens line_no += 1 except Exception as exception: line_no += 1 LOG.error("Parsing error: " + line) msg = "Parsing started..." if line_no <= total_lines: ns_util.print_progress_bar(line_no, total_lines, msg, prefix='Progress', suffix='') return result
def _parse_gitconfig(config: str) -> Dict[str, Any]: # Header header = Word(alphas) + Optional(QuotedString('"')) full_header = Suppress(LineStart()) + \ nestedExpr(opener="[", closer="]", content=header) + \ Suppress(LineEnd()) #full_header = Suppress(LineStart()) + Suppress(Literal("[")) + \ # header + Suppress(Literal("]")) + Suppress(LineEnd()) # Keys key = Word(alphas) + Suppress(Literal("=")) + Suppress(Optional(" ")) + \ restOfLine() # Full pattern full_pattern = full_header + ZeroOrMore(key) #return full_header return [match for match in full_pattern.scanString(config)]
def __init__(self): ints = Word(nums) word = Word(alphas) EOL = LineEnd().suppress() SOL = LineStart().leaveWhitespace() blankline = SOL + LineEnd() # ip address of device ipAddress = Optional( delimitedList( ints, ".", combine=True) + Suppress( ":")) # Received message rec_msg = Suppress(OneOrMore(word)) + Suppress(Literal("'")) # priority priority = Suppress("<") + ints + Suppress(">") # timestamp month = Word(string.uppercase, string.lowercase, exact=3) day = ints hour = Combine(ints + ":" + ints + ":" + ints) timestamp = month + day + hour # hostname hostname = Word(alphas + nums + "_" + "-" + ".") # appname appname = Word(alphas + "/" + "-" + "_" + ".") + Optional( Suppress("[") + ints + Suppress("]")) + Suppress(":") # message message = Regex(".*") # pattern build self.__pattern = ipAddress + priority + timestamp + \ hostname + appname + message + StringEnd() | EOL self.__pattern_without_appname = ipAddress + priority + \ timestamp + hostname + message + StringEnd() | EOL
def binary_condition_p(): lhs = tagdim_field_p().setResultsName('lhs') op = Word('[!=<>~]').setResultsName('op') rhs = rhs_p().setResultsName('rhs') line = (LineStart() + lhs + op + rhs + LineEnd()) def cond(s, l, toks): if 'value' in toks: return GenericBinaryCondition(toks['lhs'], toks['op'], toks['value']) elif 'valueset' in toks: return GenericSetCondition(toks['lhs'], toks['op'], toks['valueset'][:]) else: raise ValueError("Could not parse {}".format(toks)) line.setParseAction(cond) return line
def expr(self) -> ParserElement: NL = LineEnd().suppress() SEP = (Literal("||") | Literal("|")).suppress() ROW_BREAK = NL + SEP | NL + NL | StringEnd() IGNORE = (Link(**self.init_kwargs).expr | MailTo(**self.init_kwargs).expr | Image(**self.init_kwargs).expr | Mention(**self.init_kwargs).expr) ROW = SEP + ZeroOrMore( SkipTo(SEP | ROW_BREAK, ignore=IGNORE) + Optional(SEP), stopOn=ROW_BREAK | NL + ~SEP, ) EMPTY_LINE = Combine("\n" + White(" \t", min=0) + "\n") return (((StringStart() + Optional("\n")) ^ Optional(EMPTY_LINE, default="\n")) + OneOrMore(LineStart() + Group(ROW) + NL).setParseAction( self.action) + (StringEnd() | Optional(LineEnd(), default="\n")))
def uniqueLabels( names ): i = 1 labels = [] from rastercalcengine import rasterName from pyparsing import LineStart, LineEnd validRaster = LineStart() + rasterName() + LineEnd() for name in names: name1 = "[" + name + "]" try: ss = validRaster.parseString( str( name1 ) ) except: name1 = "layer" name1 = name1 name2 = "[" + name while name1 in labels: name1 = name2 + "_%s]" % i i = i + 1 labels.append( name1 ) return labels
def create_grammar(self): self.beg = SkipTo(LineStart() + Literal('/*')*(0, 1) + Literal('stage'), ignore=Literal('stages')) self.block = Forward() self.parallel = Suppress('parallel') + self.nested(self.block) self.parallel.setParseAction(lambda t: t[0]) self.environment = Suppress('environment') + self.nested() self.stage_content = ( self.nested((self.parallel | self.environment.suppress()), 'parallel') | self.nested().suppress() ) self.stage = Group( Suppress('stage' + '(') + quotedString('stage_name').setParseAction(removeQuotes) + Suppress(')') + self.stage_content)( self.STAGE_KEY + '*' ) self.commented_stage = Group(Suppress('/*') + self.stage + Suppress('*/'))(self.COMMENTED_STAGE_KEY + '*') self.any_stage = self.stage | self.commented_stage self.block << Group(self.parallel | self.any_stage)('block*')
def _define_grammar(self): g = {} label = Literal('Contents') | Literal('Caption title') | \ Literal('Sub-caption') | Literal('Half-title') | \ Literal('Footline') | Literal('Comments') | \ Literal('Modificatons') | Literal('Errors') | \ Literal('DMF') | Literal('ADF') copies_label = LineStart() + Literal('Copies') all_chars = u''.join( unichr(c) for c in xrange(65536) if unicodedata.category(unichr(c)).startswith('L')) section_separator = LineEnd() + FollowedBy(label | copies_label | StringEnd()) section = SkipTo(section_separator) library = Combine(Word(all_chars) + Literal(u'-') + Word(all_chars)) copy_separator = LineEnd() + FollowedBy(library) | \ LineEnd() + StringEnd() | StringEnd() copy = library + SkipTo(copy_separator) + Suppress(copy_separator) g['comments'] = Suppress('Comments') + SkipTo(section_separator) g['code'] = StringStart() + SkipTo(LineEnd()) + Suppress(LineEnd()) g['title'] = Suppress(g['code']) + Suppress(LineEnd()) + section g['copies'] = Suppress(copies_label) + OneOrMore(Group(copy)) return g
def line(contents): return LineStart() + ZeroOrMore(Group(contents)) + LineEnd().suppress()
def get_symptom(self): loglevel = LineStart() + Word(nums) analyze_expression = Combine( Regex(".*Exception:") + SkipTo(Suppress(loglevel), include=True)) return analyze_expression
pre = pre.visit(id, setTheory, id, const(True)) # rule = Rule(toks["name"], lexp, rexp, pre if pre else None) if lhs["var"] != rhs["var"]: print("ERROR: " + lhs["var"] + " == " + rhs["var"]) lctrs = lctrs + [rule] return [rule] def mkLit(s): return Literal(s) def mkMod(toks): return reduce(lambda s, t: s + "_" + t, toks, "") # define grammar name = (LineStart() + Literal("Name:")).suppress() pre = Literal("Pre:").suppress() num = Combine(Optional(Literal("-")) + Word(nums)) assign = Literal("=").suppress() constant = Combine(Literal("C") + Optional(Word(nums))) skip = Literal("skip") to = Literal("to").suppress() percent = Literal("%").suppress() ident = Combine(percent + Word(alphanums)) funName = Word(srange("[a-zA-Z]"), alphanums) lpar = Literal( "(" ).suppress() rpar = Literal( ")" ).suppress() comma = Literal( "," ).suppress() # theory expression unops = ["~", "-"] lBvUnop = Or(mkLit(s) for s in unops)
scopedIdent.setParseAction(lambda t: "_".join(t)) print("(replace namespace-scoped names with C-compatible names)") print(scopedIdent.transformString(testData)) # or a crude pre-processor (use parse actions to replace matching text) def substituteMacro(s, l, t): if t[0] in macros: return macros[t[0]] ident.setParseAction(substituteMacro) ident.ignore(macroDef) print("(simulate #define pre-processor)") print(ident.transformString(testData)) ################# print("Example of a stripper") print("----------------------") from pyparsing import dblQuotedString, LineStart # remove all string macro definitions (after extracting to a string resource table?) stringMacroDef = Literal( "#define") + ident + "=" + dblQuotedString + LineStart() stringMacroDef.setParseAction(replaceWith("")) print(stringMacroDef.transformString(testData))
#regex = u"Number(.+?)Name(.+?)Email(.+?)Course(.+?)Enrolled SAUM(Yes|No)" #regex = u"Número(.+?)Nome(.+?)Email(.+?)Curso(.+?)Inscrito SAUM(Sim|Não)" match = re.findall(regex, text) with open("alunos.txt", "w") as f: for m in match: f.write(u"{} {}\n".format(m[0], m[1]).encode("utf8")) from pyparsing import Word, Literal, printables, LineStart, SkipTo, Combine name = Word(printables).setResultsName("name") seq_start = Literal("5'").suppress() seq_stop = Literal("3'").suppress() sequence = Combine(seq_start + SkipTo(seq_stop)).setResultsName("seq") mwg_primer = LineStart() + name + SkipTo(LineStart()) + sequence result = mwg_primer.scanString(raw_string) seqlist = [data for data, dataStart, dataEnd in result] number += len(seqlist) fasta_string = '' for data in seqlist: number -= 1 s = data.seq.strip("-").replace("\n", "").replace(" ", "") fasta_string += ">{number}_{name} ({length}-mer)\n{seq}\n\n".format( number=number, name=data.name, length=len(s), seq=s)
from pyparsing import ( alphas, alphanums, LineEnd, LineStart, nums, Word, ZeroOrMore, ) region_identifier = Word(alphanums + '_') region_start = (LineStart() + '@region $ ' + region_identifier.setResultsName('region_identifier') + '{' + LineEnd()) region_end = LineStart() + '}' + LineEnd() region_param_valid = Word(alphanums + '_.') region_param = (region_param_valid + ': ' + region_param_valid + ';' + LineEnd()) mod_identifier = Word(alphas.upper() + alphas.lower() + '_') mod_param_name = Word(' ' + alphas.upper() + alphas.lower() + '_') mods_start_line = '[' + LineEnd() mod_param = mod_param_name + ':' + Word(nums) + ";" mod_param_additional_line = Word(nums) + ':' + Word(nums) + ';' mod_start = '@' + mod_identifier.setResultsName('mod_identifier') + '{' mod_end = '}' + LineEnd()
import pyparsing from collections import namedtuple from pyparsing import (ParserElement, OneOrMore, ZeroOrMore, Word, nums, alphas, alphanums, delimitedList, Group, Combine, SkipTo, Optional, Literal, restOfLine, LineStart, LineEnd, StringEnd) __all__ = ['load_ledger'] CurrencyAmount = namedtuple('CurrencyAmount', 'currency amount') ws = ' \t' ParserElement.setDefaultWhitespaceChars(ws) EOL = LineEnd().suppress() SOL = LineStart().leaveWhitespace() blankline = SOL + LineEnd() noIndentation = SOL + ~Word(ws).leaveWhitespace().suppress() indentation = SOL + Word(ws).leaveWhitespace().suppress() date = Combine( Word(nums, exact=4) + '-' + Word(nums, exact=2) + '-' + Word(nums, exact=2)) description = SkipTo(';' | EOL) accountName = SkipTo(Literal(' ') | Literal(';') | Literal('\n')) currency = Word(alphas + '£$') number = Word(nums + '-.,') amount = currency('currency') + number('value')
with open("../data/output.md", 'w') as output_doc: print(len(input_text.split("\n"))) for line in input_text.split("\n"): # print("Word " + str(i) + " --- " + line) line_p = line.replace("\xa0", " ") # line_p = unicodedata.normalize("NFC", line) word_text.append(line_p) word_bold = (Literal("**").suppress() + Concat( OneOrMore( Word(alphas) ^ Cleanup( Literal("(").suppress() + Word(alphas) + Literal(")").suppress()))) + Literal("**").suppress()) word_def = ( LineStart() + Optional(Word(nums + " /")).suppress() + Concat(SkipTo(Word("►¶"))).setResultsName("definition") + OneOrMore( Literal("►").suppress() + NotAny(Literal("►")).suppress() + Concat(SkipTo(oneOf(genders) ^ Word("|¶►") ^ LineEnd())).setResultsName("words") + Concat( Optional(OneOrMore( oneOf(genders) + Optional(Literal(" ")).suppress()), default="na").setResultsName("gender")) + Optional( ( SkipTo(Literal("¶")).suppress() + Literal("¶").suppress() + Concat(SkipTo(Literal("►") ^ LineEnd())) # SkipTo(Word("►¶")).suppress() ).setResultsName("sources"), default="na")) + Optional(
def make_parser(): ParserElement.setDefaultWhitespaceChars(' \t') EOL = OneOrMore(LineEnd()).suppress().setName("end of line") Spaces = OneOrMore(" ").suppress() # NOTE: These are not all 'printable' Unicode characters. # If needed, expand the alphas_extra variable. alphas_extra = ''.join(chr(x) for x in range(0x100, 0x350)) chars = printables + alphas8bit + alphas_extra Token = Word(chars) InlineComment = '#' - SkipTo(EOL) WholelineComment = LineStart() + '#' - restOfLine - EOL Argument = Token('arg').setName('argument') Variable = Token('var').setName('variable') KindObject = Keyword('kind')('object') KindVerb = Keyword('is')('verb') Kind = Named(Keyword('url') | Keyword('raw') | Keyword('text'))('arg') MatchObject = Named(Keyword('arg'))('object') data = Named(Keyword('data'))('object') MatchVerb = Named( Keyword('is') | Keyword('istype') | Keyword('matches') | Keyword('rewrite'))('verb').setName('verb') Pattern = Named(Group(OneOrMore(Spaces + Argument + EOL)))('arg').leaveWhitespace() ActionObject = Keyword('plumb')('object') ActionVerb = Named( Keyword('run') | Keyword('notify') | Keyword('download'))('verb') Action = Named(originalTextFor(OneOrMore(Argument)))('arg') ArgMatchClause = Group(MatchObject - MatchVerb - Variable - Pattern) DataMatchClause = Group(data - MatchVerb - Pattern) # Transform every 'data match' rule to an equivalent 'arg match' rule def data_to_arg(toks): assert (len(toks) == 1) toks[0][0] = 'arg' toks[0].insert(2, '{data}') return toks DataMatchClause.setParseAction(data_to_arg) KindClause = Group(KindObject - KindVerb - Kind) - EOL MatchClause = (DataMatchClause | ArgMatchClause) ActionClause = Group(ActionObject - ActionVerb - Action) - EOL MatchBlock = Group(ZeroOrMore(MatchClause('match-clause'))) ActionBlock = Group(OneOrMore(ActionClause('action-clause'))) # TODO: allow the excluded chars if they are escaped. RuleName = Word(chars, excludeChars='{ } [ ]')('rule-name') RuleHeading = Suppress('[') - RuleName - Suppress(']') - EOL Rule = Group(RuleHeading - KindClause('kind-clause') - MatchBlock('match-block') - ActionBlock('action-block')) RulesFile = OneOrMore(Rule) RulesFile.ignore(WholelineComment) RulesFile.ignore(InlineComment) for v in [MatchObject, ActionObject]: v.setName('object') for v in [MatchVerb, ActionVerb]: v.setName('verb') Kind.setName('kind') data.setName('object') Pattern.setName('pattern') Action.setName('action or url') KindClause.setName('kind clause') MatchClause.setName('match clause') ActionClause.setName('action clause') MatchBlock.setName('match block') ActionBlock.setName('action block') Rule.setName('rule') RuleName.setName('rule name') RulesFile.setName('rules') return RulesFile
# Only used as the top of the appendix hierarchy a1 = Word(string.digits).setResultsName("a1") aI = Word("IVXLCDM").setResultsName("aI") # Catches the A in 12A but not in 12Awesome markerless_upper = Word(string.ascii_uppercase).setResultsName( 'markerless_upper') + ~FollowedBy(Word(string.ascii_lowercase)) paren_upper = parenthesize(string.ascii_uppercase, "paren_upper") paren_lower = parenthesize(string.ascii_lowercase, "paren_lower") paren_digit = parenthesize(string.digits, "paren_digit") period_upper = decimalize(string.ascii_uppercase, "period_upper") period_lower = decimalize(string.ascii_lowercase, "period_lower") period_digit = decimalize(string.digits, "period_digit") section = (atomic.section_marker.copy().leaveWhitespace() + unified.part_section + SkipTo(LineEnd())) par = (atomic.section.copy().leaveWhitespace() + unified.depth1_p + SkipTo(LineEnd())) marker_par = (atomic.paragraph_marker.copy().leaveWhitespace() + atomic.section + unified.depth1_p) appendix = (atomic.appendix_marker.copy().leaveWhitespace() + atomic.appendix + SkipTo(LineEnd())) headers = utils.QuickSearchable(LineStart() + (section | marker_par | par | appendix))
from pyparsing import LineEnd, LineStart, SkipTo, Regex from regparser.grammar import atomic, unified section = (atomic.section_marker.copy().leaveWhitespace() + unified.part_section + SkipTo(LineEnd())) par = (atomic.section.copy().leaveWhitespace() + unified.depth1_p + SkipTo(LineEnd())) marker_par = (atomic.paragraph_marker.copy().leaveWhitespace() + atomic.section + unified.depth1_p) # This matches an appendix name in an appendix header. Here we'll match # something with a dash in the appendix name (i.e. AA-1) but we'll # remove the dash. The effect of this is that, for label purposes only, # the appendix becomes known as 'AA1', and therefore we don't have weird # label collisions with a node labeled '1' underneath the appendix. appendix = ( atomic.appendix_marker.copy().leaveWhitespace() + Regex(r"[A-Z]+-?[0-9]*\b").setResultsName("appendix").setParseAction( lambda r: r[0].replace('-', '')).setResultsName("appendix") + SkipTo(LineEnd())) parser = LineStart() + (section | marker_par | par | appendix)
# PROP = Suppress(INFO) + STUFF + Suppress(SkipTo(StringEnd())) HEADER = Regex( r'(?:State (?P<state>\d+) )?file (?P<file>[^\s]+) function (?P<function>[^\s]+) line (?P<line>\d+) (?:thread (?P<thread>\d+))?' ) # noqa: E501 HEADER_OLD = Regex( r'(?:State (?P<state>\d+) )?file (?P<file>[^\s]+) line (?P<line>\d+) function (?P<function>[^\s]+) (?:thread (?P<thread>\d+))?' ) # noqa: E501 SEP = Keyword("----------------------------------------------------") ASGN = Regex(r'(?P<lhs>[^\s=]+)=(?P<rhs>.+)') TRACE = OneOrMore(Group(Group(HEADER) + SEP.suppress() + Group(ASGN))).ignore( OneOrMore(SKIP)) # noqa: E501 TRACE_OLD = OneOrMore(Group(Group(HEADER_OLD) + SEP.suppress() + Group(ASGN))).ignore(OneOrMore(SKIP)) # noqa: E501 PROP = Suppress(SkipTo(LineEnd())) + Suppress(SkipTo( LineStart())) + STUFF + Suppress(SkipTo(StringEnd())) # noqa: E501 def pprint_agent(info, tid): return f"{info.spawn[int(tid)]} {tid}" def translateCPROVER54(cex, info): yield from translateCPROVER(cex, info, parser=TRACE_OLD) def translateCPROVERNEW(cex, info): yield from translateCPROVER(cex, info, parser=TRACE) def translateCPROVER(cex, info, parser=TRACE):
class TypeDocGrammar: r""" Grammar to parse boto3 docs syntax. EOL ::= ["\r"] "\n" SOL ::= LINE_START line ::= [^EOL]+ EOL word ::= alphanums + "_" indented_block ::= INDENT (line_indented | any_line) line_indented ::= any_line indented_block type_definition ::= ":type" [^:]+ ":" [^EOL]+ rtype_definition ::= ":rtype:" [^EOL]+ returns_definition ::= (":returns:" | ":return:") [^EOL]+ param_definition ::= ":param" [^:]+ ":" [^EOL]+ EOL [indented_block] response_structure ::= "**Response Structure**" line [indented_block] typed_dict_key_line ::= "-" "**" word "**" "*(" word ")" "--*" [^EOL]+ + EOL type_line ::= "-" "*(" word ")" "--*" [^EOL]+ + EOL any_line ::= typed_dict_key_line | type_line | line """ indent_stack = [1] SOL = LineStart().suppress() EOL = LineEnd().suppress() word = Word(alphanums + "_") line = SkipTo(LineEnd()) + EOL line_indented = Forward() any_line = Forward() indented_block = indentedBlock( line_indented | any_line, indentStack=indent_stack ).setResultsName("indented") line_indented <<= any_line + indented_block type_definition = ( SOL + Literal(":type") + SkipTo(":").setResultsName("name") + Literal(":") + SkipTo(EOL).setResultsName("type_name") ) rtype_definition = SOL + Literal(":rtype:") + SkipTo(EOL).setResultsName("type_name") returns_definition = ( SOL + (Literal(":returns:") | Literal(":return:")) + SkipTo(EOL).setResultsName("description") ) param_definition = ( SOL + Literal(":param") + SkipTo(":").setResultsName("name") + Literal(":") + SkipTo(EOL).setResultsName("description") + EOL + Optional(indented_block) ) response_structure = Literal("**Response Structure**") + line_indented typed_dict_key_line = ( Literal("-") + White(ws=" \t") + Literal("**") + word.setResultsName("name") + Literal("**") + White(ws=" \t") + Literal("*(") + word.setResultsName("type_name") + Literal(")") + White(ws=" \t") + Literal("--*") + SkipTo(EOL).setResultsName("description") + EOL ) type_line = ( Literal("-") + White(ws=" \t") + Literal("*(") + word.setResultsName("type_name") + Literal(")") + White(ws=" \t") + Literal("--*") + SkipTo(EOL).setResultsName("description") + EOL ) any_line <<= (typed_dict_key_line | type_line | line).setResultsName("line") @classmethod def fail_action( cls, _input_string: str, _chr_index: int, _source: str, error: BaseException ) -> None: """ Check for input end. """ if "found end of text" not in str(error): raise error @classmethod def reset(cls) -> None: """ Reset call stack and packrat. """ cls.disable_packrat() cls.indented_block.setFailAction(cls.fail_action) cls.indent_stack.clear() cls.indent_stack.append(1) @staticmethod def enable_packrat() -> None: """ Enable packrat boost. """ ParserElement.enablePackrat(cache_size_limit=128) @staticmethod def disable_packrat() -> None: """ Disable packrat boost. """ ParserElement.enablePackrat(cache_size_limit=None)