def define_identifier(self): """ Return the syntax definition for an identifier. """ # --- Defining the individual identifiers: # Getting all the Unicode numbers in a single string: unicode_numbers = "".join([unichr(n) for n in xrange(0x10000) if unichr(n).isdigit()]) unicode_number_expr = Regex("[%s]" % unicode_numbers, re.UNICODE) space_char = re.escape(self._grammar.get_token("identifier_spacing")) identifier0 = Regex("[\w%s]+" % space_char, re.UNICODE) # Identifiers cannot start with a number: identifier0 = Combine(~unicode_number_expr + identifier0) identifier0.setName("individual_identifier") # --- Defining the namespaces: namespace_sep = Suppress(self._grammar.get_token("namespace_separator")) namespace = Group(ZeroOrMore(identifier0 + namespace_sep)) namespace.setName("namespace") # --- The full identifier, which could have a namespace: identifier = Combine(namespace.setResultsName("namespace_parts") + identifier0.setResultsName("identifier")) identifier.setName("full_identifier") return identifier
def nexus_iter(infile): import pyparsing pyparsing.ParserElement.enablePackrat() from pyparsing import Word, Literal, QuotedString, CaselessKeyword, \ OneOrMore, Group, Optional, Suppress, Regex, Dict ## beginblock = Suppress(CaselessKeyword("begin") + ## CaselessKeyword("trees") + ";") ## endblock = Suppress((CaselessKeyword("end") | ## CaselessKeyword("endblock")) + ";") comment = Optional(Suppress("[&") + Regex(r'[^]]+') + Suppress("]")) ## translate = CaselessKeyword("translate").suppress() name = Word(string.letters+string.digits+"_.") | QuotedString("'") ## ttrec = Group(Word(string.digits).setResultsName("number") + ## name.setResultsName("name") + ## Optional(",").suppress()) ## ttable = Group(translate + OneOrMore(ttrec) + Suppress(";")) newick = Regex(r'[^;]+;') tree = (CaselessKeyword("tree").suppress() + Optional("*").suppress() + name.setResultsName("tree_name") + comment.setResultsName("tree_comment") + Suppress("=") + comment.setResultsName("root_comment") + newick.setResultsName("newick")) ## treesblock = Group(beginblock + ## Optional(ttable.setResultsName("ttable")) + ## Group(OneOrMore(tree)) + ## endblock) def not_begin(s): return s.strip().lower() != "begin trees;" def not_end(s): return s.strip().lower() not in ("end;", "endblock;") def parse_ttable(f): ttable = {} while True: s = f.next().strip() if not s: continue if s.lower() == ";": break if s[-1] == ",": s = s[:-1] k, v = s.split() ttable[k] = v if s[-1] == ";": break return ttable # read lines between "begin trees;" and "end;" f = itertools.takewhile(not_end, itertools.dropwhile(not_begin, infile)) s = f.next().strip().lower() if s != "begin trees;": print sys.stderr, "Expecting 'begin trees;', got %s" % s raise StopIteration ttable = {} while True: try: s = f.next().strip() except StopIteration: break if not s: continue if s.lower() == "translate": ttable = parse_ttable(f) print "ttable: %s" % len(ttable) elif s.split()[0].lower()=='tree': match = tree.parseString(s) yield nexus.Newick(match, ttable)
def parseDate(self, dateString): dateGrammar = Regex("\d{4}")("year") + Regex("\d{2}")("month") +\ Regex("\d{2}")("day") + Regex("\d{2}")("hours") +\ Suppress(":") + Regex("\d{2}")("minutes") results = dateGrammar.parseString(dateString) return { "year" : results["year"], "month" : results["month"], "day" : results["day"], "hours" : results["hours"], "minutes" : results["minutes"], }
def _make_grammar(self): from pyparsing import (QuotedString, ZeroOrMore, Combine, Literal, Optional, OneOrMore, Regex, CaselessKeyword) def escape_handler(s, loc, toks): if toks[0] == '\\\\': return "\\" elif toks[0] == '\\\'': return "'" elif toks[0] == '\\"': return '"' elif toks[0] == '\\f': return "\f" elif toks[0] == '\\n': return "\n" elif toks[0] == '\\r': return "\r" elif toks[0] == '\\t': return "\t" elif toks[0] == '\\ ': return " " else: return toks[0][1:] escape = Combine(Regex(r'\\.')).setParseAction(escape_handler) word = Combine(OneOrMore(escape | Regex(r'[^\s\\]+'))) whitespace = Regex(r'\s+').suppress() quotedstring = Combine(OneOrMore(QuotedString('"', escChar='\\') | QuotedString("'", escChar='\\'))) command = Regex(r'[^\s:]+') + Literal(":").suppress() + (quotedstring | word) include = quotedstring | command | word exclude = (Literal("-") | Literal("^")).suppress() + (quotedstring | command | word) or_keyword = CaselessKeyword("or") and_keyword = CaselessKeyword("and") keyword = or_keyword | and_keyword argument = (keyword | exclude | include) expr = ZeroOrMore(Optional(whitespace) + argument) # arguments.leaveWhitespace() command.setParseAction(CommandExpr) include.setParseAction(IncludeExpr) exclude.setParseAction(ExcludeExpr) or_keyword.setParseAction(OrKeywordExpr) and_keyword.setParseAction(AndKeywordExpr) # or_expr.setParseAction(lambda s, loc, toks: OrOperator(toks[0], toks[2])) # and_expr.setParseAction(lambda s, loc, toks: AndOperator(toks[0], toks[2])) # no_expr.setParseAction(lambda s, loc, toks: AndOperator(toks[0], toks[1])) # expr.setParseAction(Operator) return expr
def __init__(self, EvaluateVariableChild=None, EvaluateNumberChild=None): EvaluateVariableChild = EvaluateVariableChild or EvaluateVariable EvaluateNumberChild = EvaluateNumberChild or EvaluateNumber # what is a float number floatNumber = Regex(r'[-]?\d+(\.\d*)?([eE][-+]?\d+)?') # a variable is a combination of letters, numbers, and underscor variable = Word(alphanums + "_") # a sign is plus or minus signOp = oneOf('+ -') # an operand is a variable or a floating point number operand = floatNumber ^ variable # when a floatNumber is found, parse it with evaluate number floatNumber.setParseAction(EvaluateNumberChild) # when a variable is found, parse it with the EvaluateVariableChild # or EvaluateVariable variable.setParseAction(EvaluateVariableChild) # comparisons include lt,le,gt,ge,eq,ne comparisonOp = oneOf("< <= > >= == !=") # negation of the boolean is ! notOp = oneOf("!") # an expression is a either a comparison or # a NOT operation (where NOT a is essentially (a == False)) comparisonExpression = operatorPrecedence(operand, [ (comparisonOp, 2, opAssoc.LEFT, EvaluateComparison ), (notOp, 1, opAssoc.RIGHT, EvaluateNot ), ]) # boolean logic of AND or OR boolOp = oneOf("& |") # a bool expression contains a nested bool expression or a comparison, # joined with a boolean operation boolExpression = Forward() boolPossible = boolExpression | comparisonExpression self.boolExpression = operatorPrecedence(boolPossible, [ (boolOp, 2, opAssoc.RIGHT, EvaluateOrAnd ), ]) return
def parse_treesblock(infile): import string from pyparsing import Optional, Word, Regex, CaselessKeyword, Suppress from pyparsing import QuotedString comment = Optional(Suppress("[&") + Regex(r'[^]]+') + Suppress("]")) name = Word(alphanums+"_") | QuotedString("'") newick = Regex(r'[^;]+;') tree = (CaselessKeyword("tree").suppress() + Optional("*").suppress() + name.setResultsName("tree_name") + comment.setResultsName("tree_comment") + Suppress("=") + comment.setResultsName("root_comment") + newick.setResultsName("newick")) ## treesblock = Group(beginblock + ## Optional(ttable.setResultsName("ttable")) + ## Group(OneOrMore(tree)) + ## endblock) def parse_ttable(f): ttable = {} while True: s = f.next().strip() if s.lower() == ";": break if s[-1] in ",;": s = s[:-1] k, v = s.split() ttable[k] = v if s[-1] == ";": break return ttable ttable = {} while True: try: s = infile.next().strip() except StopIteration: break if s.lower() == "translate": ttable = parse_ttable(infile) # print("ttable: %s" % len(ttable)) else: match = tree.parseString(s) yield Newick(match, ttable)
def getkw_bnf(self): sect_begin = Literal("{").suppress() sect_end = Literal("}").suppress() array_begin = Literal("[").suppress() array_end = Literal("]").suppress() tag_begin = Literal("<").suppress() tag_end = Literal(">").suppress() eql = Literal("=").suppress() dmark = Literal('$').suppress() end_data=Literal('$end').suppress() prtable = alphanums+r'!$%&*+-./<>?@^_|~' ival=Regex('[-]?\d+') dval=Regex('-?\d+\.\d*([eE]?[+-]?\d+)?') lval=Regex('([Yy]es|[Nn]o|[Tt]rue|[Ff]alse|[Oo]n|[Oo]ff)') # Helper definitions kstr= quotedString.setParseAction(removeQuotes) ^ \ dval ^ ival ^ lval ^ Word(prtable) name = Word(alphas+"_",alphanums+"_") vec=array_begin+delimitedList(dval ^ ival ^ lval ^ Word(prtable) ^ \ Literal("\n").suppress() ^ \ quotedString.setParseAction(removeQuotes))+array_end sect=name+sect_begin tag_sect=name+Group(tag_begin+name+tag_end)+sect_begin # Grammar keyword = name + eql + kstr vector = name + eql + vec data=Combine(dmark+name)+SkipTo(end_data)+end_data section=Forward() sect_def=(sect | tag_sect ) #| vec_sect) input=section | data | vector | keyword section << sect_def+ZeroOrMore(input) + sect_end # Parsing actions ival.setParseAction(self.conv_ival) dval.setParseAction(self.conv_dval) lval.setParseAction(self.conv_lval) keyword.setParseAction(self.store_key) vector.setParseAction(self.store_vector) data.setParseAction(self.store_data) sect.setParseAction(self.add_sect) tag_sect.setParseAction(self.add_sect) sect_end.setParseAction(self.pop_sect) bnf=ZeroOrMore(input) + StringEnd().setFailAction(parse_error) bnf.ignore(pythonStyleComment) return bnf
def make_sexp_parser (): """ Returns a simple parser for nested lists of real numbers. Round parens () are assumed as customary in lisps. """ # Punctuation literals (note round parens): LPAR, RPAR = map (Suppress, "()") # Real numbers: real_string = Regex (r"[+-]?\d+\.\d*([eE][+-]?\d+)?") real = real_string.setParseAction (lambda tokens: float (tokens[0])) # Voodoo: sexp = Forward () sexp_list = Group (LPAR + ZeroOrMore (sexp) + RPAR) sexp << (real | sexp_list) return lambda s: sexp.parseString (s)[0]
def ListParser(): """ A parser for list columns, where each list is composed of pairs of values. """ value = Regex(r'[-+]?[0-9]+(?:\.[0-9]*)?(?:e[-+]?[0-9]+)?', IGNORECASE) value.setParseAction(lambda toks: float(toks[0])) item = Suppress('(') + value + Suppress(',') + value + Suppress(')') item.setParseAction(tuple) lst = Suppress('[') + delimitedList(item) + Suppress(']') lst.setParseAction(list) def parse(s): try: return lst.parseString(s).asList() except ParseBaseException as e: raise ValueError(e) return parse
def translate(self, text, filename): self.source = text self.super = None self.inheritance = 0 self.declaration_lines = ['inheritance = 0'] self.block_lines = [] self.body_lines = ['def body():'] self.target_lines = self.body_lines self.indent = 1 template_close = Literal('%>') white = White() attribute = Word(alphanums + '_') + Literal('=') + QuotedString('"') + Optional(white) directive = "<%@" + Optional(white) + Word(alphanums + '_') + white + ZeroOrMore(attribute) + template_close declaration = "<%!" + SkipTo(template_close) + template_close expression = "<%=" + SkipTo(template_close) + template_close scriptlet = '<%' + SkipTo(template_close) + template_close template_text = directive | declaration | expression | scriptlet plain_text = Regex(r'((?!<%).|\s)+', re.MULTILINE) body = template_text | plain_text lit = OneOrMore(body) directive.setParseAction(self.compile_directive) declaration.setParseAction(self.compile_declaration) expression.setParseAction(self.compile_expression) scriptlet.setParseAction(self.compile_scriptlet) plain_text.setParseAction(self.compile_plain_text) lit.leaveWhitespace() lit.parseString(self.source) translated = '\n' + '\n'.join(self.declaration_lines + ['\n'] + self.block_lines + ['\n'] + self.body_lines) if self.super: translated = self.super.module_source + translated return translated
# ===============> Standard libraries and third-party <======================== from plams import (Atom, Molecule) from pyparsing import (CaselessKeyword, Combine, Literal, nums, Optional, ParseException, Regex, SkipTo, Suppress, Word) import numpy as np # Literals point = Literal('.') e = CaselessKeyword('E') minusOrplus = Literal('+') | Literal('-') # Parsing Floats natural = Word(nums) integer = Combine(Optional(minusOrplus) + natural) floatNumber = Regex(r'(\-)?\d+(\.)(\d*)?([eE][\-\+]\d+)?') floatNumberDot = Regex(r'(\-)?(\d+)?(\.)(\d*)?([eE][\-\+]\d+)?') # Parse Utilities anyChar = Regex('.') skipAnyChar = Suppress(anyChar) skipSupress = lambda z: Suppress(SkipTo(z)) skipLine = Suppress(skipSupress('\n')) # Generic Functions def parse_file(p, file_name): """ Wrapper over the parseFile method
def __init__(self, preferences_dir=None): ''' Creates a new ConfigShell. @param preferences_dir: Directory to load/save preferences from/to @type preferences_dir: str ''' self._current_node = None self._root_node = None self._exit = False # Grammar of the command line command = locatedExpr(Word(alphanums + '_'))('command') var = Word(alphanums + '_\+/.<>()~@:-%]') value = var keyword = Word(alphanums + '_\-') kparam = locatedExpr(keyword + Suppress('=') + Optional(value, default=''))('kparams*') pparam = locatedExpr(var)('pparams*') parameter = kparam | pparam parameters = OneOrMore(parameter) bookmark = Regex('@([A-Za-z0-9:_.]|-)+') pathstd = Regex('([A-Za-z0-9:_.]|-)*' + '/' + '([A-Za-z0-9:_./]|-)*') \ | '..' | '.' path = locatedExpr(bookmark | pathstd | '*')('path') parser = Optional(path) + Optional(command) + Optional(parameters) self._parser = parser if tty: readline.set_completer_delims('\t\n ~!#$^&()[{]}\|;\'",?') self.log = log.Log() if preferences_dir is not None: preferences_dir = os.path.expanduser(preferences_dir) if not os.path.exists(preferences_dir): os.makedirs(preferences_dir) self._prefs_file = preferences_dir + '/prefs.bin' self.prefs = prefs.Prefs(self._prefs_file) self._cmd_history = preferences_dir + '/history.txt' self._save_history = True if not os.path.isfile(self._cmd_history): try: open(self._cmd_history, 'w').close() except: self.log.warning("Cannot create history file %s, " % self._cmd_history + "command history will not be saved.") self._save_history = False if os.path.isfile(self._cmd_history) and tty: try: readline.read_history_file(self._cmd_history) except IOError: self.log.warning("Cannot read command history file %s." % self._cmd_history) if self.prefs['logfile'] is None: self.prefs['logfile'] = preferences_dir + '/' + 'log.txt' self.prefs.autosave = True else: self.prefs = prefs.Prefs() self._save_history = False try: self.prefs.load() except IOError: self.log.warning("Could not load preferences file %s." % self._prefs_file) for pref, value in self.default_prefs.iteritems(): if pref not in self.prefs: self.prefs[pref] = value self.con = console.Console()
keywords = (SELECT|FROM|WHERE|AS|NULL|NOT|AND|OR|DISTINCT|ALL|INSERT| INTO|VALUES|DELETE|UPDATE|SET|CREATE|INDEX|USING|BTREE|HASH| ON|INTEGER|FLOAT|DATETIME|DATE|VARCHAR|CHAR|TABLE|DATABASE| DROP|ORDER|BY|ASC|DESC) # Define basic symbols LPAR, RPAR = map(Suppress, '()') dot = Literal(".").suppress() comma = Literal(",").suppress() semi_colon = Literal(";").suppress() # Basic identifier used to define vars, tables, columns identifier = ~keywords + Word(alphas, alphanums + '_') # Literal Values integer_literal = Regex(r"([+-]?[1-9][0-9]*|0)") integer_literal = integer_literal.setResultsName('integer_literal') float_literal = Regex(r"([+-]?[1-9][0-9]*|0)\.[0-9]+") float_literal = float_literal.setResultsName('float_literal') numeric_literal = float_literal | integer_literal string_literal = QuotedString("'").setResultsName('string_literal') literal_value = (numeric_literal|string_literal|NULL) # SQL-Type-names INTEGER = INTEGER.setResultsName('type_name') FLOAT = FLOAT.setResultsName('type_name') DATETIME = DATETIME.setResultsName('type_name') DATE = DATE.setResultsName('type_name') VARCHAR = VARCHAR.setResultsName('type_name') CHAR = CHAR.setResultsName('type_name')
from pyparsing import Regex, Literal, ZeroOrMore, SkipTo, Group, \ ParseException, StringEnd import multiprocessing import collections import subprocess import argparse import json import sys import os # GCC *.map file grammar for parsing code size per file. hex_word = Regex(r"0x[a-f0-9]+").setParseAction(lambda x: int(x[0], 16)) address = hex_word ^ Literal("[!provide]") size = hex_word meta = SkipTo(address ^ StringEnd()).setParseAction(lambda x: x[0].strip()) line_a = Group(address + size) line_b = Group(address + size + meta) + ZeroOrMore(Group(address + meta)) line_c = Group(address + meta) grammar = SkipTo(address ^ StringEnd()) + (line_a ^ line_b ^ line_c ^ StringEnd()) def parse_arguments(): """ Parse command line arguments. """
"SELECTOR_GROUP", "SELECTOR", "MIXIN", "INCLUDE", "MIXIN_PARAM", "EXTEND", "FONT_FACE", "OPTION", "FUNCTION_DEFINITION", "FUNCTION_RETURN", "IF", "ELSE", "IF_BODY", "FOR", "FOR_BODY", "CHARSET", "MEDIA", "WARN", "SEP_VAL_STRING", "POINT") # Base css word and literals COMMA, COLON, SEMICOLON = [Suppress(c) for c in ",:;"] OPT_SEMICOLON = Optional(SEMICOLON) LACC, RACC, LPAREN, RPAREN = [Suppress(c) for c in "{}()"] LLACC, LRACC, LBRACK, RBRACK = [Literal(c) for c in "{}[]"] # Comment CSS_COMMENT = cStyleComment + Optional(lineEnd) SCSS_COMMENT = dblSlashComment IDENT = Regex(r"-?[a-zA-Z_][-a-zA-Z0-9_]*") COLOR_VALUE = Regex(r"#[a-zA-Z0-9]{3,6}") VARIABLE = Regex(r"-?\$[-a-zA-Z_][-a-zA-Z0-9_]*") NUMBER_VALUE = Regex(r"-?\d+(?:\.\d*)?|\.\d+") + Optional( Regex(r"(em|ex|px|cm|mm|in|pt|pc|deg|s|%)(?![-\w])")) PATH = Regex(r"[-\w\d_\.]*\/{1,2}[-\w\d_\.\/]*") | Regex( r"((https?|ftp|file):((//)|(\\\\))+[\w\d:#@%/;$()~_?\+-=\\\.&]*)") POINT_PART = (NUMBER_VALUE | Regex(r"(top|bottom|left|right)")) POINT = POINT_PART + POINT_PART # Values EXPRESSION = Forward() INTERPOLATION_VAR = Suppress("#") + LACC + EXPRESSION + RACC SIMPLE_VALUE = NUMBER_VALUE | PATH | IDENT | COLOR_VALUE | quotedString DIV_STRING = SIMPLE_VALUE + OneOrMore(Literal("/") + SIMPLE_VALUE)
query_expr = Forward() required_modifier = Literal('+')('required') prohibit_modifier = Literal('-')('prohibit') special_characters = '=><(){}[]^"~*?:\\/' valid_word = Word(printables, excludeChars=special_characters).setName('word') valid_word.setParseAction( lambda t: t[0].replace('\\\\', chr(127)).replace('\\', '').replace(chr(127), '\\') ) clause = Forward() field_name = valid_word()('fieldname') single_term = valid_word()('singleterm') phrase = QuotedString('"', unquoteResults=True)('phrase') wildcard = Regex('[a-z0-9]*[\?\*][a-z0-9]*')('wildcard') wildcard.setParseAction( lambda t: t[0].replace('?', '.?').replace('*', '.*') ) regex = QuotedString('/', unquoteResults=True)('regex') _all = Literal('*') lower_range = Group((LBRACK('inclusive') | LBRACE('exclusive')) + (valid_word | _all)('lowerbound')) upper_range = Group((valid_word | _all)('upperbound') + (RBRACK('inclusive') | RBRACE('esclusive'))) _range = (lower_range + to_ + upper_range)('range') GT = Literal('>') GTE = Literal('>=') LT = Literal('<') LTE = Literal('<=')
# vim: set encoding=utf-8 """Atomic components; probably shouldn't use these directly""" import string from pyparsing import CaselessLiteral, Optional, Regex, Suppress, Word from regparser.grammar.utils import Marker, SuffixMarker, WordBoundaries lower_p = (Suppress("(") + Regex(r"[ivx]{1}|[a-hj-uwyz]{1,2}").setResultsName("p1") + Suppress(")")) digit_p = (Suppress("(") + Word(string.digits).setResultsName("p2") + Suppress(")")) roman_p = (Suppress("(") + Word("ivxlcdm").setResultsName("p3") + Suppress(")")) upper_p = (Suppress("(") + Word(string.ascii_uppercase).setResultsName("p4") + Suppress(")")) em_digit_p = (Suppress(Regex(r"\(<E[^>]*>")) + Word(string.digits).setResultsName("p5") + Suppress("</E>)")) em_roman_p = (Suppress(Regex(r"\(<E[^>]*>")) + Word("ivxlcdm").setResultsName("p5") + Suppress("</E>)")) # Allow a plaintext version of italic paragraph markers plaintext_level5_p = (Suppress("(") + Word(string.digits).setResultsName("plaintext_p5") + Suppress(")")) plaintext_level6_p = (Suppress("(") + Word("ivxlcdm").setResultsName("plaintext_p6") + Suppress(")"))
hyphen_minus, newline) ''' Rec. ITU-T X.680 (08/2015) 12.7 Empty lexical item Page 18 ''' empty = Empty().suppress() ''' Rec. ITU-T X.680 (08/2015) 12.8 Numbers Page 18 ''' # number = Word(digits.replace('0', ''), digits, min=2) | \ # Word(digits, max=1) number = Regex(r'\d+') ''' Rec. ITU-T X.680 (08/2015) 12.9 Real numbers Page 18 ''' # This is a poor reflection of its definition but for time being will be enough # realnumber = Word(digits, '.' + digits+ 'eE-') realnumber = Regex(r'\d+\.\d+') ''' Rec. ITU-T X.680 (08/2015) 12.10 Binary strings Page 18 ''' # This is a poor reflection of its definition but for time being will be enough # bstring = Word("'", "01B'")
raise ValueError("Missing required parameter") if maximum and len(val) > maximum: raise ValueError("Too many parameters") return [func(x) for x in val] return parse_values class RawQuotedString(QuotedString): def __init__(self, quoteChar, escChar="\\"): # noqa: N803 super().__init__(quoteChar, escChar=escChar, convertWhitespaceEscapes=False) # unlike the QuotedString this replaces only escaped quotes and not all chars self.escCharReplacePattern = ( re.escape(escChar) + "(" + re.escape(quoteChar) + ")" ) SYNTAXCHARS = {",", ":", '"', "'", "\\"} FlagName = Regex(r"""[^,:"'\\]+""") RegexString = "r" + RawQuotedString('"') FlagParam = Optional( RegexString | FlagName | RawQuotedString("'") | RawQuotedString('"') ) Flag = FlagName + ZeroOrMore(":" + FlagParam) FlagsParser = Optional(Flag) + ZeroOrMore("," + Optional(Flag))
from pyparsing import (CharsNotIn, Optional, Suppress, Word, Regex, ParseException, alphas, nums) from brian2.utils.caching import cached VARIABLE = Word(f"{alphas}_", f"{alphas + nums}_").setResultsName('variable') OP = Regex(r'(\+|\-|\*|/|//|%|\*\*|>>|<<|&|\^|\|)?=').setResultsName( 'operation') EXPR = CharsNotIn('#').setResultsName('expression') COMMENT = CharsNotIn('#').setResultsName('comment') STATEMENT = VARIABLE + OP + EXPR + Optional(Suppress('#') + COMMENT) @cached def parse_statement(code): """ parse_statement(code) Parses a single line of code into "var op expr". Parameters ---------- code : str A string containing a single statement of the form ``var op expr # comment``, where the ``# comment`` part is optional. Returns ------- var, op, expr, comment : str, str, str, str The four parts of the statement.
LBRACKET = L("[").suppress() RBRACKET = L("]").suppress() LPAREN = L("(").suppress() RPAREN = L(")").suppress() COMMA = L(",").suppress() SEMICOLON = L(";").suppress() AT = L("@").suppress() PUNCTUATION = Word("-_.") IDENTIFIER_END = ALPHANUM | (ZeroOrMore(PUNCTUATION) + ALPHANUM) IDENTIFIER = Combine(ALPHANUM + ZeroOrMore(IDENTIFIER_END)) NAME = IDENTIFIER("name") EXTRA = IDENTIFIER URI = Regex(r"[^ ;]+")("url") URL = AT + URI EXTRAS_LIST = EXTRA + ZeroOrMore(COMMA + EXTRA) EXTRAS = (LBRACKET + Optional(EXTRAS_LIST) + RBRACKET)("extras") VERSION_PEP440 = Regex(REGEX, re.VERBOSE | re.IGNORECASE) VERSION_LEGACY = Regex(LEGACY_REGEX, re.VERBOSE | re.IGNORECASE) VERSION_ONE = VERSION_PEP440 ^ VERSION_LEGACY VERSION_MANY = Combine(VERSION_ONE + ZeroOrMore(COMMA + VERSION_ONE), joinString=",", adjacent=False)("_raw_spec") _VERSION_SPEC = Optional(((LPAREN + VERSION_MANY + RPAREN) | VERSION_MANY)) _VERSION_SPEC.setParseAction(lambda s, l, t: t._raw_spec or "")
def __init__(self): """ expop :: '^' multop :: '*' | '/' addop :: '+' | '-' integer :: ['+' | '-'] '0'..'9'+ atom :: PI | E | real | fn '(' expr ')' | '(' expr ')' factor :: atom [ expop factor ]* term :: factor [ multop factor ]* expr :: term [ addop term ]* """ self.kwargs = {} self.expr_stack = [] self.assignment_stack = [] self.expression_string = None self.results = None self.container = None self.opn = { "+": self.add, "-": self.subtract, "*": self.multiply, "/": self.divide, "^": self.pow, } self.fn = {"exp": self.exp, "clamp": self.clamp} self.conditionals = ["==", "!=", ">", ">=", "<", "<="] # use CaselessKeyword for e and pi, to avoid accidentally matching # functions that start with 'e' or 'pi' (such as 'exp'); Keyword # and CaselessKeyword only match whole words e = CaselessKeyword("E") pi = CaselessKeyword("PI") # fnumber = Combine(Word("+-"+nums, nums) + # Optional("." + Optional(Word(nums))) + # Optional(e + Word("+-"+nums, nums))) # or use provided pyparsing_common.number, but convert back to str: # fnumber = ppc.number().addParseAction(lambda t: str(t[0])) fnumber = Regex(r"[+-]?\d+(?:\.\d*)?(?:[eE][+-]?\d+)?") ident = Word(alphas, alphanums + "_$") plus, minus, mult, div = map(Literal, "+-*/") lpar, rpar = map(Suppress, "()") addop = plus | minus multop = mult | div expop = Literal("^") comparison_op = oneOf(" ".join(self.conditionals)) qm, colon = map(Literal, "?:") assignment = Literal("=") assignment_op = ident + assignment + ~FollowedBy(assignment) expr = Forward() expr_list = delimitedList(Group(expr)) # add parse action that replaces the function identifier with a (name, number of args) tuple fn_call = (ident + lpar - Group(expr_list) + rpar).setParseAction( lambda t: t.insert(0, (t.pop(0), len(t[0]))) ) atom = ( addop[...] + ( (fn_call | pi | e | fnumber | ident).setParseAction(self.push_first) | Group(lpar + expr + rpar) ) ).setParseAction(self.push_unary_minus) # by defining exponentiation as "atom [ ^ factor ]..." instead of "atom [ ^ atom ]...", we get right-to-left # exponents, instead of left-to-right that is, 2^3^2 = 2^(3^2), not (2^3)^2. factor = Forward() factor <<= atom + (expop + factor).setParseAction(self.push_first)[...] term = factor + (multop + factor).setParseAction(self.push_first)[...] expr <<= term + (addop + term).setParseAction(self.push_first)[...] comparison = expr + (comparison_op + expr).setParseAction(self.push_first)[...] ternary = ( comparison + (qm + expr + colon + expr).setParseAction(self.push_first)[...] ) # self.bnf = ternary assignment = Optional(assignment_op).setParseAction(self.push_last) + ternary self.bnf = assignment
# protobuf_parser.py # # simple parser for parsing protobuf .proto files # # Copyright 2010, Paul McGuire # from pyparsing import (Word, alphas, alphanums, Regex, Suppress, Forward, Group, oneOf, ZeroOrMore, Optional, delimitedList, restOfLine, quotedString, Dict) ident = Word(alphas + "_", alphanums + "_").setName("identifier") integer = Regex(r"[+-]?\d+") LBRACE, RBRACE, LBRACK, RBRACK, LPAR, RPAR, EQ, SEMI = map( Suppress, "{}[]()=;") kwds = """message required optional repeated enum extensions extends extend to package service rpc returns true false option import""" for kw in kwds.split(): exec("{0}_ = Keyword('{1}')".format(kw.upper(), kw)) messageBody = Forward() messageDefn = MESSAGE_ - ident("messageId") + LBRACE + messageBody( "body") + RBRACE typespec = oneOf("""double float int32 int64 uint32 uint64 sint32 sint64 fixed32 fixed64 sfixed32 sfixed64 bool string bytes""" ) | ident rvalue = integer | TRUE_ | FALSE_ | ident
def header_parser(): identifier = Regex("[a-zA-Z_][a-zA-Z0-9_\$]*") comment = cppStyleComment.suppress() size = Group( Optional( Suppress('[') + SkipTo(']') + Suppress(']') ) ) # Params end_param = Literal(',') + 'parameter' | Literal(')') + '(' ptype = Optional( oneOf('integer real realtime time') ) # NOTE: this isn't completely right, good enough for parsing valid Verilog param = Group( 'parameter' + ptype + size + identifier + Suppress('=') + SkipTo(end_param) ) list_of_params = Group( Suppress('#(') + delimitedList( param ) + Suppress(')') ) # Ports dir_ = Optional( oneOf('input output inout') ) type_ = Optional( oneOf('wire reg') ) port = Group( dir_ + type_ + size + identifier ) list_of_ports = Group( Suppress('(') + delimitedList( port ) + Suppress(')') ) # Module module_identifier = identifier module = Group( Suppress('module') + module_identifier('module_name') + Optional( list_of_params('params') ) + Optional( list_of_ports ('ports' ) ) + Suppress(';') + SkipTo('endmodule') + Suppress('endmodule') ) # Debug #print #module_identifier.setParseAction( dbg('modname') )#.setDebug() #param .setParseAction( dbg('param') )#.setDebug() #port .setParseAction( dbg('port' ) )#.setDebug() #module .setParseAction( dbg('module', 1) )#.setDebug() file_ = SkipTo('module', ignore=comment ).suppress() + \ OneOrMore( module ).ignore( comment ) + \ SkipTo( StringEnd() ).suppress() return file_
def parse(expression, equation=False, subs=dict(), main=None, returnVars=False): if not isinstance(expression,str): return expression varSet = set() lparen = Literal("(").suppress() rparen = Literal(")").suppress() equal = Literal("=").suppress() dot = Literal(".") spec = { "E": exp(1), "Pi": pi } def getSymbol(s): varSet.add(s) if s in subs: s = subs[s] return symbols(s) def getFunction(s): if s[0] == "len": return SetLength(s[1]) elif s[0] == "sum": return SetSummation(s[1]) else: Error('Unknown slng function ' + s[0]) integer = Word(nums).setParseAction( lambda t: [ int(t[0]) ] ) decimal = Regex("[0-9]+\.[0-9]").setParseAction( lambda t: [float(t[0])]) special = Regex("[A-Z][a-zA-Z]*").setParseAction( lambda t: [spec[t[0]]]) var = Regex("[a-z][a-zA-Z]*").setParseAction( lambda t: [getSymbol(t[0])]) lowerName = Regex("[a-z][a-zA-Z]*").setParseAction( lambda t: [t[0]]) prop = Regex("[a-z][a-zA-Z]*\.[a-z][a-zA-Z]*").setParseAction( lambda t: [getSymbol(t[0])]) ref = Regex("\{[0-9]+\}").setParseAction( lambda t: [getSymbol(t[0])]) string = Regex('"[-0-9a-zA-Z: ]*"').setParseAction( lambda t: [t[0][1:-1]]) opn = { "+": (lambda a,b: a+b ), "-": (lambda a,b: a-b ), "*": (lambda a,b: a*b ), "/": (lambda a,b: a/b ), "^": (lambda a,b: a**b ) } ops = set(opn.keys()) def opClean(t): if len(t)==1: return t res = opClean([opn[t[1]](t[0],t[2])]+t[3:]) return res if main is not None: def treeCompute(p): try: node = main.fromDotRef(p) comp = hypergraph.treeCompute(node) res = solve(comp,symbols(p)) return res[0] except Exception as e: logging.exception(e) Error("Error with tree Compute: ") prop = prop.setParseAction( lambda t: [treeCompute(t[0])]) expr = Forward() paren = (lparen + expr + rparen).setParseAction( lambda s,l,t: t) function = (lowerName + lparen + (prop | var) + rparen).setParseAction( lambda t: getFunction(t) ) atom = function | string | paren | decimal | integer | ref | prop | special | var multExpr = (atom + ZeroOrMore( Word("*/") + atom)).setParseAction( lambda s,l,t: opClean(t)) expr << (multExpr + ZeroOrMore( Word("+-") + multExpr)).setParseAction( lambda s,l,t: opClean(t)) equality = (expr + equal + expr).setParseAction( lambda s,l,t: Eq(t[0],t[1]) ) if equation: res = equality.parseString(expression)[0] else: res = expr.parseString(expression)[0] if returnVars: return varSet else: return res
if len(url_protocol.searchString(unquoted)) > 0: result = [url_to_resource(unquoted)] else: result = [unquoted] return result # Numbers are converted to ints if possible. cql_number = Combine( Optional('-') + ('0' | Word(nonzero_nums, nums)) + Optional('.' + Word(nums)) + Optional(Word('eE', exact=1) + Word(nums + '+-', nums))).setParseAction(convert_number) # Dates are parsed as double-quoted ISO8601 strings and converted to datetime # objects. cql_date = Combine(dbl_quote.suppress() + Regex(ISO8601_REGEX) + dbl_quote.suppress()).setParseAction(convert_date) # All double-quoted strings that are not dates are returned with their quotes # removed. cql_string = (dblQuotedString | sglQuotedString).setParseAction(convert_string) # URLs are detected as strings starting with the http(s) protocol. url_protocol = Combine(Literal('http') + Optional('s')) # Number range. # FIXME: char ranges are not supported yet cql_number_range = Group(cql_number + '-' + cql_number).setParseAction(convert_range) cql_values = Group( delimitedList(
FollowedBy,empty __all__ = ['tapOutputParser', 'TAPTest', 'TAPSummary'] # newlines are significant whitespace, so set default skippable # whitespace to just spaces and tabs ParserElement.setDefaultWhitespaceChars(" \t") NL = LineEnd().suppress() integer = Word(nums) plan = '1..' + integer("ubound") OK,NOT_OK = map(Literal,['ok','not ok']) testStatus = (OK | NOT_OK) description = Regex("[^#\n]+") description.setParseAction(lambda t:t[0].lstrip('- ')) TODO,SKIP = map(CaselessLiteral,'TODO SKIP'.split()) directive = Group(Suppress('#') + (TODO + restOfLine | FollowedBy(SKIP) + restOfLine.copy().setParseAction(lambda t:['SKIP',t[0]]) )) commentLine = Suppress("#") + empty + restOfLine testLine = Group( Optional(OneOrMore(commentLine + NL))("comments") + testStatus("passed") + Optional(integer)("testNumber") + Optional(description)("description") + Optional(directive)("directive")
import operator from typing import Dict from pyparsing import Literal, Word, ZeroOrMore, Forward, alphas, Regex, Suppress, oneOf, Optional, Group __all__ = 'berekenen' # Based on https://github.com/pyparsing/pyparsing/blob/master/examples/fourFn.py # Stripped down to a minimum # Implementation can be considered quick and dirty point = Literal(".") fnumber = Regex(r"[+-]?\d+(?:\.\d*)?(?:[eE][+-]?\d+)?") ident = Word(alphas + "_") plus = Literal("+") minus = Literal("-") mult = Literal("*") div = Literal("/") lpar, rpar = map(Suppress, "()") addop = plus | minus multop = mult | div unary_op = { 'ABS': abs, } def veilig_delen(x, y): if abs(float(y)) < 1e-6:
def eval(self, archive, context, app, exp_context): val = [eval(archive, context, app, exp_context) for eval in self._evals] return val integer = Word(nums) real = Combine(Word(nums) + '.' + Word(nums)) constant = (Literal('True') | Literal('False') | Literal('None') | Literal('yes') | Literal('no')) + WordEnd() model_reference = Regex(r'([\w\.]*#[\w\.]+)') variable = Regex(r'([a-zA-Z0-9\._]+)') string = QuotedString('"', escChar="\\") | QuotedString('\'', escChar="\\") operand = model_reference | real | integer | constant | string | variable plusop = oneOf('+ -') multop = oneOf('* / // %') groupop = Literal(',') expr = Forward() modifier = Combine(Word(alphas + nums) + ':') integer.setParseAction(EvalInteger) real.setParseAction(EvalReal)
alphas, alphanums, delimitedList, originalTextFor, ParseBaseException, \ Literal, quotedString, Keyword, empty, Suppress, Combine, NotAny, Regex def eachMostOnce(*args, or_=operator.ior, and_=operator.add): return reduce(or_, (reduce(or_, map(lambda x: reduce(and_, x), permutations(args, i))) for i in range(len(args), 0, -1))) NAME = Word(alphas, alphanums + '_') INTEGER = Word(nums).setName('integer') INTEGER_K = Combine(INTEGER + Optional('_' + (INTEGER | NAME))) EOL = p.LineEnd() FortranComment = Regex(r'!.*$') FortranComment.setParseAction(lambda s,loc,toks: [' '+toks[0]]) EOLL = Optional(FortranComment) + EOL precision = Combine('.' + INTEGER) exponent = Combine(oneOf('d e D E') + Optional(oneOf('+ -')) + INTEGER) REAL = Combine(INTEGER + ((precision + exponent) | precision | exponent)) STRING = quotedString comp_op = Forward() user_op = NotAny(comp_op | oneOf('.not. .and. .or. .eqv. .neqv. ** // % .true. .false.')) \ + Combine('.' + NAME + '.') atom = Forward() calllist = Forward() array_sub = '(' + Optional(atom)+':'+Optional(atom) + Optional(':'+atom) + ')' type_sub = '%' + NAME
def _getoptblank(pa, boundarychars): return Optional(Regex(r"[^\S%s]+" % re.escape(boundarychars)).leaveWhitespace().setParseAction(pa))
class ActivityParser13(object): """Grammar and parser for the activity.""" variable = Word(alphas) integer = Word(nums).setParseAction(make_int) string = ( QuotedString('\'', escChar='\\', multiline=True) ^ QuotedString('"', escChar='\\', multiline=True)) boolean = ( Literal('true').setParseAction(make_bool(True)) ^ Literal('false').setParseAction(make_bool(False))) regex = ( Regex('/(.*)/i') ^ Combine( sep('regex(') + QuotedString('"', escChar='\\') + sep(')') ).setParseAction(lambda x: verify.Term(verify.REGEX, x[0])) ) choice_decl = Group( sep('[') + string + sep(',') + boolean + sep(',') + string + sep(']') ) choices_decl = Group( sep('[') + Optional(list_of(choice_decl)) + sep(']') ).setParseAction(make_list) multiple_choice_decl = ( key('questionType') + sep(':') + key('multiple choice') + Optional(sep(',')) ) multiple_choice = ( sep('{') + multiple_choice_decl + Each( Optional( key('questionHTML') + sep(':') + string + Optional(sep(','))) + Optional( key('choices') + sep(':') + choices_decl + Optional(sep(','))) ) + sep('}') ).setParseAction(make_dict) free_text_decl = ( key('questionType') + sep(':') + key('freetext') + Optional(sep(',')) ) free_text = ( sep('{') + free_text_decl + Each( Optional( key('questionHTML') + sep(':') + string + Optional(sep(','))) + Optional( key('correctAnswerRegex') + sep(':') + regex + Optional(sep(','))) + Optional( key('correctAnswerOutput') + sep(':') + string + Optional(sep(','))) + Optional( key('incorrectAnswerOutput') + sep(':') + string + Optional(sep(','))) + Optional( key('showAnswerPrompt') + sep(':') + string + Optional(sep(','))) + Optional( key('showAnswerOutput') + sep(':') + string + Optional(sep(','))) + Optional( key('outputHeight') + sep(':') + string + Optional(sep(','))) ) + sep('}') ).setParseAction(make_dict) question_list_decl = ( sep('{') + Each( Optional( key('questionHTML') + sep(':') + string + Optional(sep(','))) + Optional( key('choices') + sep(':') + sep('[') + Group(list_of(string)).setParseAction(make_list) + sep(']') + Optional(sep(','))) + Optional( key('correctIndex') + sep(':') + (integer ^ ( sep('[') + Group(list_of(integer)).setParseAction(make_list) + sep(']'))) + Optional(sep(','))) + Optional( key('multiSelect') + sep(':') + boolean + Optional(sep(','))), ) + sep('}')).setParseAction(make_dict) questions_list_decl = Group( sep('[') + Optional(list_of(question_list_decl)) + sep(']') ).setParseAction(make_list) multiple_choice_group_decl = ( key('questionType') + sep(':') + key('multiple choice group') + Optional(sep(',')) ) multiple_choice_group = ( sep('{') + multiple_choice_group_decl + Each( Optional( key('questionGroupHTML') + sep(':') + string + Optional(sep(','))) + Optional( key('allCorrectMinCount') + sep(':') + integer + Optional(sep(','))) + Optional( key('allCorrectOutput') + sep(':') + string + Optional(sep(','))) + Optional( key('someIncorrectOutput') + sep(':') + string + Optional(sep(','))) + Optional( key('questionsList') + sep(':') + questions_list_decl + Optional(sep(','))) ) + sep('}') ).setParseAction(make_dict) activity_grammar = ( sep('activity') + sep('=') + sep('[') + Optional(list_of( string ^ multiple_choice ^ free_text ^ multiple_choice_group)) + sep(']') + Optional(sep(';'))) @classmethod def parse_string(cls, content): return cls.activity_grammar.parseString(content) @classmethod def parse_string_in_scope(cls, content, scope, root_name): """Parses activity text following grammar.""" if 'activity' != root_name: raise Exception('Unsupported schema: %s', root_name) return dict( scope.items() + {'__builtins__': {}}.items() + {root_name: cls.parse_string(content).asList()}.items())
def parse(cls, content, basedir=None, resolve=True, unresolved_value=DEFAULT_SUBSTITUTION): """parse a HOCON content :param content: HOCON content to parse :type content: basestring :param resolve: if true, resolve substitutions :type resolve: boolean :param unresolved_value: assigned value value to unresolved substitution. If overriden with a default value, it will replace all unresolved value to the default value. If it is set to to pyhocon.STR_SUBSTITUTION then it will replace the value by its substitution expression (e.g., ${x}) :type unresolved_value: boolean :return: a ConfigTree or a list """ unescape_pattern = re.compile(r'\\.') def replace_escape_sequence(match): value = match.group(0) return cls.REPLACEMENTS.get(value, value) def norm_string(value): return unescape_pattern.sub(replace_escape_sequence, value) def unescape_string(tokens): return ConfigUnquotedString(norm_string(tokens[0])) def parse_multi_string(tokens): # remove the first and last 3 " return tokens[0][3:-3] def convert_number(tokens): n = tokens[0] try: return int(n, 10) except ValueError: return float(n) def safe_convert_number(tokens): n = tokens[0] try: return int(n, 10) except ValueError: try: return float(n) except ValueError: return n def convert_period(tokens): period_value = int(tokens.value) period_identifier = tokens.unit period_unit = next((single_unit for single_unit, values in cls.get_supported_period_type_map().items() if period_identifier in values)) return period(period_value, period_unit) # ${path} or ${?path} for optional substitution SUBSTITUTION_PATTERN = r"\$\{(?P<optional>\?)?(?P<variable>[^}]+)\}(?P<ws>[ \t]*)" def create_substitution(instring, loc, token): # remove the ${ and } match = re.match(SUBSTITUTION_PATTERN, token[0]) variable = match.group('variable') ws = match.group('ws') optional = match.group('optional') == '?' substitution = ConfigSubstitution(variable, optional, ws, instring, loc) return substitution # ${path} or ${?path} for optional substitution STRING_PATTERN = '"(?P<value>(?:[^"\\\\]|\\\\.)*)"(?P<ws>[ \t]*)' def create_quoted_string(instring, loc, token): # remove the ${ and } match = re.match(STRING_PATTERN, token[0]) value = norm_string(match.group('value')) ws = match.group('ws') return ConfigQuotedString(value, ws, instring, loc) def include_config(instring, loc, token): url = None file = None required = False if token[0] == 'required': required = True final_tokens = token[1:] else: final_tokens = token if len(final_tokens) == 1: # include "test" value = final_tokens[0].value if isinstance( final_tokens[0], ConfigQuotedString) else final_tokens[0] if value.startswith("http://") or value.startswith( "https://") or value.startswith("file://"): url = value else: file = value elif len(final_tokens) == 2: # include url("test") or file("test") value = final_tokens[1].value if isinstance( token[1], ConfigQuotedString) else final_tokens[1] if final_tokens[0] == 'url': url = value else: file = value if url is not None: logger.debug('Loading config from url %s', url) obj = ConfigFactory.parse_URL(url, resolve=False, required=required, unresolved_value=NO_SUBSTITUTION) elif file is not None: path = file if basedir is None else os.path.join(basedir, file) logger.debug('Loading config from file %s', path) obj = ConfigFactory.parse_file( path, resolve=False, required=required, unresolved_value=NO_SUBSTITUTION) else: raise ConfigException( 'No file or URL specified at: {loc}: {instring}', loc=loc, instring=instring) return ConfigInclude(obj if isinstance(obj, list) else obj.items()) @contextlib.contextmanager def set_default_white_spaces(): default = ParserElement.DEFAULT_WHITE_CHARS ParserElement.setDefaultWhitespaceChars(' \t') yield ParserElement.setDefaultWhitespaceChars(default) with set_default_white_spaces(): assign_expr = Forward() true_expr = Keyword("true", caseless=True).setParseAction( replaceWith(True)) false_expr = Keyword("false", caseless=True).setParseAction( replaceWith(False)) null_expr = Keyword("null", caseless=True).setParseAction( replaceWith(NoneValue())) # key = QuotedString('"', escChar='\\', unquoteResults=False) | Word(alphanums + alphas8bit + '._- /') key = QuotedString('"', escChar='\\', unquoteResults=False) | \ Word("0123456789.").setParseAction(safe_convert_number) | Word(alphanums + alphas8bit + '._- /') eol = Word('\n\r').suppress() eol_comma = Word('\n\r,').suppress() comment = (Literal('#') | Literal('//')) - SkipTo(eol | StringEnd()) comment_eol = Suppress(Optional(eol_comma) + comment) comment_no_comma_eol = (comment | eol).suppress() number_expr = Regex( r'[+-]?(\d*\.\d+|\d+(\.\d+)?)([eE][+\-]?\d+)?(?=$|[ \t]*([\$\}\],#\n\r]|//))', re.DOTALL).setParseAction(convert_number) period_types = itertools.chain.from_iterable( cls.get_supported_period_type_map().values()) period_expr = Regex(r'(?P<value>\d+)\s*(?P<unit>' + '|'.join(period_types) + ')$').setParseAction(convert_period) # multi line string using """ # Using fix described in http://pyparsing.wikispaces.com/share/view/3778969 multiline_string = Regex( '""".*?"*"""', re.DOTALL | re.UNICODE).setParseAction(parse_multi_string) # single quoted line string quoted_string = Regex( r'"(?:[^"\\\n]|\\.)*"[ \t]*', re.UNICODE).setParseAction(create_quoted_string) # unquoted string that takes the rest of the line until an optional comment # we support .properties multiline support which is like this: # line1 \ # line2 \ # so a backslash precedes the \n unquoted_string = Regex( r'(?:[^^`+?!@*&"\[\{\s\]\}#,=\$\\]|\\.)+[ \t]*', re.UNICODE).setParseAction(unescape_string) substitution_expr = Regex(r'[ \t]*\$\{[^\}]+\}[ \t]*' ).setParseAction(create_substitution) string_expr = multiline_string | quoted_string | unquoted_string value_expr = period_expr | number_expr | true_expr | false_expr | null_expr | string_expr include_content = (quoted_string | ( (Keyword('url') | Keyword('file')) - Literal('(').suppress() - quoted_string - Literal(')').suppress())) include_expr = (Keyword("include", caseless=True).suppress() + (include_content | (Keyword("required") - Literal('(').suppress() - include_content - Literal(')').suppress())) ).setParseAction(include_config) root_dict_expr = Forward() dict_expr = Forward() list_expr = Forward() multi_value_expr = ZeroOrMore(comment_eol | include_expr | substitution_expr | dict_expr | list_expr | value_expr | (Literal('\\') - eol).suppress()) # for a dictionary : or = is optional # last zeroOrMore is because we can have t = {a:4} {b: 6} {c: 7} which is dictionary concatenation inside_dict_expr = ConfigTreeParser( ZeroOrMore(comment_eol | include_expr | assign_expr | eol_comma)) inside_root_dict_expr = ConfigTreeParser( ZeroOrMore(comment_eol | include_expr | assign_expr | eol_comma), root=True) dict_expr << Suppress('{') - inside_dict_expr - Suppress('}') root_dict_expr << Suppress('{') - inside_root_dict_expr - Suppress( '}') list_entry = ConcatenatedValueParser(multi_value_expr) list_expr << Suppress('[') - ListParser(list_entry - ZeroOrMore( eol_comma - list_entry)) - Suppress(']') # special case when we have a value assignment where the string can potentially be the remainder of the line assign_expr << Group(key - ZeroOrMore(comment_no_comma_eol) - ( dict_expr | (Literal('=') | Literal(':') | Literal('+=')) - ZeroOrMore(comment_no_comma_eol) - ConcatenatedValueParser(multi_value_expr))) # the file can be { ... } where {} can be omitted or [] config_expr = ZeroOrMore(comment_eol | eol) + ( list_expr | root_dict_expr | inside_root_dict_expr) + ZeroOrMore(comment_eol | eol_comma) config = config_expr.parseString(content, parseAll=True)[0] if resolve: allow_unresolved = resolve and unresolved_value is not DEFAULT_SUBSTITUTION and unresolved_value is not MANDATORY_SUBSTITUTION has_unresolved = cls.resolve_substitutions( config, allow_unresolved) if has_unresolved and unresolved_value is MANDATORY_SUBSTITUTION: raise ConfigSubstitutionException( 'resolve cannot be set to True and unresolved_value to MANDATORY_SUBSTITUTION' ) if unresolved_value is not NO_SUBSTITUTION and unresolved_value is not DEFAULT_SUBSTITUTION: cls.unresolve_substitutions_to_value(config, unresolved_value) return config
class AssessmentParser13(object): """Grammar and parser for the assessment.""" string = ( QuotedString('\'', escChar='\\', multiline=True) ^ QuotedString('"', escChar='\\', multiline=True)) boolean = ( Literal('true').setParseAction(make_bool(True)) ^ Literal('false').setParseAction(make_bool(False))) float = Combine( Word(nums) + Optional(Literal('.') + Word(nums)) ).setParseAction(make_float) integer = Word(nums).setParseAction(make_int) choice_decl = ( string ^ Combine( sep('correct(') + string + sep(')') ).setParseAction(lambda x: verify.Term(verify.CORRECT, x[0])) ) regex = ( Regex('/(.*)/i') ^ Combine( sep('regex(') + QuotedString('"', escChar='\\') + sep(')') ).setParseAction(lambda x: verify.Term(verify.REGEX, x[0])) ) question_decl = ( sep('{') + Each( Optional( key('questionHTML') + sep(':') + string + Optional(sep(','))) + Optional( key('lesson') + sep(':') + string + Optional(sep(','))) + Optional( key('correctAnswerString') + sep(':') + string + Optional(sep(','))) + Optional( key('correctAnswerRegex') + sep(':') + regex + Optional(sep(','))) + Optional( key('correctAnswerNumeric') + sep(':') + float + Optional(sep(','))) + Optional( key('choiceScores') + sep(':') + sep('[') + Group(list_of(float)).setParseAction(make_list) + sep(']') + Optional(sep(','))) + Optional( key('weight') + sep(':') + integer + Optional(sep(','))) + Optional( key('multiLine') + sep(':') + boolean + Optional(sep(','))) + Optional( key('choices') + sep(':') + sep('[') + Group(list_of(choice_decl)).setParseAction(make_list) + sep(']') + Optional(sep(','))) ) + sep('}')).setParseAction(make_dict) assessment_grammar = ( sep('assessment') + sep('=') + sep('{') + Each( Optional( key('assessmentName') + sep(':') + string + Optional(sep(','))) + Optional( key('preamble') + sep(':') + string + Optional(sep(','))) + Optional( key('checkAnswers') + sep(':') + boolean + Optional(sep(','))) + Optional( key('questionsList') + sep(':') + sep('[') + Group(list_of(question_decl)).setParseAction(make_list) + sep(']') + Optional(sep(','))) ) + sep('}') + Optional(sep(';'))).setParseAction(make_dict) @classmethod def parse_string(cls, content): return cls.assessment_grammar.parseString(content) @classmethod def parse_string_in_scope(cls, content, scope, root_name): """Parses assessment text following grammar.""" if 'assessment' != root_name: raise Exception('Unsupported schema: %s', root_name) # we need to extract the results as a dictionary; so we remove the # outer array holding it ast = cls.parse_string(content).asList() if len(ast) == 1: ast = ast[0] return dict( scope.items() + {'__builtins__': {}}.items() + {root_name: ast}.items())
Word, Regex, Group, oneOf, Forward, CaselessKeyword, Suppress, delimitedList, operatorPrecedence, opAssoc, ParseException, ) # Variables variable = Regex(r"(?P<table>[ai|di|sv]{2})\.(?P<tag>[\w\d]+)\.(?P<attr>\w+)") def var_parse_action(text, index, context): return context[0] variable.setParseAction(var_parse_action) # Numbers numeric_literal = Regex(r"\-?\d+(\.\d+)?") def number_prase_action(text, index, data): number = data[0] if "." in number:
class ChoiceTree: ''' Class that parses strings representing possible combinations, and returns possible combinations. e.g. "abc[de|fg]" → [ "abcde", "abcfg" ] "I [eat|like] [|hot]dogs" → [ "I eat dogs", "I like dogs", "I eat hotdogs", "I like hotdogs" ] Escape symbol is '~' e.g. "abc~[def~]" → [ "abc[def]" ] Due to reasons, an escaped escape '~~' is not turned into a literal '~', if this is not up to liking, simply .replace('~~', '~') yourself after parsing. Essentially, consider the noncommutative Semiring of (unordered) lists of strings, so that in python notation: list1+list2 == [*list1, *list2] the concatenation of lists and list1*list2 == [a+b for a in list1 for b in list2] the concatenation of each pair of strings. (This ring has as neutral element the list of the empty string, and as zero element the empty list.) We write addition using the "|" symbol, the product is implicit (i.e. a*b == ab), and use [] as parentheses, so that in python notation e.g. "abc" == ["abc"] and "a|b|c" == ["a", "b", "c"] What ChoiceTree does is parse such expressions, and using the distributivity rule ( [a|b]c == ab|ac ) it simplifies the expression to a sum of products. ''' class Text: def __init__(self, text): self.text = text if text == '' else ''.join(text.asList()) self.count = 1 self.reset() __str__ = __repr__ = lambda s: s.text def next(self): self.done = True return self.text def random(self): return self.text def reset(self): self.done = False def current(self): return self.text class Choice: def __init__(self, vals): self.vals = vals.asList() self.count = sum(v.count for v in self.vals) self.reset() __str__ = __repr__ = lambda s: '[{}]'.format('|'.join( [str(v) for v in s.vals])) def next(self): next = self.vals[self.i] out = next.next() if next.done: self.i += 1 if self.i == len(self.vals): self.done = True return out def random(self): # Weighted based on the number of different possible branches each child has. return np.random.choice(self.vals, p=list(v.count / self.count for v in self.vals)).random() def reset(self): self.i = 0 self.done = False [c.reset() for c in self.vals] def current(self): return self.vals[self.i].current() class Group: def __init__(self, vals): self.vals = vals.asList() self.count = functools.reduce(lambda x, y: x * y, (c.count for c in self.vals), 1) self.reset() __str__ = __repr__ = lambda s: ''.join([str(v) for v in s.vals]) def next(self): i = 0 out = '' while True: out += self.vals[i].next() if self.vals[i].done: if i == len(self.vals) - 1: self.done = True break else: self.vals[i].reset() else: break i += 1 i += 1 while i < len(self.vals): out += self.vals[i].current() i += 1 return out def random(self): return ''.join(v.random() for v in self.vals) def reset(self): self.done = False [c.reset() for c in self.vals] def current(self): return ''.join([c.current() for c in self.vals]) escapedSymbol = Char('~').suppress() + Char('[|]') escapedEsc = Literal('~~') soleEsc = Char('~') lbr = Literal('[').suppress() rbr = Literal(']').suppress() div = Literal('|').suppress() _text = Regex( r'[^\[\|\]~]+' ) # any sequence of characters not containing '[', ']', '|' or '~' text = pGroup( OneOrMore(escapedSymbol | escapedEsc | soleEsc | _text)).setParseAction(lambda t: ChoiceTree.Text(t[0])) group = Forward() choice = pGroup(lbr + group + ZeroOrMore(div + group) + rbr).setParseAction(lambda t: ChoiceTree.Choice(t[0])) empty = Empty().setParseAction(lambda t: ChoiceTree.Text('')) group <<= pGroup(OneOrMore(text | choice) | empty).setParseAction( lambda t: ChoiceTree.Group(t[0])).leaveWhitespace() def __init__(self, text, parse_flags=False, add_brackets=False, leave_escapes=False): self.flag_random = False if parse_flags: if text[:3] == '[?]': text = text[3:] self.flag_random = True if add_brackets: text = '[' + text + ']' self.root: ChoiceTree.Group = ChoiceTree.group.parseString(text)[0] self.count = self.root.count def __iter__(self): if self.flag_random: yield self.random() return while not self.root.done: yield self.root.next() self.root.reset() def random(self): return self.root.random()
joint = imusimModel.getJoint(bone.name) if joint.hasParent: parentRot = joint.parent.rotationKeyFrames.latestValue parentRotOffset = bonedata.parent.rotationOffset rotation = parentRot * parentRotOffset * rotation else: rotation = convertCGtoNED(rotation) joint.rotationKeyFrames.add(t, rotation) t += framePeriod return imusimModel # Define parser tokens comments = ZeroOrMore(Suppress(Literal('#') + SkipTo(LineEnd()))) intValue = Word(nums).setParseAction( lambda s,l,t: int(t[0]) ) floatValue = Regex(r'-?\d+(\.\d*)?(e-?\d*)?').setParseAction(lambda s,l,t: float(t[0])) floatVector = Group(floatValue + floatValue + floatValue) limit = Group( Suppress(Literal("(")) + floatValue + floatValue + Suppress(Literal(")"))) limits = Group(OneOrMore(limit)) channel = Word("TRtr","XYZxyz") channels = Group(OneOrMore(channel)) rotationOrder = Word("XYZ", exact=3) begin = Suppress(Keyword("begin")) end = Suppress(Keyword("end")) bonename = Combine(~end + Word(alphanums+"_-")).setWhitespaceChars(' ') version = Keyword(":version") + Literal("1.10")
def formula_grammar(table): """ Construct a parser for molecular formulas. :Parameters: *table* = None : PeriodicTable If table is specified, then elements and their associated fields will be chosen from that periodic table rather than the default. :Returns: *parser* : pyparsing.ParserElement. The ``parser.parseString()`` method returns a list of pairs (*count,fragment*), where fragment is an *isotope*, an *element* or a list of pairs (*count,fragment*). """ # Recursive composite = Forward() mixture = Forward() # whitespace and separators space = Optional(White().suppress()) separator = space+Literal('+').suppress()+space # Lookup the element in the element table symbol = Regex("[A-Z][a-z]*") symbol = symbol.setParseAction(lambda s,l,t: table.symbol(t[0])) # Translate isotope openiso = Literal('[').suppress() closeiso = Literal(']').suppress() isotope = Optional(~White()+openiso+Regex("[1-9][0-9]*")+closeiso, default='0') isotope = isotope.setParseAction(lambda s,l,t: int(t[0]) if t[0] else 0) # Translate ion openion = Literal('{').suppress() closeion = Literal('}').suppress() ion = Optional(~White() +openion +Regex("([1-9][0-9]*)?[+-]") +closeion, default='0+') ion = ion.setParseAction(lambda s,l,t: int(t[0][-1]+(t[0][:-1] if len(t[0])>1 else '1'))) # Translate counts fract = Regex("(0|[1-9][0-9]*|)([.][0-9]*)") fract = fract.setParseAction(lambda s,l,t: float(t[0]) if t[0] else 1) whole = Regex("[1-9][0-9]*") whole = whole.setParseAction(lambda s,l,t: int(t[0]) if t[0] else 1) count = Optional(~White()+(fract|whole),default=1) # Convert symbol,isotope,ion,count to (count,isotope) element = symbol+isotope+ion+count def convert_element(string,location,tokens): #print "convert_element received",tokens symbol,isotope,ion,count = tokens[0:4] if isotope != 0: symbol = symbol[isotope] if ion != 0: symbol = symbol.ion[ion] return (count,symbol) element = element.setParseAction(convert_element) # Convert "count elements" to a pair implicit_group = count+OneOrMore(element) def convert_implicit(string,location,tokens): #print "implicit",tokens count = tokens[0] fragment = tokens[1:] return fragment if count==1 else (count,fragment) implicit_group = implicit_group.setParseAction(convert_implicit) # Convert "(composite) count" to a pair opengrp = space + Literal('(').suppress() + space closegrp = space + Literal(')').suppress() + space explicit_group = opengrp + composite + closegrp + count def convert_explicit(string,location,tokens): #print "explicit",tokens count = tokens[-1] fragment = tokens[:-1] return fragment if count == 1 else (count,fragment) explicit_group = explicit_group.setParseAction(convert_explicit) # Build composite from a set of groups group = implicit_group | explicit_group implicit_separator = separator | space composite << group + ZeroOrMore(implicit_separator + group) density = Literal('@').suppress() + count + Optional(Regex("[ni]"),default='i') compound = composite + Optional(density,default=None) def convert_compound(string,location,tokens): #print "compound",tokens if tokens[-1] is None: return Formula(structure=_immutable(tokens[:-1])) elif tokens[-1] == 'n': return Formula(structure=_immutable(tokens[:-2]), natural_density=tokens[-2]) else: return Formula(structure=_immutable(tokens[:-2]), density=tokens[-2]) compound = compound.setParseAction(convert_compound) partsep = space + Literal('//').suppress() + space percent = Literal('%').suppress() weight_percent = Regex("%(w((eigh)?t)?|m(ass)?)").suppress() + space by_weight = count + weight_percent + mixture + ZeroOrMore(partsep+count+(weight_percent|percent)+mixture) + partsep + mixture def convert_by_weight(string,location,tokens): #print "by weight",tokens piece = tokens[1:-1:2] + [tokens[-1]] fract = [float(v) for v in tokens[:-1:2]] fract.append(100-sum(fract)) #print piece, fract if len(piece) != len(fract): raise ValueError("Missing base component of mixture") if fract[-1] < 0: raise ValueError("Formula percentages must sum to less than 100%") return _mix_by_weight_pairs(zip(piece,fract)) mixture_by_weight = by_weight.setParseAction(convert_by_weight) volume_percent = Regex("%v(ol(ume)?)?").suppress() + space by_volume = count + volume_percent + mixture + ZeroOrMore(partsep+count+(volume_percent|percent)+mixture) + partsep + mixture def convert_by_volume(string,location,tokens): #print "by volume",tokens piece = tokens[1:-1:2] + [tokens[-1]] fract = [float(v) for v in tokens[:-1:2]] fract.append(100-sum(fract)) #print piece, fract if len(piece) != len(fract): raise ValueError("Missing base component of mixture "+string) if fract[-1] < 0: raise ValueError("Formula percentages must sum to less than 100%") return _mix_by_volume_pairs(zip(piece,fract)) mixture_by_volume = by_volume.setParseAction(convert_by_volume) mixture_by_layer = Forward() layer_thick = Group(count + Regex("(nm|um|mm)") + space) layer_part = (layer_thick + mixture ) | (opengrp + mixture_by_layer + closegrp +count) mixture_by_layer << layer_part + ZeroOrMore(partsep + layer_part) def convert_by_layer(string,location,tokens): units = {'nm': 1e-9, 'um': 1e-6, 'mm': 1e-3, } if len (tokens) < 2: return tokens piece = [] fract = [] for p1, p2 in zip(tokens[0::2], tokens[1::2]): if isinstance(p1, Formula): f = p1.absthick * float(p2) p = p1 else: f = float(p1[0]) * units[p1[1]] p = p2 piece.append(p) fract.append(f) total = sum(fract) vfract = [ (v/total)*100 for v in fract] result = _mix_by_volume_pairs(zip(piece,vfract)) result.absthick = total return result mixture_by_layer = mixture_by_layer.setParseAction(convert_by_layer) mixture_by_absmass = Forward() absmass_mass = Group(count + Regex("(ng|ug|mg|g|kg)") + space) absmass_part = (absmass_mass + mixture) | (opengrp + mixture_by_absmass + closegrp + count) mixture_by_absmass << absmass_part + ZeroOrMore( partsep + absmass_part) def convert_by_absmass(string,location,tokens): units = {'ng': 1e-9, 'ug': 1e-6, 'mg': 1e-3, 'g': 1e+0, 'kg': 1e+3, } if len (tokens) < 2: return tokens piece = [] fract = [] for p1, p2 in zip(tokens[0::2], tokens[1::2]): if isinstance(p1, Formula): f = p1.absmass * float(p2) p = p1 else: f = float(p1[0]) * units[p1[1]] p = p2 piece.append(p) fract.append(f) total = sum(fract) mfract = [ (m/total)*100 for m in fract] result = _mix_by_weight_pairs(zip(piece,mfract)) result.absmass=total return result mixture_by_absmass = mixture_by_absmass.setParseAction(convert_by_absmass) mixture_by_absvolume = Forward() absvolume_vol = Group(count + Regex("(nl|ul|ml|l)") + space) absvolume_part = ( absvolume_vol + mixture )|(opengrp + mixture_by_absvolume + closegrp + count) mixture_by_absvolume << absvolume_part + ZeroOrMore( partsep + absvolume_part) def convert_by_absvolume(string,location,tokens): units = {'nl': 1e-9, 'ul': 1e-6, 'ml': 1e-3, 'l': 1e+0, } if len (tokens) < 2: return tokens piece = [] fract = [] for p1, p2 in zip(tokens[0::2], tokens[1::2]): if isinstance(p1, Formula): f = p1.absvolume * float(p2) p = p1 else: f = float(p1[0]) * units[p1[1]] p = p2 piece.append(p) fract.append(f) total = sum(fract) vfract = [ (v/total)*100 for v in fract] if len(piece) != len(fract): raise ValueError("Missing base component of mixture "+string) if fract[-1] < 0: raise ValueError("Formula percentages must sum to less than 100%") result = _mix_by_volume_pairs(zip(piece,vfract)) result.absvolume = total return result mixture_by_absvolume = mixture_by_absvolume.setParseAction(convert_by_absvolume) mixture << (compound | (opengrp + (mixture_by_weight | mixture_by_volume ) + closegrp)) formula = compound | mixture_by_weight | mixture_by_volume | mixture_by_layer | mixture_by_absmass | mixture_by_absvolume grammar = Optional(formula, default=Formula()) + StringEnd() grammar.setName('Chemical Formula') return grammar
# [167] PN_CHARS ::= PN_CHARS_U | '-' | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040] PN_CHARS_re = '\\-0-9\u00B7\u0300-\u036F\u203F-\u2040' + PN_CHARS_U_re # PN_CHARS = Regex(u'[%s]'%PN_CHARS_re, flags=re.U) # [168] PN_PREFIX ::= PN_CHARS_BASE ((PN_CHARS|'.')* PN_CHARS)? PN_PREFIX = Regex(r'[%s](?:[%s\.]*[%s])?' % (PN_CHARS_BASE_re, PN_CHARS_re, PN_CHARS_re), flags=re.U) # [140] PNAME_NS ::= PN_PREFIX? ':' PNAME_NS = Optional( Param('prefix', PN_PREFIX)) + Suppress(':').leaveWhitespace() # [173] PN_LOCAL_ESC ::= '\' ( '_' | '~' | '.' | '-' | '!' | '$' | '&' | "'" | '(' | ')' | '*' | '+' | ',' | ';' | '=' | '/' | '?' | '#' | '@' | '%' ) PN_LOCAL_ESC = Regex('\\\\[_~\\.\\-!$&"\'()*+,;=/?#@%]') PN_LOCAL_ESC.setParseAction(lambda x: x[0][1:]) # [172] HEX ::= [0-9] | [A-F] | [a-f] # HEX = Regex('[0-9A-Fa-f]') # not needed # [171] PERCENT ::= '%' HEX HEX PERCENT = Regex('%[0-9a-fA-F]{2}') PERCENT.setParseAction(lambda x: chr(int(x[0][1:], 16))) # [170] PLX ::= PERCENT | PN_LOCAL_ESC PLX = PERCENT | PN_LOCAL_ESC # [169] PN_LOCAL ::= (PN_CHARS_U | ':' | [0-9] | PLX ) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX) )? PN_LOCAL = Combine((Regex('[%s0-9:]' % PN_CHARS_U_re, flags=re.U) | PLX) + ZeroOrMore((Regex( '[%s\\.:]' % PN_CHARS_re, flags=re.U) | PLX) + Optional(Regex('[%s:]' % PN_CHARS_re, flags=re.U) | PLX)))
# val = val.replace(".", "\\.") elif val.startswith('`') and val.endswith('`'): val = "'" + val[1:-1].replace("``","`") + "'" elif val.startswith("+"): val = val[1:] un = ast.literal_eval(val) return un def to_string(instring, tokensStart, retTokens): val = retTokens[0] val = "'"+val[1:-1].replace("''", "\\'")+"'" return {"literal": ast.literal_eval(val)} # NUMBERS realNum = Regex(r"[+-]?(\d+\.\d*|\.\d+)([eE][+-]?\d+)?").addParseAction(unquote) intNum = Regex(r"[+-]?\d+([eE]\+?\d+)?").addParseAction(unquote) # STRINGS, NUMBERS, VARIABLES sqlString = Regex(r"\'(\'\'|\\.|[^'])*\'").addParseAction(to_string) identString = Regex(r'\"(\"\"|\\.|[^"])*\"').addParseAction(unquote) mysqlidentString = Regex(r'\`(\`\`|\\.|[^`])*\`').addParseAction(unquote) ident = Combine(~RESERVED + (delimitedList(Literal("*") | Word(alphas + "_", alphanums + "_$") | identString | mysqlidentString, delim=".", combine=True))).setName("identifier") # EXPRESSIONS expr = Forward() # CASE case = ( CASE + Group(ZeroOrMore((WHEN + expr("when") + THEN + expr("then")).addParseAction(to_when_call)))("case") +
def getToken(self): token = Regex(r"\\\n") token = token.setParseAction(lambda s, l, t: u"")("linejoin") return token
"""Write the strategy encoded by the subtree rooted at 'root' in modified Newick format. V(H, L) represents the tree with root node V, high subtree H, and low subtree L. A node name followed by * indicates that the gusher is being opened solely for information and the Goldie will never be found there.""" if root.high and root.low: return f'{root}({write_tree(root.high)}, {write_tree(root.low)})' elif root.high: return f'{root}({write_tree(root.high)},)' elif root.low: return f'{root}(,{write_tree(root.low)})' else: return f'{root}' # Strategy tree grammar node = Regex(rf'\w+[{NEVER_FIND_FLAG}]?') LPAREN, COMMA, RPAREN = map(Suppress, '(,)') tree = Forward() subtree = Group(Optional(tree)) subtrees = LPAREN - subtree.setResultsName( 'high') - COMMA - subtree.setResultsName('low') - RPAREN tree << node.setResultsName('root') - Optional(subtrees) def read_tree(tree_str, gusher_map, start=BASKET_LABEL): """Read the strategy encoded in tree_str and build the corresponding decision tree. V(H, L) represents the tree with root node V, high subtree H, and low subtree L. A node name followed by * indicates that the gusher is being opened solely for information and the Goldie will never be found there.""" def build_tree( tokens
import re from pyparsing import ( Word, Keyword, NotAny, alphanums, nums, alphas, OneOrMore, srange, ZeroOrMore, Regex ) from whispy_lispy import ast int_literal = Word(nums) + NotAny('.') int_literal.setParseAction(ast.Int.from_parsed_result) float_literal = Word(nums) + Word('.') + Word(nums) float_literal.setParseAction(ast.Float.from_parsed_result) bool_literal = Keyword('#t') | Keyword('#f') bool_literal.setParseAction(ast.Bool.from_parsed_result) string_literal = Regex(r'\".*?(?<!\\)\"', re.DOTALL) string_literal.setParseAction(ast.String.from_parse_result) grammar = OneOrMore(float_literal | int_literal | bool_literal | string_literal)
def dsl_parser(datestr: str, year: int) -> date: """ Parse dsl str for a given year. >>> from officiumdivinum.DSL.dsl_parser import dsl_parser >>> dsl_parser("Easter", 2020) datetime.date(2020, 4, 12) >>> dsl_parser("1 Jan", 2020) datetime.date(2020, 1, 1) >>> dsl_parser("Sun between 2 Jan 4 Jan OR 2 Jan", 2016) datetime.date(2016, 1, 3) >>> dsl_parser("Sun between 2 Jan 4 Jan OR 2 Jan", 2017) datetime.date(2017, 1, 2) >>> dsl_parser("22nd Sun after Pentecost", 2021) datetime.date(2021, 10, 24) Parameters ---------- datestr: str : Expression to be parsed. year: int : Year in which to evaluate expression Returns ------- date a date in the year in question. """ # First we convert all possible date representations into isodate strings (yyyy-mm-dd) # convert specials special = oneOf(specials.keys()) special.setParseAction(lambda t: str(specials[t[0]](year)) + " ") _specials = special[...] datestr = _specials.transformString(datestr) # convert yearless date expressions into dates yearless = Word(nums) + oneOf(months) yearless.setParseAction( lambda t: str(date(year, months.index(t[1]) + 1, int(t[0]))) + " ") _yearless = yearless[...] datestr = _yearless.transformString(datestr) # All dates are now isodates. isodate = Regex(r"[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]") # handle [ordinals] weekdays + timedeltas timedelta = Group( Optional(oneOf(ordinals))("ordinal") + oneOf(days)("day") + oneOf(["before", "after"])("delta") + isodate("date")) timedelta.setParseAction(_parse_timedelta) _timedeltas = timedelta[...] # datestr = _timedeltas.transformString(datestr) # handle betweens between = Group( Optional(oneOf(ordinals))("ordinal") + oneOf(days)("day") + "between" + isodate("date1") + isodate("date2")) between.setParseAction(_parse_between) _betweens = between[...] _betweens += _timedeltas count = 0 while any(x in datestr for x in ("after", "before", "between")): datestr = _betweens.transformString(datestr) if count > 10: raise DSLError(f"Recursion limit reached, got as far as {datestr}") count += 1 # At this point we only have calendar dates, components of date # expressions ('before', 'after', 'on or before' or 'on or after'; # ordinal weekdays and 'between' expressions) and operators ('AND' # 'OR', 'NOT'). Since operators operate on the *logical status* # of operands, and this logical status is False if the operand # doesn't evaluate to a date, and otherwise a calendar date, we # deal with them last. # Then we build parsers for individual objects # fail here if we match anything except isodates, 'or' or 'and' # illegal = ~(isodate | oneOf(["OR", "AND"]))[1...] # At this point the datestr is composed entirely of evaluated date # expressions split by logical operators. We reduce these by looping over them. count = 0 while "OR" in datestr: or_expr = Group((isodate("lhs") ^ "False") + "OR" + (isodate("rhs") ^ "False")) or_expr.setParseAction(_parse_or) _or_expr = or_expr[...] datestr = _or_expr.transformString(datestr) if count > 10: raise DSLError(f"Recursion limit reached, got as far as {datestr}") count += 1 count = 0 while "AND" in datestr: and_expr = Group((isodate("lhs") ^ "False") + "AND" + (isodate("rhs") ^ "False")) and_expr.setParseAction(_parse_and) _and_expr = and_expr[...] datestr = _and_expr.transformString(datestr) if count > 10: raise DSLError(f"Recursion limit reached, got as far as {datestr}") count += 1 # convert dates to datetime.date() objects isodate.setParseAction(lambda s, l, t: date.fromisoformat(t[0])) _isodates = isodate[...] parsed = _isodates.parseString(datestr) try: return parsed[0] except IndexError: raise DSLError("Unable to parse")
return self.name == other.name def __ne__(self, other): return self.name != other.name # Character literals LCURLY,RCURLY,LPAREN,RPAREN,QUOTE,COMMA,AT,EQUALS,HASH = map(Suppress,'{}()",@=#') def bracketed(expr): """ Return matcher for `expr` between curly brackets or parentheses """ return (LPAREN + expr + RPAREN) | (LCURLY + expr + RCURLY) # Define parser components for strings (the hard bit) chars_no_curly = Regex(r"[^{}]+") chars_no_curly.leaveWhitespace() chars_no_quotecurly = Regex(r'[^"{}]+') chars_no_quotecurly.leaveWhitespace() # Curly string is some stuff without curlies, or nested curly sequences curly_string = Forward() curly_item = Group(curly_string) | chars_no_curly curly_string << LCURLY + ZeroOrMore(curly_item) + RCURLY # quoted string is either just stuff within quotes, or stuff within quotes, within # which there is nested curliness quoted_item = Group(curly_string) | chars_no_quotecurly quoted_string = QUOTE + ZeroOrMore(quoted_item) + QUOTE # Numbers can just be numbers. Only integers though. number = Regex('[0-9]+')
unary_op = oneOf('- + ~', caseless=True) unary_op |= CKeyword('NOT') # TODO this does not encode precedence binary_op = oneOf("|| * / % + - << >> & | < <= > >= = == != <>", caseless=True) binary_op |= reduce(lambda x,y: x|y, [CKeyword(x) for x in 'IS,IS NOT,IN,LIKE,GLOB,MATCH,REGEXP,AND,OR'.split(',')]) # these direct from the SQLite docs KEYWORDS = 'ABORT ACTION ADD AFTER ALL ALTER ANALYZE AND AS ASC ATTACH AUTOINCREMENT BEFORE BEGIN BETWEEN BY CASCADE CASE CAST CHECK COLLATE COLUMN COMMIT CONFLICT CONSTRAINT CREATE CROSS CURRENT_DATE CURRENT_TIME CURRENT_TIMESTAMP DATABASE DEFAULT DEFERRABLE DEFERRED DELETE DESC DETACH DISTINCT DROP EACH ELSE END ESCAPE EXCEPT EXCLUSIVE EXISTS EXPLAIN FAIL FOR FOREIGN FROM FULL GLOB GROUP HAVING IF IGNORE IMMEDIATE IN INDEX INDEXED INITIALLY INNER INSERT INSTEAD INTERSECT INTO IS ISNULL JOIN KEY LEFT LIKE LIMIT MATCH NATURAL NO NOT NOTNULL NULL OF OFFSET ON OR ORDER OUTER PLAN PRAGMA PRIMARY QUERY RAISE REFERENCES REGEXP REINDEX RELEASE RENAME REPLACE RESTRICT RIGHT ROLLBACK ROW SAVEPOINT SELECT SET TABLE TEMP TEMPORARY THEN TO TRANSACTION TRIGGER UNION UNIQUE UPDATE USING VACUUM VALUES VIEW VIRTUAL WHEN WHERE' # TODO probably not right charset & does not account for escaping identifiers # https://www.sqlite.org/lang_keywords.html identifier = NotAny( reduce(lambda x,y: x|y, [CKeyword(x) for x in KEYWORDS.split(' ')]) ) + Regex('[a-zA-Z_][a-zA-Z0-9_]*') # for the purposes of attaching parse actions to these # objects they need to all be separate. table_in_column # is to distinguish between tables as found in the grammar # and those specifically found (optionally) in a column spec # (which gets triggered whether there's actually a table part # or not.) table_name = identifier.copy() table_in_column = table_name.copy() database_name = identifier.copy() column_name = identifier.copy() column = Optional(database_name + '.') + Optional(table_in_column + '.') + column_name integer_num = Regex('[0-9]+')
ParserElement, ) ParserElement.enablePackrat() COLON, LBRACK, RBRACK, LBRACE, RBRACE, TILDE, CARAT = map(Literal, ":[]{}~^") LPAR, RPAR = map(Suppress, "()") and_ = CaselessKeyword("AND") or_ = CaselessKeyword("OR") not_ = CaselessKeyword("NOT") to_ = CaselessKeyword("TO") keyword = and_ | or_ | not_ expression = Forward() valid_word = Regex(r'([a-zA-Z0-9*_+.-]|\\[!(){}\[\]^"~*?\\:])+').setName("word") valid_word.setParseAction(lambda t: t[0].replace("\\\\", chr(127)).replace("\\", "").replace(chr(127), "\\")) string = QuotedString('"') required_modifier = Literal("+")("required") prohibit_modifier = Literal("-")("prohibit") integer = Regex(r"\d+").setParseAction(lambda t: int(t[0])) proximity_modifier = Group(TILDE + integer("proximity")) number = Regex(r"\d+(\.\d+)?").setParseAction(lambda t: float(t[0])) fuzzy_modifier = TILDE + Optional(number, default=0.5)("fuzzy") term = Forward() field_name = valid_word.copy().setName("fieldname") incl_range_search = Group(LBRACK + term("lower") + to_ + term("upper") + RBRACK) excl_range_search = Group(LBRACE + term("lower") + to_ + term("upper") + RBRACE)
":<=": "lte", ":>": "gt", ":>=": "gte", } # Parsing grammar AND = CaselessKeyword("AND") OR = Optional(CaselessKeyword("OR")) NOT = CaselessKeyword("NOT") # Search operator OPERATOR = oneOf(OPERATOR_MAP.keys()) # Field name, explicitely exlude URL like patters FIELD = Regex(r"""(?!http|ftp|https|mailto)[a-zA-Z_]+""") # Match token WORD = Regex(r"""[^ \(\)]([^ '"]*[^ '"\)])?""") DATE = Word("0123456789:.-T") # Date range RANGE = "[" + DATE + "to" + DATE + "]" # Match value REGEX_STRING = "r" + RawQuotedString('"') STRING = REGEX_STRING | RawQuotedString("'") | RawQuotedString('"') | WORD # Single term, either field specific or not TERM = (FIELD + OPERATOR + (RANGE | STRING)) | STRING
def define_dot_parser(self): """Define dot grammar Based on the grammar http://www.graphviz.org/doc/info/lang.html """ # punctuation colon = Literal(":") lbrace = Suppress("{") rbrace = Suppress("}") lbrack = Suppress("[") rbrack = Suppress("]") lparen = Literal("(") rparen = Literal(")") equals = Suppress("=") comma = Literal(",") dot = Literal(".") slash = Literal("/") bslash = Literal("\\") star = Literal("*") semi = Suppress(";") at = Literal("@") minus = Literal("-") pluss = Suppress("+") # keywords strict_ = CaselessLiteral("strict") graph_ = CaselessLiteral("graph") digraph_ = CaselessLiteral("digraph") subgraph_ = CaselessLiteral("subgraph") node_ = CaselessLiteral("node") edge_ = CaselessLiteral("edge") punctuation_ = "".join( [ c for c in string.punctuation if c not in '_' ] ) +string.whitespace # token definitions identifier = Word(alphanums + "_" ).setName("identifier") #double_quoted_string = QuotedString('"', multiline=True,escChar='\\', # unquoteResults=True) # dblQuotedString double_quoted_string = Regex(r'\"(?:\\\"|\\\\|[^"])*\"', re.MULTILINE) double_quoted_string.setParseAction(removeQuotes) quoted_string = Combine(double_quoted_string+ Optional(OneOrMore(pluss+double_quoted_string)),adjacent=False) alphastring_ = OneOrMore(CharsNotIn(punctuation_)) def parse_html(s, loc, toks): return '<<%s>>' % ''.join(toks[0]) opener = '<' closer = '>' try: html_text = pyparsing.nestedExpr( opener, closer, (( CharsNotIn( opener + closer ).setParseAction( lambda t:t[0] )) )).setParseAction(parse_html) except: log.debug('nestedExpr not available.') log.warning('Old version of pyparsing detected. Version 1.4.8 or ' 'later is recommended. Parsing of html labels may not ' 'work properly.') html_text = Combine(Literal("<<") + OneOrMore(CharsNotIn(",]"))) ID = ( alphastring_ | html_text | quoted_string | #.setParseAction(strip_quotes) | identifier ).setName("ID") float_number = Combine(Optional(minus) + OneOrMore(Word(nums + "."))).setName("float_number") righthand_id = (float_number | ID ).setName("righthand_id") port_angle = (at + ID).setName("port_angle") port_location = ((OneOrMore(Group(colon + ID)) | Group(colon + lparen + ID + comma + ID + rparen))).setName("port_location") port = Combine((Group(port_location + Optional(port_angle)) | Group(port_angle + Optional(port_location)))).setName("port") node_id = (ID + Optional(port)) a_list = OneOrMore(ID + Optional(equals + righthand_id) + Optional(comma.suppress())).setName("a_list") attr_list = OneOrMore(lbrack + Optional(a_list) + rbrack).setName("attr_list").setResultsName('attrlist') attr_stmt = ((graph_ | node_ | edge_) + attr_list).setName("attr_stmt") edgeop = (Literal("--") | Literal("->")).setName("edgeop") stmt_list = Forward() graph_stmt = (lbrace + Optional(stmt_list) + rbrace + Optional(semi) ).setName("graph_stmt") edge_point = Forward() edgeRHS = OneOrMore(edgeop + edge_point) edge_stmt = edge_point + edgeRHS + Optional(attr_list) subgraph = (Optional(subgraph_,'') + Optional(ID,'') + Group(graph_stmt)).setName("subgraph").setResultsName('ssubgraph') edge_point << (subgraph | graph_stmt | node_id ) node_stmt = (node_id + Optional(attr_list) + Optional(semi)).setName("node_stmt") assignment = (ID + equals + righthand_id).setName("assignment") stmt = (assignment | edge_stmt | attr_stmt | subgraph | graph_stmt | node_stmt).setName("stmt") stmt_list << OneOrMore(stmt + Optional(semi)) graphparser = ( (Optional(strict_,'notstrict') + ((graph_ | digraph_)) + Optional(ID,'') + lbrace + Group(Optional(stmt_list)) +rbrace).setResultsName("graph") ) singleLineComment = Group("//" + restOfLine) | Group("#" + restOfLine) # actions graphparser.ignore(singleLineComment) graphparser.ignore(cStyleComment) node_id.setParseAction(self._proc_node_id) assignment.setParseAction(self._proc_attr_assignment) a_list.setParseAction(self._proc_attr_list) edge_stmt.setParseAction(self._proc_edge_stmt) node_stmt.setParseAction(self._proc_node_stmt) attr_stmt.setParseAction(self._proc_default_attr_stmt) attr_list.setParseAction(self._proc_attr_list_combine) subgraph.setParseAction(self._proc_subgraph_stmt) #graph_stmt.setParseAction(self._proc_graph_stmt) graphparser.setParseAction(self._main_graph_stmt) return graphparser
LBRACKET = L("[").suppress() RBRACKET = L("]").suppress() LPAREN = L("(").suppress() RPAREN = L(")").suppress() COMMA = L(",").suppress() SEMICOLON = L(";").suppress() AT = L("@").suppress() PUNCTUATION = Word("-_.") IDENTIFIER_END = ALPHANUM | (ZeroOrMore(PUNCTUATION) + ALPHANUM) IDENTIFIER = Combine(ALPHANUM + ZeroOrMore(IDENTIFIER_END)) NAME = IDENTIFIER("name") EXTRA = IDENTIFIER URI = Regex(r'[^ ]+')("url") URL = (AT + URI) EXTRAS_LIST = EXTRA + ZeroOrMore(COMMA + EXTRA) EXTRAS = (LBRACKET + Optional(EXTRAS_LIST) + RBRACKET)("extras") VERSION_PEP440 = Regex(Specifier._regex_str, re.VERBOSE | re.IGNORECASE) VERSION_LEGACY = Regex(LegacySpecifier._regex_str, re.VERBOSE | re.IGNORECASE) VERSION_ONE = VERSION_PEP440 ^ VERSION_LEGACY VERSION_MANY = Combine(VERSION_ONE + ZeroOrMore(COMMA + VERSION_ONE), joinString=",", adjacent=False)("_raw_spec") _VERSION_SPEC = Optional(((LPAREN + VERSION_MANY + RPAREN) | VERSION_MANY)) _VERSION_SPEC.setParseAction(lambda s, l, t: t._raw_spec or '')
return [(t[0][0].port, t[0][1].port)] def normalize_ip(t): # returns a normalized ip return t.ip + "/" + (str(t.mask.mask) if t.mask else "32") port = Group(Word(nums).setParseAction(to_int)('port')) port_range = Group((port + Word("-").suppress() + port)('range')) normalized_port_range = (port ^ port_range).setParseAction(to_port_range) ports = delimitedList(normalized_port_range)('ports') # IP addresses, name of another group, or sg-* security_group = Regex("sg-[\w\d]+") group_name = Regex("[\w\d\-]+") mask = Word("/") + Word(nums).setParseAction(to_int)('mask') ip= (Combine(Word(nums) + ('.' + Word(nums))*3)('ip') + Optional(mask)('mask')).setParseAction(normalize_ip) parser = Optional(protocol)('protocol') + \ Optional(port_) + \ ports + \ (ip.setResultsName('ip_and_mask') ^ security_group.setResultsName('security_group') ^ group_name('group_name')) class Rule(object): def __init__(self, protocol, from_port, to_port, address=None, group=None, group_name=None): """constructs a new rule
from pyparsing import (Literal, CaselessKeyword, Forward, Regex, QuotedString, Suppress, Optional, Group, FollowedBy, operatorPrecedence, opAssoc, ParseException, ParserElement) ParserElement.enablePackrat() COLON,LBRACK,RBRACK,LBRACE,RBRACE,TILDE,CARAT = map(Literal,":[]{}~^") LPAR,RPAR = map(Suppress,"()") and_ = CaselessKeyword("AND") or_ = CaselessKeyword("OR") not_ = CaselessKeyword("NOT") to_ = CaselessKeyword("TO") keyword = and_ | or_ | not_ expression = Forward() valid_word = Regex(r'([a-zA-Z0-9*_+.-]|\\[!(){}\[\]^"~*?\\:])+').setName("word") valid_word.setParseAction( lambda t : t[0].replace('\\\\',chr(127)).replace('\\','').replace(chr(127),'\\') ) string = QuotedString('"') required_modifier = Literal("+")("required") prohibit_modifier = Literal("-")("prohibit") integer = Regex(r"\d+").setParseAction(lambda t:int(t[0])) proximity_modifier = Group(TILDE + integer("proximity")) number = Regex(r'\d+(\.\d+)?').setParseAction(lambda t:float(t[0])) fuzzy_modifier = TILDE + Optional(number, default=0.5)("fuzzy") term = Forward() field_name = valid_word.copy().setName("fieldname")
class DdlParse(DdlParseBase): """DDL parser""" _LPAR, _RPAR, _COMMA, _SEMICOLON, _DOT, _DOUBLEQUOTE, _BACKQUOTE, _SPACE = map( Suppress, "(),;.\"` ") _CREATE, _TABLE, _TEMP, _CONSTRAINT, _NOT_NULL, _PRIMARY_KEY, _UNIQUE, _UNIQUE_KEY, _FOREIGN_KEY, _REFERENCES, _KEY, _CHAR_SEMANTICS, _BYTE_SEMANTICS = \ map(CaselessKeyword, "CREATE, TABLE, TEMP, CONSTRAINT, NOT NULL, PRIMARY KEY, UNIQUE, UNIQUE KEY, FOREIGN KEY, REFERENCES, KEY, CHAR, BYTE".replace(", ", ",").split(",")) _TYPE_UNSIGNED, _TYPE_ZEROFILL = \ map(CaselessKeyword, "UNSIGNED, ZEROFILL".replace(", ", ",").split(",")) _COL_ATTR_DISTKEY, _COL_ATTR_SORTKEY, _COL_ATTR_CHARACTER_SET = \ map(CaselessKeyword, "DISTKEY, SORTKEY, CHARACTER SET".replace(", ", ",").split(",")) _FK_MATCH = \ CaselessKeyword("MATCH") + Word(alphanums + "_") _FK_ON, _FK_ON_OPT_RESTRICT, _FK_ON_OPT_CASCADE, _FK_ON_OPT_SET_NULL, _FK_ON_OPT_NO_ACTION = \ map(CaselessKeyword, "ON, RESTRICT, CASCADE, SET NULL, NO ACTION".replace(", ", ",").split(",")) _FK_ON_DELETE = \ _FK_ON + CaselessKeyword("DELETE") + (_FK_ON_OPT_RESTRICT | _FK_ON_OPT_CASCADE | _FK_ON_OPT_SET_NULL | _FK_ON_OPT_NO_ACTION) _FK_ON_UPDATE = \ _FK_ON + CaselessKeyword("UPDATE") + (_FK_ON_OPT_RESTRICT | _FK_ON_OPT_CASCADE | _FK_ON_OPT_SET_NULL | _FK_ON_OPT_NO_ACTION) _SUPPRESS_QUOTE = _BACKQUOTE | _DOUBLEQUOTE _COMMENT = Suppress("--" + Regex(r".+")) _CREATE_TABLE_STATEMENT = Suppress(_CREATE) + Optional(_TEMP)("temp") + Suppress(_TABLE) + Optional(Suppress(CaselessKeyword("IF NOT EXISTS"))) \ + Optional(_SUPPRESS_QUOTE) + Optional(Word(alphanums + "_")("schema") + Optional(_SUPPRESS_QUOTE) + _DOT + Optional(_SUPPRESS_QUOTE)) + Word(alphanums + "_<>")("table") + Optional(_SUPPRESS_QUOTE) \ + _LPAR \ + delimitedList( OneOrMore( _COMMENT | # Ignore Index Suppress(_KEY + Word(alphanums + "_'`() ")) | Group( Optional(Suppress(_CONSTRAINT) + Optional(_SUPPRESS_QUOTE) + Word(alphanums + "_")("name") + Optional(_SUPPRESS_QUOTE)) + ( ( (_PRIMARY_KEY ^ _UNIQUE ^ _UNIQUE_KEY ^ _NOT_NULL)("type") + Optional(_SUPPRESS_QUOTE) + Optional(Word(alphanums + "_"))("name") + Optional(_SUPPRESS_QUOTE) + _LPAR + Group(delimitedList(Optional(_SUPPRESS_QUOTE) + Word(alphanums + "_") + Optional(_SUPPRESS_QUOTE)))("constraint_columns") + _RPAR ) | ( (_FOREIGN_KEY)("type") + _LPAR + Group(delimitedList(Optional(_SUPPRESS_QUOTE) + Word(alphanums + "_") + Optional(_SUPPRESS_QUOTE)))("constraint_columns") + _RPAR + Optional(Suppress(_REFERENCES) + Optional(_SUPPRESS_QUOTE) + Word(alphanums + "_")("references_table") + Optional(_SUPPRESS_QUOTE) + _LPAR + Group(delimitedList(Optional(_SUPPRESS_QUOTE) + Word(alphanums + "_") + Optional(_SUPPRESS_QUOTE)))("references_columns") + _RPAR + Optional(_FK_MATCH)("references_fk_match") # MySQL + Optional(_FK_ON_DELETE)("references_fk_on_delete") # MySQL + Optional(_FK_ON_UPDATE)("references_fk_on_update") # MySQL ) ) ) )("constraint") | Group( ((_SUPPRESS_QUOTE + Word(alphanums + " _")("name") + _SUPPRESS_QUOTE) ^ (Optional(_SUPPRESS_QUOTE) + Word(alphanums + "_")("name") + Optional(_SUPPRESS_QUOTE))) + Group( Group( Word(alphanums + "_") + Optional(CaselessKeyword("WITHOUT TIME ZONE") ^ CaselessKeyword("WITH TIME ZONE") ^ CaselessKeyword("PRECISION") ^ CaselessKeyword("VARYING")) )("type_name") + Optional(_LPAR + Regex(r"[\d\*]+\s*,*\s*\d*")("length") + Optional(_CHAR_SEMANTICS | _BYTE_SEMANTICS)("semantics") + _RPAR) + Optional(_TYPE_UNSIGNED)("unsigned") + Optional(_TYPE_ZEROFILL)("zerofill") )("type") + Optional(Word(r"\[\]"))("array_brackets") + Optional( Regex(r"(?!--)", re.IGNORECASE) + Group( Optional(Regex(r"\b(?:NOT\s+)NULL?\b", re.IGNORECASE))("null") & Optional(Regex(r"\bAUTO_INCREMENT\b", re.IGNORECASE))("auto_increment") & Optional(Regex(r"\b(UNIQUE|PRIMARY)(?:\s+KEY)?\b", re.IGNORECASE))("key") & Optional(Regex( r"\bDEFAULT\b\s+(?:((?:[A-Za-z0-9_\.\'\" -\{\}]|[^\x01-\x7E])*\:\:(?:character varying)?[A-Za-z0-9\[\]]+)|(?:\')((?:\\\'|[^\']|,)+)(?:\')|(?:\")((?:\\\"|[^\"]|,)+)(?:\")|([^,\s]+))", re.IGNORECASE))("default") & Optional(Regex(r"\bCOMMENT\b\s+(\'(\\\'|[^\']|,)+\'|\"(\\\"|[^\"]|,)+\"|[^,\s]+)", re.IGNORECASE))("comment") & Optional(Regex(r"\bENCODE\s+[A-Za-z0-9]+\b", re.IGNORECASE))("encode") # Redshift & Optional(_COL_ATTR_DISTKEY)("distkey") # Redshift & Optional(_COL_ATTR_SORTKEY)("sortkey") # Redshift & Optional(Suppress(_COL_ATTR_CHARACTER_SET) + Word(alphanums + "_")("character_set")) # MySQL )("constraint") ) )("column") | _COMMENT ) )("columns") _DDL_PARSE_EXPR = Forward() _DDL_PARSE_EXPR << OneOrMore(_COMMENT | _CREATE_TABLE_STATEMENT) def __init__(self, ddl=None, source_database=None): super().__init__(source_database) self._ddl = ddl self._table = DdlParseTable(source_database) @property def source_database(self): """ Source database option :param source_database: enum DdlParse.DATABASE """ return super().source_database @source_database.setter def source_database(self, source_database): super(self.__class__, self.__class__).source_database.__set__(self, source_database) self._table.source_database = source_database @property def ddl(self): """DDL script""" return self._ddl @ddl.setter def ddl(self, ddl): self._ddl = ddl def parse(self, ddl=None, source_database=None): """ Parse DDL script. :param ddl: DDL script :return: DdlParseTable, Parsed table define info. """ if ddl is not None: self._ddl = ddl if source_database is not None: self.source_database = source_database if self._ddl is None: raise ValueError("DDL is not specified") ret = self._DDL_PARSE_EXPR.parseString(self._ddl) # print(ret.dump()) if "schema" in ret: self._table.schema = ret["schema"] self._table.name = ret["table"] self._table.is_temp = True if "temp" in ret else False for ret_col in ret["columns"]: if ret_col.getName() == "column": # add column col = self._table.columns.append( column_name=ret_col["name"], data_type_array=ret_col["type"], array_brackets=ret_col['array_brackets'] if "array_brackets" in ret_col else None, constraint=ret_col['constraint'] if "constraint" in ret_col else None) elif ret_col.getName() == "constraint": # set column constraint for col_name in ret_col["constraint_columns"]: col = self._table.columns[col_name] if ret_col["type"] == "PRIMARY KEY": col.not_null = True col.primary_key = True elif ret_col["type"] in ["UNIQUE", "UNIQUE KEY"]: col.unique = True elif ret_col["type"] == "NOT NULL": col.not_null = True return self._table
LPAREN = Suppress('(') RPAREN = Suppress(')') QUOTE = Suppress('"') COMMA = Suppress(',') AT = Suppress('@') EQUALS = Suppress('=') HASH = Suppress('#') def bracketed(expr): """ Return matcher for `expr` between curly brackets or parentheses """ return (LPAREN + expr + RPAREN) | (LCURLY + expr + RCURLY) # Define parser components for strings (the hard bit) chars_no_curly = Regex(r"[^{}]+") chars_no_curly.leaveWhitespace() chars_no_quotecurly = Regex(r'[^"{}]+') chars_no_quotecurly.leaveWhitespace() # Curly string is some stuff without curlies, or nested curly sequences curly_string = Forward() curly_item = Group(curly_string) | chars_no_curly curly_string << LCURLY + ZeroOrMore(curly_item) + RCURLY # quoted string is either just stuff within quotes, or stuff within quotes, within # which there is nested curliness quoted_item = Group(curly_string) | chars_no_quotecurly quoted_string = QUOTE + ZeroOrMore(quoted_item) + QUOTE # Numbers can just be numbers. Only integers though. number = Regex('[0-9]+')
elif val.startswith('`') and val.endswith('`'): val = "'" + val[1:-1].replace("``", "`") + "'" elif val.startswith("+"): val = val[1:] un = ast.literal_eval(val) return un def to_string(instring, tokensStart, retTokens): val = retTokens[0] val = "'"+val[1:-1].replace("''", "\\'")+"'" return {"literal": ast.literal_eval(val)} # NUMBERS realNum = Regex( r"[+-]?(\d+\.\d*|\.\d+)([eE][+-]?\d+)?").addParseAction(unquote) intNum = Regex(r"[+-]?\d+([eE]\+?\d+)?").addParseAction(unquote) # STRINGS, NUMBERS, VARIABLES sqlString = Regex(r"\'(\'\'|\\.|[^'])*\'").addParseAction(to_string) identString = Regex(r'\"(\"\"|\\.|[^"])*\"').addParseAction(unquote) mysqlidentString = Regex(r'\`(\`\`|\\.|[^`])*\`').addParseAction(unquote) ident = Combine(~RESERVED + (delimitedList(Literal("*") | Word(alphas + "_", alphanums + "_$") | identString | mysqlidentString, delim=".", combine=True))).setName("identifier") # EXPRESSIONS expr = Forward() # CASE case = ( CASE +
# [165] PN_CHARS_U ::= PN_CHARS_BASE | '_' PN_CHARS_U_re = "_" + PN_CHARS_BASE_re # [167] PN_CHARS ::= PN_CHARS_U | '-' | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040] PN_CHARS_re = u"\\-0-9\u00B7\u0300-\u036F\u203F-\u2040" + PN_CHARS_U_re # PN_CHARS = Regex(u'[%s]'%PN_CHARS_re, flags=re.U) # [168] PN_PREFIX ::= PN_CHARS_BASE ((PN_CHARS|'.')* PN_CHARS)? PN_PREFIX = Regex(ur"[%s](?:[%s\.]*[%s])?" % (PN_CHARS_BASE_re, PN_CHARS_re, PN_CHARS_re), flags=re.U) # [140] PNAME_NS ::= PN_PREFIX? ':' PNAME_NS = Optional(Param("prefix", PN_PREFIX)) + Suppress(":").leaveWhitespace() # [173] PN_LOCAL_ESC ::= '\' ( '_' | '~' | '.' | '-' | '!' | '$' | '&' | "'" | '(' | ')' | '*' | '+' | ',' | ';' | '=' | '/' | '?' | '#' | '@' | '%' ) PN_LOCAL_ESC = Regex("\\\\[_~\\.\\-!$&\"'()*+,;=/?#@%]") PN_LOCAL_ESC.setParseAction(lambda x: x[0][1:]) # [172] HEX ::= [0-9] | [A-F] | [a-f] # HEX = Regex('[0-9A-Fa-f]') # not needed # [171] PERCENT ::= '%' HEX HEX PERCENT = Regex("%[0-9a-fA-F]{2}") PERCENT.setParseAction(lambda x: unichr(int(x[0][1:], 16))) # [170] PLX ::= PERCENT | PN_LOCAL_ESC PLX = PERCENT | PN_LOCAL_ESC # [169] PN_LOCAL ::= (PN_CHARS_U | ':' | [0-9] | PLX ) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX) )? PN_LOCAL = Combine( (Regex(u"[%s0-9:]" % PN_CHARS_U_re, flags=re.U) | PLX)
def templateparser(monitor): gfactory = GFactory(scalarpa = Text.pa, boundarychars = '', monitor = monitor) return Parser(gfactory.create(gfactory.templatepa) | Regex('^$').setParseAction(Text.pa))
return PackageDirective(t[0]) def import_directive_fn(s,l,t): return ImportDirective(t[0]) def field_fn(s,l,t): return Field(*t) def service_definition_fn(s,l,t): return ServiceDefintion(t[0]) def top_level_statement_fn(s,l,t): return TopLevelStatement(t[0]) def parser_fn(s,l,t): return Parser(t[0]) identifier = Word(alphas+"_",alphanums+"_").setName("identifier") identifier.setParseAction(identifier_fn) integer = Regex(r"[+-]?\d+") integer.setParseAction(integer_fn) LBRACE = Suppress('{') RBRACE = Suppress('}') LBRACK = Suppress('[') RBRACK = Suppress(']') LPAR = Suppress('(') RPAR = Suppress(')') EQ = Suppress('='); SEMI = Suppress(';') SYNTAX = Keyword('syntax') IMPORT = Keyword('import') PACKAGE = Keyword('package') MESSAGE = Keyword('message')
def gettext(pa): return Regex(r"[^$\s%s]+" % re.escape(boundarychars)).leaveWhitespace().setParseAction(pa)