def define_identifier(self): """ Return the syntax definition for an identifier. """ # --- Defining the individual identifiers: # Getting all the Unicode numbers in a single string: unicode_numbers = "".join( [unichr(n) for n in xrange(0x10000) if unichr(n).isdigit()]) unicode_number_expr = Regex("[%s]" % unicode_numbers, re.UNICODE) space_char = re.escape(self._grammar.get_token("identifier_spacing")) identifier0 = Regex("[\w%s]+" % space_char, re.UNICODE) # Identifiers cannot start with a number: identifier0 = Combine(~unicode_number_expr + identifier0) identifier0.setName("individual_identifier") # --- Defining the namespaces: namespace_sep = Suppress( self._grammar.get_token("namespace_separator")) namespace = Group(ZeroOrMore(identifier0 + namespace_sep)) namespace.setName("namespace") # --- The full identifier, which could have a namespace: identifier = Combine( namespace.setResultsName("namespace_parts") + identifier0.setResultsName("identifier")) identifier.setName("full_identifier") return identifier
def define_identifier(self): """ Return the syntax definition for an identifier. """ # --- Defining the individual identifiers: # Getting all the Unicode numbers in a single string: unicode_numbers = "".join([unichr(n) for n in xrange(0x10000) if unichr(n).isdigit()]) unicode_number_expr = Regex("[%s]" % unicode_numbers, re.UNICODE) space_char = re.escape(self._grammar.get_token("identifier_spacing")) identifier0 = Regex("[\w%s]+" % space_char, re.UNICODE) # Identifiers cannot start with a number: identifier0 = Combine(~unicode_number_expr + identifier0) identifier0.setName("individual_identifier") # --- Defining the namespaces: namespace_sep = Suppress(self._grammar.get_token("namespace_separator")) namespace = Group(ZeroOrMore(identifier0 + namespace_sep)) namespace.setName("namespace") # --- The full identifier, which could have a namespace: identifier = Combine(namespace.setResultsName("namespace_parts") + identifier0.setResultsName("identifier")) identifier.setName("full_identifier") return identifier
def make_pyparsing_regex(regex_str: str, caseless: bool = False, name: str = None) -> Regex: flags = re.IGNORECASE if caseless else 0 result = Regex(regex_str, flags=flags) if name: result.setName(name) return result
def define_identifier(self): """ Return the syntax definition for an identifier. """ # --- Defining the individual identifiers: # Getting all the Unicode numbers in a single string: try: unicode_numbers = "".join( [unichr(n) for n in xrange(0x10000) if unichr(n).isdigit()]) except NameError: unicode_numbers = "".join( [chr(n) for n in range(0x10000) if chr(n).isdigit()]) unicode_number_expr = Regex("[%s]" % unicode_numbers, re.UNICODE) space_char = re.escape(self._grammar.get_token("identifier_spacing")) identifier0 = Regex("[\w%s]+" % space_char, re.UNICODE) # Identifiers cannot start with a number: identifier0 = Combine(identifier0) identifier0.setName("individual_identifier") # --- Defining the namespaces: namespace_sep = Suppress( self._grammar.get_token("namespace_separator")) namespace = Group(ZeroOrMore(identifier0 + namespace_sep)) namespace.setName("namespace") # --- The full identifier, which could have a namespace: identifier = Combine( namespace.setResultsName("namespace_parts") + identifier0.setResultsName("identifier")) identifier.setName("full_identifier") expop = Literal('^') multop = oneOf('* /') factop = Literal('!') modop = Literal('%') signop = oneOf('+ -') opers = expop | signop | multop | factop | modop identifier = identifier + NotAny(opers) return identifier
case = ( CASE + Group(ZeroOrMore((WHEN + expr("when") + THEN + expr("then")).addParseAction(to_when_call)))("case") + Optional(ELSE + expr("else")) + END ).addParseAction(to_case_call) selectStmt = Forward() compound = ( (Keyword("not", caseless=True)("op").setDebugActions(*debug) + expr("params")).addParseAction(to_json_call) | (Keyword("distinct", caseless=True)("op").setDebugActions(*debug) + expr("params")).addParseAction(to_json_call) | Keyword("null", caseless=True).setName("null").setDebugActions(*debug) | case | (Literal("(").setDebugActions(*debug).suppress() + selectStmt + Literal(")").suppress()) | (Literal("(").setDebugActions(*debug).suppress() + Group(delimitedList(expr)) + Literal(")").suppress()) | realNum.setName("float").setDebugActions(*debug) | intNum.setName("int").setDebugActions(*debug) | (Literal("-")("op").setDebugActions(*debug) + expr("params")).addParseAction(to_json_call) | sqlString.setName("string").setDebugActions(*debug) | ( Word(alphas)("op").setName("function name").setDebugActions(*debug) + Literal("(").setName("func_param").setDebugActions(*debug) + Optional(selectStmt | Group(delimitedList(expr)))("params") + ")" ).addParseAction(to_json_call).setDebugActions(*debug) | ident.copy().setName("variable").setDebugActions(*debug) ) expr << Group(infixNotation( compound, [ (
domain_alphanum_dash_underscore = Word(domain_charset_alphanums_dash_underscore) domain_alphanum_dash_underscore.setName('<alphanum-hyphen-underscore>') # Maximum length of TLD is 63. # Currently, 25 is the most seen (source: http://data.iana.org/TLD/tlds-alpha-by-domain.txt) tld_label = Word(domain_charset_alphas, min=2, max=24) tld_label_regex = '[A-Za-z]{3,24}' tld_label.setName('<tdl-label>') domain_label_regex = r'[a-zA-Z0-9]{1,1}([a-zA-Z0-9\-]{0,61}){0,1}[a-zA-Z0-9]{1,1}' domain_label = Regex(domain_label_regex) # RFC1123 permitted labels starting with a digit # Word(domain_charset_alphanums, exact=1) # + Word(domain_charset_alphanums_dash, min=1, max=61) # + Word(domain_charset_alphanums, exact=1) domain_label.setName('<level2-domain-label>') # NOGO: Do not consider merging subdomain_* with domain_generic_* # For subdomains, we can use underscore, practically anywhere within its domain label # Domain Registars mostly do not allow name registration having any underscore # End-user may however deploy underscore anywhere outside of 2nd and top level domain name subdomain_label_regex = '[A-Za-z0-9_]{1,1}(([A-Za-z0-9_\-]{0,61}){0,1}[A-Za-z0-9_]{1,1}){0,1}' # We do not do IDN/PunyCode syntax enforcement here, that is outside the scope of this parser subdomain_label = Regex(subdomain_label_regex) subdomain_label.setName('<subdomain_label>') # Generic Domain label, used for ANY level of its domain name domain_generic_label = Word(domain_charset_alphanums_dash_underscore, min=1, max=63) domain_generic_label.setName('<domain_generic_label>') domain_generic_label.setResultsName('domain_name')
case = ( CASE + Group(ZeroOrMore((WHEN + expr("when") + THEN + expr("then")).addParseAction(to_when_call)))("case") + Optional(ELSE + expr("else")) + END ).addParseAction(to_case_call) selectStmt = Forward() compound = ( (Keyword("not", caseless=True)("op").setDebugActions(*debug) + expr("params")).addParseAction(to_json_call) | (Keyword("distinct", caseless=True)("op").setDebugActions(*debug) + expr("params")).addParseAction(to_json_call) | Keyword("null", caseless=True).setName("null").setDebugActions(*debug) | case | (Literal("(").setDebugActions(*debug).suppress() + selectStmt + Literal(")").suppress()) | (Literal("(").setDebugActions(*debug).suppress() + Group(delimitedList(expr)) + Literal(")").suppress()) | realNum.setName("float").setDebugActions(*debug) | intNum.setName("int").setDebugActions(*debug) | (Literal("-")("op").setDebugActions(*debug) + expr("params")).addParseAction(to_json_call) | sqlString.setName("string").setDebugActions(*debug) | ( Word(alphas)("op").setName("function name").setDebugActions(*debug) + Literal("(").setName("func_param").setDebugActions(*debug) + Optional(selectStmt | Group(delimitedList(expr)))("params") + ")" ).addParseAction(to_json_call).setDebugActions(*debug) | ident.copy().setName("variable").setDebugActions(*debug) ) expr << Group(infixNotation( compound, [ (
(realNum | intNum)("count").setDebugActions(*debug) + _or([Keyword(d, caseless=True)("duration") for d in durations ])).addParseAction(to_interval_call).setDebugActions(*debug) compound = ( Keyword("null", caseless=True).setName("null").setDebugActions(*debug) | (Keyword("not", caseless=True)("op").setDebugActions(*debug) + expr("params")).addParseAction(to_json_call) | (Keyword("distinct", caseless=True)("op").setDebugActions(*debug) + expr("params")).addParseAction(to_json_call) | (Keyword("date", caseless=True).setDebugActions(*debug) + sqlString("params")).addParseAction(to_date_call) | interval | case | (Literal("(").suppress() + ordered_sql + Literal(")").suppress()) | (Literal("(").suppress() + Group(delimitedList(expr)) + Literal(")").suppress()) | realNum.setName("float").setDebugActions(*debug) | intNum.setName("int").setDebugActions(*debug) | (Literal("~")("op").setDebugActions(*debug) + expr("params")).addParseAction(to_json_call) | (Literal("-")("op").setDebugActions(*debug) + expr("params")).addParseAction(to_json_call) | sqlString.setName("string").setDebugActions(*debug) | call_function | ident.copy().setName("variable").setDebugActions(*debug)) expr << Group( infixNotation( compound, [(o, 3 if isinstance(o, tuple) else 2, opAssoc.LEFT, to_json_operator) for o in KNOWN_OPS] + [ (COLLATE_NOCASE, 1, opAssoc.LEFT, to_json_operator) ]).setName("expression").setDebugActions(*debug))
def __init__(self): self.stack = [] self.dice_roles = [] self.binary_ops = { '+': operator.add, '-': operator.sub, '*': operator.mul, '/': operator.truediv, '^': operator.pow, '>': operator.gt, '>=': operator.ge, '<': operator.lt, '<=': operator.le, '!=': operator.ne, '==': operator.eq, 'or': operator.or_, 'and': operator.and_ } self.constants = {'PI': math.pi, 'E': math.e} self.functions = { 'sum': lambda *a: sum(a), 'sin': math.sin, 'cos': math.cos, 'tan': math.tan, 'exp': math.exp, 'hypot': math.hypot, 'abs': abs, 'trunc': int, 'round': round, 'sgn': lambda a: -1 if a < -math.e else 1 if a > math.e else 0, 'multiply': lambda a, b: a * b, 'all': lambda *a: all(a), 'any': lambda *a: any(a) } # lang vars e = CaselessKeyword("E") pi = CaselessKeyword("PI") number = Regex(r"[+-]?\d+(?:\.\d*)?(?:[eE][+-]?\d+)?") number.setName('Number') ident = Word(alphas, alphanums + "_$") ident.setName('Ident') dice = Regex(r'\d?[dD]\d+') dice.setName('Dice') plus, minus, lt, le, gt, ge, eq, ne, or_, and_ = map( Literal, ['+', '-', '<', '<=', '>', '>=', '==', '!=', 'or', 'and']) bi_op = plus | minus | lt | le | gt | ge | eq | ne | or_ | and_ bi_op.setName('LowBinaryOp') mult = Literal('*') div = Literal('/') multop = mult | div multop.setName('MediumBinaryOp') expop = Literal('^') expop.setName('HighBinaryOp') lpar = Suppress('(') rpar = Suppress(')') factor = Forward() expr = Forward() expr_list = delimitedList(Group(expr)) expr_list.setName('ExpressionList') def dice_role(s: str) -> int: rolls = DiceRolls(roll=s, results=[]) s = s.lower() if s.startswith('d'): count = 1 limit = s[1:] else: count, limit = s.lower().split('d') count = int(count) limit = int(limit) for _ in range(0, count): roll = random.randint(1, limit) rolls.results.append(roll) self.dice_roles.append(rolls) return rolls.sum def insert_fn_arg_count_tuple(t: Tuple) -> None: fn = t.pop(0) argc = len(t[0]) t.insert(0, (fn, argc)) def push(tokens) -> None: self.stack.append(tokens[0]) def push_unary_minus(tokens) -> None: if '-' in tokens: push('unary -') def push_dice(t: ParseResults) -> None: self.stack.append(functools.partial(dice_role, t[0])) dice.setParseAction(push_dice) fn_call = ((ident + lpar - Group(expr_list) + rpar).setParseAction(insert_fn_arg_count_tuple)) atom = dice | (bi_op[...] + ( ((fn_call | pi | e | number | ident).setParseAction(push)) | Group(lpar + expr + rpar)).setParseAction(push_unary_minus)) factor <<= atom + (expop + factor).setParseAction(push)[...] term = factor + (multop + factor).setParseAction(push)[...] expr <<= term + (bi_op + term).setParseAction(push)[...] self.expr = expr expr.setName('Expression') factor.setName('Factor') atom.setName('Atom') term.setName('Term')