def _make(): # Order is important - multi-char expressions need to come before narrow # ones. parts = [] for klass in filter_unary: f = pp.Literal("~%s" % klass.code) + pp.WordEnd() f.setParseAction(klass.make) parts.append(f) simplerex = "".join(c for c in pp.printables if c not in "()~'\"") alphdevanagari = pp.pyparsing_unicode.Devanagari.alphas alphcyrillic = pp.pyparsing_unicode.Cyrillic.alphas alphgreek = pp.pyparsing_unicode.Greek.alphas alphchinese = pp.pyparsing_unicode.Chinese.alphas alpharabic = pp.pyparsing_unicode.Arabic.alphas alphhebrew = pp.pyparsing_unicode.Hebrew.alphas alphjapanese = pp.pyparsing_unicode.Japanese.alphas alphkorean = pp.pyparsing_unicode.Korean.alphas alphlatin1 = pp.pyparsing_unicode.Latin1.alphas alphlatinA = pp.pyparsing_unicode.LatinA.alphas alphlatinB = pp.pyparsing_unicode.LatinB.alphas rex = pp.Word(simplerex) |\ pp.Word(alphcyrillic) |\ pp.Word(alphgreek) |\ pp.Word(alphchinese) |\ pp.Word(alpharabic) |\ pp.Word(alphdevanagari) |\ pp.Word(alphhebrew) |\ pp.Word(alphjapanese) |\ pp.Word(alphkorean) |\ pp.Word(alphlatin1) |\ pp.Word(alphlatinA) |\ pp.Word(alphlatinB) |\ pp.QuotedString("\"", escChar='\\') |\ pp.QuotedString("'", escChar='\\') for klass in filter_rex: f = pp.Literal("~%s" % klass.code) + pp.WordEnd() + rex.copy() f.setParseAction(klass.make) parts.append(f) for klass in filter_int: f = pp.Literal("~%s" % klass.code) + pp.WordEnd() + pp.Word(pp.nums) f.setParseAction(klass.make) parts.append(f) # A naked rex is a URL rex: f = rex.copy() f.setParseAction(FUrl.make) parts.append(f) atom = pp.MatchFirst(parts) expr = pp.operatorPrecedence(atom, [ (pp.Literal("!").suppress(), 1, pp.opAssoc.RIGHT, lambda x: FNot(*x)), (pp.Literal("&").suppress(), 2, pp.opAssoc.LEFT, lambda x: FAnd(*x)), (pp.Literal("|").suppress(), 2, pp.opAssoc.LEFT, lambda x: FOr(*x)), ]) expr = pp.OneOrMore(expr) return expr.setParseAction(lambda x: FAnd(x) if len(x) != 1 else x)
def _make(): # Order is important - multi-char expressions need to come before narrow # ones. parts = [] for cls in filter_unary: f = pp.Literal(f"~{cls.code}") + pp.WordEnd() f.setParseAction(cls.make) parts.append(f) # This is a bit of a hack to simulate Word(pyparsing_unicode.printables), # which has a horrible performance with len(pyparsing.pyparsing_unicode.printables) == 1114060 unicode_words = pp.CharsNotIn("()~'\"" + pp.ParserElement.DEFAULT_WHITE_CHARS) unicode_words.skipWhitespace = True regex = ( unicode_words | pp.QuotedString('"', escChar='\\') | pp.QuotedString("'", escChar='\\') ) for cls in filter_rex: f = pp.Literal(f"~{cls.code}") + pp.WordEnd() + regex.copy() f.setParseAction(cls.make) parts.append(f) for cls in filter_int: f = pp.Literal(f"~{cls.code}") + pp.WordEnd() + pp.Word(pp.nums) f.setParseAction(cls.make) parts.append(f) # A naked rex is a URL rex: f = regex.copy() f.setParseAction(FUrl.make) parts.append(f) atom = pp.MatchFirst(parts) expr = pp.infixNotation( atom, [(pp.Literal("!").suppress(), 1, pp.opAssoc.RIGHT, lambda x: FNot(*x)), (pp.Literal("&").suppress(), 2, pp.opAssoc.LEFT, lambda x: FAnd(*x)), (pp.Literal("|").suppress(), 2, pp.opAssoc.LEFT, lambda x: FOr(*x)), ]) expr = pp.OneOrMore(expr) return expr.setParseAction(lambda x: FAnd(x) if len(x) != 1 else x)
def get_expression_parser(): sign = pp.Optional(pp.Literal('-')) number = pp.Word(pp.nums) dpoint = pp.Literal('.') ignore_errors = pp.CaselessLiteral(IGNORE_ERRORS) all_envs = pp.CaselessLiteral(ALL_ENVS) eq, neq = pp.Literal(EQUAL), pp.Literal(NOT_EQUAL) eand, eor = pp.CaselessLiteral(AND), pp.CaselessLiteral(OR) option = (ignore_errors | all_envs).setParseAction(_tag_with(_OPTION)) options = pp.Group(pp.ZeroOrMore(option)) operator_test = (eq | neq).setParseAction(_tag_with(TEST)) operator_logical = (eand | eor).setParseAction(_tag_with(_LOGICAL)) begin_if = pp.CaselessLiteral(_IF).setParseAction(_tag_with(_IF)) obj = pp.Word(pp.printables).setParseAction(_tag_with(_OBJ)) integer = pp.Combine(sign + number + pp.WordEnd()).setParseAction( _tag_with(_OBJ, int)) real = pp.Combine(sign + ((number + dpoint + number) | (dpoint + number) | (number + dpoint))).setParseAction( _tag_with(_OBJ, float)) expritem = integer | real | obj single_test = expritem + operator_test + expritem additional_test = operator_logical + single_test expr_var = pp.Group(obj + s_end).setParseAction(_tag_with(VALUE)) expr_test = pp.Group(obj + begin_if + single_test + pp.ZeroOrMore(additional_test) + s_end).setParseAction(_tag_with(TEST)) expr_list_test = pp.Group(begin_if + single_test + pp.ZeroOrMore(additional_test) + s_end).setParseAction(_tag_with(LIST_TEST)) expr = expr_test | expr_var | expr_list_test line = options + expr + s_end return line
def _make(): # Order is important - multi-char expressions need to come before narrow # ones. parts = [] for klass in filt_unary: f = pp.Literal("~%s" % klass.code) + pp.WordEnd() f.setParseAction(klass.make) parts.append(f) simplerex = "".join(c for c in pp.printables if c not in "()~'\"") rex = pp.Word(simplerex) |\ pp.QuotedString("\"", escChar='\\') |\ pp.QuotedString("'", escChar='\\') for klass in filt_rex: f = pp.Literal("~%s" % klass.code) + pp.WordEnd() + rex.copy() f.setParseAction(klass.make) parts.append(f) for klass in filt_int: f = pp.Literal("~%s" % klass.code) + pp.WordEnd() + pp.Word(pp.nums) f.setParseAction(klass.make) parts.append(f) # A naked rex is a URL rex: f = rex.copy() f.setParseAction(FUrl.make) parts.append(f) atom = pp.MatchFirst(parts) expr = pp.operatorPrecedence(atom, [(pp.Literal("!").suppress(), 1, pp.opAssoc.RIGHT, lambda x: FNot(*x)), (pp.Literal("&").suppress(), 2, pp.opAssoc.LEFT, lambda x: FAnd(*x)), (pp.Literal("|").suppress(), 2, pp.opAssoc.LEFT, lambda x: FOr(*x)), ]) expr = pp.OneOrMore(expr) return expr.setParseAction(lambda x: FAnd(x) if len(x) != 1 else x)
rename = ASTRING + S('as') crename = rename - dotted_name cname = Group(Optional(rename) + NAME)('name') tname = Group(Optional(rename) + dotted_name)('name') type = pp.Forward().setName('type') # pylint: disable=redefined-builtin pdef = Group( Group(NAME)('name') + S(':') - type + Optional(S('=') + K('default'))) plist = pp.ZeroOrMore(S(',') + pdef) decorators = Group(pp.ZeroOrMore(S('@') - NAME + NEWLINE))('decorators') parameters = Group(PARENS(Optional(pdef + plist)))('params') mparameters = PARENS((W('self') | W('cls'))('self') + Group(plist)('params')) postproc = Optional( DOCSTR_BLOCK(S('return') - pp.WordEnd() + NAME - S('(...)'), 'postproc')) types = Group(E('') + type) | PARENS(pp.delimitedList(pdef)) returns = S('->') - (SW('None') | Group(types)('returns')) callable_type = Group( Optional(rename) + Optional(S('lambda')) + parameters + returns) # tname | (NAME+composed_type) does not work :( named_type = Group(tname + Optional(ANGLED(pp.delimitedList(Group(type))))) type <<= callable_type('callable') ^ named_type('named') # Since all XXXdef's only used inside BLOCK that does Group inside, skip Group. G = lambda X: X _func = Tag('func') funcdef = G(_func + decorators + S('def') - cname + parameters + Optional(returns - postproc))
def SuffixMarker(txt): # noqa - we treat this like a pyparsing class return pp.Suppress(pp.CaselessLiteral(txt) + pp.WordEnd(pp.alphanums))
def WordBoundaries(grammar): # noqa - we treat this like a pyparsing class return (pp.WordStart(pp.alphanums) + grammar + pp.WordEnd(pp.alphanums))
def SuffixMarker(txt): return pyparsing.Suppress( pyparsing.CaselessLiteral(txt) + pyparsing.WordEnd(pyparsing.alphanums))
def WordBoundaries(grammar): return (pyparsing.WordStart(pyparsing.alphanums) + grammar + pyparsing.WordEnd(pyparsing.alphanums))
def transform_human(text, variables=None): """Transform user input with given context. Args: text (str): User input. variables (dict): Variables for purposes of substitution. Returns: A 2-tuple of: (A human-readable script that Script can parse, A list of contextual information for tooltips, etc.) """ if variables is None: variables = {} # No mutable default value. # these are parseActions for pyparsing. def str_literal_to_hex(s, loc, toks): for i, t in enumerate(toks): toks[i] = ''.join(['0x', t.encode('hex')]) return toks def var_name_to_value(s, loc, toks): for i, t in enumerate(toks): val = variables.get(t[1:]) if val: toks[i] = val return toks def implicit_opcode_to_explicit(s, loc, toks): """Add "OP_" prefix to an opcode.""" for i, t in enumerate(toks): toks[i] = '_'.join(['OP', t]) return toks def hex_to_formatted_hex(s, loc, toks): """Add "0x" prefix and ensure even length.""" for i, t in enumerate(toks): new_tok = t # Add '0x' prefix if not t.startswith('0x'): if t.startswith('x'): new_tok = ''.join(['0', t]) else: new_tok = ''.join(['0x', t]) # Even-length string if len(new_tok) % 2 != 0: new_tok = ''.join([new_tok[0:2], '0', new_tok[2:]]) toks[i] = new_tok return toks # ^ parseActions for pyparsing end here. str_literal = QuotedString('"') str_literal.setParseAction(str_literal_to_hex) var_name = Combine(Word('$') + Word(pyparsing.alphas)) var_name.setParseAction(var_name_to_value) # Here we populate the list of contextual tips. # Explicit opcode names op_names = [str(i) for i in OPCODE_NAMES.keys()] op_names_explicit = ' '.join(op_names) def is_small_int(op): """True if op is one of OP_1, OP_2, ...OP_16""" try: i = int(op[3:]) return True except ValueError: return False op_names_implicit = ' '.join( [i[3:] for i in op_names if not is_small_int(i)]) # Hex, implicit (e.g. 'a') and explicit (e.g. '0x0a') explicit_hex = Combine( Word('0x') + Word(pyparsing.hexnums) + pyparsing.WordEnd()) implicit_hex = Combine(pyparsing.WordStart() + OneOrMore(Word(pyparsing.hexnums)) + pyparsing.WordEnd()) explicit_hex.setParseAction(hex_to_formatted_hex) implicit_hex.setParseAction(hex_to_formatted_hex) # Opcodes, implicit (e.g. 'ADD') and explicit (e.g. 'OP_ADD') explicit_op = pyparsing.oneOf(op_names_explicit) implicit_op = Combine(pyparsing.WordStart() + pyparsing.oneOf(op_names_implicit)) implicit_op.setParseAction(implicit_opcode_to_explicit) contexts = pyparsing.Optional( var_name('Variable') | str_literal('String literal') | explicit_op('Opcode') | implicit_op('Opcode') | explicit_hex('Hex') | implicit_hex('Hex')) matches = [(i[0].asDict(), i[1], i[2]) for i in contexts.scanString(text)] context_tips = [] for i in matches: d = i[0] if len(d.items()) == 0: continue match_type, value = d.items()[0] start = i[1] end = i[2] context_tips.append((start, end, value, match_type)) # Now we do the actual transformation. s = text s = var_name.transformString(s) s = str_literal.transformString(s) s = implicit_op.transformString(s) s = implicit_hex.transformString(s) s = explicit_hex.transformString(s) return s, context_tips
rescue_identifier.setResultsName("subject") + rest_of_line.setResultsName("new_cmdr")) GRAB_PATTERN = suppress_first_word + rescue_identifier.setResultsName( "subject") IRC_NICK_PATTERN = (suppress_first_word + rescue_identifier.setResultsName("subject") + irc_name.setResultsName("new_nick")) JUST_RESCUE_PATTERN = suppress_first_word + rescue_identifier.setResultsName( "subject") SUB_CMD_PATTERN = (suppress_first_word + rescue_identifier.setResultsName("subject") + (pyparsing.Word(pyparsing.nums, pyparsing.nums, min=1) + pyparsing.WordEnd()).setParseAction(lambda token: int( token.quote_id[0])).setResultsName("quote_id") + rest_of_line.setResultsName("remainder")) SYS_PATTERN = (suppress_first_word + rescue_identifier.setResultsName("subject") + rest_of_line.setResultsName("remainder")) TITLE_PATTERN = SYS_PATTERN UNASSIGN_PATTERN = (suppress_first_word + rescue_identifier.setResultsName("subject") + pyparsing.OneOrMore(irc_name).setResultsName("rats")) INJECT_PATTERN = ( suppress_first_word + rescue_identifier.setResultsName("subject")
from uuid import UUID import pyparsing from pyparsing import Word, Literal, hexnums irc_name = ( pyparsing.Word( initChars=pyparsing.alphas, bodyChars=pyparsing.alphanums + "[]{}|:-_<>\\/", ) ) + pyparsing.WordEnd() """ Matches a valid IRC nickname. Token MUST start with a letter but MAY contain numerics and some special chars """ api_id = pyparsing.Optional(pyparsing.Suppress("@")) + ( Word(hexnums, exact=8) + Literal("-") + Word(hexnums, exact=4) + Literal("-") + Word(hexnums, exact=4) + Literal("-") + Word(hexnums, exact=4) + Literal("-") + Word(hexnums, exact=12) + pyparsing.WordEnd() ).setParseAction(lambda tokens: UUID("".join(tokens.asList()))) """ matches a well formed UUID4""" case_number = (
def transform_human(text, variables=None): """Transform user input with given context. Args: text (str): User input. variables (dict): Variables for purposes of substitution. Returns: A 2-tuple of: (A human-readable script that Script can parse, A list of contextual information for tooltips, etc.) """ if variables is None: variables = {} # No mutable default value. # these are parseActions for pyparsing. def var_name_to_value(s, loc, toks): for i, t in enumerate(toks): val = variables.get(t[1:]) if val: toks[i] = val return toks def implicit_opcode_to_explicit(s, loc, toks): """Add "OP_" prefix to an opcode.""" for i, t in enumerate(toks): toks[i] = '_'.join(['OP', t]) return toks def hex_to_formatted_hex(s, loc, toks): """Add "0x" prefix and ensure even length.""" for i, t in enumerate(toks): new_tok = format_hex_string(t) toks[i] = new_tok return toks def decimal_to_formatted_hex(s, loc, toks=None): """Convert decimal to hex.""" if toks is None: return for i, t in enumerate(toks): token = hex(int(t)) new_tok = format_hex_string(token) toks[i] = new_tok return toks # ^ parseActions for pyparsing end here. str_literal = QuotedString('"') var_name = Combine(Word('$') + Word(pyparsing.alphas)) var_name.setParseAction(var_name_to_value) # Here we populate the list of contextual tips. # Explicit opcode names op_names = [str(i) for i in opcodes.opcodes_by_name.keys()] op_names_explicit = ' '.join(op_names) def is_small_int(op): """True if op is one of OP_1, OP_2, ...OP_16""" try: i = int(op[3:]) return True except ValueError: return False op_names_implicit = ' '.join( [i[3:] for i in op_names if not is_small_int(i)]) # Hex, implicit (e.g. 'a') and explicit (e.g. '0x0a') explicit_hex = Combine( Word('0x') + Word(pyparsing.hexnums) + pyparsing.WordEnd()) decimal_number = Combine(pyparsing.WordStart() + OneOrMore(Word(pyparsing.nums)) + pyparsing.WordEnd()) explicit_hex.setParseAction(hex_to_formatted_hex) decimal_number.setParseAction(decimal_to_formatted_hex) # Opcodes, implicit (e.g. 'ADD') and explicit (e.g. 'OP_ADD') explicit_op = pyparsing.oneOf(op_names_explicit) implicit_op = Combine(pyparsing.WordStart() + pyparsing.oneOf(op_names_implicit)) implicit_op.setParseAction(implicit_opcode_to_explicit) contexts = pyparsing.Optional( var_name('Variable') | str_literal('String literal') | explicit_op('Opcode') | implicit_op('Opcode') | explicit_hex('Hex') | decimal_number('Decimal')) matches = [(i[0].asDict(), i[1], i[2]) for i in contexts.scanString(text)] context_tips = [] for i in matches: d = i[0] if len(d.items()) == 0: continue match_type, value = d.items()[0] start = i[1] end = i[2] context_tips.append((start, end, value, match_type)) # Now we do the actual transformation. strings = [] try: words = shlex.split(text, posix=False) except Exception: words = text.split() for s in words: # Do not transform strings if they are string literals. is_literal = True if pyparsing.Optional(str_literal).parseString( s) else False if not is_literal: s = var_name.transformString(s) s = implicit_op.transformString(s) s = decimal_number.transformString(s) s = explicit_hex.transformString(s) strings.append(s) return ' '.join(strings), context_tips
import pyparsing as pp from pyparsing import pyparsing_common as ppc WHITE_CHARS = pp.ParserElement.DEFAULT_WHITE_CHARS uword = pp.Suppress(pp.SkipTo(pp.WordStart())) + pp.CharsNotIn(WHITE_CHARS) common_parsers = { int: pp.Combine(pp.Optional('-') + pp.Word(pp.nums)).setParseAction( ppc.convertToInteger) + pp.Suppress(pp.WordEnd()), str: (pp.QuotedString("'") | pp.QuotedString('"') | uword) + pp.Suppress(pp.WordEnd()), bool: pp.Empty().setParseAction(lambda x: True) } def default_parser(default): return pp.Empty().setParseAction(default) rest_of_line = pp.restOfLine.copy() rest_of_string = pp.SkipTo(pp.StringEnd()) rest_parser = rest_of_string("_rest").setName("_rest").addParseAction( lambda x: {"_rest": x[0]}) def update_dict(dict_list): ret = dict() for i in dict_list:
import pyparsing as pp from pydbml.definitions.generic import name from pydbml.definitions.common import _, _c, end, note, note_object from pydbml.definitions.column import table_column from pydbml.definitions.index import indexes from pydbml.classes import Table pp.ParserElement.setDefaultWhitespaceChars(' \t\r') alias = pp.WordStart() + pp.Literal('as').suppress() - pp.WordEnd() - name hex_char = pp.Word(pp.srange('[0-9a-fA-F]'), exact=1) hex_color = ("#" - (hex_char * 3 ^ hex_char * 6)).leaveWhitespace() header_color = (pp.CaselessLiteral('headercolor:').suppress() + _ - pp.Combine(hex_color)('header_color')) table_setting = _ + (note('note') | header_color) + _ table_settings = '[' + table_setting + (',' + table_setting)[...] + ']' def parse_table_settings(s, l, t): ''' [headercolor: #cccccc, note: 'note'] ''' result = {} if 'note' in t: result['note'] = t['note'] if 'header_color' in t: result['header_color'] = t['header_color'] return result
rename = ASTRING + S('as') crename = rename - dotted_name cname = Group(Optional(rename) + NAME)('name') tname = Group(Optional(rename) + dotted_name)('name') type = pp.Forward().setName('type') # pylint: disable=redefined-builtin pdef = Group(Group(NAME)('name') + S(':') - type + Optional(S('=') + K('default'))) plist = pp.ZeroOrMore(S(',') + pdef) decorators = Group(pp.ZeroOrMore(S('@') - NAME + NEWLINE))('decorators') parameters = Group(PARENS(Optional(pdef + plist)))('params') mparameters = PARENS((W('self') | W('cls'))('self') + Group(plist)('params')) postproc = Optional( DOCSTR_BLOCK(S('return') - pp.WordEnd() + NAME - S('(...)'), 'postproc')) types = Group(E('') + type) | PARENS(pp.delimitedList(pdef)) returns = S('->') - (SW('None') | Group(types)('returns')) callable_type = Group(Optional(rename) + Optional(S('lambda')) + parameters + returns) # tname | (NAME+composed_type) does not work :( named_type = Group(tname + Optional(ANGLED(pp.delimitedList(Group(type))))) type <<= callable_type('callable') ^ named_type('named') # Since all XXXdef's only used inside BLOCK that does Group inside, skip Group. G = lambda X: X _func = Tag('func') funcdef = G(_func + decorators + S('def') - cname + parameters + Optional(returns - postproc))