for r in result: fields.append(_list_to_fields(r)) return fields def _result_to_list(result): struct_result = [] for member in result: if isinstance(member, basestring): struct_result.append(member) else: struct_result.append(_result_to_list([member[2]])) return struct_result _enclosed = pp.Forward() _nested_curlies = pp.nestedExpr('{', '}', content=_enclosed) _enclosed << (pp.Word(pp.alphas + '?_' + pp.nums) | '=' | _nested_curlies) def parse_struct(encoding): comps = _enclosed.parseString(encoding).asList()[0] struct_name = comps[0] struct_members = comps[2:] fields = _list_to_fields(_result_to_list(struct_members)) struct_class = _struct_class_from_fields(fields) struct_class.__name__ = (struct_name + '_Structure').replace('?', '_') return struct_class _cached_parse_types_results = {}
u"≤") multiple_operators = (u"and", u"or", u"∧", u"∨") operator = pp.Regex(u"|".join(binary_operator)) null = pp.Regex("None|none|null").setParseAction(pp.replaceWith(None)) boolean = "False|True|false|true" boolean = pp.Regex(boolean).setParseAction(lambda t: t[0].lower() == "true") hex_string = lambda n: pp.Word(pp.hexnums, exact=n) uuid_string = pp.Combine(hex_string(8) + (pp.Optional("-") + hex_string(4)) * 3 + pp.Optional("-") + hex_string(12)) number = r"[+-]?\d+(:?\.\d*)?(:?[eE][+-]?\d+)?" number = pp.Regex(number).setParseAction(lambda t: float(t[0])) identifier = pp.Word(pp.alphas, pp.alphanums + "_") quoted_string = pp.QuotedString('"') | pp.QuotedString("'") comparison_term = pp.Forward() in_list = pp.Group(pp.Suppress('[') + pp.Optional(pp.delimitedList(comparison_term)) + pp.Suppress(']'))("list") comparison_term << (null | boolean | uuid_string | identifier | number | quoted_string | in_list) condition = pp.Group(comparison_term + operator + comparison_term) expr = pp.infixNotation(condition, [ ("not", 1, pp.opAssoc.RIGHT, ), ("and", 2, pp.opAssoc.LEFT, ), ("∧", 2, pp.opAssoc.LEFT, ), ("or", 2, pp.opAssoc.LEFT, ), ("∨", 2, pp.opAssoc.LEFT, ), ])
def banana_grammar(emitter=emit.PrintEmitter()): """ Generate a banana parser that can be then used to parse a banana content. It build an AST on which operation can then be applied. :return: Return a banana parser :rtype: BananaScopeParser """ # Should debug debug_grammar = False # Actions def action_str_lit(s, l, t): return ast.StringLit(ast.make_span(s, l, t), t[0]) def action_num_lit(s, l, t): return ast.Number(ast.make_span(s, l, t), t[0]) def action_ident(s, l, t): return ast.Ident(ast.make_span(s, l, t), t[0]) def action_expr(s, l, t): if len(t) != 1: raise exception.BananaGrammarBug( 'Bug found in the grammar for expression,' ' Please report this bug.') if isinstance(t[0], ast.Expr): return t[0] return ast.Expr(ast.make_span(s, l, t), t[0]) def action_dot_path(s, l, t): # First token is the name of the variable # The rest is the property path if isinstance(t[0], ast.StringLit) and len(t[1:]) == 0: return t[0] return ast.DotPath(ast.make_span(s, l, t), t[0], t[1:]) def action_json_obj(s, l, t): return ast.JsonObj(ast.make_span(s, l, t), t) def action_parse_ctor_arg(s, l, t): if len(t) > 1: return ast.ComponentCtorArg(ast.make_span(s, l, t), t[1], t[0]) else: return ast.ComponentCtorArg(ast.make_span(s, l, t), t[0]) def action_parse_comp_ctor(s, l, tokens): comp = ast.Component(ast.make_span(s, l, tokens)) for tok in tokens: if isinstance(tok, ast.Ident): comp.set_ctor(tok) elif isinstance(tok, ast.ComponentCtorArg): comp.add_arg(tok) else: raise exception.BananaGrammarBug( 'Bug found in the grammar, Please report this bug') return comp def action_assignment(s, l, t): return ast.Assignment(ast.make_span(s, l, t), t[0], t[1]) def action_create_connections(s, l, t): ast_conn = ast.into_connection(t[0]) ast_conn.span = ast.make_span(s, l, t) for i in range(1, len(t)): next_conn = ast.into_connection(t[i]) ast_conn.connect_to(next_conn, emitter) return ast_conn def action_merge_connections(s, l, t): ast_conn = ast.Connection(ast.make_span(s, l, t)) ast_conn.merge_all(t, emitter) return ast_conn def action_root_ast(s, l, tokens): root = ast.BananaFile(emitter) for tok in tokens: if isinstance(tok, ast.Assignment): if isinstance(tok.rhs, ast.Component): root.add_component_ctor(tok.lhs, tok.rhs) else: root.add_assignment(tok.lhs, tok.rhs) elif isinstance(tok, ast.Connection): root.add_connections(tok) else: raise exception.BananaGrammarBug( 'Bug found in the grammar, Please report this bug.') return root # TODO(Joan): Remove once it is no longer needed def print_stmt(s, l, t): print("\nPRINT AST") print(l, map(lambda x: str(x), t)) print("END PRINT AST\n") def action_unimplemented(s, l, t): raise exception.BananaGrammarBug("unimplemented code reached") # Tokens equals = p.Literal("=").suppress().setName('"="').setDebug(debug_grammar) arrow = p.Literal("->").suppress().setName('"->"').setDebug(debug_grammar) lbra = p.Literal("[").suppress().setName('"["').setDebug(debug_grammar) rbra = p.Literal("]").suppress().setName('"]"').setDebug(debug_grammar) colon = p.Literal(":").suppress().setName('":"') comma = p.Literal(",").suppress().setName(",") less = p.Literal("<").suppress().setName('"<"') greater = p.Literal(">").suppress().setName('">"') lbrace = p.Literal("{").suppress().setName('"{"').setDebug(debug_grammar) rbrace = p.Literal("}").suppress().setName('"}"').setDebug(debug_grammar) lpar = p.Literal("(").suppress().setName('"("') rpar = p.Literal(")").suppress().setName('")"') # Keywords ing = p.Literal("ing").suppress() imp = p.Literal("import").suppress() fro = p.Literal("from").suppress() # String Literal, Numbers, Identifiers string_lit = p.quotedString()\ .setParseAction(action_str_lit)\ .setName(const.STRING_LIT) number_lit = p.Regex(r'\d+(\.\d*)?([eE]\d+)?')\ .setParseAction(action_num_lit)\ .setName(const.NUMBER) ident = p.Word(p.alphas + "_", p.alphanums + "_")\ .setParseAction(action_ident)\ .setName(const.IDENT) # Path for properties dot_prop = ident | string_lit dot_path = p.delimitedList(dot_prop, ".")\ .setParseAction(action_dot_path)\ .setName(const.DOT_PATH)\ .setDebug(debug_grammar) # Expressions # Here to simplify the logic, we can match directly # against ident and string_lit to avoid having to deal # only with dot_path. It also allow to remove the confusion # where '"a"' could be interpreted as a dot_path and would thus # be the same as 'a'. With the following, the first we # always be type-checked as a String whereas the latter will # be as the type of the variable. expr = p.infixNotation(number_lit | dot_path, [ (p.oneOf('* /'), 2, p.opAssoc.LEFT), (p.oneOf('+ -'), 2, p.opAssoc.LEFT), ], lpar=lpar, rpar=rpar) expr.setParseAction(action_expr)\ .setName(const.EXPR)\ .setDebug(debug_grammar) # Json-like object (value are much more) json_obj = p.Forward() json_value = p.Forward() json_array = p.Group(lbra + p.Optional(p.delimitedList(json_value)) + rbra) json_array.setDebug(debug_grammar) json_array.setName(const.JSON_ARRAY) json_value <<= expr | json_obj | json_array json_value.setDebug(debug_grammar)\ .setName(const.JSON_VALUE) json_members = p.delimitedList(p.Group(dot_path + colon - json_value)) +\ p.Optional(comma) json_members.setDebug(debug_grammar)\ .setName(const.JSON_MEMBERS) json_obj <<= p.Dict(lbrace + p.Optional(json_members) - rbrace) json_obj.setParseAction(action_json_obj)\ .setName(const.JSON_OBJ)\ .setDebug(debug_grammar) # Component constructor arg = (ident + equals - (expr | json_obj)) | expr | json_obj arg.setParseAction(action_parse_ctor_arg) params = p.delimitedList(arg) comp_ctor = ident + lpar - p.Optional(params) + rpar comp_ctor.setParseAction(action_parse_comp_ctor)\ .setName(const.COMP_CTOR)\ .setDebug(debug_grammar) # Assignments assignment = dot_path + equals - (comp_ctor | expr | json_obj) assignment.setParseAction(action_assignment) # Connections connection = p.Forward() array_of_connection = p.Group(lbra + p.Optional(p.delimitedList(connection)) + rbra) array_of_connection.setParseAction(action_merge_connections) last_expr = ident | array_of_connection this_expr = p.Forward() match_expr = p.FollowedBy(last_expr + arrow - last_expr) + \ (last_expr + p.OneOrMore(arrow - last_expr)) this_expr <<= match_expr | last_expr connection <<= this_expr match_expr.setDebug(debug_grammar)\ .setName(const.CONNECTION) \ .setParseAction(action_create_connections) # Definitions definition = ing - less - string_lit - greater - ident - lbrace - rbrace definition.setDebug(debug_grammar)\ .setName(const.DEFINITION)\ .setParseAction(action_unimplemented) # Import directive module_def = (imp - ident) | fro - ident - imp - ident module_def.setDebug(debug_grammar)\ .setName(const.MOD_IMPORT)\ .setParseAction(action_unimplemented) # Comments comments = "#" + p.restOfLine statement = assignment | \ match_expr | \ definition | \ module_def statement.setName(const.STATEMENT) statement.setDebug(debug_grammar) statement.setParseAction(print_stmt) # Grammar grammar = p.OneOrMore(statement).ignore(comments) grammar.setParseAction(action_root_ast) return BananaScopeParser(grammar)
"""utilitary functions to manipulate boolean expressions.""" from sys import stderr import pyparsing as pp logExp = pp.Forward() boolCst = pp.oneOf("True False") boolNot = pp.oneOf("! NOT") boolAnd = pp.oneOf("&& & AND") boolOr = pp.oneOf("|| | OR") boolXor = pp.oneOf("^ XOR") varName = (~boolAnd + ~boolOr + ~boolXor + ~boolNot + ~boolCst + ~pp.Literal('Node') + pp.Word(pp.alphas, pp.alphanums + '_')) varName.setParseAction(lambda token: token[0]) lparen = '(' rparen = ')' logTerm = (pp.Optional(boolNot) + (boolCst | varName | (lparen + logExp + rparen))) logAnd = logTerm + pp.ZeroOrMore(boolAnd + logTerm) logOr = logAnd + pp.ZeroOrMore(boolOr + logAnd) logExp << pp.Combine( logOr + pp.ZeroOrMore(boolXor + logOr), adjacent=False, joinString=' ') def _check_logic_syntax(string): """Return True iff string is a syntaxically correct boolean expression.""" return logExp.matches(string) def _check_logic_defined(name_list, logic_list): """Check if the list of logic is consistant.
class sparc_syntax: divide = False noprefix = False comment = pp.Regex(r"\#.*") symbol = pp.Regex(r"[A-Za-z_.$][A-Za-z0-9_.$]*").setParseAction( lambda r: env.ext(r[0], size=32)) mnemo = pp.LineStart() + symbol + pp.Optional(pp.Literal(",a")) mnemo.setParseAction(lambda r: r[0].ref.lower() + "".join(r[1:])) integer = pp.Regex(r"[1-9][0-9]*").setParseAction(lambda r: int(r[0], 10)) hexa = pp.Regex(r"0[xX][0-9a-fA-F]+").setParseAction( lambda r: int(r[0], 16)) octa = pp.Regex(r"0[0-7]*").setParseAction(lambda r: int(r[0], 8)) bina = pp.Regex(r"0[bB][01]+").setParseAction(lambda r: int(r[0], 2)) char = pp.Regex(r"('.)|('\\\\)").setParseAction(lambda r: ord(r[0])) number = integer | hexa | octa | bina | char number.setParseAction(lambda r: env.cst(r[0], 32)) term = symbol | number exp = pp.Forward() op_one = pp.oneOf("- ~") op_sig = pp.oneOf("+ -") op_mul = pp.oneOf("* /") op_cmp = pp.oneOf("== != <= >= < > <>") op_bit = pp.oneOf("^ && || & |") operators = [ (op_one, 1, pp.opAssoc.RIGHT), (op_sig, 2, pp.opAssoc.LEFT), (op_mul, 2, pp.opAssoc.LEFT), (op_cmp, 2, pp.opAssoc.LEFT), (op_bit, 2, pp.opAssoc.LEFT), ] reg = pp.Suppress("%") + pp.NotAny(pp.oneOf("hi lo")) + symbol hilo = pp.oneOf("%hi %lo") + pp.Suppress("(") + exp + pp.Suppress(")") exp << pp.operatorPrecedence(term | reg | hilo, operators) adr = pp.Suppress("[") + exp + pp.Suppress("]") mem = adr # +pp.Optional(symbol|imm) mem.setParseAction(lambda r: env.mem(r[0])) opd = exp | mem | reg opds = pp.Group(pp.delimitedList(opd)) instr = mnemo + pp.Optional(opds) + pp.Optional(comment) def action_reg(toks): rname = toks[0] if rname.ref.startswith("asr"): return env.reg(rname.ref) return env.__dict__[rname.ref] def action_hilo(toks): v = toks[1] return env.hi(v) if toks[0] == "%hi" else env.lo(v).zeroextend(32) def action_exp(toks): tok = toks[0] if isinstance(tok, env.exp): return tok if len(tok) == 2: op = tok[0] r = tok[1] if isinstance(r, list): r = action_exp(r) return env.oper(op, r) elif len(tok) == 3: op = tok[1] l = tok[0] r = tok[2] if isinstance(l, list): l = action_exp(l) if isinstance(r, list): r = action_exp(r) return env.oper(op, l, r) else: return tok def action_instr(toks): i = instruction(b"") i.mnemonic = toks[0] if len(toks) > 1: i.operands = toks[1][0:] return asmhelper(i) # actions: reg.setParseAction(action_reg) hilo.setParseAction(action_hilo) exp.setParseAction(action_exp) instr.setParseAction(action_instr)
'Mn','Mt','Md','Hg','Mo','Nd','Ne','Np','Ni','Nb','N', 'No','Os','O','Pd','P','Pt','Pu','Po','K','Pr','Pm', 'Pa','Ra','Rn','Re','Rh','Rg','Rb','Ru','Rf','Sm', 'Sc','Sg','Se','Si','Ag','Na','Sr','S','Ta','Tc', 'Te','Tb','Tl','Th','Tm','Sn','Ti','W','Uub','Uuh', 'Uuo','Uup','Uuq','Uus','Uut','Uuu','U','V','Xe','Yb', 'Y','Zn','Zr',]) hcount = pp.Regex('H[0-9]+') ringclosure = pp.Optional( pp.Literal('%') + pp.oneOf(['1 2 3 4 5 6 7 8 9'])) + pp.oneOf(['0 1 2 3 4 5 6 7 8 9']) charge = (pp.Literal('-') + pp.Optional( pp.oneOf(['-02-9']) ^ pp.Literal('1') + pp.Optional(pp.oneOf(['0-5'])) )) ^ pp.Literal('+') + pp.Optional( pp.oneOf(['+02-9']) ^ pp.Literal('1') + pp.Optional(pp.oneOf('[0-5]')) ) chiralclass = pp.Optional(pp.Literal('@') + pp.Optional( pp.Literal('@')) ^ ( pp.Literal('TH') ^ pp.Literal('AL') ) + pp.oneOf('[1-2]') ^ pp.Literal('SP') + pp.oneOf('[1-3]') ^ pp.Literal('TB') + ( pp.Literal('1') + pp.Optional(pp.oneOf('[0-9]')) ^ pp.Literal('2') + pp.Optional(pp.Literal('0')) ^ pp.oneOf('[3-9]') ) ^ pp.Literal('OH') + ( ( pp.Literal('1') ^ pp.Literal('2') ) + pp.Optional(pp.oneOf('[0-9]')) ^ pp.Literal('3') + pp.Optional(pp.Literal('0')) ^ pp.oneOf('[4-9]')) ) atomspec = pp.Literal('[') + pp.Optional(isotope) + ( pp.Literal('se') ^ pp.Literal('as') ^ aromaticsymbol ^ elementsymbol ^ pp.Literal('*') ) + pp.Optional(chiralclass)+ pp.Optional(hcount)+pp.Optional(charge)+ pp.Optional(atomclass) + pp.Literal(']') atom = organicsymbol ^ aromaticsymbol ^ pp.Literal('*') ^ atomspec chain = pp.OneOrMore(pp.Optional(bond) + ( atom ^ ringclosure )) ## This looks f****d up smiles = pp.Forward() branch = pp.Forward() smiles << atom + pp.ZeroOrMore(chain ^ branch) branch << (pp.Literal('(') + (bond ^ pp.OneOrMore(smiles)) + pp.Literal(')')) def IsValidSMILES(text): """ A simple SMILES validator """ is_valid = False results = smiles.parseString(text) if results: is_valid = True return(is_valid)
rule_head.setName('Head field') rule_results = \ pp.Group(pp.Literal('results_name:') + _rule_config_string.setResultsName('value')) rule_results.setName('Results name field') rule_results.setParseAction(lambda results: results[0]) rule_results.addParseAction(lambda results: results.value) rule_terminal = \ _rule_identifier.setResultsName('rule_type') + \ _rule_config_string.setResultsName('rule_name') + \ pp.Optional(rule_options).setResultsName('rule_options') _rule_rules_tree_terminal = pp.OneOrMore(pp.Group(rule_terminal)) _rule_rules_tree_recursive = pp.Forward() _rule_rules_tree_recursive << \ _rule_config_string.setResultsName('list_type') + \ pp.Literal('[').suppress() + \ pp.ZeroOrMore( pp.Group(_rule_rules_tree_recursive) | pp.Group(rule_terminal)).setResultsName('rules') + \ pp.Literal(']').suppress() _rule_rules_tree_recursive = \ pp.ZeroOrMore( pp.Group(_rule_rules_tree_recursive)) + \ (pp.Group(_rule_rules_tree_recursive) | pp.ZeroOrMore(pp.Group(rule_terminal))) + \ pp.ZeroOrMore(pp.Group(_rule_rules_tree_recursive)) rule_rules_tree = _rule_rules_tree_recursive
def __init__(self): """ Constructs a parser for all named queries using PyParsing. """ extractor = pp.Keyword("random") ^ pp.Keyword("first") ^ pp.Keyword( "last") # Valid selectors - except "avg", because not all attributes can be combined with it selector_no_avg = pp.Keyword("most_used") ^ pp.Keyword( "least_used") ^ pp.Keyword("all") # All attributes that cannot be combined with "avg" attributes_no_avg = pp.Keyword("ipaddress") ^ pp.Keyword("macaddress") ^ pp.Keyword("portnumber") ^\ pp.Keyword("protocolname") ^ pp.Keyword("winsize") ^ pp.Keyword("ipclass") # All attributes that can be combined with "avg" attributes_avg = pp.Keyword("ttlvalue") ^ pp.Keyword("mssvalue") ^\ pp.Keyword("pktssent") ^ pp.Keyword("pktsreceived") ^ pp.Keyword("mss") ^\ pp.Keyword("kbytesreceived") ^ pp.Keyword("kbytessent") # Collection of all attributes for simpler specification attributes_all = attributes_no_avg ^ attributes_avg # Simple selector + attribute query, only allowing "avg" with compatible attributes simple_selector_query = (selector_no_avg + pp.Suppress("(") + attributes_all + pp.Suppress(")")) ^\ (pp.Keyword("avg") + pp.Suppress("(") + attributes_avg + pp.Suppress(")")) # Selectors for parameterized queries - they are replaced in the result to avoid ambiguity param_selectors = pp.Keyword("ipaddress").setParseAction(pp.replaceWith("ipaddress_param")) ^\ pp.Keyword("macaddress").setParseAction(pp.replaceWith("macaddress_param")) # All operators allowed in parameterized queries operators = pp.Literal("<=") ^ pp.Literal("<") ^ pp.Literal("=") ^\ pp.Literal(">=") ^ pp.Literal(">") ^ pp.CaselessLiteral("in") # Placeholder for nesting in parameterized queries expr = pp.Forward() # Simple values for comparisons inside a parameterized query can be alphanumeric plus dot and colon simple_value = pp.Word(pp.alphanums + ".:") # Values in parameterized queries can either be simple values, or a list of them. # If it's a list, we insert a "list"-token to be able to distinguish it parameterized_value = simple_value ^\ (pp.Suppress("[") + pp.Group(pp.Empty().addParseAction(pp.replaceWith('list')) + pp.delimitedList(simple_value)) + pp.Suppress("]")) # One "attribute-operator-value" triplet for parameterized queries comparison = pp.Group(attributes_all + operators + (parameterized_value ^ expr)) # A full parameterized query, consisting of a parameterized selector and a comma-separated list of comparisons parameterized_query = param_selectors + pp.Suppress("(") + pp.Group( pp.delimitedList(comparison)) + pp.Suppress(")") # Combination of simple and parameterized queries all_selector_queries = (simple_selector_query ^ parameterized_query) # All queries can be combined with an extractor extractor_selector_query = extractor + pp.Suppress( "(") + all_selector_queries + pp.Suppress(")") # Queries can be used with an extractor or without named_query = (extractor_selector_query ^ all_selector_queries) # The placeholder can be replaced with any query expr << pp.Group(named_query) # Make sure all queries end with a semicolon, and we're done self.full_query = named_query + pp.Suppress(";")
def __init__(self): # Bibtex keywords string_def_start = pp.CaselessKeyword("@string") preamble_start = pp.CaselessKeyword("@preamble") comment_line_start = pp.CaselessKeyword('@comment') # String names string_name = pp.Word(pp.alphanums + '_-:')('StringName') self.set_string_name_parse_action(lambda s, l, t: None) string_name.addParseAction(self._string_name_parse_action) # Values inside bibtex fields # Values can be integer or string expressions. The latter may use # quoted or braced values. # Integer values integer = pp.Word(pp.nums)('Integer') # Braced values: braced values can contain nested (but balanced) braces braced_value_content = pp.CharsNotIn('{}') braced_value = pp.Forward() # Recursive definition for nested braces braced_value <<= pp.originalTextFor( '{' + pp.ZeroOrMore(braced_value | braced_value_content) + '}' )('BracedValue') braced_value.setParseAction(remove_braces) # TODO add ignore for "\}" and "\{" ? # TODO @ are not parsed by bibtex in braces # Quoted values: may contain braced content with balanced braces brace_in_quoted = pp.nestedExpr('{', '}', ignoreExpr=None) text_in_quoted = pp.CharsNotIn('"{}') # (quotes should be escaped by braces in quoted value) quoted_value = pp.originalTextFor( '"' + pp.ZeroOrMore(text_in_quoted | brace_in_quoted) + '"' )('QuotedValue') quoted_value.addParseAction(pp.removeQuotes) # String expressions string_expr = pp.delimitedList( (quoted_value | braced_value | string_name), delim='#' )('StringExpression') self.set_string_expression_parse_action(lambda s, l, t: None) string_expr.addParseAction(self._string_expr_parse_action) value = (integer | string_expr)('Value') # Entries # @EntryType { ... entry_type = (pp.Suppress('@') + pp.Word(pp.alphas))('EntryType') entry_type.setParseAction(first_token) # Entry key: any character up to a ',' without leading and trailing # spaces. key = pp.SkipTo(',')('Key') # Exclude @',\#}{~% key.setParseAction(lambda s, l, t: first_token(s, l, t).strip()) # Field name: word of letters, digits, dashes and underscores field_name = pp.Word(pp.alphanums + '_-().')('FieldName') field_name.setParseAction(first_token) # Field: field_name = value field = pp.Group(field_name + pp.Suppress('=') + value)('Field') field.setParseAction(field_to_pair) # List of fields: comma separeted fields field_list = (pp.delimitedList(field) + pp.Suppress(pp.Optional(',')) )('Fields') field_list.setParseAction( lambda s, l, t: {k: v for (k, v) in reversed(t.get('Fields'))}) # Entry: type, key, and fields self.entry = (entry_type + in_braces_or_pars(key + pp.Suppress(',') + field_list) )('Entry') # Other stuff: comments, string definitions, and preamble declarations # Explicit comments: @comment + everything up to next valid declaration # starting on new line. not_an_implicit_comment = (pp.LineEnd() + pp.Literal('@') ) | pp.stringEnd() self.explicit_comment = ( pp.Suppress(comment_line_start) + pp.originalTextFor(pp.SkipTo(not_an_implicit_comment), asString=True))('ExplicitComment') self.explicit_comment.addParseAction(remove_trailing_newlines) self.explicit_comment.addParseAction(remove_braces) # Previous implementation included comment until next '}'. # This is however not inline with bibtex behavior that is to only # ignore until EOL. Brace stipping is arbitrary here but avoids # duplication on bibtex write. # Empty implicit_comments lead to infinite loop of zeroOrMore def mustNotBeEmpty(t): if not t[0]: raise pp.ParseException("Match must not be empty.") # Implicit comments: not anything else self.implicit_comment = pp.originalTextFor( pp.SkipTo(not_an_implicit_comment).setParseAction(mustNotBeEmpty), asString=True)('ImplicitComment') self.implicit_comment.addParseAction(remove_trailing_newlines) # String definition self.string_def = (pp.Suppress(string_def_start) + in_braces_or_pars( string_name + pp.Suppress('=') + string_expr('StringValue') ))('StringDefinition') # Preamble declaration self.preamble_decl = (pp.Suppress(preamble_start) + in_braces_or_pars(value))('PreambleDeclaration') # Main bibtex expression self.main_expression = pp.ZeroOrMore( self.string_def | self.preamble_decl | self.explicit_comment | self.entry | self.implicit_comment)
# # implementation of Lucene grammar, as decribed # at http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/docs/queryparsersyntax.html # import pyparsing as pp from pyparsing import pyparsing_common as ppc pp.ParserElement.enablePackrat() COLON, LBRACK, RBRACK, LBRACE, RBRACE, TILDE, CARAT = map( pp.Literal, ":[]{}~^") LPAR, RPAR = map(pp.Suppress, "()") and_, or_, not_, to_ = map(pp.CaselessKeyword, "AND OR NOT TO".split()) keyword = and_ | or_ | not_ | to_ expression = pp.Forward() valid_word = pp.Regex( r'([a-zA-Z0-9*_+.-]|\\\\|\\([+\-!(){}\[\]^"~*?:]|\|\||&&))+').setName( "word") valid_word.setParseAction(lambda t: t[0].replace('\\\\', chr(127)).replace( '\\', '').replace(chr(127), '\\')) string = pp.QuotedString('"') required_modifier = pp.Literal("+")("required") prohibit_modifier = pp.Literal("-")("prohibit") integer = ppc.integer() proximity_modifier = pp.Group(TILDE + integer("proximity")) number = ppc.fnumber() fuzzy_modifier = TILDE + pp.Optional(number, default=0.5)("fuzzy")
#terminals: p_bottop = pp.oneOf('⊥ T') p_symbol = pp.Word(pp.alphas) p_extern = pp.Suppress('@') + p_symbol p_cst = pp.Suppress('0x') + pp.Combine( pp.Optional('-') + pp.Regex('[0-9a-f]+')) p_int = pp.Word(pp.nums).setParseAction(lambda r: int(r[0])) p_slc = '[' + p_int.setResultsName('start') + ':' + p_int.setResultsName( 'stop') + ']' p_op1 = pp.oneOf('~ -') p_op2 = pp.oneOf('+ - / // * & | ^ << >> < > == <= >= != ? :') p_term = p_bottop | p_symbol | p_extern | p_cst #nested expressions: p_expr = pp.Forward() p_csl = pp.Suppress('|') + p_slc + pp.Suppress('->') p_comp = pp.Group( pp.Suppress('{') + pp.ZeroOrMore(p_expr) + pp.Suppress('| }')) p_mem = 'M' + p_int + pp.Optional(p_symbol) operators = [ (p_op1, 1, pp.opAssoc.RIGHT), (p_mem, 1, pp.opAssoc.RIGHT), (p_slc, 1, pp.opAssoc.LEFT), (p_op2, 2, pp.opAssoc.LEFT), (p_csl, 1, pp.opAssoc.RIGHT), ] p_expr << pp.operatorPrecedence(p_term | p_comp, operators)
def __init__(self, extra_literals=[]): """ expop :: '^' multop :: '*' | '/' addop :: '+' | '-' integer :: ['+' | '-'] '0'..'9'+ atom :: PI | E | real | fn '(' expr ')' | '(' expr ')' factor :: atom [ expop factor ]* term :: factor [ multop factor ]* expr :: term [ addop term ]* """ point = pp.Literal(".") e = pp.CaselessLiteral("E") fnumber = pp.Combine(pp.Word("+-" + pp.nums, pp.nums) + pp.Optional(point + pp.Optional(pp.Word(pp.nums))) + pp.Optional(e + pp.Word("+-" + pp.nums, pp.nums))) ident = pp.Word(pp.alphas, pp.alphas + pp.nums + "_$") plus = pp.Literal("+") minus = pp.Literal("-") mult = pp.Literal("*") div = pp.Literal("/") lpar = pp.Literal("(").suppress() rpar = pp.Literal(")").suppress() addop = plus | minus multop = mult | div expop = pp.Literal("^") pi = pp.CaselessLiteral("PI") self.extra_literals = extra_literals pp_extra_literals = functools.reduce(operator.or_, [pp.CaselessLiteral(e) for e in extra_literals], pp.NoMatch()) expr = pp.Forward() atom = ((pp.Optional(pp.oneOf("- +")) + (pi | e | pp_extra_literals | fnumber | ident + lpar + expr + rpar).setParseAction(self.pushFirst)) | pp.Optional(pp.oneOf("- +")) + pp.Group(lpar + expr + rpar) ).setParseAction(self.pushUMinus) # by defining exponentiation as "atom [ ^ factor ]..." instead of # "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-right # that is, 2^3^2 = 2^(3^2), not (2^3)^2. factor = pp.Forward() factor << atom + pp.ZeroOrMore((expop + factor).setParseAction(self.pushFirst)) term = factor + pp.ZeroOrMore((multop + factor).setParseAction(self.pushFirst)) expr << term + pp.ZeroOrMore((addop + term).setParseAction(self.pushFirst)) # addop_term = ( addop + term ).setParseAction( self.pushFirst ) # general_term = term + ZeroOrMore( addop_term ) | OneOrMore( addop_term) # expr << general_term self.bnf = expr # map operator symbols to corresponding arithmetic operations epsilon = 1e-12 self.opn = {"+": operator.add, "-": operator.sub, "*": operator.mul, "/": operator.truediv, "^": operator.pow} self.fn = {"sin": math.sin, "cos": math.cos, "tan": math.tan, "abs": abs, "trunc": lambda a: int(a), "round": round, "sgn": lambda a: abs(a) > epsilon and pp.cmp(a, 0) or 0}
def __mk_grammar(self): """ Метод, в котором создаётся описание грамматики, вызывается в конструкторе класса Parser. """ # Описание LiteralNode и IdentNode num = ppc.integer() | ppc.real() str_ = pp.QuotedString('"', escChar='\\', unquoteResults=True, convertWhitespaceEscapes=False) literal = (num | str_).setName('Literal') ident = ppc.identifier.setName('Ident') # Описание ключевых слов VAR_KW, FUNC_KW, RETURN_KW = pp.Keyword('var'), pp.Keyword('function'), pp.Keyword('return') IF_KW, ELSE_KW = pp.Keyword('if'), pp.Keyword('else') FOR_KW, DO_KW, WHILE_KW = pp.Keyword('for'), pp.Keyword('do'), pp.Keyword('while') # Описание различных скобок, запятой и точки с запятой. L_PAR, R_PAR = pp.Literal('(').suppress(), pp.Literal(')').suppress() L_BRACKET, R_BRACKET = pp.Literal('{').suppress(), pp.Literal('}').suppress() SEMICOLON, COMMA = pp.Literal(';').suppress(), pp.Literal(',').suppress() # Описание операторов ASSIGN = pp.Literal('=') ADD, SUB, MUL, DIV, MOD, EXP = pp.Literal('+'), pp.Literal('-'), pp.Literal('*'), pp.Literal('/'), \ pp.Literal('%'), pp.Literal('**') LOG_AND, LOG_OR, LOG_NOT = pp.Literal('&&'), pp.Literal('||'), pp.Literal('!') GT, LT, GE, LE = pp.Literal('>'), pp.Literal('<'), pp.Literal('>='), pp.Literal('<=') NEQ, EQ = pp.Literal('!='), pp.Literal('==') INCR, DECR = pp.Literal('++'), pp.Literal('--') # Объявляем переменные, описывающие операции умножения, сложения и Выражение. Они определяются дальше в коде. mul_op = pp.Forward() add_op = pp.Forward() expr = pp.Forward() # Описание вызова функции call = (ident + L_PAR + pp.Optional(expr + pp.ZeroOrMore(COMMA + expr)) + R_PAR).setName('Call') # Описание унарных операций: инкремент, декремент. incr_op = (ident + INCR).setName('UnaryExpr') decr_op = (ident + DECR).setName('UnaryExpr') group = (literal | call | ident | L_PAR + expr + R_PAR) # Описание бинарных выражений. mul_op << pp.Group(group + pp.ZeroOrMore((EXP | MUL | DIV | MOD) + group)).setName('BinExpr') add_op << pp.Group(mul_op + pp.ZeroOrMore((ADD | SUB) + mul_op)).setName('BinExpr') compare = pp.Group(add_op + pp.ZeroOrMore((GE | LE | GT | LT) + add_op)).setName('BinExpr') compare_eq = pp.Group(compare + pp.ZeroOrMore((EQ | NEQ) + compare)).setName('BinExpr') log_and_op = pp.Group(compare_eq + pp.ZeroOrMore(LOG_AND + compare_eq)).setName('BinExpr') log_or_op = pp.Group(log_and_op + pp.ZeroOrMore(LOG_OR + log_and_op)).setName('BinExpr') expr << log_or_op # Описание присвоения и объявления переменных. assign = (ident + ASSIGN + expr).setName('BinExpr') simple_assign = (ident + ASSIGN.suppress() + expr) var_item = simple_assign | ident simple_var = (VAR_KW.suppress() + var_item).setName('Declarator') mult_var_item = (COMMA + var_item).setName('Declarator') mult_var = (simple_var + pp.ZeroOrMore(mult_var_item)).setName('VarDeclaration') stmt = pp.Forward() simple_stmt = assign | call | incr_op | decr_op # Описание цикла for. for_statement_list = pp.Optional(simple_stmt + pp.ZeroOrMore(COMMA + simple_stmt)).setName('BlockStatement') for_statement = mult_var | for_statement_list for_test = expr | pp.Group(pp.empty) for_block = stmt | pp.Group(SEMICOLON).setName('BlockStatement') # Описание циклов for, while, do while, условного оператора if. if_ = (IF_KW.suppress() + L_PAR + expr + R_PAR + stmt + pp.Optional(ELSE_KW.suppress() + stmt)).setName('If') for_ = (FOR_KW.suppress() + L_PAR + for_statement + SEMICOLON + for_test + SEMICOLON + for_statement + R_PAR + for_block).setName('For') while_ = (WHILE_KW.suppress() + L_PAR + expr + R_PAR + stmt).setName('While') do_while = (DO_KW.suppress() + stmt + WHILE_KW.suppress() + L_PAR + expr + R_PAR).setName('DoWhile') # Описание блока кода в { } и без них, аргументов функции, объявления функции и оператора return. block = pp.ZeroOrMore(stmt + pp.ZeroOrMore(SEMICOLON)).setName('BlockStatement') br_block = L_BRACKET + block + R_BRACKET args = ((expr + pp.ZeroOrMore(COMMA + expr)) | pp.Group(pp.empty)).setName("Args") func_decl = (FUNC_KW.suppress() + ident + L_PAR + args + R_PAR + br_block)\ .setName('FuncDeclaration') return_ = (RETURN_KW.suppress() + expr).setName('Return') stmt << ( if_ | for_ | while_ | do_while | br_block | mult_var + SEMICOLON | simple_stmt + SEMICOLON | func_decl | return_ ) # locals().copy().items() возвращает словарь всех переменных в текущей области видимости # все элементы этого словаря перебираются в цикле for for var_name, value in locals().copy().items(): # проверка на то, что текущий элемент является экземлпяром класса ParserElement if isinstance(value, pp.ParserElement): # вызов метода __set_parse_action self.__set_parse_action(var_name, value) return block.ignore(pp.cStyleComment).ignore(pp.dblSlashComment) + pp.stringEnd
def __init__(self): """ Creates a new StdInputParser object """ def push_first(strg, loc, toks): self.exprStack.append(toks[0]) def push_mult(strg, loc, toks): self.exprStack.append('*') def push_slice(strg, loc, toks): self.exprStack.append('SLICE') #def push_count( strg, loc, toks ): # self.exprStack.append( toks[0] ) # self.exprStack.append( 'COUNT' ) #caps = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" lowers = _string.lowercase #caps.lower() digits = _pp.nums #"0123456789" #same as "nums" #point = _pp.Literal( "." ) #e = _pp.CaselessLiteral( "E" ) #real = _pp.Combine( _pp.Word( "+-"+_pp.nums, _pp.nums ) + # _pp.Optional( point + _pp.Optional( _pp.Word( _pp.nums ) ) ) + # _pp.Optional( e + _pp.Word( "+-"+_pp.nums, _pp.nums ) ) ).setParseAction(push_first) real = _pp.Regex( r'[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?') #faster than above nop = _pp.Literal("{}").setParseAction(push_first) expop = _pp.Literal("^") lpar = _pp.Literal("(").suppress() rpar = _pp.Literal(")").suppress() lbrk = _pp.Literal("[").suppress() rbrk = _pp.Literal("]").suppress() integer = _pp.Word(digits).setParseAction(push_first) reflbl = _pp.Word(_pp.alphas + _pp.nums + "_").setParseAction(push_first) #gate = _pp.Word( "G", lowers + digits + "_" ).setParseAction(push_first) gate = _pp.Regex(r'G[a-z0-9_]+').setParseAction( push_first) #faster than above strref = (_pp.Literal("S") + "[" + reflbl + "]").setParseAction(push_first) slcref = (strref + _pp.Optional( ("[" + integer + ":" + integer + "]").setParseAction(push_slice))) #bSimple = False #experimenting with possible parser speedups #if bSimple: # string = _pp.Forward() # gateSeq = _pp.OneOrMore( gate ) # expable = (nop | gateSeq | lpar + gateSeq + rpar) # expdstr = expable + _pp.Optional( (expop + integer).setParseAction(push_first) ) # string << expdstr + _pp.ZeroOrMore( (_pp.Optional("*") + expdstr).setParseAction(push_mult)) #else: string = _pp.Forward() expable = (gate | slcref | lpar + string + rpar | nop) expdstr = expable + _pp.ZeroOrMore( (expop + integer).setParseAction(push_first)) string << expdstr + _pp.ZeroOrMore( (_pp.Optional("*") + expdstr).setParseAction(push_mult)) #count = real.copy().setParseAction(push_count) #dataline = string + _pp.OneOrMore( count ) dictline = reflbl + string self.string_parser = string #self.dataline_parser = dataline #OLD: when data lines had their own parser self.dictline_parser = dictline
# # nested_markup.py # # Example markup parser to recursively transform nested markup directives. # # Copyright 2019, Paul McGuire # import pyparsing as pp wiki_markup = pp.Forward() # a method that will construct and return a parse action that will # do the proper wrapping in opening and closing HTML, and recursively call # wiki_markup.transformString on the markup body text def convert_markup_to_html(opening, closing): def conversionParseAction(s, l, t): return opening + wiki_markup.transformString(t[1][1:-1]) + closing return conversionParseAction # use a nestedExpr with originalTextFor to parse nested braces, but return the # parsed text as a single string containing the outermost nested braces instead # of a nested list of parsed tokens markup_body = pp.originalTextFor(pp.nestedExpr("{", "}")) italicized = ("ital" + markup_body).setParseAction( convert_markup_to_html("<I>", "</I>")) bolded = ("bold" + markup_body).setParseAction( convert_markup_to_html("<B>", "</B>"))
raise ImportError( 'six 1.3 or later is required; you have %s' % ( six.__version__)) try: import pyparsing except ImportError: raise ImportError("matplotlib requires pyparsing") else: if not compare_versions(pyparsing.__version__, '1.5.6'): raise ImportError( "matplotlib requires pyparsing >= 1.5.6") # pyparsing 2.0.0 bug, but it may be patched in distributions try: f = pyparsing.Forward() f <<= pyparsing.Literal('a') bad_pyparsing = f is None except TypeError: bad_pyparsing = True # pyparsing 1.5.6 does not have <<= on the Forward class, but # pyparsing 2.0.0 and later will spew deprecation warnings if # using << instead. Additionally, the <<= in pyparsing 1.5.7 is # broken, since it doesn't return self. In order to support # pyparsing 1.5.6 and above with a common code base, this small # monkey patch is applied. if bad_pyparsing: def _forward_ilshift(self, other): self.__lshift__(other) return self
def createParser(self): """This function returns a parser for the RestrictedXpathQuery grammar. """ # xml standard tokens (see: http://www.w3.org/TR/REC-xml) xmlNameStartChar = pp.alphas + ":" + "_" + \ pp.srange("[\u00C0-\u00D6]") + \ pp.srange("[\u00D8-\u00F6]") + \ pp.srange("[\u00F8-\u02FF]") + \ pp.srange("[\u0370-\u037D]") + \ pp.srange("[\u037F-\u1FFF]") + \ pp.srange("[\u200C-\u200D]") + \ pp.srange("[\u2070-\u218F]") + \ pp.srange("[\u2C00-\u2FEF]") + \ pp.srange("[\u3001-\uD7FF]") + \ pp.srange("[\uF900-\uFDCF]") + \ pp.srange("[\uFDF0-\uFFFD]") + \ pp.srange("[\u10000-\uEFFFF]") xmlNameChar = xmlNameStartChar + "-" + "." + pp.nums + \ unichr(0xB7) + pp.srange("[\u0300-\u036F]") + \ pp.srange("[\u203F-\u2040]") # custom tokens wildcard = pp.Literal(self.WILDCARD) # node wildcard operator sep = pp.Literal(self.SEP) # path separator selfNd = pp.Literal('.').suppress() # current node parentNd = pp.Literal(self.PARENT) # parent of current node lpar = pp.Literal('(').suppress() # left parenthesis literal rpar = pp.Literal(')').suppress() # right parenthesis literal pstart = pp.Literal('[').suppress() # beginning of predicates pend = pp.Literal(']').suppress() # end of predicates ncPrefix = pp.Word(xmlNameStartChar, xmlNameChar) + ':' # namespace prefix # node name, may contain a namespace prefix and may start with '@' for # attribute nodes ndName = pp.Combine(pp.Optional('@') + pp.Optional(ncPrefix) + \ pp.Word(xmlNameStartChar, xmlNameChar)) node = wildcard | parentNd | selfNd | ndName # node literalValue = pp.Literal('"').suppress() + \ pp.CharsNotIn('"') + \ pp.Literal('"').suppress() \ | \ pp.Literal("'").suppress() + \ pp.CharsNotIn("'") + \ pp.Literal("'").suppress() # literal value delimited # by either "" or '' numericValue = pp.Combine(pp.Optional('-') + \ pp.Word(pp.nums) + \ pp.Optional('.' + pp.Word(pp.nums)))# Numbers # keywords orderBy = pp.CaselessKeyword('order by') asc = pp.CaselessKeyword('asc') desc = pp.CaselessKeyword('desc') limit = pp.CaselessKeyword('limit') offset = pp.CaselessKeyword('offset') # operators eqOp = pp.Literal('==').setParseAction(pp.replaceWith("=")) | \ pp.Literal('=') ltOp = pp.Literal('<') gtOp = pp.Literal('>') leOp = pp.Literal('<=') geOp = pp.Literal('>=') ineqOp = pp.Literal('!=') orOp = pp.CaselessKeyword('or') andOp = pp.CaselessKeyword('and') relOp = eqOp | ineqOp | leOp | geOp | ltOp | gtOp logOp = orOp | andOp # functions notFunc = pp.CaselessKeyword('not') # location step package_id = (pp.Word(pp.alphanums + "-_") | wildcard).\ setResultsName('package_id').\ setParseAction(self.evalPackage_id).suppress() resourcetype_id = (pp.Word(pp.alphanums + "-_") | wildcard).\ setResultsName('resourcetype_id').\ setParseAction(self.evalResourcetype_id).suppress() locationStep = (sep.suppress() + (ndName | wildcard)).\ setResultsName('locationStep', True) location = (sep.suppress() + package_id + \ sep.suppress() + resourcetype_id + \ pp.ZeroOrMore(locationStep)).\ setParseAction(self.evalLocationSteps) # predicate expression pexpr = pp.Forward().setParseAction(self.remove_list) pathExpr = (pp.Optional(sep) + node + \ pp.ZeroOrMore(sep.suppress() + node)).\ setParseAction(self.evalPath) valueExpr = literalValue | numericValue relExpr = pathExpr + pp.Optional(relOp + (valueExpr | pathExpr)) parExpr = pp.Group(lpar + pexpr + rpar) notExpr = pp.Group(notFunc + parExpr) pexpr << (notExpr | pp.Group(relExpr) | parExpr) + \ pp.Optional(logOp + (pp.Group(pexpr) | parExpr)) # order by clause obItem = (pathExpr + pp.Optional(asc | desc, 'asc')).\ setResultsName('order_by', listAllMatches=True) orderByExpr = orderBy + pp.delimitedList(obItem, ',') # limit and offset limitExpr = limit + pp.Word(pp.nums).setResultsName('limit') + \ pp.Optional(',' + \ pp.Word(pp.nums).setResultsName('offset')) offsetExpr = offset + pp.Word(pp.nums).setResultsName('offset') # query predicates = (pstart + pexpr + pend).setResultsName('predicates') query = pp.StringStart() + \ location + \ pp.Optional(predicates) + \ pp.Optional(orderByExpr) + \ pp.Optional(limitExpr) + \ pp.Optional(offsetExpr) + \ pp.StringEnd() return query.parseString
def make_grammar(): name_expr = pp.Word(pp.alphas + "_").setName("Name") table_expr = pp.Word(pp.alphas + "_").setName("Table") column_expr = (table_expr - pp.Suppress(".") - name_expr).setName("Column").setParseAction(ColumnNode) number_expr = pp.Word( pp.nums).setName("number expression").setParseAction(IntNode) string_expr = pp.QuotedString( quoteChar='"', endQuoteChar='"', escChar='\\', unquoteResults=True).setParseAction(StringNode) iso_date_time_expr = pp.pyparsing_common.iso8601_datetime.copy( ).setParseAction(DateTimeNode) list_expr = pp.Forward() value = (string_expr | iso_date_time_expr | number_expr | list_expr) list_expr <<= pp.nestedExpr(opener="[", closer="]", content=pp.delimitedList(value)) list_expr.setName("list expression, starting with [ ending with ]") list_expr.setParseAction(ListNode) infix_op = pp.oneOf("< > = ~").setName('infix_op').setParseAction( InfixOpNode) regexp_expr = pp.QuotedString( quoteChar='/', endQuoteChar='/', escChar='\\', unquoteResults=True).setParseAction(RegexpNode) binary_expression = (column_expr - infix_op - (value | regexp_expr)).setName( "binary expression").setParseAction(BinaryNode) logical_expression = pp.Forward() l_paren = pp.Suppress(pp.Literal("(")) r_paren = pp.Suppress(pp.Literal(")")) trailing_comma = pp.Optional(pp.Suppress(pp.Literal(","))) base_expr = pp.Forward() def _logical_expr(name, expr_val, parse_action, single_argument=False): keyword = pp.Suppress(pp.CaselessKeyword(name)) entries = expr_val.copy() if single_argument else pp.delimitedList( expr_val.copy()) return keyword - l_paren - entries.setParseAction( parse_action) - trailing_comma - r_paren and_expression = _logical_expr("and", base_expr, AndNode) or_expression = _logical_expr("or", base_expr, OrNode) not_expression = _logical_expr("not", base_expr, NotNode, single_argument=True) log_expression = and_expression | or_expression | not_expression logical_expression <<= log_expression.setName('logical expression') base_expr <<= logical_expression | binary_expression return base_expr
ignoredCaseItem = (desc | version100es).suppress() caseItem = pp.Group(values | expectation | code | reqGlsl100) | ignoredCaseItem caseBody = pp.ZeroOrMore(caseItem) blockEnd = pp.Keyword("end").suppress(); caseHeader = pp.Keyword("case") + wordWithUnderscores case = pp.Group(caseHeader + caseBody + blockEnd) # Groups can be nested to any depth (or can be absent), and may contain any number of cases. # The names in the group header are ignored. groupHeader = (pp.Keyword("group") + wordWithUnderscores + pp.QuotedString('"')).suppress() group = pp.Forward() group <<= pp.OneOrMore(case | (groupHeader + group + blockEnd)) # The full grammar is just the group specification, plus the fact that # indicates a comment. grammar = group group.ignore('#' + pp.restOfLine) testCases = grammar.parse_string(sys.stdin.read(), parse_all=True) # Write output files in subdirectories next to this script. testDirectory = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__))) passDirectory = testDirectory + "/pass" failDirectory = testDirectory + "/fail" os.makedirs(passDirectory, exist_ok=True) os.makedirs(failDirectory, exist_ok=True) written = {}
def make_parser(): MULT = pp.oneOf('* /') ADD = pp.oneOf('+ -') COND = pp.oneOf('== != > < >= <=') LOGIC = pp.oneOf('&& ||') #ASSIGN = pp.oneOf('+= -=') RIGHTP, LEFTP, LEFTBR, RIGHTBR, LEFTSQ, RIGHTSQ = pp.Literal(')').suppress(), pp.Literal('(').suppress(),\ pp.Literal('{').suppress(), pp.Literal('}').suppress(),\ pp.Literal('['), pp.Literal(']') SIGN = pp.oneOf('= += -= *= /=') DOUBLEOP = pp.oneOf(' ++ --') ENDSTR = pp.Literal(';').suppress() COMMA = pp.Literal(',').suppress() DOT = pp.Literal('.').suppress() BOOLEAN = pp.Keyword('true') #| pp.Keyword('false') CLASS = pp.Keyword('class') PRINT = pp.Keyword('Console.WriteLine') NEW = pp.Keyword('new') ACCESS = pp.oneOf('public private') #pp.Keyword('public') STATIC = pp.Keyword('static') IF, ELSE = pp.Keyword('if'), pp.Keyword('else') FOR, WHILE, DO = pp.Keyword('for'), pp.Keyword('while'), pp.Keyword('do') RETURN = pp.Keyword('return') add = pp.Forward() block = pp.Forward() ident = pp.Forward() var = pp.Forward() condelse = pp.Forward() class_1 = pp.Forward() numb = ppc.number str_ = pp.QuotedString('"') literal = numb | str_ bool = BOOLEAN ret = RETURN.suppress() + add + ENDSTR cons = pp.ZeroOrMore((var | add) + pp.Optional(COMMA)) call = pp.Optional(NEW) + (PRINT | ident) + LEFTP + pp.ZeroOrMore( (var | add) + pp.Optional(COMMA)) + RIGHTP + pp.Optional(ENDSTR) group = bool | call | ident | literal | LEFTP + add + RIGHTP ident << ppc.identifier + pp.Optional(LEFTSQ + pp.Optional(group) + RIGHTSQ) mult = group + pp.ZeroOrMore(MULT + group) add << mult + pp.ZeroOrMore(ADD + mult) cond = add + COND + add cond_list = LEFTP + cond + pp.ZeroOrMore(LOGIC + cond) + RIGHTP array = NEW.suppress() + ident + pp.Optional( LEFTSQ + cons + RIGHTSQ) + pp.Optional( (LEFTBR + cons + RIGHTBR)) | (LEFTBR + cons + RIGHTBR) # increment = DOUBLEOP + ident | ident + DOUBLEOP + pp.Optional(ENDSTR) sign = (DOUBLEOP + ident | ident + DOUBLEOP | ident + SIGN + (call | array | add)) + pp.Optional(ENDSTR) # pp.Optional(SIGN) + ident + SIGN + pp.Optional((array | call | add)) + pp.Optional(ENDSTR) var << pp.Optional(ACCESS) + pp.Optional(STATIC) + ident + ( sign | ident) + pp.Optional(ENDSTR) condif = IF.suppress() + cond_list + pp.Optional(block) + pp.Optional( condelse) condelse << ELSE.suppress() + (condif | pp.Optional(block)) for_circle = FOR.suppress() + LEFTP + pp.Optional( var | sign) + cond + ENDSTR + pp.Optional(var | sign) + RIGHTP + block while_circle = WHILE.suppress() + LEFTP + cond + RIGHTP + pp.Optional( block) do_circle = DO.suppress() + block + WHILE.suppress( ) + while_circle + ENDSTR func = (var | pp.Optional(ACCESS) + ident) + LEFTP + cons + RIGHTP + block expression = ret | call | sign | var | condif | for_circle | while_circle | do_circle | func # |inner block << LEFTBR + pp.ZeroOrMore(expression) + RIGHTBR class_1 << pp.Optional(ACCESS) + CLASS.suppress( ) + ident + LEFTBR + pp.ZeroOrMore(func | var | class_1) + RIGHTBR expr_lines = pp.ZeroOrMore(class_1) program = expr_lines.ignore(pp.cStyleComment).ignore( pp.dblSlashComment) + pp.stringEnd start = program def set_parse_action_magic(rule_name: str, parser: pp.ParserElement) -> None: if rule_name == rule_name.upper(): return if rule_name in ('mult', 'add', 'cond', 'cond_list'): def bin_op_parse_action(s, loc, tocs): node = tocs[0] for i in range(1, len(tocs) - 1, 2): node = BOperNode(BinOperation(tocs[i]), node, tocs[i + 1]) return node parser.setParseAction(bin_op_parse_action) else: cls = ''.join(x.capitalize() or '_' for x in rule_name.split('_')) + 'Node' with suppress(NameError): cls = eval(cls) parser.setParseAction(lambda s, loc, tocs: cls(*tocs)) for var_name, value in locals().copy().items(): if isinstance(value, pp.ParserElement): set_parse_action_magic(var_name, value) return start
value = pmdbpy.AttributeValue() if (toks.number != ''): value.dtype = pmdbpy.DataType.Integer value.nameValue = toks.number value.isColumn = False elif (toks.string != ''): value.dtype = pmdbpy.DataType.String value.nameValue = toks.string value.isColumn = False elif (toks.column != ''): value.nameValue = toks.column value.isColumn = True return value subquery = pp.Forward().setParseAction(genSelectClause) colValue = (number('number') | sqString('string') | identifier('column')) comparison = pp.Group( colValue('lhs').setParseAction(genValue) + '=' + colValue('rhs').setParseAction(genValue)) where = (kwWhere + comparison('comparison')) tableReference = identifier('table').addParseAction(genTable) | subquery( 'subquery') tableReferenceList = pp.delimitedList(tableReference) sqlSelect = ( kwSelect + selectList('selectList') + kwFrom + tableReferenceList('tableReferenceList').setParseAction(genTableList) + pp.Optional(where('where')))
GLOBAL_VARIABLE.setParseAction(lambda s, l, t: GlobalVariableType(t[0])) # Defines the keywords reserved to the special function calls. SPECIAL_FUNCTION_NAMES = (pp.Keyword('assert') | pp.Keyword('retract') | pp.Keyword('bind')) # Defines the name of a function excluding the keyboards reserved to the special function calls. FUNCTION_NAME = ~SPECIAL_FUNCTION_NAMES + pp.Word( pp.printables.translate(None, '()' + pp.nums), pp.printables.translate(None, '()')) # Defines a constant. CONSTANT = BOOLEAN | SYMBOL | STRING | FLOAT | INTEGER # Initializes the calling of a function. FUNCTION_CALL = pp.Forward() # Defines the name of a single variable. SINGLEFIELD_VARIABLE = pp.Combine(pp.Literal('?') + VARIABLE_SYMBOL) # Defines the name of a variable. VARIABLE = SINGLEFIELD_VARIABLE | GLOBAL_VARIABLE # Defines an expression. EXPRESSION = CONSTANT | VARIABLE | FUNCTION_CALL # Defines the calling of a function. FUNCTION_CALL << OB + FUNCTION_NAME + pp.ZeroOrMore(EXPRESSION) + CB # Defines a field of an ordered pattern. FACT_FIELD = GLOBAL_VARIABLE | CONSTANT | FUNCTION_CALL
def smile_parser(str_smile): # str='FC(c1ccc(cc1)Cl)(F)F' atomicIndex = { 'H': 1, 'He': 2, 'Li': 3, 'Be': 4, 'B': 5, 'C': 6, 'N': 7, 'O': 8, 'F': 9, 'Ne': 10, 'Na': 11, 'Mg': 12, 'Al': 13, 'Si': 14, 'P': 15, 'S': 16, 'Cl': 17, 'Ar': 18, 'K': 19, 'Ca': 20, 'Sc': 21, 'Ti': 22, 'V': 23, 'Cr': 24, 'Mn': 25, 'Fe': 26, 'Co': 27, 'Ni': 28, 'Cu': 29, 'Zn': 30, 'Ga': 31, 'Ge': 32, 'As': 33, 'Se': 34, 'Br': 35, 'Kr': 36, 'Rb': 37, 'Sr': 38, 'Y': 39, 'Zr': 40, 'Nb': 41, 'Mo': 42, 'Tc': 43, 'Ru': 44, 'Rh': 45, 'Pd': 46, 'Ag': 47, 'Cd': 48, 'In': 49, 'Sn': 50, 'Sb': 51, 'Te': 52, 'I': 53, 'Xe': 54, 'Cs': 55, 'Ba': 56, 'La': 57, 'Ce': 58, 'Pr': 59, 'Nd': 60, 'Pm': 61, 'Sm': 62, 'Eu': 63, 'Gd': 64, 'Tb': 65, 'Ty': 66, 'Ho': 67, 'Er': 68, 'Tm': 69, 'Yb': 70, 'Lu': 71, 'Hf': 72, 'Ta': 73, 'W': 74, 'Re': 75, 'Os': 76, 'Ir': 77, 'Pt': 78, 'Au': 79, 'Hg': 80, 'Tl': 81, 'Pb': 82, 'Bi': 83, 'Po': 84, 'At': 85, 'Rn': 86, 'Fr': 87, 'Ra': 88, 'Ac': 89, 'Th': 90, 'Pa': 91, 'U': 92, 'Np': 93, 'Pu': 94, 'Am': 95, 'Cm': 96, 'Bk': 97, 'Cf': 98, 'Es': 99, 'Fm': 100, 'Md': 101, 'No': 102, 'Lr': 103, 'Rf': 104, 'Db': 105, 'Sg': 106, 'Bh': 107, 'Hs': 108, 'Mt': 109, 'Ds': 110, 'Rg': 111, 'Cn': 112, 'Nh': 113, 'Fl': 114, 'Mc': 115, 'Lv': 116, 'Ts': 117, 'Og': 118, 'c': 119, 'o': 120, 'n': 121, 's': 122, '=': 123, '#': 124, '@': 125, '(': 126, ')': 127, '[': 128, ']': 129, '+': 130, '-': 139, '0': 140, '1': 141, '2': 142, '3': 143, '4': 144, '5': 145, '6': 146, '7': 147, '8': 148, '9': 149, '10': 150, '11': 151, '12': 152, '13': 153, '14': 154, '15': 155, '16': 156, '17': 157, '18': 158, '19': 159, '20': 160 } # Grammar definition isotope = pp.Regex('[1-9][0-9]?[0-9]?') atomclass = pp.Regex(':[0-9]+') bond = pp.oneOf(['-', '=', '#', '$', ':', '/', '\\', '.']) organicsymbol = pp.oneOf( ['B', 'Br', 'C', 'Cl', 'N', 'O', 'P', 'S', 'F', 'I']) aromaticsymbol = pp.oneOf(['b', 'c', 'n', 'o', 'p', 's']) elementsymbol = pp.oneOf([ 'Al', 'Am', 'Sb', 'Ar', 'At', 'Ba', 'Bk', 'Be', 'Bi', 'Bh', 'B', 'Br', 'Cd', 'Ca', 'Cf', 'C', 'Ce', 'Cs', 'Cl', 'Cr', 'Co', 'Cu', 'Cm', 'Ds', 'Db', 'Dy', 'Es', 'Er', 'Eu', 'Fm', 'F', 'Fr', 'Gd', 'Ga', 'Ge', 'Au', 'Hf', 'Hs', 'He', 'Ho', 'H', 'In', 'I', 'Ir', 'Fe', 'Kr', 'La', 'Lr', 'Pb', 'Li', 'Lu', 'Mg', 'Mn', 'Mt', 'Md', 'Hg', 'Mo', 'Nd', 'Ne', 'Np', 'Ni', 'Nb', 'N', 'No', 'Os', 'O', 'Pd', 'P', 'Pt', 'Pu', 'Po', 'K', 'Pr', 'Pm', 'Pa', 'Ra', 'Rn', 'Re', 'Rh', 'Rg', 'Rb', 'Ru', 'Rf', 'Sm', 'Sc', 'Sg', 'Se', 'Si', 'Ag', 'Na', 'Sr', 'S', 'Ta', 'Tc', 'Te', 'Tb', 'Tl', 'Th', 'Tm', 'Sn', 'Ti', 'W', 'Uub', 'Uuh', 'Uuo', 'Uup', 'Uuq', 'Uus', 'Uut', 'Uuu', 'U', 'V', 'Xe', 'Yb', 'Y', 'Zn', 'Zr' ]) integer = pp.Word("0123456789") ringclosure = pp.Optional( pp.Literal('%') + pp.oneOf(['1 2 3 4 5 6 7 8 9'])) + pp.oneOf( ['1 2 3 4 5 6 7 8 9']) charge = (pp.Literal('-') + pp.Optional( pp.oneOf(['-02-9']) ^ pp.Literal('1') + pp.Optional(pp.oneOf(['0-5'])) )) ^ pp.Literal('+') + pp.Optional( pp.oneOf(['+02-9']) ^ pp.Literal('1') + pp.Optional(pp.oneOf('[0-5]'))) chiralclass = pp.Optional( pp.Literal('@') + pp.Optional(pp.Literal('@')) ^ (pp.Literal('TH') ^ pp.Literal('AL')) + pp.oneOf('[1-2]') ^ pp.Literal('SP') + pp.oneOf('[1-3]') ^ pp.Literal('TB') + (pp.Literal('1') + pp.Optional(pp.oneOf('[0-9]')) ^ pp.Literal('2') + pp.Optional(pp.Literal('0')) ^ pp.oneOf('[3-9]')) ^ pp.Literal('OH') + ((pp.Literal('1') ^ pp.Literal('2')) + pp.Optional(pp.oneOf('[0-9]')) ^ pp.Literal('3') + pp.Optional(pp.Literal('0')) ^ pp.oneOf('[4-9]'))) atomspec = pp.Literal('[') + pp.OneOrMore( pp.Optional(isotope) + (pp.Literal('se') ^ pp.Literal('as') ^ aromaticsymbol ^ elementsymbol ^ pp.Literal('*')) + pp.Optional(chiralclass) + pp.Optional(integer) + pp.Optional(charge) + pp.Optional(atomclass)) + pp.Literal(']') atom = (organicsymbol + pp.Optional(integer)) ^ ( aromaticsymbol + pp.Optional(integer)) ^ pp.Literal('*') ^ ( atomspec + pp.Optional(integer)) chain = pp.OneOrMore(pp.Optional(bond) + (atom ^ ringclosure)) smiles = pp.Forward() branch = pp.Forward() smiles << atom + pp.ZeroOrMore(chain ^ branch) branch << (pp.Literal('(') + (pp.OneOrMore(bond + pp.OneOrMore(smiles)) ^ pp.OneOrMore(smiles)) + pp.Literal(')')) formulaData = smiles.parseString(str_smile) #print('specie:' + str_smile.replace(" ", "")) if len(str_smile.replace(" ", "")) != len(''.join( str(e) for e in formulaData)): print('wrong:' + str_smile.replace(" ", "") + ' => ' + ''.join(str(e) for e in formulaData) + ' &act_len:' + str(len(str_smile.replace(" ", ""))) + ' &trans_len:' + str(len(''.join(str(e) for e in formulaData)))) formulaData_Index = [atomicIndex[c] for c in formulaData] return formulaData_Index
def bracketedList(l, r, sep, expr, what): """Parse bracketed list. Empty list is possible, as is a trailing separator. """ return (sym(l) - listMembers(sep, expr, what) - sym(r)).setParseAction(head) keywords = [ 'and', 'or', 'not', 'if', 'then', 'else', 'include', 'inherit', 'null', 'true', 'false' ] expression = p.Forward() comment = '#' + p.restOfLine identifier = p.Regex(r'[a-zA-Z_][a-zA-Z0-9_:-]*') # Contants integer = p.Word(p.nums).setParseAction(do(head, int, Constant)) floating = p.Regex(r'\d*\.\d+').setParseAction(do(head, float, Constant)) dq_string = p.QuotedString('"', escChar='\\', multiline=True).setParseAction(do(head, Constant)) sq_string = p.QuotedString("'", escChar='\\', multiline=True).setParseAction(do(head, Constant)) boolean = (p.Keyword('true') | p.Keyword('false')).setParseAction( do(head, mkBool, Constant)) null = p.Keyword('null').setParseAction(Null)
def parse_file(file_name): number = pp.Word(pp.nums) identifier = pp.Word(pp.alphas + "_", pp.alphanums + "_") lbrace = pp.Literal('{').suppress() rbrace = pp.Literal('}').suppress() cls = pp.Keyword('class') colon = pp.Literal(":") semi = pp.Literal(";").suppress() langle = pp.Literal("<") rangle = pp.Literal(">") equals = pp.Literal("=") comma = pp.Literal(",") lparen = pp.Literal("(") rparen = pp.Literal(")") lbrack = pp.Literal("[") rbrack = pp.Literal("]") mins = pp.Literal("-") struct = pp.Keyword('struct') template = pp.Keyword('template') final = pp.Keyword('final')("final") stub = pp.Keyword('stub')("stub") with_colon = pp.Word(pp.alphanums + "_" + ":") btype = with_colon type = pp.Forward() nestedParens = pp.nestedExpr('<', '>') tmpl = pp.Group( btype("template_name") + langle.suppress() + pp.Group(pp.delimitedList(type)) + rangle.suppress()) type << (tmpl | btype) enum_lit = pp.Keyword('enum') enum_class = pp.Group(enum_lit + cls) ns = pp.Keyword("namespace") enum_init = equals.suppress() + pp.Optional(mins) + number enum_value = pp.Group(identifier + pp.Optional(enum_init)) enum_values = pp.Group(lbrace + pp.delimitedList(enum_value) + pp.Optional(comma) + rbrace) content = pp.Forward() member_name = pp.Combine( pp.Group(identifier + pp.Optional(lparen + rparen))) attrib = pp.Group(lbrack.suppress() + lbrack.suppress() + pp.SkipTo(']') + rbrack.suppress() + rbrack.suppress()) opt_attribute = pp.Optional(attrib)("attribute") namespace = pp.Group( ns("type") + identifier("name") + lbrace + pp.Group(pp.OneOrMore(content))("content") + rbrace) enum = pp.Group( enum_class("type") + identifier("name") + colon.suppress() + identifier("underline_type") + enum_values("enum_values") + pp.Optional(semi).suppress()) default_value = equals.suppress() + pp.SkipTo(';') class_member = pp.Group( type("type") + member_name("name") + opt_attribute + pp.Optional(default_value)("default") + semi.suppress())("member") template_param = pp.Group(identifier("type") + identifier("name")) template_def = pp.Group(template + langle + pp.Group(pp.delimitedList(template_param)) ("params") + rangle) class_content = pp.Forward() class_def = pp.Group( pp.Optional(template_def)("template") + (cls | struct)("type") + with_colon("name") + pp.Optional(final) + pp.Optional(stub) + opt_attribute + lbrace + pp.Group(pp.ZeroOrMore(class_content))("members") + rbrace + pp.Optional(semi)) content << (enum | class_def | namespace) class_content << (enum | class_def | class_member) for varname in "enum class_def class_member content namespace template_def".split( ): locals()[varname].setName(varname) rt = pp.OneOrMore(content) singleLineComment = "//" + pp.restOfLine rt.ignore(singleLineComment) rt.ignore(pp.cStyleComment) return rt.parseFile(file_name, parseAll=True)
pp.alphas) + pp.Word(pp.alphanums + "_").setResultsName("prop") value = (pp.QuotedString("'") | pp.QuotedString('"') | pp.Word(pp.printables, excludeChars=",")).setResultsName("value") types_ = pp.oneOf("re eq ne gt ge lt le").setResultsName("types") flags = pp.oneOf("C I").setResultsName("flags") comma = pp.Literal(',') quote = (pp.Literal("'") | pp.Literal('"')).setResultsName("quote") type_exp = pp.Group( pp.Literal("type") + pp.Literal("=") + quote + types_ + quote).setResultsName("type_exp") flag_exp = pp.Group( pp.Literal("flag") + pp.Literal("=") + quote + flags + quote).setResultsName("flag_exp") semi_expression = pp.Forward() semi_expression << pp.Group( pp.Literal("(") + prop + comma + value + pp.Optional(comma + type_exp) + pp.Optional(comma + flag_exp) + pp.Literal(")")).setParseAction( ParseFilter.parse_filter_obj).setResultsName("semi_expression") expr = pp.Forward() expr << pp.operatorPrecedence( semi_expression, [("not", 1, pp.opAssoc.RIGHT, ParseFilter.not_operator), ("and", 2, pp.opAssoc.LEFT, ParseFilter.and_operator), ("or", 2, pp.opAssoc.LEFT, ParseFilter.or_operator)]) def generate_infilter(class_id, filter_str, is_meta_class_id): """ Create FilterFilter object
membership_comparison_p = attribute_identifier_p + membership_comparison_operator_p + ( number_list_literal_p | string_list_literal_p) membership_comparison_p.setParseAction( lambda tokens: BinaryOperator(tokens[1], tokens[0::2])) comparison_p = value_comparison_p | membership_comparison_p logical_expression_p = pp.infixNotation(comparison_p, [ (pp.Literal("and"), 2, pp.opAssoc.LEFT, lambda tokens: BinaryOperator(tokens[0][1], tokens[0][0::2])), (pp.Literal("or"), 2, pp.opAssoc.LEFT, lambda tokens: BinaryOperator(tokens[0][1], tokens[0][0::2])), ]) function_call_p = pp.Forward() function_argument_p = function_call_p | attribute_identifier_p | string_literal_p | float_literal_p | integer_literal_p function_call_p << pp.Word( pp.alphas, pp.alphanums) + pp.Suppress("(") + pp.Optional( pp.delimitedList(function_argument_p, delim=",")) + pp.Suppress(")") function_call_p.setParseAction(FunctionCall) attribute_expression_p = logical_expression_p | function_call_p | attribute_identifier_p | slice_literal_p # Define the arrays section of a hyperchunk. arrays_expression_p = slices_literal_p # Define the attributes section of a hyperchunk. attributes_expression_p = pp.delimitedList(attribute_expression_p, delim="|")
STR_EXPRAFF = pyparsing.Suppress("ExprAff") STR_COMMA = pyparsing.Suppress(",") LPARENTHESIS = pyparsing.Suppress("(") RPARENTHESIS = pyparsing.Suppress(")") string_quote = pyparsing.QuotedString(quoteChar="'", escChar='\\', escQuote='\\') string_dquote = pyparsing.QuotedString(quoteChar='"', escChar='\\', escQuote='\\') string = string_quote | string_dquote expr = pyparsing.Forward() expr_int = pyparsing.Group(STR_EXPRINT + LPARENTHESIS + str_int + STR_COMMA + str_int + RPARENTHESIS) expr_id = pyparsing.Group(STR_EXPRID + LPARENTHESIS + string + STR_COMMA + str_int + RPARENTHESIS) expr_slice = pyparsing.Group(STR_EXPRSLICE + LPARENTHESIS + expr + STR_COMMA + str_int + STR_COMMA + str_int + RPARENTHESIS) expr_mem = pyparsing.Group(STR_EXPRMEM + LPARENTHESIS + expr + STR_COMMA + str_int + RPARENTHESIS) expr_cond = pyparsing.Group(STR_EXPRCOND + LPARENTHESIS + expr + STR_COMMA + expr + STR_COMMA + expr + RPARENTHESIS) expr_compose = pyparsing.Group(STR_EXPRCOMPOSE + LPARENTHESIS + pyparsing.delimitedList(expr, delim=',') + RPARENTHESIS) expr_op = pyparsing.Group(STR_EXPROP + LPARENTHESIS + string + STR_COMMA +
def _build_tgrep_parser(set_parse_actions = True): ''' Builds a pyparsing-based parser object for tokenizing and interpreting tgrep search strings. ''' tgrep_op = (pyparsing.Optional('!') + pyparsing.Regex('[$%,.<>][%,.<>0-9-\':]*')) tgrep_qstring = pyparsing.QuotedString(quoteChar='"', escChar='\\', unquoteResults=False) tgrep_node_regex = pyparsing.QuotedString(quoteChar='/', escChar='\\', unquoteResults=False) tgrep_qstring_icase = pyparsing.Regex( 'i@\\"(?:[^"\\n\\r\\\\]|(?:\\\\.))*\\"') tgrep_node_regex_icase = pyparsing.Regex( 'i@\\/(?:[^/\\n\\r\\\\]|(?:\\\\.))*\\/') tgrep_node_literal = pyparsing.Regex('[^][ \r\t\n;:.,&|<>()$!@%\'^=]+') tgrep_expr = pyparsing.Forward() tgrep_relations = pyparsing.Forward() tgrep_parens = pyparsing.Literal('(') + tgrep_expr + ')' tgrep_nltk_tree_pos = ( pyparsing.Literal('N(') + pyparsing.Optional(pyparsing.Word(pyparsing.nums) + ',' + pyparsing.Optional(pyparsing.delimitedList( pyparsing.Word(pyparsing.nums), delim=',') + pyparsing.Optional(','))) + ')') tgrep_node_label = pyparsing.Regex('[A-Za-z0-9]+') tgrep_node_label_use = pyparsing.Combine('=' + tgrep_node_label) # see _tgrep_segmented_pattern_action tgrep_node_label_use_pred = tgrep_node_label_use.copy() macro_name = pyparsing.Regex('[^];:.,&|<>()[$!@%\'^=\r\t\n ]+') macro_name.setWhitespaceChars('') macro_use = pyparsing.Combine('@' + macro_name) tgrep_node_expr = (tgrep_node_label_use_pred | macro_use | tgrep_nltk_tree_pos | tgrep_qstring_icase | tgrep_node_regex_icase | tgrep_qstring | tgrep_node_regex | '*' | tgrep_node_literal) tgrep_node_expr2 = ((tgrep_node_expr + pyparsing.Literal('=').setWhitespaceChars('') + tgrep_node_label.copy().setWhitespaceChars('')) | tgrep_node_expr) tgrep_node = (tgrep_parens | (pyparsing.Optional("'") + tgrep_node_expr2 + pyparsing.ZeroOrMore("|" + tgrep_node_expr))) tgrep_brackets = pyparsing.Optional('!') + '[' + tgrep_relations + ']' tgrep_relation = tgrep_brackets | (tgrep_op + tgrep_node) tgrep_rel_conjunction = pyparsing.Forward() tgrep_rel_conjunction << (tgrep_relation + pyparsing.ZeroOrMore(pyparsing.Optional('&') + tgrep_rel_conjunction)) tgrep_relations << tgrep_rel_conjunction + pyparsing.ZeroOrMore( "|" + tgrep_relations) tgrep_expr << tgrep_node + pyparsing.Optional(tgrep_relations) tgrep_expr_labeled = tgrep_node_label_use + pyparsing.Optional(tgrep_relations) tgrep_expr2 = tgrep_expr + pyparsing.ZeroOrMore(':' + tgrep_expr_labeled) macro_defn = (pyparsing.Literal('@') + pyparsing.White().suppress() + macro_name + tgrep_expr2) tgrep_exprs = (pyparsing.Optional(macro_defn + pyparsing.ZeroOrMore(';' + macro_defn) + ';') + tgrep_expr2 + pyparsing.ZeroOrMore(';' + (macro_defn | tgrep_expr2)) + pyparsing.ZeroOrMore(';').suppress()) if set_parse_actions: tgrep_node_label_use.setParseAction(_tgrep_node_label_use_action) tgrep_node_label_use_pred.setParseAction(_tgrep_node_label_pred_use_action) macro_use.setParseAction(_tgrep_macro_use_action) tgrep_node.setParseAction(_tgrep_node_action) tgrep_node_expr2.setParseAction(_tgrep_bind_node_label_action) tgrep_parens.setParseAction(_tgrep_parens_action) tgrep_nltk_tree_pos.setParseAction(_tgrep_nltk_tree_pos_action) tgrep_relation.setParseAction(_tgrep_relation_action) tgrep_rel_conjunction.setParseAction(_tgrep_conjunction_action) tgrep_relations.setParseAction(_tgrep_rel_disjunction_action) macro_defn.setParseAction(_macro_defn_action) # the whole expression is also the conjunction of two # predicates: the first node predicate, and the remaining # relation predicates tgrep_expr.setParseAction(_tgrep_conjunction_action) tgrep_expr_labeled.setParseAction(_tgrep_segmented_pattern_action) tgrep_expr2.setParseAction(functools.partial(_tgrep_conjunction_action, join_char = ':')) tgrep_exprs.setParseAction(_tgrep_exprs_action) return tgrep_exprs.ignore('#' + pyparsing.restOfLine)
""" Define logic of parsing queries string for filtering. Provide function to parse and convert a query filter """ import pyparsing as pp COMPLEX_EXPR = pp.Forward() OPERATOR = pp.Regex("ge|le|ne|gt|lt|eq").setName("operator") LOGICAL = (pp.Keyword("and") | pp.Keyword("or")).setName("logical") DATES = pp.Word(pp.nums + "-" + ":") NAME = pp.Word(pp.alphas, pp.alphanums + "_" + " ") QUOTED = pp.Suppress("'") + NAME + pp.Suppress("'") ^ \ pp.Suppress('"') + NAME + pp.Suppress('"') ^ \ pp.Suppress('"') + DATES + pp.Suppress('"') ^ \ pp.Suppress("'") + DATES + pp.Suppress("'") FIELD = pp.Word(pp.alphas, pp.alphanums + "_") VALUES = pp.Regex(r"[+-]?\d+(:?\.\d*)?(:?[eE][+-]?\d+)?") | \ pp.Word(pp.alphas, pp.alphanums + "_") | QUOTED | DATES CONDITION = (FIELD + OPERATOR + VALUES) CLAUSE = pp.Group(CONDITION ^ (pp.Suppress("(") + COMPLEX_EXPR + pp.Suppress(")"))) EXPR = pp.operatorPrecedence(CLAUSE, [ ( "and", 2, pp.opAssoc.RIGHT, ), ( "or", 2,