def mathematical_expression() -> Token: binary_adding_operator = Literal("+") | Literal("-") multiplying_operator = Literal("*") | Literal("/") highest_precedence_operator = Literal("**") array_aggregate = (Literal("(").setParseAction(lambda s, l, t: l) + numeric_literal() + (comma() - numeric_literal()) * (0, ) + Literal(")").setParseAction(lambda s, l, t: l)) array_aggregate.setParseAction(parse_array_aggregate) string = QuotedString('"') string.setParseAction(parse_string) concatenation = (infixNotation( array_aggregate | string, [(Suppress(Keyword("&")), 2, opAssoc.LEFT, parse_concatenation)], )).setName("Concatenation") term = numeric_literal() | attribute_reference() | qualified_identifier( ) | concatenation term.setParseAction(parse_term) return (infixNotation( term, [ (highest_precedence_operator, 2, opAssoc.LEFT, parse_mathematical_expression), (multiplying_operator, 2, opAssoc.LEFT, parse_mathematical_expression), (binary_adding_operator, 2, opAssoc.LEFT, parse_mathematical_expression), ], )).setName("MathematicalExpression")
def transform_human(text, main_window): """Transform user input into something Script can read. Main window is needed for tool integration.""" # these are parseActions for pyparsing. def str_literal_to_hex(s, loc, toks): for i, t in enumerate(toks): toks[i] = ''.join(['0x', t.encode('hex')]) return toks def var_name_to_value(s, loc, toks): for i, t in enumerate(toks): val = main_window.dock_handler.variables.get_key(t.strip('$')) if val: toks[i] = val return toks # ^ parseActions for pyparsing end here. str_literal = QuotedString('"') str_literal.setParseAction(str_literal_to_hex) var_name = pyparsing.Combine(Word('$') + Word(pyparsing.alphas)) var_name.setParseAction(var_name_to_value) s = text s = var_name.transformString(s) s = str_literal.transformString(s) return s
def _create_parser() -> ParserElement: # operators in the format later used by infixNotation operator_list = [ (None, 2, opAssoc.LEFT, BooleanAndOperation._create_from_implicit_tokens), (CaselessKeyword('not') | "~" | "!", 1, opAssoc.RIGHT, BooleanNotOperation._create_from_tokens), (CaselessKeyword('and') | "&", 2, opAssoc.LEFT, BooleanAndOperation._create_from_tokens), (CaselessKeyword('xor') | "^", 2, opAssoc.LEFT, BooleanXorOperation._create_from_tokens), (CaselessKeyword('or') | "|", 2, opAssoc.LEFT, BooleanOrOperation._create_from_tokens), ] # terms (atoms) that will be combined with the boolean operators term_list = [ (CaselessKeyword('tag'), TagFilterTerm._create_from_tokens), (CaselessKeyword('ingr'), IngredientFilterTerm._create_from_tokens), (CaselessKeyword('unit'), UnitFilterTerm._create_from_tokens), (None, AnyFilterTerm._create_from_tokens), ] # extract keywords that can operator_expressions = [om[0] for om in operator_list if om[0] is not None] term_expressions = [tm[0] for tm in term_list if tm[0] is not None] reserved_expressions = operator_expressions + term_expressions # quoted string indicates exact macthc quoted_filter_string = (QuotedString('"', escChar='\\') | QuotedString("'", escChar='\\')).setResultsName('string') # quoted_filter_string.setDebug(True) quoted_filter_string.setName("quoted_filter_string") quoted_filter_string.setParseAction(ExactFilterString._create_from_tokens) # not quoted string is inexact match, can't contain whitespace or be an operator unquoted_filter_string = ~MatchFirst(reserved_expressions) + Regex(r'[^\s\(\)]+', flags=re.U).setResultsName('string') # unquoted_filter_string.setDebug(True) unquoted_filter_string.setName("unquoted_filter_string") unquoted_filter_string.setParseAction(FuzzyFilterString._create_from_tokens) # regular expressions aren't parsed in the grammar but delegated to python re.compile in the parser action regex_filter_string = QuotedString('/', escChar='\\') regex_filter_string.setName("regex_filter_string") regex_filter_string.setParseAction(RegexFilterString._create_from_tokens) # unquoted_filter_string must be last, so that initial quotes are handled correctly filter_string = regex_filter_string | quoted_filter_string | unquoted_filter_string filter_string.setParseAction(lambda toks: toks[0]) filter_terms = [] for prefix_expression, term_action in term_list: if prefix_expression is not None: filter_term = Combine(prefix_expression + ':' + filter_string.setResultsName("filter_string")) filter_term.setName("filter_term_"+str(prefix_expression.match)) else: filter_term = filter_string.setResultsName("filter_string") filter_term.setName("filter_term_None") # filter_term.setDebug(True) filter_term.addParseAction(term_action) filter_terms.append(filter_term) filter_term = MatchFirst(filter_terms) filter_expr = infixNotation(filter_term, operator_list) return filter_expr
def _define_grammar(): """ Creates and returns a copy of the selector grammar. Wrapped in a function to avoid polluting the module namespace. """ expr = Forward() label_name = Word(LABEL_CHARS) label_name.setParseAction(LabelNode) string_literal = QuotedString('"') | QuotedString("'") string_literal.setParseAction(LiteralNode) set_literal = (Suppress("{") + delimitedList(QuotedString('"') | QuotedString("'"), ",") + Suppress("}")) set_literal.setParseAction(SetLiteralNode) eq_comparison = label_name + Suppress("==") + string_literal eq_comparison.setParseAction(LabelToLiteralEqualityNode) not_eq_comparison = label_name + Suppress("!=") + string_literal not_eq_comparison.setParseAction(InequalityNode) in_comparison = label_name + Suppress(Keyword("in")) + set_literal in_comparison.setParseAction(LabelInSetLiteralNode) not_in = Suppress(Keyword("not") + Keyword("in")) not_in_comparison = label_name + not_in + set_literal not_in_comparison.setParseAction(NotInNode) has_check = (Suppress("has(") + Word(LABEL_CHARS) + Suppress(")")) has_check.setParseAction(HasNode) # For completeness, we allow an all() to occur in an expression like # "! all()". Note: we special-case the trivial selectors "" and # "all()" below for efficiency. all_op = (Suppress("all()")) all_op.setParseAction(AllNode) comparison = (eq_comparison | not_eq_comparison | in_comparison | not_in_comparison | has_check | all_op) paren_expr = (Suppress("(") + expr + Suppress(")")) value = ZeroOrMore("!") + (comparison | paren_expr) value.setParseAction(simplify_negation_node) and_expr = value + ZeroOrMore(Suppress("&&") + value) and_expr.setParseAction(simplify_and_node) or_expr = and_expr + ZeroOrMore(Suppress("||") + and_expr) or_expr.setParseAction(simplify_or_node) expr << or_expr grammar = expr + StringEnd() return grammar
def __init__(self): # speed up infixNotation considerably at the price of some cache memory ParserElement.enablePackrat() boolean = Keyword('True') | Keyword('False') none = Keyword('None') integer = Word(nums) real = Combine(Word(nums) + "." + Word(nums)) string = (QuotedString('"', escChar='\\') | QuotedString("'", escChar='\\')) regex = QuotedString('/', escChar='\\') identifier = Word(alphas, alphanums + '_') dereference = infixNotation(identifier, [ (Literal('.'), 2, opAssoc.LEFT, EvalArith), ]) result = (Keyword('bad') | Keyword('fail') | Keyword('good') | Keyword('ignore') | Keyword('unknown')) rval = boolean | none | real | integer | string | regex | result | dereference rvallist = Group( Suppress('[') + Optional(delimitedList(rval)) + Suppress(']')) rvalset = Group( Suppress('{') + Optional(delimitedList(rval)) + Suppress('}')) operand = rval | rvallist | rvalset # parse actions replace the parsed tokens with an instantiated object # which we can later call into for evaluation of its content boolean.setParseAction(EvalBoolean) none.setParseAction(EvalNone) integer.setParseAction(EvalInteger) real.setParseAction(EvalReal) string.setParseAction(EvalString) regex.setParseAction(EvalRegex) identifier.setParseAction(EvalIdentifier) result.setParseAction(EvalResult) rvallist.setParseAction(EvalList) rvalset.setParseAction(EvalSet) identity_test = Keyword('is') + ~Keyword('not') | Combine( Keyword('is') + Keyword('not'), adjacent=False, joinString=' ') membership_test = Keyword('in') | Combine( Keyword('not') + Keyword('in'), adjacent=False, joinString=' ') comparison_op = oneOf('< <= > >= != == isdisjoint') comparison = identity_test | membership_test | comparison_op self.parser = infixNotation(operand, [ (Literal('**'), 2, opAssoc.LEFT, EvalPower), (oneOf('+ - ~'), 1, opAssoc.RIGHT, EvalModifier), (oneOf('* / // %'), 2, opAssoc.LEFT, EvalArith), (oneOf('+ -'), 2, opAssoc.LEFT, EvalArith), (oneOf('<< >>'), 2, opAssoc.LEFT, EvalArith), (Literal('&'), 2, opAssoc.LEFT, EvalArith), (Literal('^'), 2, opAssoc.LEFT, EvalArith), (Literal('|'), 2, opAssoc.LEFT, EvalArith), (comparison, 2, opAssoc.LEFT, EvalLogic), (Keyword('not'), 1, opAssoc.RIGHT, EvalModifier), (Keyword('and'), 2, opAssoc.LEFT, EvalLogic), (Keyword('or'), 2, opAssoc.LEFT, EvalLogic), (Keyword('->'), 2, opAssoc.LEFT, EvalArith), ])
def create_type_query_syntax(self): create_type_keyword = CaselessLiteral("CREATE TYPE").setParseAction(self.create_new_type_query_obj) new_type = Word(alphas).setParseAction(self.set_type) parent_type = Word(alphas).setParseAction(self.set_parent_type) description = QuotedString("'", multiline=True) ^ QuotedString('"', multiline=True) description.setParseAction(self.set_description) create_type_query = create_type_keyword + new_type + parent_type + description return create_type_query
def _define_grammar(): """ Creates and returns a copy of the selector grammar. Wrapped in a function to avoid polluting the module namespace. """ expr = Forward() label_name = Word(LABEL_CHARS) label_name.setParseAction(LabelNode) string_literal = QuotedString('"') | QuotedString("'") string_literal.setParseAction(LiteralNode) set_literal = (Suppress("{") + delimitedList(QuotedString('"') | QuotedString("'"), ",") + Suppress("}")) set_literal.setParseAction(SetLiteralNode) eq_comparison = label_name + Suppress("==") + string_literal eq_comparison.setParseAction(LabelToLiteralEqualityNode) not_eq_comparison = label_name + Suppress("!=") + string_literal not_eq_comparison.setParseAction(InequalityNode) in_comparison = label_name + Suppress(Keyword("in")) + set_literal in_comparison.setParseAction(LabelInSetLiteralNode) not_in = Suppress(Keyword("not") + Keyword("in")) not_in_comparison = label_name + not_in + set_literal not_in_comparison.setParseAction(NotInNode) has_check = (Suppress("has(") + Word(LABEL_CHARS) + Suppress(")")) has_check.setParseAction(HasNode) comparison = (eq_comparison | not_eq_comparison | in_comparison | not_in_comparison | has_check) paren_expr = (Suppress("(") + expr + Suppress(")")) value = comparison | paren_expr and_expr = value + ZeroOrMore(Suppress("&&") + value) and_expr.setParseAction(simplify_and_node) or_expr = and_expr + ZeroOrMore(Suppress("||") + and_expr) or_expr.setParseAction(simplify_or_node) expr << or_expr grammar = expr + StringEnd() return grammar
def _getPattern(self): arith_expr = Forward() comp_expr = Forward() logic_expr = Forward() LPAR, RPAR, SEMI = map(Suppress, "();") identifier = Word(alphas+"_", alphanums+"_") multop = oneOf('* /') plusop = oneOf('+ -') expop = Literal( "^" ) compop = oneOf('> < >= <= != ==') andop = Literal("AND") orop = Literal("OR") current_value = Literal( "." ) assign = Literal( "=" ) # notop = Literal('NOT') function = oneOf(' '.join(self.FUNCTIONS)) function_call = Group(function.setResultsName('fn') + LPAR + Optional(delimitedList(arith_expr)) + RPAR) aggregate_column = QuotedString(quoteChar='{', endQuoteChar='}') single_column = QuotedString(quoteChar='[', endQuoteChar=']') integer = Regex(r"-?\d+") real = Regex(r"-?\d+\.\d*") # quotedString enables strings without quotes to pass operand = \ function_call.setParseAction(self.__evalFunction) | \ aggregate_column.setParseAction(self.__evalAggregateColumn) | \ single_column.setParseAction(self.__evalSingleColumn) | \ ((real | integer).setParseAction(self.__evalConstant)) | \ quotedString.setParseAction(self.__evalString).addParseAction(removeQuotes) | \ current_value.setParseAction(self.__evalCurrentValue) | \ identifier.setParseAction(self.__evalString) arith_expr << operatorPrecedence(operand, [ (expop, 2, opAssoc.LEFT, self.__expOp), (multop, 2, opAssoc.LEFT, self.__multOp), (plusop, 2, opAssoc.LEFT, self.__addOp), ]) # comp_expr = Group(arith_expr + compop + arith_expr) comp_expr << operatorPrecedence(arith_expr, [ (compop, 2, opAssoc.LEFT, self.__evalComparisonOp), ]) logic_expr << operatorPrecedence(comp_expr, [ (andop, 2, opAssoc.LEFT, self.__evalLogicOp), (orop, 2, opAssoc.LEFT, self.__evalLogicOp) ]) pattern = logic_expr + StringEnd() return pattern
def _getPattern(self): arith_expr = Forward() comp_expr = Forward() logic_expr = Forward() LPAR, RPAR, SEMI = map(Suppress, "();") identifier = Word(alphas + "_", alphanums + "_") multop = oneOf('* /') plusop = oneOf('+ -') expop = Literal("^") compop = oneOf('> < >= <= != ==') andop = Literal("AND") orop = Literal("OR") current_value = Literal(".") assign = Literal("=") # notop = Literal('NOT') function = oneOf(' '.join(self.FUNCTIONS)) function_call = Group( function.setResultsName('fn') + LPAR + Optional(delimitedList(arith_expr)) + RPAR) aggregate_column = QuotedString(quoteChar='{', endQuoteChar='}') single_column = QuotedString(quoteChar='[', endQuoteChar=']') integer = Regex(r"-?\d+") real = Regex(r"-?\d+\.\d*") # quotedString enables strings without quotes to pass operand = \ function_call.setParseAction(self.__evalFunction) | \ aggregate_column.setParseAction(self.__evalAggregateColumn) | \ single_column.setParseAction(self.__evalSingleColumn) | \ ((real | integer).setParseAction(self.__evalConstant)) | \ quotedString.setParseAction(self.__evalString).addParseAction(removeQuotes) | \ current_value.setParseAction(self.__evalCurrentValue) | \ identifier.setParseAction(self.__evalString) arith_expr << operatorPrecedence(operand, [ (expop, 2, opAssoc.LEFT, self.__expOp), (multop, 2, opAssoc.LEFT, self.__multOp), (plusop, 2, opAssoc.LEFT, self.__addOp), ]) # comp_expr = Group(arith_expr + compop + arith_expr) comp_expr << operatorPrecedence(arith_expr, [ (compop, 2, opAssoc.LEFT, self.__evalComparisonOp), ]) logic_expr << operatorPrecedence( comp_expr, [(andop, 2, opAssoc.LEFT, self.__evalLogicOp), (orop, 2, opAssoc.LEFT, self.__evalLogicOp)]) pattern = logic_expr + StringEnd() return pattern
def transform_human(text): """Transform user input into something Script can read.""" # these are parseActions for pyparsing. def str_literal_to_hex(s, loc, toks): for i, t in enumerate(toks): toks[i] = ''.join(['0x', t.encode('hex')]) return toks # ^ parseActions for pyparsing end here. str_literal = QuotedString('"') str_literal.setParseAction(str_literal_to_hex) transformer = ZeroOrMore( str_literal ) return transformer.transformString(text)
def transform_human(text): """Transform user input into something Script can read.""" # these are parseActions for pyparsing. def str_literal_to_hex(s, loc, toks): for i, t in enumerate(toks): toks[i] = ''.join(['0x', t.encode('hex')]) return toks # ^ parseActions for pyparsing end here. str_literal = QuotedString('"') str_literal.setParseAction(str_literal_to_hex) transformer = ZeroOrMore(str_literal) return transformer.transformString(text)
def ParseExpression(cls, source): # atoms boolean = Keyword('#f') | Keyword('#t') boolean.setParseAction(lambda s, l, t: SchemeExpression.make_boolean(t[ 0] == '#t').expression) symbol = Word(alphanums + '-_') symbol.setParseAction( lambda s, l, t: SchemeExpression.make_symbol(t[0]).expression) integer = Word(nums) integer.setParseAction( lambda s, l, t: SchemeExpression.make_integer(t[0]).expression) string = QuotedString('"', multiline=True) string.setParseAction( lambda s, l, t: SchemeExpression.make_string(t[0]).expression) element = integer | boolean | symbol | string # lists lexpr = Forward() vexpr = Forward() lparen = Literal('(').suppress() rparen = Literal(')').suppress() hashsym = Literal('#').suppress() # vectors lexpr << Group(lparen + ZeroOrMore(element ^ lexpr ^ vexpr) + rparen) lexpr.setParseAction(lambda s, l, t: SchemeExpression.make_list(t[0])) vexpr << Group(hashsym + lparen + ZeroOrMore(element ^ lexpr ^ vexpr) + rparen) vexpr.setParseAction( lambda s, l, t: SchemeExpression.make_vector(t[0])) # final... sexpr = element | vexpr | lexpr sexpr.keepTabs = True # this seems to be necessary to fix a problem with pyparsing result = sexpr.parseString(source)[0] return cls(SchemeExpression._flatten(result))
def _build_grammar(self): expr = Forward() float_lit = Combine(Word(nums) + '.' + Word(nums)) float_lit.setName('float') float_lit.setParseAction(lambda x: \ self.to_literal(float(x[0]))) int_lit = Word(nums) int_lit.setName('int') int_lit.setParseAction(lambda x: \ self.to_literal(int(x[0]))) num = (float_lit | int_lit) num.setParseAction(lambda x: x[0]) tag_name = Word(alphas + "_", alphanums + "_") tag_name.setName('tag_name') tag_name.setParseAction(lambda t: tag_reference.TagReference(t[0])) quoted_string = QuotedString("'") quoted_string.setParseAction(lambda s: self.to_literal(s[0])) oper = oneOf('+ * / -') oper.setParseAction(lambda o: o[0]) lpar = Literal("(").suppress() rpar = Literal(")").suppress() arith = Group(lpar + expr + oper + expr + rpar) arith.setParseAction(lambda t: \ self.to_arith(t[0][0], t[0][1], t[0][2])) assign = tag_name + '=' + expr assign.setName('assign') assign.setParseAction(lambda x: self.to_assign(x[0],x[2])) print_tags = Literal('?') print_tags.setParseAction(lambda x: self.to_print_tags()) expr <<(arith|assign|tag_name|num|quoted_string|print_tags) expr.setParseAction(lambda x: x[0]) return expr
def build_jimple_parser(self): # Literals op_add = Literal("+") op_sub = Literal("-") op_mul = Literal("*") op_div = Literal("/") op_xor = Literal("^") op_lt = Literal("<") op_gt = Literal(">") op_eq = Literal("==") op_neq = Literal("!=") op_lte = Literal("<=") op_gte = Literal(">=") op_sls = Literal("<<") op_srs = Literal(">>") op_urs = Literal(">>>") op_mod = Literal("%") op_rem = Literal("rem") op_bwa = Literal("&") op_bwo = Literal("|") op_cmp = Literal("cmp") op_cmpg = Literal("cmpg") op_cmpl = Literal("cmpl") lit_lcb = Literal("{").suppress() lit_rcb = Literal("}").suppress() lit_lp = Literal("(").suppress() lit_rp = Literal(")").suppress() lit_dot = Literal(".").suppress() lit_asgn = Literal("=").suppress() lit_ident = Literal(":=").suppress() lit_strm = Literal(";").suppress() lit_cln = Literal(":").suppress() lit_lsb = Literal("[").suppress() lit_rsb = Literal("]").suppress() binop = op_add ^ op_sub ^ op_mul ^ op_div ^ op_xor \ ^ op_bwa ^ op_mod ^ op_rem ^ op_urs ^ op_lte \ ^ op_gte ^ op_sls ^ op_srs ^ op_lt ^ op_gt \ ^ op_eq ^ op_neq ^ op_bwo ^ op_cmp ^ op_cmpg \ ^ op_cmpl \ cond_op = op_gte ^ op_lte ^ op_lt ^ op_gt ^ op_eq ^ op_neq # Keywords kw_specialinvoke = Keyword("specialinvoke") kw_interfaceinvoke = Keyword("interfaceinvoke") kw_virtualinvoke = Keyword("virtualinvoke") kw_staticinvoke = Keyword("staticinvoke") kw_instanceof = Keyword("instanceof") kw_new = Keyword("new") kw_newarray = Keyword("newarray") kw_newmultiarray = Keyword("newmultiarray") kw_length = Keyword("lengthof") kw_neg = Keyword("neg") kw_goto = Keyword("goto") kw_if = Keyword("if") kw_this = Keyword("@this") kw_caughtexception = Keyword("@caughtexception") kw_lookupswitch = Keyword("lookupswitch") kw_case = Keyword("case") kw_default = Keyword("default") kw_return = Keyword("return") kw_entermonitor = Keyword("entermonitor") kw_exitmonitor = Keyword("exitmonitor") kw_throw = Keyword("throw") kw_throws = Keyword("throws") kw_catch = Keyword("catch") kw_transient = Keyword("transient") kw_from = Keyword("from") kw_to = Keyword("to") kw_with = Keyword("with") kw_breakpoint = Keyword("breakpoint") kw_nop = Keyword("nop") kw_public = Keyword("public") kw_protected = Keyword("protected") kw_private = Keyword("private") kw_volatile = Keyword("volatile") kw_static = Keyword("static") kw_annotation = Keyword("annotation") kw_final = Keyword("final") kw_class = Keyword("class") kw_enum = Keyword("enum") kw_interface = Keyword("interface") kw_abstract = Keyword("abstract") kw_extends = Keyword("extends") kw_implements = Keyword("implements") kw_null = Keyword("null") modifier = \ kw_public | kw_protected | kw_private \ | kw_static | kw_abstract | kw_final \ | kw_volatile | kw_enum | kw_transient \ | kw_annotation #Identifiers id_local = Combine(Optional(Literal("$")) + Word(alphas) + Word(nums)) id_java = Word(alphas + "'$_", alphanums + "'$_") id_class_comp = Word(alphas + "_", alphanums + "$_") id_type = Combine(id_class_comp + ZeroOrMore(Combine(Literal(".") + (id_class_comp))) + Optional(Word("[]"))) id_method_name = id_java | Word("<clinit>") | Word("<init>") id_label = Combine(Literal("label") + Word(nums)) id_parameter = Combine(Literal("@parameter") + Word(nums)) # Field field_specifier = \ Suppress(Literal("<")) \ + id_type + lit_cln + id_type + id_java \ + Suppress(Literal(">")) field_specifier.setParseAction(self.field_specifier_parse_action) # Method method_param_list = delimitedList(id_type, delim=",") id_method = \ Suppress(Literal("<")) \ + id_type + lit_cln + id_type + id_method_name \ + lit_lp + Group(Optional(method_param_list)) + lit_rp \ + Suppress(Literal(">")) number_suffix = Optional(Literal("F") | Literal("L")) # Numeric constant expr_number = \ Combine( Word("+-" + nums, nums) + Optional(Literal(".") + Optional(Word(nums))) + Optional(Literal("E") + Optional(Word("+-")) + Word(nums)) + number_suffix) \ | Combine(Literal("#Infinity") + number_suffix) \ | Combine(Literal("#-Infinity") + number_suffix) \ | Combine(Literal("#NaN") + number_suffix) expr_number.setParseAction(self.expr_numeric_const_parse_action) expr_str = QuotedString(quoteChar='"', escChar="\\") expr_str.setParseAction(self.expr_str_const_parse_action) # Null constant expr_null = kw_null expr_null.setParseAction(self.expr_null_parse_action) # Group all constants expr_constant = \ expr_str \ ^ expr_number \ ^ expr_null # A 'class' expression (class + classname) expr_class = kw_class + QuotedString(quoteChar='"') expr_class.setParseAction(self.expr_class_parse_action) # A local variable expression expr_local = id_local expr_local.setParseAction(self.expr_local_parse_action) # Group together all "immediate" values expr_imm = expr_local ^ expr_constant ^ expr_class expr_imm.setParseAction(self.expr_imm_parse_action) # Conditional expression expr_cond = expr_imm + cond_op + expr_imm expr_cond.setParseAction(self.expr_cond_parse_action) # Array index array_idx = lit_lsb + expr_imm + lit_rsb empty_array_idx = lit_lsb + lit_rsb expr_binop = expr_imm + binop + expr_imm expr_binop.setParseAction(self.expr_binop_parse_action) expr_cast = lit_lp + id_type + lit_rp + expr_imm expr_cast.setParseAction(self.expr_cast_parse_action) expr_instanceof = expr_imm + kw_instanceof + id_type expr_instanceof.setParseAction(self.expr_instanceof_parse_action) expr_new = Suppress(kw_new) + id_type expr_new.setParseAction(self.expr_new_parse_action) expr_newarray = kw_newarray + lit_lp + id_type + lit_rp + array_idx expr_newarray.setParseAction(self.expr_newarray_parse_action) expr_newmultiarray = kw_newmultiarray + lit_lp + id_type + lit_rp + OneOrMore(array_idx | empty_array_idx) expr_newmultiarray.setParseAction(self.expr_newmultiarray_parse_action) expr_lengthof = kw_length + expr_imm expr_lengthof.setParseAction(self.expr_lengthof_parse_action) expr_neg = kw_neg + expr_imm expr_neg.setParseAction(self.expr_neg_parse_action) # Invoke Expressions method_arg_list = delimitedList(expr_imm, delim=",") expr_invoke = \ kw_specialinvoke \ + id_local + lit_dot + id_method \ + lit_lp + Group(Optional(method_arg_list)) + lit_rp \ | kw_interfaceinvoke \ + id_local + lit_dot + id_method \ + lit_lp + Group(Optional(method_arg_list)) + lit_rp \ | kw_virtualinvoke \ + id_local + lit_dot + id_method \ + lit_lp + Group(Optional(method_arg_list)) + lit_rp \ | kw_staticinvoke + id_method \ + lit_lp + Group(Optional(method_arg_list)) + lit_rp expr_invoke.setParseAction(self.expr_invoke_parse_action) expr = \ expr_binop \ ^ expr_cast \ ^ expr_instanceof \ ^ expr_invoke \ ^ expr_new \ ^ expr_newarray \ ^ expr_newmultiarray \ ^ expr_lengthof \ ^ expr_neg expr.setParseAction(self.expr_parse_action) # Concrete Reference Expression expr_field_ref = Group(Optional(id_local + lit_dot)) + field_specifier expr_field_ref.setParseAction(self.expr_field_ref_parse_action) expr_array_ref = id_local + array_idx expr_array_ref.setParseAction(self.expr_array_ref_parse_action) # L and R values expr_lvalue = \ id_local \ ^ expr_field_ref \ ^ expr_array_ref expr_lvalue.setParseAction(self.expr_lvalue_parse_action) expr_rvalue = \ expr \ ^ expr_field_ref \ ^ expr_array_ref \ ^ expr_imm # Declaration stmt_decl = \ id_type \ + Group(delimitedList(id_local, delim=",")) \ + lit_strm stmt_decl.setParseAction(self.stmt_decl_parse_action) # Statements stmt_assign = \ expr_lvalue + lit_asgn + expr_rvalue + lit_strm ''' id_local + lit_asgn + expr_rvalue + lit_strm \ ^ field_specifier + lit_asgn + expr_imm + lit_strm \ ^ id_local + lit_dot + field_specifier + lit_asgn + expr_imm + lit_strm \ ^ id_local + lit_lsb + expr_imm + lit_rsb + lit_asgn + expr_imm + lit_strm ''' stmt_assign.setParseAction(self.stmt_assign_parse_action) stmt_identity = \ id_local + lit_ident + kw_this + lit_cln + id_type + lit_strm \ ^ id_local + lit_ident + id_parameter + lit_cln + id_type + lit_strm \ ^ id_local + lit_ident + kw_caughtexception + lit_strm stmt_identity.setParseAction(self.stmt_identity_parse_action) stmt_goto = kw_goto + id_label + lit_strm stmt_goto.setParseAction(self.stmt_goto_parse_action) stmt_if = Suppress(kw_if) + expr_cond + Suppress(kw_goto) + id_label + lit_strm stmt_if.setParseAction(self.stmt_if_parse_action) stmt_invoke = expr_invoke + lit_strm stmt_invoke.setParseAction(self.stmt_invoke_parse_action) switch_case = kw_case + expr_number + lit_cln + kw_goto + id_label + lit_strm switch_default = kw_default + lit_cln + kw_goto + id_label + lit_strm switch_body = ZeroOrMore(switch_case) + Optional(switch_default) stmt_switch = kw_lookupswitch + lit_lp + expr_imm + lit_rp + lit_lcb + switch_body + lit_rcb + lit_strm stmt_switch.setParseAction(self.stmt_switch_parse_action) stmt_enter_monitor = kw_entermonitor + expr_imm + lit_strm stmt_enter_monitor.setParseAction(self.stmt_enter_monitor_parse_action) stmt_exit_monitor = kw_exitmonitor + expr_imm + lit_strm stmt_exit_monitor.setParseAction(self.stmt_exit_monitor_parse_action) stmt_return = Suppress(kw_return) + expr_imm + lit_strm | Suppress(kw_return) + lit_strm stmt_return.setParseAction(self.stmt_return_parse_action) stmt_throw = kw_throw + expr_imm + lit_strm stmt_throw.setParseAction(self.stmt_throw_parse_action) stmt_catch = kw_catch + id_type \ + kw_from + id_label \ + kw_to + id_label \ + kw_with + id_label + lit_strm stmt_catch.setParseAction(self.stmt_catch_parse_action) stmt_breakpoint = kw_breakpoint + lit_strm stmt_breakpoint.setParseAction(self.stmt_breakpoint_parse_action) stmt_nop = kw_nop + lit_strm stmt_nop.setParseAction(self.stmt_nop_parse_action) jimple_stmt = \ stmt_decl \ ^ stmt_assign \ ^ stmt_identity \ ^ stmt_goto \ ^ stmt_if \ ^ stmt_invoke \ ^ stmt_switch \ ^ stmt_enter_monitor \ ^ stmt_exit_monitor \ ^ stmt_return \ ^ stmt_throw \ ^ stmt_catch \ ^ stmt_breakpoint \ ^ stmt_nop jimple_stmt.setParseAction(self.stmt_parse_action) throws_clause = kw_throws + delimitedList(id_type, delim=",") method_sig = \ Group(ZeroOrMore(modifier)) \ + id_type + id_method_name \ + lit_lp + Group(Optional(method_param_list)) + lit_rp \ + Group(Optional(throws_clause)) method_decl = method_sig + lit_strm field_decl = ZeroOrMore(modifier) + id_type + id_java + lit_strm field_decl.setParseAction(self.field_decl_parse_action) class_decl = \ Group(ZeroOrMore(modifier)) + Suppress(kw_class) + id_type \ + Optional(kw_extends + delimitedList(id_type, delim=",")) \ + Optional(kw_implements + delimitedList(id_type, delim=",")) interface_decl = \ Group(ZeroOrMore(modifier)) + Suppress(kw_interface) + id_type \ + Optional(kw_extends + delimitedList(id_type, delim=",")) \ + Optional(kw_implements + delimitedList(id_type, delim=",")) jimple_method_item = \ jimple_stmt \ | Combine(id_label + lit_cln).setParseAction(self.label_parse_action) jimple_method_body = ZeroOrMore(jimple_method_item) jimple_method = \ Group(method_sig) + lit_lcb \ + Group(jimple_method_body) \ + lit_rcb jimple_method.setParseAction(self.method_defn_parse_action) jimple_class_item = field_decl | method_decl | jimple_method jimple_class_body = ZeroOrMore(jimple_class_item) jimple_class = Group(class_decl | interface_decl) + lit_lcb + Group(jimple_class_body) + lit_rcb jimple_class.setParseAction(self.class_defn_parse_action) return jimple_class
class Int(int): pass ################ Parsing: parse, tokenize, and read_from_tokens lisp_integer = Word(nums) lisp_integer.setParseAction(lambda s, l, t: Int(t[0])) lisp_float = Combine(Word(nums) + '.' + Word(nums)) lisp_float.setParseAction(lambda s, l, t: float(t[0])) lisp_number = lisp_integer | lisp_float lisp_string = QuotedString(quoteChar='"', escChar='\\', multiline=True) lisp_string.setParseAction(lambda s, l, t: String(t[0])) special = "_-+*/^><=:'" #lisp_symbol = Word(alphas + nums + '_-' + '?!') # any order #lisp_symbol = Combine(Char(alphas) + Word(alphas + nums + '?' + '!')) # starts with alphas lisp_symbol = Combine( Char(alphas + special) + Optional(Word(alphas + nums + special)) + Optional(Char('?!'))) # Ruby style lisp_symbol.setParseAction(lambda s, l, t: Symbol(t[0])) lisp_atom = lisp_symbol | lisp_string | lisp_number lisp_list = nestedExpr(opener='(', closer=')', content=lisp_atom,
def transform_human(text, variables=None): """Transform user input with given context. Args: text (str): User input. variables (dict): Variables for purposes of substitution. Returns: A 2-tuple of: (A human-readable script that Script can parse, A list of contextual information for tooltips, etc.) """ if variables is None: variables = {} # No mutable default value. # these are parseActions for pyparsing. def str_literal_to_hex(s, loc, toks): for i, t in enumerate(toks): toks[i] = ''.join(['0x', t.encode('hex')]) return toks def var_name_to_value(s, loc, toks): for i, t in enumerate(toks): val = variables.get(t[1:]) if val: toks[i] = val return toks def implicit_opcode_to_explicit(s, loc, toks): """Add "OP_" prefix to an opcode.""" for i, t in enumerate(toks): toks[i] = '_'.join(['OP', t]) return toks def hex_to_formatted_hex(s, loc, toks): """Add "0x" prefix and ensure even length.""" for i, t in enumerate(toks): new_tok = t # Add '0x' prefix if not t.startswith('0x'): if t.startswith('x'): new_tok = ''.join(['0', t]) else: new_tok = ''.join(['0x', t]) # Even-length string if len(new_tok) % 2 != 0: new_tok = ''.join([new_tok[0:2], '0', new_tok[2:]]) toks[i] = new_tok return toks # ^ parseActions for pyparsing end here. str_literal = QuotedString('"') str_literal.setParseAction(str_literal_to_hex) var_name = Combine(Word('$') + Word(pyparsing.alphas)) var_name.setParseAction(var_name_to_value) # Here we populate the list of contextual tips. # Explicit opcode names op_names = [str(i) for i in OPCODE_NAMES.keys()] op_names_explicit = ' '.join(op_names) def is_small_int(op): """True if op is one of OP_1, OP_2, ...OP_16""" try: i = int(op[3:]) return True except ValueError: return False op_names_implicit = ' '.join( [i[3:] for i in op_names if not is_small_int(i)]) # Hex, implicit (e.g. 'a') and explicit (e.g. '0x0a') explicit_hex = Combine( Word('0x') + Word(pyparsing.hexnums) + pyparsing.WordEnd()) implicit_hex = Combine(pyparsing.WordStart() + OneOrMore(Word(pyparsing.hexnums)) + pyparsing.WordEnd()) explicit_hex.setParseAction(hex_to_formatted_hex) implicit_hex.setParseAction(hex_to_formatted_hex) # Opcodes, implicit (e.g. 'ADD') and explicit (e.g. 'OP_ADD') explicit_op = pyparsing.oneOf(op_names_explicit) implicit_op = Combine(pyparsing.WordStart() + pyparsing.oneOf(op_names_implicit)) implicit_op.setParseAction(implicit_opcode_to_explicit) contexts = pyparsing.Optional( var_name('Variable') | str_literal('String literal') | explicit_op('Opcode') | implicit_op('Opcode') | explicit_hex('Hex') | implicit_hex('Hex')) matches = [(i[0].asDict(), i[1], i[2]) for i in contexts.scanString(text)] context_tips = [] for i in matches: d = i[0] if len(d.items()) == 0: continue match_type, value = d.items()[0] start = i[1] end = i[2] context_tips.append((start, end, value, match_type)) # Now we do the actual transformation. s = text s = var_name.transformString(s) s = str_literal.transformString(s) s = implicit_op.transformString(s) s = implicit_hex.transformString(s) s = explicit_hex.transformString(s) return s, context_tips
def transform_human(text, variables=None): """Transform user input with given context. Args: text (str): User input. variables (dict): Variables for purposes of substitution. Returns: A 2-tuple of: (A human-readable script that Script can parse, A list of contextual information for tooltips, etc.) """ if variables is None: variables = {} # No mutable default value. # these are parseActions for pyparsing. def str_literal_to_hex(s, loc, toks): for i, t in enumerate(toks): toks[i] = ''.join(['0x', t.encode('hex')]) return toks def var_name_to_value(s, loc, toks): for i, t in enumerate(toks): val = variables.get(t[1:]) if val: toks[i] = val return toks def implicit_opcode_to_explicit(s, loc, toks): """Add "OP_" prefix to an opcode.""" for i, t in enumerate(toks): toks[i] = '_'.join(['OP', t]) return toks def hex_to_formatted_hex(s, loc, toks): """Add "0x" prefix and ensure even length.""" for i, t in enumerate(toks): new_tok = t # Add '0x' prefix if not t.startswith('0x'): if t.startswith('x'): new_tok = ''.join(['0', t]) else: new_tok = ''.join(['0x', t]) # Even-length string if len(new_tok) % 2 != 0: new_tok = ''.join([new_tok[0:2], '0', new_tok[2:]]) toks[i] = new_tok return toks # ^ parseActions for pyparsing end here. str_literal = QuotedString('"') str_literal.setParseAction(str_literal_to_hex) var_name = Combine(Word('$') + Word(pyparsing.alphas)) var_name.setParseAction(var_name_to_value) # Here we populate the list of contextual tips. # Explicit opcode names op_names = [str(i) for i in OPCODE_NAMES.keys()] op_names_explicit = ' '.join(op_names) def is_small_int(op): """True if op is one of OP_1, OP_2, ...OP_16""" try: i = int(op[3:]) return True except ValueError: return False op_names_implicit = ' '.join([i[3:] for i in op_names if not is_small_int(i)]) # Hex, implicit (e.g. 'a') and explicit (e.g. '0x0a') explicit_hex = Combine(Word('0x') + Word(pyparsing.hexnums) + pyparsing.WordEnd()) implicit_hex = Combine(pyparsing.WordStart() + OneOrMore(Word(pyparsing.hexnums)) + pyparsing.WordEnd()) explicit_hex.setParseAction(hex_to_formatted_hex) implicit_hex.setParseAction(hex_to_formatted_hex) # Opcodes, implicit (e.g. 'ADD') and explicit (e.g. 'OP_ADD') explicit_op = pyparsing.oneOf(op_names_explicit) implicit_op = Combine(pyparsing.WordStart() + pyparsing.oneOf(op_names_implicit)) implicit_op.setParseAction(implicit_opcode_to_explicit) contexts = pyparsing.Optional(var_name('Variable') | str_literal('String literal') | explicit_op('Opcode') | implicit_op('Opcode') | explicit_hex('Hex') | implicit_hex('Hex')) matches = [(i[0].asDict(), i[1], i[2]) for i in contexts.scanString(text)] context_tips = [] for i in matches: d = i[0] if len(d.items()) == 0: continue match_type, value = d.items()[0] start = i[1] end = i[2] context_tips.append( (start, end, value, match_type) ) # Now we do the actual transformation. s = text s = var_name.transformString(s) s = str_literal.transformString(s) s = implicit_op.transformString(s) s = implicit_hex.transformString(s) s = explicit_hex.transformString(s) return s, context_tips
class SearchRestrictionParser(object): """ Defines the grammar for a simple search restriction expressions. The parsers of the different terms of these restriction expressions are provided by this class. """ def __init__(self): """ Constructor. """ self.__literalExpression = None self.__keywordExpression = None self.__propertyNameExpression = None self.__comparisonExpression = None self.__conditionExpression = None self.__conjunctionExpression = None self.__restrictionExpression = None self.__dateExpression = None self.__numberExpression = None self.__conjunctionTokens = None self.__comparisonTokens = None self.__andKeyword = None self.__orKeyword = None self.__notKeyword = None self.__quotedStringCharacters = ["\"", "'"] self.__initSearchRestrictionParser() def __initSearchRestrictionParser(self): """ Initializes and returns a parser for the search restrictions. """ unicodeUmlaut = unicodedata.lookup("LATIN CAPITAL LETTER A WITH DIAERESIS") + \ unicodedata.lookup("LATIN SMALL LETTER A WITH DIAERESIS") + \ unicodedata.lookup("LATIN CAPITAL LETTER O WITH DIAERESIS") + \ unicodedata.lookup("LATIN SMALL LETTER O WITH DIAERESIS") + \ unicodedata.lookup("LATIN CAPITAL LETTER U WITH DIAERESIS") + \ unicodedata.lookup("LATIN SMALL LETTER U WITH DIAERESIS") + \ unicodedata.lookup("LATIN SMALL LETTER SHARP S") # define property name firstPropertyNameCharacter = alphas + unicodeUmlaut + "_" propertyCharacter = firstPropertyNameCharacter + nums + ".-" self.__propertyNameExpression = Word(firstPropertyNameCharacter, propertyCharacter) # define literal day = Regex("(0[1-9]|[12][0-9]|3[01])") month = Regex("(0[1-9]|1[012])") year = Regex("((?:19|20)\d\d)") hour = Regex("([01][0-9]|2[0-3])") minute = Regex("([0-5][0-9])") second = minute self.__dateExpression = Combine(day + "." + month + "." + year + White() + hour + ":" + minute + ":" + second) self.__numberExpression = Regex( "[-+]?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?") self.__literalExpression = QuotedString( self.__quotedStringCharacters[0]) for quotedStringCharacter in self.__quotedStringCharacters[1:]: self.__literalExpression |= QuotedString(quotedStringCharacter) self.__literalExpression.setParseAction(self.__handleLiteral) # define keywords notKeyword = Keyword(NOT_OPERATOR, caseless=True) andKeyword = Keyword(AND_OPERATOR, caseless=True) orKeyword = Keyword(OR_OPERATOR, caseless=True) gteKeyword = Keyword(GTE_OPERATOR) lteKeyword = Keyword(LTE_OPERATOR) equalKeyword = Keyword(EQUAL_OPERATOR) gtKeyword = Keyword(GT_OPERATOR) ltKeyword = Keyword(LT_OPERATOR) likeKeyword = Keyword(LIKE_OPERATOR, caseless=True) comparisonKeyword = gteKeyword | lteKeyword | equalKeyword | gtKeyword | ltKeyword | likeKeyword existsKeyword = Keyword(EXISTS_OPERATOR, caseless=True) contentContainsKeyword = Keyword(CONTENT_CONTAINS_OPERATOR, caseless=True) isCollectionKeyword = Keyword(IS_COLLECTION_OPERATOR, caseless=True) self.__keywordExpression = notKeyword | andKeyword | orKeyword | comparisonKeyword | existsKeyword | \ contentContainsKeyword | isCollectionKeyword | "(" | ")" # definition of condition terms comparisonCondition = Group(self.__propertyNameExpression + comparisonKeyword + self.__literalExpression) existsCondition = Group(existsKeyword + self.__propertyNameExpression) contentContainsCondition = Group(contentContainsKeyword + self.__literalExpression) isCollectionCondition = isCollectionKeyword self.__conditionExpression = comparisonCondition | existsCondition | contentContainsCondition | isCollectionCondition self.__conditionExpression.setParseAction(self.__handleConditionTerm) # definition of restriction expressions (operators to combine the condition terms) self.__restrictionExpression = operatorPrecedence( self.__conditionExpression, [(notKeyword, 1, opAssoc.RIGHT), (andKeyword, 2, opAssoc.LEFT), (orKeyword, 2, opAssoc.LEFT)]) + StringEnd() # definition of comparison expression self.__comparisonExpression = comparisonKeyword self.__andKeyword = andKeyword self.__orKeyword = orKeyword self.__notKeyword = notKeyword # definition of conjunction expression self.__conjunctionExpression = andKeyword | orKeyword def registerPropertyParseAction(self, parseAction): """ Appends a parsing action when matching a property expression. """ self.__propertyNameExpression.setParseAction(parseAction) def registerLiteralParseAction(self, parseAction): """ Appends a parsing action when matching a literal. """ self.__literalExpression.setParseAction(parseAction) def registerConjunctionParseAction(self, parseAction): """ Appends a parsing action when matching a conjunction keyword. """ self.__andKeyword.setParseAction(parseAction) self.__orKeyword.setParseAction(parseAction) self.__notKeyword.setParseAction(parseAction) def registerComparisonParseAction(self, parseAction): """ Appends a parsing action when matching a comparison keyword. """ self.__comparisonExpression.setParseAction(parseAction) def __handleLiteral(self, _, __, tokenList): """" Evaluates the content of the quoted string. """ unquotedString = tokenList[0] result = list() for item in self.__dateExpression.scanString(unquotedString): result.append(item) if len(result) == 1: return time.strptime(str(result[0][0][0]), "%d.%m.%Y %H:%M:%S") else: for item in self.__numberExpression.scanString(unquotedString): result.append(item) if len(result) == 1: return eval(str(result[0][0][0])) def parseString(self, inputString): """ Parses the string and returns the result. @param inputString: String to parse. @type inputString: C{unicode} @raise ParseException: Signals an error parsing the given string. """ return self.__restrictionExpression.parseString(inputString) @staticmethod def __handleConditionTerm(_, __, tokens): """ Extracts operator, literal, property name from the parsed string and calls the given parse action function. """ operator = propertyName = literal = None tokenList = list(list(tokens)[0]) if len(tokenList) == 3: operator = tokenList[1] propertyName = tokenList[0] literal = tokenList[2] elif len(tokenList) == 2: operator = tokenList[0] if operator == EXISTS_OPERATOR: propertyName = tokenList[1] else: literal = tokenList[1] else: operator = tokens[0] return (propertyName, operator, literal) def matchKeyword(self, inputString): """ Returns all matches of keywords. Keywords in literals are ignored. @param inputString: String to parse. @type inputString: C{unicode} @return: List of matched expression tuples that consist of matched expression, start index, end index. @rtype: C{list} of C{tuple} of C{unicode}, C{int}, C{int} """ return self._matchWrapper(inputString, self.__keywordExpression) def matchPropertyName(self, inputString): """ Returns all matches of property names. Keywords and property names in literals are ignored. @param inputString: String to parse. @type inputString: C{unicode} @return: List of matched expression tuples that consist of matched expression, start index, end index. @rtype: C{list} of C{tuple} of C{unicode}, C{int}, C{int} """ return self._matchWrapper(inputString, self.__propertyNameExpression) def matchLiteral(self, inputString): """ Returns all matches of literals. @param inputString: String to parse. @type inputString: C{unicode} @return: List of matched expression tuples that consist of matched expression, start index, end index. @rtype: C{list} of C{tuple} of (C{unicode} or C{time.struct_time} or C{int} or C{float}, C{int}, C{int}) """ return self._matchWrapper(inputString, self.__literalExpression) def matchComparison(self, inputString): """ Returns all matches of comparison operators. @param inputString: String to parse. @type inputString: C{unicode} @return: List of matched expression tuples that consist of matched expression, start index, end index. @rtype: C{list} of C{tuple} of (C{unicode} or C{time.struct_time} or C{int} or C{float}, C{int}, C{int}) """ return self._matchWrapper(inputString, self.__comparisonExpression) def matchConjunction(self, inputString): """ Returns all matches of conjunction operators. @param inputString: String to parse. @type inputString: C{unicode} @return: List of matched expression tuples that consist of matched expression, start index, end index. @rtype: C{list} of C{tuple} of (C{unicode} or C{time.struct_time} or C{int} or C{float}, C{int}, C{int}) """ return self._matchWrapper(inputString, self.__conjunctionExpression) def matchConditionTerm(self, inputString): """ Returns all matches of condition terms. Condition terms in literals are ignored. @param inputString: String to parse. @type inputString: C{unicode} @return: List of matched expression tuples that consist of matched expression, start index, end index. @rtype: C{list} of C{tuple} of C{unicode}, C{int}, C{int} """ return self._matchWrapper(inputString, self.__conditionExpression) @property def comparisonTokens(self): """ Returns a list of strings representing the comparison operators. """ if self.__comparisonTokens is None: self.__comparisonTokens = self._walkKeywordTree( self.__comparisonExpression) return self.__comparisonTokens @property def conjunctionTokens(self): """ Returns a list of strings representing the conjunction keywords. """ if self.__conjunctionTokens is None: self.__conjunctionTokens = self._walkKeywordTree( self.__conjunctionExpression) return self.__conjunctionTokens @property def quotedStringCharacters(self): """ Returns a list of strings representing the quoted string characters. """ return self.__quotedStringCharacters def _walkKeywordTree(self, rootNode): """ Walks through a MatchFirst object and returns possible matches as a string list """ nextRoot = None try: nextRoot = rootNode.exprs[0] except AttributeError: return [rootNode.match] else: result = self._walkKeywordTree(nextRoot) result.append(rootNode.exprs[1].match) return result @staticmethod def _matchWrapper(inputString, expression): """ Calls scanString with given input, parse expression and returns the result. """ result = list() for expression, startIndex, endIndex in expression.scanString( inputString): expressionString = expression[0] result.append((expressionString, startIndex, endIndex)) return result
"platform.version": "platform_version", "platform.machine": "platform_machine", "platform.python_implementation": "platform_python_implementation", "python_implementation": "platform_python_implementation", } VARIABLE.setParseAction(lambda s, l, t: Variable(ALIASES.get(t[0], t[0]))) VERSION_CMP = ( L("===") | L("==") | L(">=") | L("<=") | L("!=") | L("~=") | L(">") | L("<") ) MARKER_OP = VERSION_CMP | L("not in") | L("in") MARKER_OP.setParseAction(lambda s, l, t: Op(t[0])) MARKER_VALUE = QuotedString("'") | QuotedString('"') MARKER_VALUE.setParseAction(lambda s, l, t: Value(t[0])) BOOLOP = L("and") | L("or") MARKER_VAR = VARIABLE | MARKER_VALUE MARKER_ITEM = Group(MARKER_VAR + MARKER_OP + MARKER_VAR) MARKER_ITEM.setParseAction(lambda s, l, t: tuple(t[0])) LPAREN = L("(").suppress() RPAREN = L(")").suppress() MARKER_EXPR = Forward() MARKER_ATOM = MARKER_ITEM | Group(LPAREN + MARKER_EXPR + RPAREN) MARKER_EXPR << MARKER_ATOM + ZeroOrMore(BOOLOP + MARKER_EXPR)
def RawOutputSpecParser(): '''Syntax of the OUTPUT statement (and nothing else).''' with PyParsingDefaultWhitespaceChars(DEFAULT_WHITESPACE_CHARS): OUTPUT = CaselessKeyword('OUTPUT').suppress() QUERY = CaselessKeyword('query').suppress() INDEX = CaselessKeyword('index').suppress() KEY = CaselessKeyword('key').suppress() CONTENT = CaselessKeyword('content').suppress() SET = CaselessKeyword('set').suppress() SEQUENCE = CaselessKeyword('sequence').suppress() DICTIONARY = CaselessKeyword('dictionary').suppress() NOT = CaselessKeyword('not').suppress() constant = integer | QuotedString('"', escChar='\\') constant.setParseAction( lambda t: o.Constant(t[0]) ) # not strictly necessary to wrap this, but it simplifies working with the syntax tree asp_variable_name = Word(alphas_uppercase, alphanums + '_') asp_variable_anonymous = Keyword('_') asp_variable = asp_variable_anonymous | asp_variable_name asp_variable_expr = asp_variable_name.copy() # asp_variable_name.setParseAction(lambda t: asp.Variable(t[0])) asp_variable_anonymous.setParseAction( lambda t: asp.AnonymousVariable()) asp_variable_expr.setParseAction(lambda t: o.Variable(t[0])) # TODO: # Instead of explicitly marking references with '&', we might just define a convention as follows: # * Output names start with lowercase characters # * ASP variables start with uppercase characters (as they do in actual ASP code) reference = amp + py_identifier reference.setParseAction(lambda t: o.Reference(t[0]) ) # to distinguish from literal string values # Note: must be able to distinguish between unquoted and quoted constants asp_constant_symbol = Word(alphas_lowercase, alphanums + '_') asp_quoted_string = QuotedString('"', escChar='\\') asp_quoted_string.setParseAction(lambda t: asp.QuotedConstant(t[0])) term = (asp_constant_symbol | asp_quoted_string | asp_variable | positive_integer).setResultsName('terms', listAllMatches=True) terms = Optional(term + ZeroOrMore(comma + term)) classical_atom = predicate_name('predicate') + Optional(lpar + terms + rpar) # Builtin atoms builtin_op_binary = (Literal('=') | '==' | '!=' | '<>' | '<' | '<=' | '>' | '>=' | '#succ').setResultsName('predicate') builtin_atom_binary = term + builtin_op_binary + term builtin_atom_binary_prefix = builtin_op_binary + lpar + term + comma + term + rpar builtin_atom = builtin_atom_binary | builtin_atom_binary_prefix # body_atom = classical_atom | builtin_atom pos_body_atom = body_atom.copy() neg_body_atom = NOT + body_atom pos_body_atom.setParseAction( lambda t: asp.Literal(t.predicate, tuple(t.terms), False)) neg_body_atom.setParseAction( lambda t: asp.Literal(t.predicate, tuple(t.terms), True)) body_literal = neg_body_atom | pos_body_atom # asp_query = Group(body_literal + ZeroOrMore(comma + body_literal)) asp_query.setParseAction(lambda t: asp.Query(tuple(t[0]))) expr = Forward() # TODO: Instead of semicolon, we could use (semicolon | FollowedBy(rbrace)) to make the last semicolon optional (but how would that work with asp_query...) query_clause = QUERY + colon + asp_query('query') + semicolon content_clause = CONTENT + colon + expr('content') + semicolon index_clause = INDEX + colon + asp_variable_expr('index') + semicolon key_clause = KEY + colon + expr('key') + semicolon # simple_set_spec = SET + lbrace + predicate_name( 'predicate') + slash + positive_integer('arity') + Optional( rightarrow + py_qualified_identifier('constructor')) + rbrace set_spec = SET + lbrace + (query_clause & content_clause) + rbrace # TODO: add clause like "at_missing_index: skip;", "at_missing_index: 0;", "at_missing_index: None;" sequence_spec = SEQUENCE + lbrace + (query_clause & content_clause & index_clause) + rbrace dictionary_spec = DICTIONARY + lbrace + (query_clause & content_clause & key_clause) + rbrace expr_collection = set_spec | simple_set_spec | sequence_spec | dictionary_spec # simple_set_spec.setParseAction(lambda t: o.ExprSimpleSet( t.predicate, t.arity, t.get('constructor'))) set_spec.setParseAction(lambda t: o.ExprSet(t.query, t.content)) sequence_spec.setParseAction( lambda t: o.ExprSequence(t.query, t.content, t.index)) dictionary_spec.setParseAction( lambda t: o.ExprDictionary(t.query, t.content, t.key)) expr_obj_args = Group( Optional(expr + ZeroOrMore(comma + expr) + Optional(comma))) expr_obj = Optional( py_qualified_identifier, default=None)('constructor') + lpar + expr_obj_args('args') + rpar # expr_obj.setParseAction(lambda t: o.ExprObject(t.constructor, t.args)) # Note: "|" always takes the first match, that's why we have to parse variable names after obj (otherwise "variable name" might consume the identifier of expr_obj) expr << (constant | expr_collection | expr_obj | reference | asp_variable_expr) named_output_spec = py_identifier('name') + equals + expr( 'expr') + semicolon output_statement = OUTPUT + lbrace + ZeroOrMore( named_output_spec) + rbrace # named_output_spec.setParseAction(lambda t: (t.name, t.expr)) output_statement.setParseAction(lambda t: o.OutputSpec(t)) return output_statement
useToken = Keyword("USE") defaultToken = Keyword("DEFAULT") unsignedToken = Keyword("UNSIGNED") autoincrementToken = Keyword("AUTO_INCREMENT") autoincrementToken.setParseAction(lambda toks: ["PRIMARY KEY AUTOINCREMENT"]) keyToken = Keyword("KEY") primaryToken = Keyword("PRIMARY") uniqueToken = Keyword("UNIQUE") insertToken = Keyword("INSERT") intoToken = Keyword("INTO") valuesToken = Keyword("VALUES") ident = Word(alphas, alphanums + "_$" ) ^ QuotedString('"') ^ QuotedString("`") ident.setParseAction(lambda toks: ['"%s"' % toks[0]]) string = QuotedString("'",multiline=True) string.setParseAction(lambda toks: ["'%s'" % toks[0]]) columnName = delimitedList( ident, ".",combine=True) tableName = delimitedList( ident, ".",combine=True) dataType = Word(alphas) + Combine(Optional(Literal("(") + (Word(nums) ^ delimitedList(string,combine=True)) + Literal(")"))) + ZeroOrMore(nnToken ^ autoincrementToken ^ (defaultToken + (string ^ nullToken)) ^ unsignedToken.suppress() ) dataType.setParseAction(convert_datatypes) columnDescription = Group(ident + dataType) keyDescription = Optional(primaryToken ^ uniqueToken) + keyToken + Optional(ident) + Literal("(") + delimitedList(ident + Optional(Literal("(") + Word(nums) + Literal(")"))) + Literal(")") createTableStmt = Group(createToken + tableToken + ifneToken + ident + Literal("(")) + delimitedList(columnDescription ^ keyDescription.suppress()) + Group(Literal(")")) + Optional(autoincrementToken + Literal("=") + Word(nums)).suppress() createTableStmt.setParseAction(rebuild_createtable) createDataBaseStmt = Group(createToken + databaseToken + ident + dcsToken + Word(alphanums)+ collateToken + ident)
# Bytes can be represented in binary, hex, char, or a number (0-255 or -128-127) # and may include embedded arithmetic # OPCODE 0b00001100 # OPCODE 0x0b # OPCODE 'a' # OPCODE 254-0x0a # OPCODE 'a'&0b00001111 binbyte = Combine(Literal('0b') + Char('01') * 8) binbyte.setName('binbyte') binbyte.setParseAction(lambda t: [int(t[0], 2)]) hexbyte = Combine(Literal('0x') + Char(srange("[0-9a-fA-F]")) * 2) hexbyte.setName('hexbyte') hexbyte.setParseAction(lambda t: [int(t[0], 16)]) chrbyte = QuotedString(quoteChar="'", unquoteResults=True) chrbyte.setName('char') chrbyte.setParseAction(lambda t: [ord(t[0])]) number = Word(nums + '-') number.setName('number') number.setParseAction(lambda t: [int(t[0])]) allbytes = binbyte | hexbyte | chrbyte | number mathtoken = Combine(oneOf('+ - & |') + allbytes) bytemathexpression = Combine(allbytes + OneOrMore(mathtoken)) bytemathexpression.setParseAction(lambda t: [eval(t[0])]) byte = bytemathexpression | allbytes byte.setName('byte') # Words can be represented in binary, hex, label, or number (0-65535 or -32768-32767) # OPCODE 0b0000111100001111 # OPCODE 0x2911 # OPCODE .label # OPCODE .label+4 # OPCODE 2490
setAttributeValue = attribute + Literal("=") + freeformText | attribute + comparison + variable setVariableFromVar = variable + Literal("=") + freeformText | variable + comparison + variable setVariableFromAttr = variable + comparison + attribute argument = Word(alphanums) + Optional(",") funcDef = Word(alphanums) + "(" + OneOrMore(argument) + ")" comment = QuotedString('/*', endQuoteChar='*/') line = startContext + ";" | setAttributeValue + ";" + setVariableFromAttr + ";" | setVariableFromVar + ";" | funcDef + ";" | comment # grammar to be exported grammar = OneOrMore(line) def parse_knowledge_tree(s): """ return knowledge tree after parsing with the grammar""" grammar.parseString(s) return kt #parse actions setAttributeValue.setParseAction(update_attribute) setVariableFromVar.setParseAction(update_variable) setVariableFromAttr.setParseAction(update_variable_from_attr) startContext.setParseAction(start_context) funcDef.setParseAction(set_function) comment.setParseAction(print_comment) freeformText.setParseAction(get_freeform)
signop = oneOf("+ -") multop = oneOf("* / // % bitand bitor") filterop = oneOf("|") plusop = oneOf("+ -") notop = Literal("not") + WordEnd(word_characters) rangeop = Literal("..") exclusiverangeop = Literal("...") ternaryop = ("?", ":") current_scope_operand.setParseAction(EvalCurrentScope) variable_operand.setParseAction(EvalVariable) explicit_variable_operand.setParseAction(EvalExplicitVariable) integer_operand.setParseAction(EvalInteger) real_operand.setParseAction(EvalReal) triple_string.setParseAction(EvalTripleString) string_operand.setParseAction(EvalString) constant.setParseAction(EvalConstant) regexp.setParseAction(EvalRegExp) timespan.setParseAction(EvalTimespan) modifier = Regex(r"([a-zA-Z][a-zA-Z0-9_]*)\:") simple_list_operand = Group(delimitedList(expr)) simple_list_operand.setParseAction(EvalSimpleList) list_operand = Suppress("[") + delimitedList(expr) + Suppress("]") list_operand.setParseAction(EvalList) empty_list_operand = Literal("[]") empty_list_operand.setParseAction(EvalEmptyList)
import re import jinja2 import pyparsing import bleach from .attachments import THUMB_PATTERN from pyparsing import QuotedString, ParserElement, LineStart, LineEnd, SkipTo, OneOrMore, restOfLine from .util import mime2thumb_ext ParserElement.setDefaultWhitespaceChars(' \t') EOL = LineEnd() SOL = LineStart() strong = QuotedString("**") | QuotedString(quoteChar="[b]", endQuoteChar="[/b]") strong.setParseAction(lambda x: "<strong>%s</strong>" % x[0]) italic = QuotedString("*", escChar='\\') | QuotedString(quoteChar="[i]", endQuoteChar="[/i]") italic.setParseAction(lambda x: "<i>%s</i>" % x[0]) underline = QuotedString("__") | QuotedString(quoteChar="[u]", endQuoteChar="[/u]") underline.setParseAction(lambda x: "<u>%s</u>" % x[0]) strike = QuotedString(quoteChar="[s]", endQuoteChar="[/s]") strike.setParseAction(lambda x: "<s>%s</s>" % x[0]) sup = QuotedString(quoteChar="[sup]", endQuoteChar="[/sup]") sup.setParseAction(lambda x: "<sup>%s</sup>" % x[0]) sub = QuotedString(quoteChar="[sub]", endQuoteChar="[/sub]") sub.setParseAction(lambda x: "<sub>%s</sub>" % x[0])
VERSION_CMP = ( L("===") | L("==") | L(">=") | L("<=") | L("!=") | L("~=") | L(">") | L("<") ) MARKER_OP = VERSION_CMP | L("not in") | L("in") MARKER_VALUE = QuotedString("'") | QuotedString('"') MARKER_VALUE.setParseAction(lambda s, l, t: Value(t[0])) BOOLOP = L("and") | L("or") MARKER_VAR = VARIABLE | MARKER_VALUE MARKER_ITEM = Group(MARKER_VAR + MARKER_OP + MARKER_VAR) MARKER_ITEM.setParseAction(lambda s, l, t: tuple(t[0])) LPAREN = L("(").suppress() RPAREN = L(")").suppress() MARKER_EXPR = Forward() MARKER_ATOM = MARKER_ITEM | Group(LPAREN + MARKER_EXPR + RPAREN) MARKER_EXPR << MARKER_ATOM + ZeroOrMore(BOOLOP + MARKER_EXPR)
def make_grammar_2(): """ Construct the BBDB grammar. See grammar.ebnf for the specification. """ # Helper functions for the brace types. LP, RP, LB, RB = map(Suppress, "()[]") Paren = lambda arg: LP + Group(arg) + RP Bracket = lambda arg: LB + Group(arg) + RB # Helper functions for constructing return types. def make_list(t): return t.asList() def make_dict(t): return {k: v for k, v in t[0] or []} def make_address_entry(t): return t[0].tag, { "location": list(t[0].location or []), "city": t[0].city or "", "state": t[0].state or "", "zipcode": t[0].zipcode or "", "country": t[0].country or "" } def make_record(t): return { "firstname": t[0].firstname, "lastname": t[0].lastname, "aka": t[0].aka or [], "company": t[0].company or "", "phone": t[0].phone or {}, "address": t[0].address or {}, "net": t[0].net or [], "fields": t[0].fields or {} } def make_string(t): return t[0][1:-1].replace(r'\"', '"') # Define the low-level entities. string = QuotedString(quoteChar='"', escChar='\\', unquoteResults=False) string.setParseAction(make_string) nil = Keyword("nil") nil.setParseAction(lambda t: [None]) atom = Word(alphanums + '-') dot = Suppress(Keyword(".")) integer = Word(nums) integer.setParseAction(lambda t: int(t[0])) # Phone. phone_usa = Group(OneOrMore(integer)) phone_nonusa = string phone_entry = Bracket(string("tag") + Or([phone_usa, phone_nonusa])) phone = Or([Paren(OneOrMore(phone_entry)), nil])("phone") phone.setParseAction(make_dict) # Address. location = Paren(OneOrMore(string))("location") location.setParseAction(make_list) address_entry = Bracket( string("tag") + location + string("city") + string("state") + string("zipcode") + string("country")) address_entry.setParseAction(make_address_entry) address = Or([Paren(OneOrMore(address_entry)), nil])("address") address.setParseAction(make_dict) # Field. field = Paren(atom + dot + string) fields = Or([Paren(OneOrMore(field)), nil])("fields") fields.setParseAction(make_dict) # Other parts of an entry. name = string("firstname") + Or([string("lastname"), nil]) company = Or([string, nil])("company") aka = Or([Paren(OneOrMore(string)), nil])("aka") aka.setParseAction(make_list) net = Or([Paren(OneOrMore(string)), nil])("net") net.setParseAction(make_list) cache = nil("cache") # A single record. record = Bracket(name + aka + company + phone + address + net + fields + cache) record.setParseAction(make_record) # All the records. bbdb = ZeroOrMore(record) bbdb.setParseAction(make_list) # Define comment syntax. comment = Regex(r";.*") bbdb.ignore(comment) return bbdb
filterop = oneOf('|') plusop = oneOf('+ -') notop = Literal('not') + WordEnd(word_characters) rangeop = Literal('..') exclusiverangeop = Literal('...') ternaryop = ('?', ':') current_scope_operand.setParseAction(EvalCurrentScope) variable_operand.setParseAction(EvalVariable) explicit_variable_operand.setParseAction(EvalExplicitVariable) integer_operand.setParseAction(EvalInteger) real_operand.setParseAction(EvalReal) string_operand.setParseAction(EvalString) constant.setParseAction(EvalConstant) regexp.setParseAction(EvalRegExp) timespan.setParseAction(EvalTimespan) modifier = Regex(r'([a-zA-Z][a-zA-Z0-9_]*)\:') simple_list_operand = Group(delimitedList(expr)) simple_list_operand.setParseAction(EvalSimpleList) list_operand = (Suppress('[') + delimitedList(expr) + Suppress(']')) list_operand.setParseAction(EvalList) empty_list_operand = Literal('[]') empty_list_operand.setParseAction(EvalEmptyList) dict_item = Group(expr + Suppress(Literal(':')) + expr) dict_operand = Group(Suppress('{') + delimitedList(dict_item) + Suppress('}'))
variable = Regex(r'([a-zA-Z0-9\._]+)') string = QuotedString('"', escChar="\\") | QuotedString('\'', escChar="\\") operand = model_reference | real | integer | constant | string | variable plusop = oneOf('+ -') multop = oneOf('* / // %') groupop = Literal(',') expr = Forward() modifier = Combine(Word(alphas + nums) + ':') integer.setParseAction(EvalInteger) real.setParseAction(EvalReal) string.setParseAction(EvalString) constant.setParseAction(EvalConstant) variable.setParseAction(EvalVariable) model_reference.setParseAction(EvalModelReference) comparisonop = (oneOf("< <= > >= != == ~= ^= $=") | (Literal('not in') + WordEnd()) | (oneOf("in lt lte gt gte matches contains icontains like") + WordEnd())) logicopOR = Literal('or') + WordEnd() logicopAND = Literal('and') + WordEnd() expr << operatorPrecedence(operand, [ (modifier, 1, opAssoc.RIGHT, EvalModifierOp), (multop, 2, opAssoc.LEFT, EvalMultOp),
def parse(string=None, filename=None, token=None, lang=None): """ Parse a token stream from or raise a SyntaxError This function includes the parser grammar. """ if not lang: lang = guess_language(string, filename) # # End of Line # EOL = Suppress(lineEnd) UTFWORD = Word(unicodePrintables) # # @tag # TAG = Suppress('@') + UTFWORD # # A table # # A table is made up of rows of cells, e.g. # # | column 1 | column 2 | # # Table cells need to be able to handle escaped tokens such as \| and \n # def handle_esc_char(tokens): token = tokens[0] if token == r'\|': return u'|' elif token == r'\n': return u'\n' elif token == r'\\': return u'\\' raise NotImplementedError(u"Unknown token: %s" % token) ESC_CHAR = Word(initChars=r'\\', bodyChars=unicodePrintables, exact=2) ESC_CHAR.setParseAction(handle_esc_char) # # A cell can contain anything except a cell marker, new line or the # beginning of a cell marker, we then handle escape characters separately # and recombine the cell afterwards # CELL = OneOrMore(CharsNotIn('|\n\\') + Optional(ESC_CHAR)) CELL.setParseAction(lambda tokens: u''.join(tokens)) TABLE_ROW = Suppress('|') + OneOrMore(CELL + Suppress('|')) + EOL TABLE_ROW.setParseAction(lambda tokens: [v.strip() for v in tokens]) TABLE = Group(OneOrMore(Group(TABLE_ROW))) # # Multiline string # def clean_multiline_string(s, loc, tokens): """ Clean a multiline string The indent level of a multiline string is the indent level of the triple-". We have to derive this by walking backwards from the location of the quoted string token to the newline before it. We also want to remove the leading and trailing newline if they exist. FIXME: assumes UNIX newlines """ def remove_indent(multiline, indent): """ Generate the lines removing the indent """ for line in multiline.splitlines(): if line and not line[:indent].isspace(): warn("%s: %s: under-indented multiline string " "truncated: '%s'" % (lineno(loc, s), col(loc, s), line), LettuceSyntaxWarning) # for those who are surprised by this, slicing a string # shorter than indent will yield empty string, not IndexError yield line[indent:] # determine the indentation offset indent = loc - s.rfind('\n', 0, loc) - 1 multiline = '\n'.join(remove_indent(tokens[0], indent)) # remove leading and trailing newlines if multiline[0] == '\n': multiline = multiline[1:] if multiline[-1] == '\n': multiline = multiline[:-1] return multiline MULTILINE = QuotedString('"""', multiline=True) MULTILINE.setParseAction(clean_multiline_string) # A Step # # Steps begin with a keyword such as Given, When, Then or And They can # contain an optional inline comment, although it's possible to encapsulate # it in a string. Finally they can contain a table or a multiline 'Python' # string. # # <variables> are not parsed as part of the grammar as it's not easy to # distinguish between a variable and XML. Instead scenarios will replace # instances in the steps based on the outline keys. # STATEMENT_SENTENCE = Group( lang.STATEMENT + # Given, When, Then, And OneOrMore(UTFWORD.setWhitespaceChars(' \t') | quotedString.setWhitespaceChars(' \t')) + EOL ) STATEMENT = Group( STATEMENT_SENTENCE('sentence') + Optional(TABLE('table') | MULTILINE('multiline')) ) STATEMENT.setParseAction(Step) STATEMENTS = Group(ZeroOrMore(STATEMENT)) # # Background: # BACKGROUND_DEFN = \ lang.BACKGROUND('keyword') + Suppress(':') + EOL BACKGROUND_DEFN.setParseAction(Background) BACKGROUND = Group( BACKGROUND_DEFN('node') + STATEMENTS('statements') ) BACKGROUND.setParseAction(Background.add_statements) # # Scenario: description # SCENARIO_DEFN = Group( Group(ZeroOrMore(TAG))('tags') + lang.SCENARIO('keyword') + Suppress(':') + restOfLine('name') + EOL ) SCENARIO_DEFN.setParseAction(Scenario) SCENARIO = Group( SCENARIO_DEFN('node') + STATEMENTS('statements') + Group(ZeroOrMore( Suppress(lang.EXAMPLES + ':') + EOL + TABLE ))('outlines') ) SCENARIO.setParseAction(Scenario.add_statements) # # Feature: description # FEATURE_DEFN = Group( Group(ZeroOrMore(TAG))('tags') + lang.FEATURE('keyword') + Suppress(':') + restOfLine('name') + EOL ) FEATURE_DEFN.setParseAction(Feature) # # A description composed of zero or more lines, before the # Background/Scenario block # DESCRIPTION_LINE = Group( ~BACKGROUND_DEFN + ~SCENARIO_DEFN + OneOrMore(UTFWORD).setWhitespaceChars(' \t') + EOL ) DESCRIPTION = Group(ZeroOrMore(DESCRIPTION_LINE | EOL)) DESCRIPTION.setParseAction(Description) # # Complete feature file definition # FEATURE = Group( FEATURE_DEFN('node') + DESCRIPTION('description') + Optional(BACKGROUND('background')) + Group(OneOrMore(SCENARIO))('scenarios') + stringEnd) FEATURE.ignore(pythonStyleComment) FEATURE.setParseAction(Feature.add_blocks) # # Try parsing the string # if not token: token = FEATURE else: token = locals()[token] try: if string: tokens = token.parseString(string) elif filename: with open(filename, 'r', 'utf-8') as fp: tokens = token.parseFile(fp) else: raise RuntimeError("Must pass string or filename") return tokens except ParseException as e: if e.parserElement == stringEnd: msg = "Expected EOF (max one feature per file)" else: msg = e.msg raise LettuceSyntaxError( filename, u"{lineno}:{col} Syntax Error: {msg}\n{line}\n{space}^".format( msg=msg, lineno=e.lineno, col=e.col, line=e.line, space=' ' * (e.col - 1))) except LettuceSyntaxError as e: # reraise the exception with the filename raise LettuceSyntaxError(filename, e.string)