def __convert(self,string,dic): if not string: return string def repl(x): return "%" if x[0]=="" else dic[x[0]] var=QuotedString("%").setParseAction(repl) return (var.transformString(string))
def transform_human(text, main_window): """Transform user input into something Script can read. Main window is needed for tool integration.""" # these are parseActions for pyparsing. def str_literal_to_hex(s, loc, toks): for i, t in enumerate(toks): toks[i] = ''.join(['0x', t.encode('hex')]) return toks def var_name_to_value(s, loc, toks): for i, t in enumerate(toks): val = main_window.dock_handler.variables.get_key(t.strip('$')) if val: toks[i] = val return toks # ^ parseActions for pyparsing end here. str_literal = QuotedString('"') str_literal.setParseAction(str_literal_to_hex) var_name = pyparsing.Combine(Word('$') + Word(pyparsing.alphas)) var_name.setParseAction(var_name_to_value) s = text s = var_name.transformString(s) s = str_literal.transformString(s) return s
def populate(self): tokens=self.tokens if len(tokens)==2: self.containsExternalCode=True self.isJustExternalCode=True if tokens[0]=='assert': self.addReturns=False self.throws=False elif tokens[0]=='throws': self.throws=True self.addReturns=False elif tokens[0]=='returns': self.addReturns=True self.throws=False else: raise SemanticException('Invalid Partition Check Item') self.code=tokens[1] elif len(tokens)==3: self.addReturns=tokens[0]=='returns' self.throws=not self.addReturns tok=self.tokens[2] self.comparator=self.tokens[1] parse=QuotedString ("<%", "\\", None, False, unquoteResults=True, endQuoteChar="%>") self.isJustExternalCode=False try: self.code=parse.parseString(tok)[0] self.containsExternalCode=True except ParseException: self.containsExternalCode=False self.code=tok else: raise SemanticException("Check is invalid")
def create_type_query_syntax(self): create_type_keyword = CaselessLiteral("CREATE TYPE").setParseAction(self.create_new_type_query_obj) new_type = Word(alphas).setParseAction(self.set_type) parent_type = Word(alphas).setParseAction(self.set_parent_type) description = QuotedString("'", multiline=True) ^ QuotedString('"', multiline=True) description.setParseAction(self.set_description) create_type_query = create_type_keyword + new_type + parent_type + description return create_type_query
def _define_grammar(): """ Creates and returns a copy of the selector grammar. Wrapped in a function to avoid polluting the module namespace. """ expr = Forward() label_name = Word(LABEL_CHARS) label_name.setParseAction(LabelNode) string_literal = QuotedString('"') | QuotedString("'") string_literal.setParseAction(LiteralNode) set_literal = (Suppress("{") + delimitedList(QuotedString('"') | QuotedString("'"), ",") + Suppress("}")) set_literal.setParseAction(SetLiteralNode) eq_comparison = label_name + Suppress("==") + string_literal eq_comparison.setParseAction(LabelToLiteralEqualityNode) not_eq_comparison = label_name + Suppress("!=") + string_literal not_eq_comparison.setParseAction(InequalityNode) in_comparison = label_name + Suppress(Keyword("in")) + set_literal in_comparison.setParseAction(LabelInSetLiteralNode) not_in = Suppress(Keyword("not") + Keyword("in")) not_in_comparison = label_name + not_in + set_literal not_in_comparison.setParseAction(NotInNode) has_check = (Suppress("has(") + Word(LABEL_CHARS) + Suppress(")")) has_check.setParseAction(HasNode) comparison = (eq_comparison | not_eq_comparison | in_comparison | not_in_comparison | has_check) paren_expr = (Suppress("(") + expr + Suppress(")")) value = comparison | paren_expr and_expr = value + ZeroOrMore(Suppress("&&") + value) and_expr.setParseAction(simplify_and_node) or_expr = and_expr + ZeroOrMore(Suppress("||") + and_expr) or_expr.setParseAction(simplify_or_node) expr << or_expr grammar = expr + StringEnd() return grammar
def foldersByAnnotation(folderlist, type): result = [] parse = QuotedString('"') + QuotedString('"') + Suppress("(") + Group(OneOrMore(QuotedString('"'))) + Suppress(")") for folder in folderlist: tmp = parse.parseString( folder ) if type in tmp[2][1]: result.append(tmp[0]) return result
def _getPattern(self): arith_expr = Forward() comp_expr = Forward() logic_expr = Forward() LPAR, RPAR, SEMI = map(Suppress, "();") identifier = Word(alphas+"_", alphanums+"_") multop = oneOf('* /') plusop = oneOf('+ -') expop = Literal( "^" ) compop = oneOf('> < >= <= != ==') andop = Literal("AND") orop = Literal("OR") current_value = Literal( "." ) assign = Literal( "=" ) # notop = Literal('NOT') function = oneOf(' '.join(self.FUNCTIONS)) function_call = Group(function.setResultsName('fn') + LPAR + Optional(delimitedList(arith_expr)) + RPAR) aggregate_column = QuotedString(quoteChar='{', endQuoteChar='}') single_column = QuotedString(quoteChar='[', endQuoteChar=']') integer = Regex(r"-?\d+") real = Regex(r"-?\d+\.\d*") # quotedString enables strings without quotes to pass operand = \ function_call.setParseAction(self.__evalFunction) | \ aggregate_column.setParseAction(self.__evalAggregateColumn) | \ single_column.setParseAction(self.__evalSingleColumn) | \ ((real | integer).setParseAction(self.__evalConstant)) | \ quotedString.setParseAction(self.__evalString).addParseAction(removeQuotes) | \ current_value.setParseAction(self.__evalCurrentValue) | \ identifier.setParseAction(self.__evalString) arith_expr << operatorPrecedence(operand, [ (expop, 2, opAssoc.LEFT, self.__expOp), (multop, 2, opAssoc.LEFT, self.__multOp), (plusop, 2, opAssoc.LEFT, self.__addOp), ]) # comp_expr = Group(arith_expr + compop + arith_expr) comp_expr << operatorPrecedence(arith_expr, [ (compop, 2, opAssoc.LEFT, self.__evalComparisonOp), ]) logic_expr << operatorPrecedence(comp_expr, [ (andop, 2, opAssoc.LEFT, self.__evalLogicOp), (orop, 2, opAssoc.LEFT, self.__evalLogicOp) ]) pattern = logic_expr + StringEnd() return pattern
def transform_human(text): """Transform user input into something Script can read.""" # these are parseActions for pyparsing. def str_literal_to_hex(s, loc, toks): for i, t in enumerate(toks): toks[i] = ''.join(['0x', t.encode('hex')]) return toks # ^ parseActions for pyparsing end here. str_literal = QuotedString('"') str_literal.setParseAction(str_literal_to_hex) transformer = ZeroOrMore( str_literal ) return transformer.transformString(text)
def calcValue(self,testCaseValues=None): if not self.containsExternalCode: #evaluate python code try: compiledExpression=compile(self.code.strip(),'<output-check>','eval') value=eval(compiledExpression,testCaseValues) except Exception: raise SyntaxSemanticException("Invalid Syntax in %s"%(self.code)) self.value=str(value) else: def repl(x): return "%" if x[0]=="" else str(testCaseValues[x[0]]) var=QuotedString("%").setParseAction(repl) self.value=var.transformString(self.code)
def _build_grammar(self): expr = Forward() float_lit = Combine(Word(nums) + '.' + Word(nums)) float_lit.setName('float') float_lit.setParseAction(lambda x: \ self.to_literal(float(x[0]))) int_lit = Word(nums) int_lit.setName('int') int_lit.setParseAction(lambda x: \ self.to_literal(int(x[0]))) num = (float_lit | int_lit) num.setParseAction(lambda x: x[0]) tag_name = Word(alphas + "_", alphanums + "_") tag_name.setName('tag_name') tag_name.setParseAction(lambda t: tag_reference.TagReference(t[0])) quoted_string = QuotedString("'") quoted_string.setParseAction(lambda s: self.to_literal(s[0])) oper = oneOf('+ * / -') oper.setParseAction(lambda o: o[0]) lpar = Literal("(").suppress() rpar = Literal(")").suppress() arith = Group(lpar + expr + oper + expr + rpar) arith.setParseAction(lambda t: \ self.to_arith(t[0][0], t[0][1], t[0][2])) assign = tag_name + '=' + expr assign.setName('assign') assign.setParseAction(lambda x: self.to_assign(x[0],x[2])) print_tags = Literal('?') print_tags.setParseAction(lambda x: self.to_print_tags()) expr <<(arith|assign|tag_name|num|quoted_string|print_tags) expr.setParseAction(lambda x: x[0]) return expr
def _router_grammer(): """Make the url route grammer""" wildcard = Literal('*') path_separator = Literal('/').setParseAction(lambda t: ('sep', t)) extract = QuotedString( quoteChar='{', endQuoteChar='}').setParseAction(lambda t: ('extract', t)) static = Word(''.join( c for c in printables if c not in '?{}/')).setParseAction(lambda t: ('static', t)) route_parse = OneOrMore(extract | static | path_separator | wildcard) return route_parse
class NginxParser(object): """ A class that parses nginx configuration with pyparsing """ # constants left_bracket = Literal("{").suppress() right_bracket = Literal("}").suppress() semicolon = Literal(";").suppress() key = Word(alphanums + "_/") value = QuotedString(quoteChar='"', escChar='\\') \ | QuotedString(quoteChar="'", escChar='\\') \ | Word(alphanums + "*_/.-+$:=") assignment = (key + Group(ZeroOrMore(value)) + semicolon) block = Forward() subblock = Forward() subblock << ZeroOrMore(Group(assignment) | block) block << Group(key + SkipTo('{') + left_bracket + Group(subblock) + right_bracket) script = OneOrMore(Group(assignment) | block).ignore(pythonStyleComment) def __init__(self, source): self.source = source def parse(self): """ Returns the parsed tree. """ return self.script.parseString(self.source) def as_list(self): """ Returns the list of tree. """ return self.parse().asList()
def build_parser(): """ Build a pyparsing parser for our custom topology description language. :return: A pyparsing parser. :rtype: pyparsing.MatchFirst """ ParserElement.setDefaultWhitespaceChars(' \t') nl = Suppress(LineEnd()) inumber = Word(nums).setParseAction(lambda l, s, t: int(t[0])) fnumber = (Combine( Optional('-') + Word(nums) + '.' + Word(nums) + Optional('E' | 'e' + Optional('-') + Word(nums))) ).setParseAction(lambda toks: float(toks[0])) boolean = (CaselessLiteral('true') | CaselessLiteral('false') ).setParseAction(lambda l, s, t: t[0].casefold() == 'true') comment = Literal('#') + restOfLine + nl text = QuotedString('"') identifier = Word(alphas, alphanums + '_') empty_line = LineStart() + LineEnd() item_list = ((text | fnumber | inumber | boolean) + Optional(Suppress(',')) + Optional(nl)) custom_list = (Suppress('(') + Optional(nl) + Group(OneOrMore(item_list)) + Optional(nl) + Suppress(')')).setParseAction(lambda tok: tok.asList()) attribute = Group( identifier('key') + Suppress(Literal('=')) + (custom_list | text | fnumber | inumber | boolean | identifier)('value') + Optional(nl)) attributes = (Suppress(Literal('[')) + Optional(nl) + OneOrMore(attribute) + Suppress(Literal(']'))) node = identifier('node') port = Group(node + Suppress(Literal(':')) + (identifier | inumber)('port')) link = Group( port('endpoint_a') + Suppress(Literal('--')) + port('endpoint_b')) environment_spec = (attributes + nl).setResultsName('env_spec', listAllMatches=True) nodes_spec = (Group( Optional(attributes)('attributes') + Group(OneOrMore(node))('nodes')) + nl).setResultsName('node_spec', listAllMatches=True) ports_spec = (Group( Optional(attributes)('attributes') + Group(OneOrMore(port))('ports')) + nl).setResultsName('port_spec', listAllMatches=True) link_spec = (Group(Optional(attributes)('attributes') + link('links')) + nl).setResultsName('link_spec', listAllMatches=True) statements = OneOrMore(comment | link_spec | ports_spec | nodes_spec | environment_spec | empty_line) return statements
def _create_grammar(self): """ Pyparsing implementation of a where clause grammar based on http://pyparsing.wikispaces.com/file/view/simpleSQL.py The query language is a series of statements separated by AND or OR operators and parentheses can be used to group/provide precedence. A statement is a combination of three strings "<filter> <operator> <value>" or "<filter> <operator> <filter>". A value can be a string, integer or a real(floating) number or a (ISO YYYY-MM-DD) date. An operator must be one of "= != < > >= <= !:" and are translated into django __lte or equivalent suffixes. See self.as_q Example something < 10 AND other >= 2015-01-01 AND (foo < 1 OR bar > 1) """ quoted_string_excluding_quotes = QuotedString( '"', escChar='\\').setParseAction(lambda token: StringValue(token[0])) and_ = Keyword('and', caseless=True) or_ = Keyword('or', caseless=True) binary_op = oneOf('=> =< = < > >= <= : != !:', caseless=True).setResultsName('operator') # define query tokens identifier = Word(alphas, alphanums + '_$-.').setName('identifier') raw_value_chars = alphanums + '_$-+/$%*;?@[]\\^`{}|~.' raw_value = Word(raw_value_chars, raw_value_chars).setName('raw_value') value_string = quoted_string_excluding_quotes | raw_value # Define a where expression where_expression = Forward() binary_operator_statement = (identifier + binary_op + value_string).setParseAction( self._binary_op_to_q) unary_operator_statement = (identifier | (Char('!') + identifier)).setParseAction( self._unary_op_to_q) free_text_statement = quotedString.copy().setParseAction( self._freetext_to_q) operator_statement = binary_operator_statement | free_text_statement | unary_operator_statement where_condition = Group(operator_statement | ('(' + where_expression + ')')) where_expression << where_condition + ZeroOrMore( (and_ | or_) + where_expression) # define the full grammar query_statement = Forward() query_statement << Group(where_expression).setResultsName("where") return query_statement
def parse_payload(self,payload): expr = Forward() LPAR, RPAR, SEMI = map(Suppress, "();") identifier = Word(alphas+"_", alphanums+"_") function_call = identifier.setResultsName("name") + LPAR + Group(Optional(delimitedList(expr))) + RPAR integer = Regex(r"-?\d+") real = Regex(r"-?\d+\.\d*") qstr = QuotedString(quoteChar = '"', escChar = '\\', unquoteResults=False) qstrsingle = QuotedString(quoteChar = "'", escChar = '\\', unquoteResults=False) operand = (identifier | real | integer | qstr | qstrsingle ) plusop = oneOf('+ -') expr << infixNotation( operand, [ (plusop, 2, opAssoc.LEFT) ]) out = [] for t,s,e in function_call.scanString( payload ): out.append({ "action" : t[0], "arguments" : t[1].asList() if type(t[1])!=str else t[1] } ) return out
def parse_stats(fin): from pyparsing import (LineEnd, Forward, Word, alphas, QuotedString, Group, OneOrMOre) EOL = LineEnd().suppress() new_stmt = Forward().setName('new statement') new_stmt << (Word('new').suppress() + Word(alphas) + QuotedString('"')) + EOL data_stmt = Forward().setName('data statement') data_stmt << (Word('data').suppress() + QuotedString('"') + QuotedString('"')) + EOL p = OneOrMore(Group(new_stmt + Group(OneOrMore(data_stmt)))) results = p.parseFile(fin) for row in results: action, name, data = row data = dict(chunks(data, 2)) yield (action, name, data)
def parse_search_query(query): unicode_printables = u''.join( unichr(c) for c in xrange(65536) if not unichr(c).isspace()) word = TextNode.group(Word(unicode_printables)) exact = ExactNode.group( QuotedString('"', unquoteResults=True, escChar='\\')) term = exact | word comparison_name = Word(unicode_printables, excludeChars=':') comparison = ComparisonNode.group(comparison_name + Literal(':') + term) content = OneOrMore(comparison | term) return content.parseString(query)
def d3dmdf2dict(mdf_file): d = {} parameter = Word(alphas) # floatNumber = Regex(r'\d+(\.\d*)?([eE]\+\d+)?') floatNumber = Regex(r'-?\d+(\.\d*)?([eE][\+-]\d+)?').setParseAction(lambda s,l,t: [ float(t[0]) ] ) d3dline = parameter + Suppress('=') + Optional( OneOrMore(QuotedString("#", unquoteResults=False)) | OneOrMore(floatNumber)) for param in d3dline.searchString(file(mdf_file).read()): d[param[0].lower()] = ''.join(param[1:]) # remove comments del d['commnt'] print d return d
def __init__(self): # speed up infixNotation considerably at the price of some cache memory ParserElement.enablePackrat() boolean = Keyword('True') | Keyword('False') none = Keyword('None') integer = Word(nums) real = Combine(Word(nums) + "." + Word(nums)) string = (QuotedString('"', escChar='\\') | QuotedString("'", escChar='\\')) regex = QuotedString('/', escChar='\\') identifier = Word(alphas, alphanums + '_') dereference = infixNotation(identifier, [ (Literal('.'), 2, opAssoc.LEFT, EvalArith), ]) result = (Keyword('bad') | Keyword('fail') | Keyword('good') | Keyword('ignore') | Keyword('unknown')) rval = boolean | none | real | integer | string | regex | result | dereference rvallist = Group( Suppress('[') + Optional(delimitedList(rval)) + Suppress(']')) rvalset = Group( Suppress('{') + Optional(delimitedList(rval)) + Suppress('}')) operand = rval | rvallist | rvalset # parse actions replace the parsed tokens with an instantiated object # which we can later call into for evaluation of its content boolean.setParseAction(EvalBoolean) none.setParseAction(EvalNone) integer.setParseAction(EvalInteger) real.setParseAction(EvalReal) string.setParseAction(EvalString) regex.setParseAction(EvalRegex) identifier.setParseAction(EvalIdentifier) result.setParseAction(EvalResult) rvallist.setParseAction(EvalList) rvalset.setParseAction(EvalSet) identity_test = Keyword('is') + ~Keyword('not') | Combine( Keyword('is') + Keyword('not'), adjacent=False, joinString=' ') membership_test = Keyword('in') | Combine( Keyword('not') + Keyword('in'), adjacent=False, joinString=' ') comparison_op = oneOf('< <= > >= != == isdisjoint') comparison = identity_test | membership_test | comparison_op self.parser = infixNotation(operand, [ (Literal('**'), 2, opAssoc.LEFT, EvalPower), (oneOf('+ - ~'), 1, opAssoc.RIGHT, EvalModifier), (oneOf('* / // %'), 2, opAssoc.LEFT, EvalArith), (oneOf('+ -'), 2, opAssoc.LEFT, EvalArith), (oneOf('<< >>'), 2, opAssoc.LEFT, EvalArith), (Literal('&'), 2, opAssoc.LEFT, EvalArith), (Literal('^'), 2, opAssoc.LEFT, EvalArith), (Literal('|'), 2, opAssoc.LEFT, EvalArith), (comparison, 2, opAssoc.LEFT, EvalLogic), (Keyword('not'), 1, opAssoc.RIGHT, EvalModifier), (Keyword('and'), 2, opAssoc.LEFT, EvalLogic), (Keyword('or'), 2, opAssoc.LEFT, EvalLogic), (Keyword('->'), 2, opAssoc.LEFT, EvalArith), ])
def reInit(self): # Since this class gets pickled in ActionWindow, the class is never 'destroyed' # Since, a functions docstring wouldn't be reflected back to puddletag # if it were changed calling this function to 're-read' it is a good idea. if not self.function.__doc__: return self.doc = self.function.__doc__.split("\n") identifier = QuotedString('"') | Combine( Word(alphanums + ' !"#$%&\'()*+-./:;<=>?@[\\]^_`{|}~')) tags = delimitedList(identifier) self.info = [z for z in tags.parseString(self.doc[0])]
def __init__(self, comm_file_path): expression_spaced = Forward() expression = Forward() args_spaced = Forward() cb = Optional(',') + ')' # closing_brackets might include a ',' ob = Optional(' ') + '(' + Optional( ' ') # closing_brackets might include a ' ' value = (Or([ pyparsing_common.identifier.copy().setResultsName('id'), pyparsing_common.number.copy().setResultsName('number'), QuotedString("'").setResultsName('string') ])).setParseAction(Value).setResultsName('value') values = (ZeroOrMore( value.setResultsName('valueList', listAllMatches=True) + Optional(','))).setParseAction(Values) keyword = pyparsing_common.identifier.copy() keyword_argument = (keyword.setResultsName('keyword') + '=' + expression_spaced.setResultsName('expression') ).setParseAction(Keyword_argument) keyword_arguments = (keyword_argument.setResultsName( 'keyword_argument', listAllMatches=True) + ZeroOrMore(',' + keyword_argument.setResultsName( 'keyword_argument', listAllMatches=True)) ).setParseAction(Keyword_arguments) expression << (Or([ value, (ob + values.setResultsName('values') + cb), '_F' + ob + keyword_arguments.setResultsName('keyword_arguments') + cb, ob + expression.setResultsName('expression') + cb ])).setParseAction(Expression) expression_spaced << (Or([expression, ob + expression_spaced + cb])) left_side = pyparsing_common.identifier.setResultsName('left_side') operator_name = pyparsing_common.identifier.setResultsName( 'operator_name') paragraph = ( Optional(left_side + "=") + operator_name + ob + Optional(keyword_arguments.setResultsName('keyword_arguments')) + cb + Optional(';')).setParseAction(Paragraph) file = OneOrMore(paragraph).setResultsName( 'paragraphs').setParseAction(File) self.beam_data_model = file.parseFile(comm_file_path)
def parse(self, config): VALUE = OneOrMore(Word(alphanums + "-/.:_+[],") | QuotedString('"')) context = [] inactive_level = 1 for line in config.splitlines(): line = line.rstrip() if "no " in line: line = line.replace("no ", "") if not line: continue elif line.startswith(" " * inactive_level): inactive_level = len(context) + 1 context += [line.split()[0].strip()] elif "set" in line: inactive_level = len(context) + 1 elif "exit" in line: context.pop(-1) inactive_level = len(context) - 1 elif "!" in line and context: context = [] inactive_level = inactive_level - 1 # continue # if inactive_level is not None and inactive_level >= len(context): # inactive_level = None elif "!" in line: continue elif context and len(context) >= inactive_level: if context[-1] not in line: context.pop(-1) context += [line.split()[0].strip()] else: context += [line.split()[0].strip()] cp = " ".join(context).split() + line.split() h = self.handlers # print cp for p in cp: if p in h: h = h[p] elif "*" in h: h = h["*"] else: break if callable(h): h(self, VALUE.parseString(" ".join(cp))) break elif h is None: break # Yield facts for f in self.iter_facts(): yield f
def get_grammar(): assign = Literal('=') lpar = Literal('{').suppress() rpar = Literal('}').suppress() vartype = Word(alphas + '_') varname = Word(alphas + nums + '._') varvalue = (Regex(r'0x[0-9a-fA-F]{2,}') | \ Word(nums) | \ QuotedString("\"\"\"", multiline=True)) expr = Forward() atomic = vartype + varname + assign + varvalue struct = vartype + varname + assign + Group(lpar + expr + rpar) expr << ZeroOrMore(atomic | struct) return expr
def _parse_data(data: str) -> List[_PackageData]: lpar, rpar, lbrk, rbrk, dot = map(Suppress, '()[].') nil = Suppress('nil') pkgname = Word(printables) decimal = Regex(r'0|-?[1-9]\d*').setParseAction(lambda t: int(t[0])) qstring = QuotedString(quoteChar='"', escChar='\\') version = (lpar + OneOrMore(decimal) + rpar).setParseAction(lambda s, l, t: ['.'.join(map(str, t))]) dependency_entry = lpar + pkgname + version + rpar dependency_list = ((lpar + OneOrMore(dependency_entry) + rpar) | nil) people_list = OneOrMore(qstring | dot | nil) keyval_url = (lpar + (Suppress(':url') | Suppress(':homepage')) + dot + qstring + rpar).setParseAction(lambda s, l, t: [('url', t[0])]) keyval_keywords = (lpar + Suppress(':keywords') + ZeroOrMore(qstring) + rpar).setParseAction( lambda s, l, t: [('keywords', [str(k) for k in t])]) keyval_commit = (lpar + Suppress(':commit') + dot + qstring + rpar).setParseAction(lambda s, l, t: [('commit', t[0])]) keyval_maintainer = ( lpar + Suppress(':maintainer') + people_list + rpar ).setParseAction(lambda s, l, t: [('maintainer', [str(m) for m in t])]) keyval_author = ( lpar + Suppress(':author') + people_list + rpar).setParseAction(lambda s, l, t: [('author', [str(a) for a in t])]) keyval_authors = (lpar + Suppress(':authors') + OneOrMore(lpar + people_list + rpar) + rpar).setParseAction( lambda s, l, t: [('authors', [str(a) for a in t])]) keyval_item = keyval_url | keyval_keywords | keyval_commit | keyval_maintainer | keyval_authors | keyval_author keyvals = (lpar + ZeroOrMore(keyval_item) + rpar ).setParseAction(lambda s, l, t: [{k: v for k, v in t}] ) | nil.setParseAction(lambda s, l, t: [{}]) package_entry = (lpar + pkgname + dot + lbrk + version + Suppress(dependency_list) + qstring + Suppress(Word(alphas)) + keyvals + rbrk + rpar).setParseAction(lambda s, l, t: [_PackageData(*t)]) root = lpar + Suppress(decimal) + ZeroOrMore(package_entry) + rpar return root.parseString(data, parseAll=True) # type: ignore
def grammar(): parenthesis = Forward() parenthesis <<= "(" + ZeroOrMore(CharsNotIn("()") | parenthesis) + ")" parenthesis.setParseAction(join_string_act) quoted_string = "'" + OneOrMore(CharsNotIn("'")) + "'" quoted_string.setParseAction(join_string_act) quoted_default_value = "DEFAULT" + quoted_string + OneOrMore( CharsNotIn(", \n\t")) quoted_default_value.setParseAction(quoted_default_value_act) field_def = OneOrMore(quoted_default_value | Word(alphanums + "_\"'`:-/[].") | parenthesis) field_def.setParseAction(Field) tablename_def = (Word(alphas + "`_.") | QuotedString("\"")) field_list_def = field_def + ZeroOrMore(Suppress(",") + field_def) create_table_def = Literal( "CREATE") + "TABLE" + tablename_def.setResultsName( "tableName") + "(" + field_list_def.setResultsName( "fields") + ")" + ";" create_table_def.setParseAction(Table) add_fkey_def = Literal( "ALTER") + "TABLE" + "ONLY" + tablename_def.setResultsName( "tableName") + "ADD" + "CONSTRAINT" + Word( alphanums + "_") + "FOREIGN" + "KEY" + "(" + delimitedList( Word(alphanums + "_") ).setResultsName("keyName") + ")" + "REFERENCES" + Word( alphanums + "._").setResultsName("fkTable") + "(" + delimitedList( Word(alphanums + "_") ).setResultsName("fkCol") + ")" + Optional( Literal("DEFERRABLE")) + Optional( Literal("ON") + "DELETE" + (Literal("CASCADE") | Literal("RESTRICT"))) + ";" add_fkey_def.setParseAction(FKey) other_statement_def = OneOrMore(CharsNotIn(";")) + ";" other_statement_def.setParseAction(OtherStatement) comment_def = "--" + ZeroOrMore(CharsNotIn("\n")) comment_def.setParseAction(OtherStatement) return OneOrMore(comment_def | create_table_def | add_fkey_def | other_statement_def)
def query_parser() -> ParserElement: # Creating the grammar valid_chars = printables.replace("(", "").replace(")", "").replace( ":", "").replace("\"", "") expr = Forward().setName("expression") quotes = QuotedString( '"', "\\").setName("quoted string").setResultsName("quotes") brackets = Group(Literal("(").suppress() + expr + Literal(")").suppress()) \ .setName("bracketed expression").setResultsName("brackets") words = Word(valid_chars).setName("word").setResultsName("word") exception_elem = Group(quotes | words).setName("exception element")\ .setResultsName("exception_element", listAllMatches=True) exception = Group(exception_elem | ( Literal("(") + exception_elem + ZeroOrMore(pyparsing.Optional(CaselessKeyword("or")) + exception_elem) + Literal(")"))).setName("exception").setResultsName("exception") exception_connector = (CaselessKeyword("except") | CaselessKeyword("ignore"))\ .setName("Except").setResultsName("except") exception_word = words.setName("exception word").setResultsName( "exception_word") word_with_exception = Group(exception_word + exception_connector + exception) \ .setName("word with exception").setResultsName("word_with_exception") word_with_exception_brackets = Literal( "(") + word_with_exception + Literal(")") field_name = Group((Literal("@").suppress() + Word(valid_chars)) | (Word(valid_chars) + Literal(":").suppress())) \ .setName("field name").setResultsName("field_name") field_value = Group(quotes | word_with_exception_brackets | word_with_exception | words)\ .setName("field value").setResultsName("field_value") field = Group(field_name + field_value).setName("field").setResultsName("field") negator = Group(pyparsing.Optional(Literal("!") | Literal("-") | CaselessKeyword("not"))) \ .setName("negator").setResultsName("negator") element = Group(quotes | brackets | field | word_with_exception | words) \ .setName("element").setResultsName("element") full_element = Group(negator + element).setName("full element").setResultsName( "full_element", listAllMatches=True) connector = Group(pyparsing.Optional(CaselessKeyword("or") | CaselessKeyword("and"))) \ .setName("connector").setResultsName("connector", listAllMatches=True) expr <<= full_element + ZeroOrMore(connector + full_element) return expr
def compiled_sql(self): where_op = oneOf('> >= < <= = IN !=') | Literal("NOT IN") number = Word(nums) string = (QuotedString(quoteChar="'") | QuotedString( quoteChar='"')).setParseAction(lambda x: '"' + x[0] + '"') where_val = number | string lpar = Literal('(') rpar = Literal(')') tables = Group( Word(alphas) + ZeroOrMore(Suppress(Literal('>>>')) + Word(alphas))) tbl_col = tables + Suppress(Literal('.')) + Word(alphas) where_clause = tbl_col + where_op + where_val where_clause.setParseAction(lambda x: self._make_atomic( tables=x[0], col_name=x[1], op=x[2], value=x[3])) expr = Forward() atom = where_clause | (lpar + expr + rpar) op = Literal("&").setParseAction( replaceWith("INTERSECT")) | Literal("|").setParseAction( replaceWith("UNION")) expr << (atom + ZeroOrMore(op + expr)) | ( Suppress("(") + atom + ZeroOrMore(op + expr) + Suppress(")")) results = expr.parseString(self.pre_query) print(' '.join(results)) return results
def _parse_query(self): """ Defines and parses the Resource DSL based on the query associated with the PanoptesResourceDSL object Returns: list: The list of tokens parsed Raises: ParseException: This exception is raised if any parsing error occurs """ resource_fields = oneOf( 'resource_site resource_class resource_subclass resource_type resource_id resource_endpoint', caseless=True) resource_metadata = CaselessLiteral('resource_metadata') + Literal( '.') + Word(alphanums + '_') and_ = CaselessKeyword('AND').setParseAction(upcaseTokens) or_ = CaselessKeyword('OR').setParseAction(upcaseTokens) not_ = CaselessKeyword('NOT').setParseAction(upcaseTokens) in_ = CaselessKeyword('IN').setParseAction(upcaseTokens) like_ = CaselessKeyword('LIKE').setParseAction(upcaseTokens) operators = oneOf("= != eq ne", caseless=True).setParseAction(upcaseTokens) query_expression = Forward() query_l_val = (resource_fields | resource_metadata).setParseAction(downcaseTokens) query_r_val = QuotedString(quoteChar='"', escChar='\\') query_condition = Group((query_l_val + operators + query_r_val) | (query_l_val + Optional(not_) + like_ + query_r_val) | (query_l_val + Optional(not_) + in_ + '(' + delimitedList(query_r_val) + ')')) query_expression << query_condition - ZeroOrMore( (and_ | or_) - query_condition) try: tokens = query_expression.parseString(self._query, parseAll=True) except ParseException as e: raise e return tokens
def create_names_db(): p = Suppress(SkipTo('"')) + QuotedString('"') + Suppress(SkipTo(".")) with open("persondata_en.ttl") as pd: for line in pd: surname, givenname = None, None if "/name" in line: name = p.parseString(line)[0] line = pd.__next__() if "/surname" in line: surname = p.parseString(line)[0] line = pd.__next__() if "/givenName" in line: givenname = p.parseString(line)[0] n = Name(name=name, surname=surname, givenname=givenname) session.add(n) session.commit()
def _getPattern(self): arith_expr = Forward() comp_expr = Forward() logic_expr = Forward() LPAR, RPAR, SEMI = map(Suppress, "();") identifier = Word(alphas + "_", alphanums + "_") multop = oneOf('* /') plusop = oneOf('+ -') expop = Literal("^") compop = oneOf('> < >= <= != ==') andop = Literal("AND") orop = Literal("OR") current_value = Literal(".") assign = Literal("=") # notop = Literal('NOT') function = oneOf(' '.join(self.FUNCTIONS)) function_call = Group( function.setResultsName('fn') + LPAR + Optional(delimitedList(arith_expr)) + RPAR) aggregate_column = QuotedString(quoteChar='{', endQuoteChar='}') single_column = QuotedString(quoteChar='[', endQuoteChar=']') integer = Regex(r"-?\d+") real = Regex(r"-?\d+\.\d*") # quotedString enables strings without quotes to pass operand = \ function_call.setParseAction(self.__evalFunction) | \ aggregate_column.setParseAction(self.__evalAggregateColumn) | \ single_column.setParseAction(self.__evalSingleColumn) | \ ((real | integer).setParseAction(self.__evalConstant)) | \ quotedString.setParseAction(self.__evalString).addParseAction(removeQuotes) | \ current_value.setParseAction(self.__evalCurrentValue) | \ identifier.setParseAction(self.__evalString) arith_expr << operatorPrecedence(operand, [ (expop, 2, opAssoc.LEFT, self.__expOp), (multop, 2, opAssoc.LEFT, self.__multOp), (plusop, 2, opAssoc.LEFT, self.__addOp), ]) # comp_expr = Group(arith_expr + compop + arith_expr) comp_expr << operatorPrecedence(arith_expr, [ (compop, 2, opAssoc.LEFT, self.__evalComparisonOp), ]) logic_expr << operatorPrecedence( comp_expr, [(andop, 2, opAssoc.LEFT, self.__evalLogicOp), (orop, 2, opAssoc.LEFT, self.__evalLogicOp)]) pattern = logic_expr + StringEnd() return pattern
def _parse_gitconfig(config: str) -> Dict[str, Any]: # Header header = Word(alphas) + Optional(QuotedString('"')) full_header = Suppress(LineStart()) + \ nestedExpr(opener="[", closer="]", content=header) + \ Suppress(LineEnd()) #full_header = Suppress(LineStart()) + Suppress(Literal("[")) + \ # header + Suppress(Literal("]")) + Suppress(LineEnd()) # Keys key = Word(alphas) + Suppress(Literal("=")) + Suppress(Optional(" ")) + \ restOfLine() # Full pattern full_pattern = full_header + ZeroOrMore(key) #return full_header return [match for match in full_pattern.scanString(config)]
def parse(self, config): VALUE = OneOrMore(Word(alphanums + "-/.:_+") | QuotedString("'")) context = [] indent = [] for l in config.splitlines(): ls = l.strip() if not ls or ls.startswith("!"): continue # Comment line match = self.rx_indent.search(l) ilevel = len(match.group(1)) if not indent: indent = [ilevel] context = [ls] elif indent[-1] == ilevel: # Same level context context = context[:-1] + [ls] elif indent[-1] < ilevel: # Down context += [ls] indent += [ilevel] else: # Up while indent and indent[-1] >= ilevel: indent.pop(-1) context.pop(-1) context += [ls] indent += [ilevel] cp = " ".join(context).split() h = self.handlers for p in cp: if p in h: h = h[p] elif "*" in h: h = h["*"] else: break if callable(h): h(self, VALUE.parseString(" ".join(cp))) break elif h is None: break # Yield facts for f in self.iter_facts(): yield f
def parse(self, config): VALUE = OneOrMore(Word(alphanums + "-/.:_+[]") | QuotedString('"')) context = [] inactive_level = None for line in config.splitlines(): # @todo: Skip inactive blocks if "##" in line: line = line.split("##", 1)[0] line = line.strip() if line == "{master}": continue elif line.startswith("inactive:"): if line.endswith("{"): inactive_level = len(context) context += [line[9:-1].strip()] elif line.startswith("/*"): continue elif line.endswith("{"): context += [line[:-1].strip()] elif line == "}": context.pop(-1) if inactive_level is not None and inactive_level >= len( context): inactive_level = None elif line.endswith(";"): if inactive_level is not None: continue cp = " ".join(context).split() + line[:-1].split() h = self.handlers for p in cp: if p in h: h = h[p] elif "*" in h: h = h["*"] else: break if callable(h): h(self, VALUE.parseString(" ".join(cp))) break elif h is None: break # Yield facts for f in self.iter_facts(): yield f
def parse_treesblock(infile): import string from pyparsing import Optional, Word, Regex, CaselessKeyword, Suppress from pyparsing import QuotedString comment = Optional(Suppress("[&") + Regex(r'[^]]+') + Suppress("]")) name = Word(alphanums + "_") | QuotedString("'") newick = Regex(r'[^;]+;') tree = (CaselessKeyword("tree").suppress() + Optional("*").suppress() + name.setResultsName("tree_name") + comment.setResultsName("tree_comment") + Suppress("=") + comment.setResultsName("root_comment") + newick.setResultsName("newick")) ## treesblock = Group(beginblock + ## Optional(ttable.setResultsName("ttable")) + ## Group(OneOrMore(tree)) + ## endblock) def parse_ttable(f): ttable = {} while True: s = f.next().strip() if s.lower() == ";": break if s[-1] in ",;": s = s[:-1] k, v = s.split() ttable[k] = v if s[-1] == ";": break return ttable ttable = {} while True: try: s = infile.next().strip() except StopIteration: break if s.lower() == "translate": ttable = parse_ttable(infile) # print("ttable: %s" % len(ttable)) else: match = tree.parseString(s) yield Newick(match, ttable)
def translate(self, text, filename): self.source = text self.super = None self.inheritance = 0 self.declaration_lines = ['inheritance = 0'] self.block_lines = [] self.body_lines = ['def body():'] self.target_lines = self.body_lines self.indent = 1 template_close = Literal('%>') white = White() attribute = Word(alphanums + '_') + Literal('=') + QuotedString( '"') + Optional(white) directive = "<%@" + Optional(white) + Word( alphanums + '_') + white + ZeroOrMore(attribute) + template_close declaration = "<%!" + SkipTo(template_close) + template_close expression = "<%=" + SkipTo(template_close) + template_close scriptlet = '<%' + SkipTo(template_close) + template_close template_text = directive | declaration | expression | scriptlet plain_text = Regex(r'((?!<%).|\s)+', re.MULTILINE) body = template_text | plain_text lit = OneOrMore(body) directive.setParseAction(self.compile_directive) declaration.setParseAction(self.compile_declaration) expression.setParseAction(self.compile_expression) scriptlet.setParseAction(self.compile_scriptlet) plain_text.setParseAction(self.compile_plain_text) lit.leaveWhitespace() lit.parseString(self.source) translated = '\n' + '\n'.join(self.declaration_lines + ['\n'] + self.block_lines + ['\n'] + self.body_lines) if self.super: translated = self.super.module_source + translated return translated
def __build_grammar(): expr = Forward() k_select = CaselessLiteral("SELECT") k_from = CaselessLiteral("FROM") k_where = CaselessLiteral("WHERE") k_and = CaselessLiteral("AND") k_instances = CaselessLiteral("INSTANCES") qs = QuotedString("'", escQuote="''") identifier = Combine( Word(alphas + "_", exact=1) + Optional(Word(nums + alphas + "_")))("identifier") navigation = Group(identifier + ZeroOrMore(Suppress(".") + identifier))("navigation") filter_predicate = Group(navigation + Suppress("=") + (qs('value') | (Suppress('(') + expr('subquery') + Suppress(')'))))('predicate') where_clause = Group( Suppress(k_where) + filter_predicate + ZeroOrMore(Suppress(k_and) + filter_predicate))('where') # Pre filters impl = Optional(Suppress(CaselessLiteral("implementation"))) + qs('impl') cic = Suppress(CaselessLiteral("offer")) + qs('cic') lc = Suppress(CaselessLiteral("lc")) + qs('lc') envt = Suppress(CaselessLiteral("environment")) + qs('envt') pre_filter = Optional(envt) + Optional(lc) + Optional(cic) + Optional( impl) + FollowedBy(k_instances) # Dict query (only select some elements and navigate) nl_expr = Group(navigation + ZeroOrMore(Suppress(',') + navigation) + FollowedBy(k_from))('selector') # The sum of all fears select = Group( Suppress(k_select) + Optional(nl_expr + Suppress(k_from)) + pre_filter + Suppress(k_instances) + Optional(where_clause) + Optional(CaselessLiteral('WITH COMPUTATIONS')('compute')))('select') expr << select return expr
def _create_filter_parser(): and_kw = Keyword('AND') or_kw = Keyword('OR') variable = Literal('?') + Word(alphanums + '_').leaveWhitespace() uri_term = NotAny(Literal('"')) + Word(printables, excludeChars='>*') uri_part = Keyword('*') ^ uri_term ^ variable literal_term = QuotedString(quoteChar='"', escChar='\\') triple = Group( Literal('<').suppress() + uri_part.setResultsName('subj') + uri_part.setResultsName('pred') + (Group(uri_part).setResultsName('obj') ^ Group(literal_term).setResultsName('objlit')) + Literal('>').suppress()) expr = Forward() atom = (triple.setResultsName('triple') | Literal('(').suppress() + expr + Literal(')').suppress()) and_group = Group(atom + ZeroOrMore(and_kw.suppress() + atom)) or_group = Group(atom + ZeroOrMore(or_kw.suppress() + atom)) expr << (and_group.setResultsName('and') ^ or_group.setResultsName('or')) return expr
def _build_parser(): identifier = Regex(r"[A-Za-z]+(_[A-Za-z]+)*").setParseAction(lambda t: Identifier(t[0])) string = QuotedString('"', unquoteResults=True, multiline=True) # FIXME: escaping? integer = Regex(r"-?\d+").setParseAction(lambda t: int(t[0])) decimal = Regex(r"-?\d+\.\d*").setParseAction(lambda t: float(t[0])) value = string | decimal | integer | identifier array = (value + OneOrMore(Suppress(",") + value)).setParseAction(tuple) levelValue = integer | Regex(r"[X]") stanza = Forward() block = (identifier + Optional(Suppress("(") + value + Suppress(")"), default=None) + \ Group(Suppress("{") + ZeroOrMore(stanza) + Suppress("}"))) \ .setParseAction(lambda t: Block(*t)) assignment = (identifier + Suppress("=") + (array | value) + Suppress(";")) \ .setParseAction(lambda t: Assignment(*t)) levelStatement = (Suppress("LEVEL") + levelValue + Suppress("FOR") + decimal + Suppress(";")) \ .setParseAction(lambda t: LevelStatement(*t)) stanza << (block | assignment | levelStatement) return stanza
def initGrammar(self): L_Equals = Word("=") N_comment = htmlComment() N_name = CharsNotIn("{}|[]") N_simpleText = SkipTo( oneOf(["{{", "|", "[[", "]]", "}}", "'''", "<ref"])) N_elements = Forward() N_apostrofs = QuotedString("'''").setParseAction( lambda s, l, t: {'APOSTROFS': t}) N_link = nestedExpr( opener="[[", closer="]]", content=N_name + Optional("|" + delimitedList(CharsNotIn("[]"), delim="|")) ).setParseAction(self.genLink) N_header = Group(L_Equals + SkipTo("=") + L_Equals).setParseAction( lambda s, l, t: {'HEADER': t}) N_template = Forward() N_key = CharsNotIn("{}|=") # N_value = ZeroOrMore(CharsNotIn("{}|")) + ZeroOrMore(N_template + ZeroOrMore(CharsNotIn("{}|"))).setResultsName('VALUE') N_keyValues = "|" + delimitedList( Group(Optional(N_key) + Optional("=" + N_elements)), delim="|") N_label_content = N_template | ("{{" + OneOrMore("!") + "}}") | CharsNotIn("{}|") N_label = nestedExpr(opener="{", closer="}", content=N_label_content) N_template << nestedExpr( opener="{{", closer="}}", content=N_name + Optional(N_keyValues)).setParseAction(self.genTemplate) ref_start, ref_end = makeHTMLTags("ref") N_named_ref = ref_start + SkipTo(ref_end) + ref_end N_named_ref.setParseAction(lambda s, l, t: {'REF': t}) N_element = N_comment | N_simpleText | N_named_ref | N_apostrofs | N_link | N_header | N_template | N_label # N_ref = nestedExpr( opener="<ref>", closer="</ref>", content=N_elements).setParseAction( lambda s,l,t: {'REF' : t} ) N_elements << ZeroOrMore(N_element) self.N_S = N_elements
# define grammar startContext = "start_context" + Word(alphanums) variable = Word(alphanums) attribute = Word(alphanums)+ "::"+ Word(alphanums) freeformText = "freeform"+"("+ OneOrMore(Word(alphanums))+")" comparison = Literal("=") | Literal("<") | Literal(">") setAttributeValue = attribute + Literal("=") + freeformText | attribute + comparison + variable setVariableFromVar = variable + Literal("=") + freeformText | variable + comparison + variable setVariableFromAttr = variable + comparison + attribute argument = Word(alphanums) + Optional(",") funcDef = Word(alphanums) + "(" + OneOrMore(argument) + ")" comment = QuotedString('/*', endQuoteChar='*/') line = startContext + ";" | setAttributeValue + ";" + setVariableFromAttr + ";" | setVariableFromVar + ";" | funcDef + ";" | comment # grammar to be exported grammar = OneOrMore(line) def parse_knowledge_tree(s): """ return knowledge tree after parsing with the grammar""" grammar.parseString(s) return kt #parse actions setAttributeValue.setParseAction(update_attribute)
def __init__(self): intNum = Word(nums) floatNum = Combine(intNum + Optional("." + intNum)) string = QuotedString("'") | QuotedString('"') regex = QuotedString("/") ident = Word( alphas, alphanums + "_" ) time_period = Keyword("minutes") | Keyword("seconds") ordering = Keyword("unique") | Keyword("random") string_type = Keyword("random").setResultsName("ordering") + \ Keyword("string").setResultsName("data_type") + \ Keyword("of") + Keyword("length") + \ intNum.setResultsName("length") numeric_type = ordering.setResultsName("ordering") + \ Keyword("number").setResultsName("data_type") + Keyword("from") + \ floatNum.setResultsName("min") + Keyword("to") + \ floatNum.setResultsName("max") var_type = string_type | numeric_type var = Group(Keyword("var").setResultsName("type") + \ ident.setResultsName("name") + Keyword("is") + \ Keyword("a") + var_type) ident_list = delimitedList( ident ) using_ordering = Keyword("randomly") | Keyword("sequentially") using = Group(Keyword("using").setResultsName("type") + \ ident_list.setResultsName("vars") + Keyword("from") + \ string.setResultsName("filename") + \ using_ordering.setResultsName("ordering")) pause = Group(Keyword("pause").setResultsName("type") + \ Keyword("between") + \ intNum.setResultsName("lower_time") + Keyword("and") + \ intNum.setResultsName("upper_time") + Keyword("seconds")) get = Keyword("get").setResultsName("method") post = Keyword("post").setResultsName("method") put = Keyword("put").setResultsName("method") delete = Keyword("delete").setResultsName("method") method = (get | post | put | delete).setResultsName("type") url = string.setResultsName("url") data = Keyword("with") + Keyword("data") + \ string.setResultsName("data") match = Group( \ Keyword("ensure") + Keyword("match") + \ regex.setResultsName("regex")) match_list = Group(OneOrMore(match)).setResultsName("matches") request = Group(method + \ Optional(Keyword("all")).setResultsName("all") + \ url + Optional(data) + Optional(match_list)).setName("request") action = request | pause | var | using action_list = \ Group(OneOrMore(action)).setResultsName("actions") session = Group( Keyword("create") + \ Keyword("session") + Keyword("with") + \ Keyword("weight") + \ intNum.setResultsName("weight") + Keyword("as") + \ string.setResultsName("name") + \ ":" + action_list) session_list = OneOrMore(session).setResultsName("sessions") spawn = Group( Keyword("spawn") + \ Keyword("users") + Keyword("every") + \ intNum.setResultsName("user_time") + \ time_period.setResultsName("user_time_units") + \ Keyword("for") + \ intNum.setResultsName("max_duration") + \ time_period.setResultsName("max_duration_units") + \ Optional( Keyword("up") + Keyword("to") + \ intNum.setResultsName("max_users") + Keyword("users"))) spawn_list = OneOrMore(spawn).setResultsName("spawns") load = Group( Keyword("create") + Keyword("load") + ":" + \ spawn_list).setResultsName("load") comment = "#" + restOfLine script = session_list + load script.ignore(comment) self.grammar = script
# from the RFCs ABNF description nilvalue = Word("-") digit = Regex("[0-9]{1}") nonzero_digit = Regex("[1-9]{1}") printusascii = printables sp = White(" ", exact=1) octet = Regex("[\x00-\xFF]") utf_8_string = Regex("[\x00-\xFF]*") BOM = "\xef\xbb\xbf" bom = Regex(BOM) msg_utf8 = bom + utf_8_string msg_any = utf_8_string msg = Combine(Or([msg_utf8, msg_any])).setResultsName("MSG") sd_name = CharsNotIn('= ]"', 1, 32) param_name = sd_name.setResultsName("SD_PARAM_NAME") param_value = QuotedString(quoteChar='"', escChar="\\", multiline=True) param_value = param_value.setResultsName("SD_PARAM_VALUE") sd_id = sd_name.setResultsName("SD_ID") sd_param = Group(param_name + Regex("=") + param_value) sd_params = Group(ZeroOrMore(Group(sp + sd_param.setResultsName("SD_PARAM")))) sd_element = Group("[" + sd_id + sd_params.setResultsName("SD_PARAMS") + "]") sd_element = sd_element.setResultsName("SD_ELEMENT") sd_elements = Group(OneOrMore(sd_element)) structured_data = Or([nilvalue, sd_elements.setResultsName("SD_ELEMENTS")]) structured_data = structured_data.setResultsName("STRUCTURED_DATA") time_hour = Regex("0[0-9]|1[0-9]|2[0-3]") time_minute = Regex("[0-5][0-9]") time_second = time_minute time_secfrac = Regex("\.[0-9]{1,6}") time_numoffset = Or([Regex("\+"), Regex("-")]) + time_hour + ":" + time_minute time_offset = Or([Regex("Z"), time_numoffset])
def _define_grammar(): """ Creates and returns a copy of the selector grammar. Wrapped in a function to avoid polluting the module namespace. """ expr = Forward() label_name = Word(LABEL_CHARS) label_name.setParseAction(LabelNode) string_literal = QuotedString('"') | QuotedString("'") string_literal.setParseAction(LiteralNode) set_literal = (Suppress("{") + delimitedList(QuotedString('"') | QuotedString("'"), ",") + Suppress("}")) set_literal.setParseAction(SetLiteralNode) eq_comparison = label_name + Suppress("==") + string_literal eq_comparison.setParseAction(LabelToLiteralEqualityNode) not_eq_comparison = label_name + Suppress("!=") + string_literal not_eq_comparison.setParseAction(InequalityNode) in_comparison = label_name + Suppress(Keyword("in")) + set_literal in_comparison.setParseAction(LabelInSetLiteralNode) not_in = Suppress(Keyword("not") + Keyword("in")) not_in_comparison = label_name + not_in + set_literal not_in_comparison.setParseAction(NotInNode) has_check = (Suppress("has(") + Word(LABEL_CHARS) + Suppress(")")) has_check.setParseAction(HasNode) # For completeness, we allow an all() to occur in an expression like # "! all()". Note: we special-case the trivial selectors "" and # "all()" below for efficiency. all_op = (Suppress("all()")) all_op.setParseAction(AllNode) comparison = (eq_comparison | not_eq_comparison | in_comparison | not_in_comparison | has_check | all_op) paren_expr = (Suppress("(") + expr + Suppress(")")) value = ZeroOrMore("!") + (comparison | paren_expr) value.setParseAction(simplify_negation_node) and_expr = value + ZeroOrMore(Suppress("&&") + value) and_expr.setParseAction(simplify_and_node) or_expr = and_expr + ZeroOrMore(Suppress("||") + and_expr) or_expr.setParseAction(simplify_or_node) expr << or_expr grammar = expr + StringEnd() return grammar
VARIABLE.setParseAction(lambda s, l, t: Variable(t[0])) VERSION_CMP = ( L("===") | L("==") | L(">=") | L("<=") | L("!=") | L("~=") | L(">") | L("<") ) MARKER_OP = VERSION_CMP | L("not in") | L("in") MARKER_VALUE = QuotedString("'") | QuotedString('"') MARKER_VALUE.setParseAction(lambda s, l, t: Value(t[0])) BOOLOP = L("and") | L("or") MARKER_VAR = VARIABLE | MARKER_VALUE MARKER_ITEM = Group(MARKER_VAR + MARKER_OP + MARKER_VAR) MARKER_ITEM.setParseAction(lambda s, l, t: tuple(t[0])) LPAREN = L("(").suppress() RPAREN = L(")").suppress() MARKER_EXPR = Forward() MARKER_ATOM = MARKER_ITEM | Group(LPAREN + MARKER_EXPR + RPAREN) MARKER_EXPR << MARKER_ATOM + ZeroOrMore(BOOLOP + MARKER_EXPR)
dcsToken = Keyword("DEFAULT") + Keyword("CHARACTER") + Keyword("SET") useToken = Keyword("USE") defaultToken = Keyword("DEFAULT") unsignedToken = Keyword("UNSIGNED") autoincrementToken = Keyword("AUTO_INCREMENT") autoincrementToken.setParseAction(lambda toks: ["PRIMARY KEY AUTOINCREMENT"]) keyToken = Keyword("KEY") primaryToken = Keyword("PRIMARY") uniqueToken = Keyword("UNIQUE") insertToken = Keyword("INSERT") intoToken = Keyword("INTO") valuesToken = Keyword("VALUES") ident = Word(alphas, alphanums + "_$" ) ^ QuotedString('"') ^ QuotedString("`") ident.setParseAction(lambda toks: ['"%s"' % toks[0]]) string = QuotedString("'",multiline=True) string.setParseAction(lambda toks: ["'%s'" % toks[0]]) columnName = delimitedList( ident, ".",combine=True) tableName = delimitedList( ident, ".",combine=True) dataType = Word(alphas) + Combine(Optional(Literal("(") + (Word(nums) ^ delimitedList(string,combine=True)) + Literal(")"))) + ZeroOrMore(nnToken ^ autoincrementToken ^ (defaultToken + (string ^ nullToken)) ^ unsignedToken.suppress() ) dataType.setParseAction(convert_datatypes) columnDescription = Group(ident + dataType) keyDescription = Optional(primaryToken ^ uniqueToken) + keyToken + Optional(ident) + Literal("(") + delimitedList(ident + Optional(Literal("(") + Word(nums) + Literal(")"))) + Literal(")") createTableStmt = Group(createToken + tableToken + ifneToken + ident + Literal("(")) + delimitedList(columnDescription ^ keyDescription.suppress()) + Group(Literal(")")) + Optional(autoincrementToken + Literal("=") + Word(nums)).suppress() createTableStmt.setParseAction(rebuild_createtable) createDataBaseStmt = Group(createToken + databaseToken + ident + dcsToken + Word(alphanums)+ collateToken + ident)
def __init__(self, get_obfuscated): """BNF grammar for source statements. Parameters ---------- get_obfuscated : function Function to return the obfuscated name for an identifier. """ self.get_obfuscated = get_obfuscated self.directive = oneOf("#:") self.comment = ~self.directive + pythonStyleComment self.separator = Word("~!@$%^&*()+`-={}|[]:;<>?,/.", max=2) self.string = \ QuotedString(quoteChar='"', escChar='\\', multiline=False, unquoteResults=False) |\ QuotedString(quoteChar="'", escChar='\\', multiline=False, unquoteResults=False) self.doc_string = \ QuotedString(quoteChar='"""', escChar='\\', multiline=True, unquoteResults=False) |\ QuotedString(quoteChar="'''", escChar='\\', multiline=True, unquoteResults=False) self.string_or_doc = self.doc_string | self.string self.triple_quote = Literal("'''") | Literal('"""') self.e = Literal('E') | Literal('e') self.point = Literal('.') self.plusorminus = Literal('+') | Literal('-') self.number = Word(nums) self.integer = Combine(Optional(self.plusorminus) + self.number) self.fnumber = Combine( self.integer + Optional(self.point + Optional(self.number)) + Optional(self.e + self.integer)) self.tab = Literal(' ') self.ident = Word(alphas+'_', alphanums+'_') self.conseq_idents_numbs = OneOrMore(self.ident | self.fnumber) self.attrib = self.ident + OneOrMore('.'+self.ident) self.statement = ( ZeroOrMore( (self.directive | self.tab | self.conseq_idents_numbs | self.separator | self.string_or_doc | self.triple_quote) ) + Optional(self.comment).suppress() ) self.attribs = ( ZeroOrMore( (self.directive.suppress() | self.tab.suppress() | self.attrib | self.ident.suppress() | self.separator.suppress() | self.fnumber.suppress() | self.string_or_doc.suppress() | self.triple_quote.suppress()) ) + Optional(self.comment).suppress() ) self.conseq_idents = ( ZeroOrMore( (self.directive.suppress() | self.tab.suppress() | self.ident | self.separator.suppress() | self.fnumber.suppress() | self.string.suppress()) ) + Optional(self.comment).suppress() ) self.conseq_idents_no_obfuscate = ( ZeroOrMore( (self.directive.suppress() | self.tab.suppress() | self.ident | self.separator.suppress() | self.fnumber.suppress() | self.string_or_doc.suppress() | self.triple_quote.suppress()) ) + Optional(self.comment).suppress() ) self.attribs.setParseAction(self.add_attribs_reserveds) self.conseq_idents.setParseAction(self.add_conseq_idents) self.conseq_idents_no_obfuscate.setParseAction( self.add_conseq_idents_no_obfuscate) self.conseq_idents_numbs.setParseAction( self.transform_conseq_ident_numbs) self.directive.setParseAction(self.transform_directive)
ZeroOrMore(Suppress("-") + _label_part) + Suppress("]") ).setParseAction(tokenize_override_ps) # Looks like: [subject-group(Some text Goes Here)] subject_group = ( context_certainty + Suppress("[subject-group") + QuotedString(quoteChar='(', endQuoteChar=')').setResultsName("subgroup") + Suppress("]") ).setParseAction(lambda m: tokens.Context( [None, 'Subjgrp:' + subjgrp_label(m.subgroup, [])], bool(m.certain))) # Phrases like '“Nonimmigrant visa”' become 'p12345678' _double_quote_label = QuotedString( quoteChar=u'“', endQuoteChar=u'”' ).setParseAction(lambda m: "p{}".format(hash_for_paragraph(m[0]))) # Phrases like "definition for the term “Nonimmigrant visa”" become a # paragraph token with the appropriate paragraph label set definition = ( Marker("definition") + (Marker("of") | Marker("for")) + Optional(Marker("the") + Marker("term")) + _double_quote_label.copy().setResultsName("paragraph") ).setParseAction(lambda m: tokens.Paragraph(paragraphs=[m.paragraph])) # grammar which captures all of these possibilities token_patterns = QuickSearchable( put_active | put_passive | post_active | post_passive | delete_active | delete_passive | move_active | move_passive | designate_active | reserve_active |
from pyparsing import Literal, OneOrMore, QuotedString, SkipTo, stringEnd """Givent a string, this parser skips to the starting charcters '<%', returning everything it skips over as a token. Everything that is between '<%=' and '%>' are returned as a token labeled assignment. This is repeated until we run out of '<%' at which point we return everything up to the end of a string as a token.""" startExp = Literal('<%') assignExp = QuotedString('<%=', endQuoteChar='%>', multiline=True) assignExp = assignExp.setResultsName('assignment') ERBParser = OneOrMore(SkipTo(startExp) + assignExp) + SkipTo(stringEnd) ERBParser.leaveWhitespace() class ERBTemplate(object): def loadFromFile(self, fileName): tokens = ERBParser.parseFile(fileName) # Create a dictionary where the key is the position the token # was found and and the value is the token's name as set by setResultsName typeByPos = dict( [ (v[1],k) for (k,vlist) in tokens._ParseResults__tokdict.items() for v in vlist ] ) self.typeByPos = typeByPos self.tokens = tokens def render(self, context): tokens = self.tokens tokenCount = len(tokens)
import re import jinja2 import pyparsing import bleach from .attachments import THUMB_PATTERN from pyparsing import QuotedString, ParserElement, LineStart, LineEnd, SkipTo, OneOrMore, restOfLine from .util import mime2thumb_ext ParserElement.setDefaultWhitespaceChars(' \t') EOL = LineEnd() SOL = LineStart() strong = QuotedString("**") | QuotedString(quoteChar="[b]", endQuoteChar="[/b]") strong.setParseAction(lambda x: "<strong>%s</strong>" % x[0]) italic = QuotedString("*", escChar='\\') | QuotedString(quoteChar="[i]", endQuoteChar="[/i]") italic.setParseAction(lambda x: "<i>%s</i>" % x[0]) underline = QuotedString("__") | QuotedString(quoteChar="[u]", endQuoteChar="[/u]") underline.setParseAction(lambda x: "<u>%s</u>" % x[0]) strike = QuotedString(quoteChar="[s]", endQuoteChar="[/s]") strike.setParseAction(lambda x: "<s>%s</s>" % x[0]) sup = QuotedString(quoteChar="[sup]", endQuoteChar="[/sup]") sup.setParseAction(lambda x: "<sup>%s</sup>" % x[0]) sub = QuotedString(quoteChar="[sub]", endQuoteChar="[/sub]") sub.setParseAction(lambda x: "<sub>%s</sub>" % x[0])
class ObfuscateBNF(object): __metaclass__ = abc.ABCMeta def __init__(self, get_obfuscated): """BNF grammar for source statements. Parameters ---------- get_obfuscated : function Function to return the obfuscated name for an identifier. """ self.get_obfuscated = get_obfuscated self.directive = oneOf("#:") self.comment = ~self.directive + pythonStyleComment self.separator = Word("~!@$%^&*()+`-={}|[]:;<>?,/.", max=2) self.string = \ QuotedString(quoteChar='"', escChar='\\', multiline=False, unquoteResults=False) |\ QuotedString(quoteChar="'", escChar='\\', multiline=False, unquoteResults=False) self.doc_string = \ QuotedString(quoteChar='"""', escChar='\\', multiline=True, unquoteResults=False) |\ QuotedString(quoteChar="'''", escChar='\\', multiline=True, unquoteResults=False) self.string_or_doc = self.doc_string | self.string self.triple_quote = Literal("'''") | Literal('"""') self.e = Literal('E') | Literal('e') self.point = Literal('.') self.plusorminus = Literal('+') | Literal('-') self.number = Word(nums) self.integer = Combine(Optional(self.plusorminus) + self.number) self.fnumber = Combine( self.integer + Optional(self.point + Optional(self.number)) + Optional(self.e + self.integer)) self.tab = Literal(' ') self.ident = Word(alphas+'_', alphanums+'_') self.conseq_idents_numbs = OneOrMore(self.ident | self.fnumber) self.attrib = self.ident + OneOrMore('.'+self.ident) self.statement = ( ZeroOrMore( (self.directive | self.tab | self.conseq_idents_numbs | self.separator | self.string_or_doc | self.triple_quote) ) + Optional(self.comment).suppress() ) self.attribs = ( ZeroOrMore( (self.directive.suppress() | self.tab.suppress() | self.attrib | self.ident.suppress() | self.separator.suppress() | self.fnumber.suppress() | self.string_or_doc.suppress() | self.triple_quote.suppress()) ) + Optional(self.comment).suppress() ) self.conseq_idents = ( ZeroOrMore( (self.directive.suppress() | self.tab.suppress() | self.ident | self.separator.suppress() | self.fnumber.suppress() | self.string.suppress()) ) + Optional(self.comment).suppress() ) self.conseq_idents_no_obfuscate = ( ZeroOrMore( (self.directive.suppress() | self.tab.suppress() | self.ident | self.separator.suppress() | self.fnumber.suppress() | self.string_or_doc.suppress() | self.triple_quote.suppress()) ) + Optional(self.comment).suppress() ) self.attribs.setParseAction(self.add_attribs_reserveds) self.conseq_idents.setParseAction(self.add_conseq_idents) self.conseq_idents_no_obfuscate.setParseAction( self.add_conseq_idents_no_obfuscate) self.conseq_idents_numbs.setParseAction( self.transform_conseq_ident_numbs) self.directive.setParseAction(self.transform_directive) ############### # Parse actions ############### def add_conseq_idents(self, conseq_idents_list): """Add names to obfuscate to identifiers table. Parameters ---------- conseq_idents_list : list """ if 'import' not in conseq_idents_list[:] and \ 'except' not in conseq_idents_list[:]: add_identifiers(set(conseq_idents_list)) def add_conseq_idents_no_obfuscate( self, conseq_idents_no_obfuscate_list): """Add names that are not obfuscated to identifiers table. Parameters ---------- conseq_idents_no_obfuscate_list : list """ # If an except error was not added to reserved list, don't obfuscate it if 'import' not in conseq_idents_no_obfuscate_list[:] and \ 'except' not in conseq_idents_no_obfuscate_list[:]: add_identifiers(set(conseq_idents_no_obfuscate_list), do_obfuscate=False) def add_attribs_reserveds(self, attribs_list): """Add attributes of reserved names to reserved list. Take a list of attributes strings from a source statement, break it into lists of objects with their attributes, and add attributes that follow a reserved name to the reserved list. Example ------ If r is reserved, then a.r.c + d.r.e would add c and e to reserveds. Parameters ---------- attribs_list : list """ if attribs_list: # Create an ordered list of attribute parents # Ex. a.b.c => [a, b] _attrib_list = [attribs_list[0]] is_last_token_an_attrib = True for token in attribs_list[1:]: if is_last_token_an_attrib and token != '.': # End of attrib list reached. Process list. add_attribs_reserveds_list(_attrib_list) # Start new attrib list _attrib_list = [token] is_last_token_an_attrib = True elif is_last_token_an_attrib and token == '.': is_last_token_an_attrib = False elif not is_last_token_an_attrib and token == '.': continue # Multiple dots, continue attrib list elif not is_last_token_an_attrib and token != '.': _attrib_list.append(token) is_last_token_an_attrib = True else: # Process last list if _attrib_list: add_attribs_reserveds_list(_attrib_list) def transform_conseq_ident_numbs(self, conseq_ident_list): """Allow for non-name tokens in a statement. Names start with an alpha or underscore. Obfuscate these name tokens and simply copy unchanged other tokens. Parameters ---------- conseq_ident_list : list Returns ------- statement : str """ return ' '.join([ self.get_obfuscated(ident) if (ident[0].isalpha() or ident[0] == '_') else ident for ident in conseq_ident_list ]) def transform_directive(self, directive_list): """Create a directive statement.""" return ''.join([directive_list[0], ' '])
def transform_human(text, variables=None): """Transform user input with given context. Args: text (str): User input. variables (dict): Variables for purposes of substitution. Returns: A 2-tuple of: (A human-readable script that Script can parse, A list of contextual information for tooltips, etc.) """ if variables is None: variables = {} # No mutable default value. # these are parseActions for pyparsing. def str_literal_to_hex(s, loc, toks): for i, t in enumerate(toks): toks[i] = ''.join(['0x', t.encode('hex')]) return toks def var_name_to_value(s, loc, toks): for i, t in enumerate(toks): val = variables.get(t[1:]) if val: toks[i] = val return toks def implicit_opcode_to_explicit(s, loc, toks): """Add "OP_" prefix to an opcode.""" for i, t in enumerate(toks): toks[i] = '_'.join(['OP', t]) return toks def hex_to_formatted_hex(s, loc, toks): """Add "0x" prefix and ensure even length.""" for i, t in enumerate(toks): new_tok = t # Add '0x' prefix if not t.startswith('0x'): if t.startswith('x'): new_tok = ''.join(['0', t]) else: new_tok = ''.join(['0x', t]) # Even-length string if len(new_tok) % 2 != 0: new_tok = ''.join([new_tok[0:2], '0', new_tok[2:]]) toks[i] = new_tok return toks # ^ parseActions for pyparsing end here. str_literal = QuotedString('"') str_literal.setParseAction(str_literal_to_hex) var_name = Combine(Word('$') + Word(pyparsing.alphas)) var_name.setParseAction(var_name_to_value) # Here we populate the list of contextual tips. # Explicit opcode names op_names = [str(i) for i in OPCODE_NAMES.keys()] op_names_explicit = ' '.join(op_names) def is_small_int(op): """True if op is one of OP_1, OP_2, ...OP_16""" try: i = int(op[3:]) return True except ValueError: return False op_names_implicit = ' '.join([i[3:] for i in op_names if not is_small_int(i)]) # Hex, implicit (e.g. 'a') and explicit (e.g. '0x0a') explicit_hex = Combine(Word('0x') + Word(pyparsing.hexnums) + pyparsing.WordEnd()) implicit_hex = Combine(pyparsing.WordStart() + OneOrMore(Word(pyparsing.hexnums)) + pyparsing.WordEnd()) explicit_hex.setParseAction(hex_to_formatted_hex) implicit_hex.setParseAction(hex_to_formatted_hex) # Opcodes, implicit (e.g. 'ADD') and explicit (e.g. 'OP_ADD') explicit_op = pyparsing.oneOf(op_names_explicit) implicit_op = Combine(pyparsing.WordStart() + pyparsing.oneOf(op_names_implicit)) implicit_op.setParseAction(implicit_opcode_to_explicit) contexts = pyparsing.Optional(var_name('Variable') | str_literal('String literal') | explicit_op('Opcode') | implicit_op('Opcode') | explicit_hex('Hex') | implicit_hex('Hex')) matches = [(i[0].asDict(), i[1], i[2]) for i in contexts.scanString(text)] context_tips = [] for i in matches: d = i[0] if len(d.items()) == 0: continue match_type, value = d.items()[0] start = i[1] end = i[2] context_tips.append( (start, end, value, match_type) ) # Now we do the actual transformation. s = text s = var_name.transformString(s) s = str_literal.transformString(s) s = implicit_op.transformString(s) s = implicit_hex.transformString(s) s = explicit_hex.transformString(s) return s, context_tips
return val integer = Word(nums) real = Combine(Word(nums) + '.' + Word(nums)) constant = (Literal('True') | Literal('False') | Literal('None') | Literal('yes') | Literal('no')) + WordEnd() model_reference = Regex(r'([\w\.]*#[\w\.]+)') variable = Regex(r'([a-zA-Z0-9\._]+)') string = QuotedString('"', escChar="\\") | QuotedString('\'', escChar="\\") operand = model_reference | real | integer | constant | string | variable plusop = oneOf('+ -') multop = oneOf('* / // %') groupop = Literal(',') expr = Forward() modifier = Combine(Word(alphas + nums) + ':') integer.setParseAction(EvalInteger) real.setParseAction(EvalReal) string.setParseAction(EvalString) constant.setParseAction(EvalConstant) variable.setParseAction(EvalVariable)
def parse(string=None, filename=None, token=None, lang=None): """ Parse a token stream from or raise a SyntaxError This function includes the parser grammar. """ if not lang: lang = guess_language(string, filename) # # End of Line # EOL = Suppress(lineEnd) UTFWORD = Word(unicodePrintables) # # @tag # TAG = Suppress('@') + UTFWORD # # A table # # A table is made up of rows of cells, e.g. # # | column 1 | column 2 | # # Table cells need to be able to handle escaped tokens such as \| and \n # def handle_esc_char(tokens): token = tokens[0] if token == r'\|': return u'|' elif token == r'\n': return u'\n' elif token == r'\\': return u'\\' raise NotImplementedError(u"Unknown token: %s" % token) ESC_CHAR = Word(initChars=r'\\', bodyChars=unicodePrintables, exact=2) ESC_CHAR.setParseAction(handle_esc_char) # # A cell can contain anything except a cell marker, new line or the # beginning of a cell marker, we then handle escape characters separately # and recombine the cell afterwards # CELL = OneOrMore(CharsNotIn('|\n\\') + Optional(ESC_CHAR)) CELL.setParseAction(lambda tokens: u''.join(tokens)) TABLE_ROW = Suppress('|') + OneOrMore(CELL + Suppress('|')) + EOL TABLE_ROW.setParseAction(lambda tokens: [v.strip() for v in tokens]) TABLE = Group(OneOrMore(Group(TABLE_ROW))) # # Multiline string # def clean_multiline_string(s, loc, tokens): """ Clean a multiline string The indent level of a multiline string is the indent level of the triple-". We have to derive this by walking backwards from the location of the quoted string token to the newline before it. We also want to remove the leading and trailing newline if they exist. FIXME: assumes UNIX newlines """ def remove_indent(multiline, indent): """ Generate the lines removing the indent """ for line in multiline.splitlines(): if line and not line[:indent].isspace(): warn("%s: %s: under-indented multiline string " "truncated: '%s'" % (lineno(loc, s), col(loc, s), line), LettuceSyntaxWarning) # for those who are surprised by this, slicing a string # shorter than indent will yield empty string, not IndexError yield line[indent:] # determine the indentation offset indent = loc - s.rfind('\n', 0, loc) - 1 multiline = '\n'.join(remove_indent(tokens[0], indent)) # remove leading and trailing newlines if multiline[0] == '\n': multiline = multiline[1:] if multiline[-1] == '\n': multiline = multiline[:-1] return multiline MULTILINE = QuotedString('"""', multiline=True) MULTILINE.setParseAction(clean_multiline_string) # A Step # # Steps begin with a keyword such as Given, When, Then or And They can # contain an optional inline comment, although it's possible to encapsulate # it in a string. Finally they can contain a table or a multiline 'Python' # string. # # <variables> are not parsed as part of the grammar as it's not easy to # distinguish between a variable and XML. Instead scenarios will replace # instances in the steps based on the outline keys. # STATEMENT_SENTENCE = Group( lang.STATEMENT + # Given, When, Then, And OneOrMore(UTFWORD.setWhitespaceChars(' \t') | quotedString.setWhitespaceChars(' \t')) + EOL ) STATEMENT = Group( STATEMENT_SENTENCE('sentence') + Optional(TABLE('table') | MULTILINE('multiline')) ) STATEMENT.setParseAction(Step) STATEMENTS = Group(ZeroOrMore(STATEMENT)) # # Background: # BACKGROUND_DEFN = \ lang.BACKGROUND('keyword') + Suppress(':') + EOL BACKGROUND_DEFN.setParseAction(Background) BACKGROUND = Group( BACKGROUND_DEFN('node') + STATEMENTS('statements') ) BACKGROUND.setParseAction(Background.add_statements) # # Scenario: description # SCENARIO_DEFN = Group( Group(ZeroOrMore(TAG))('tags') + lang.SCENARIO('keyword') + Suppress(':') + restOfLine('name') + EOL ) SCENARIO_DEFN.setParseAction(Scenario) SCENARIO = Group( SCENARIO_DEFN('node') + STATEMENTS('statements') + Group(ZeroOrMore( Suppress(lang.EXAMPLES + ':') + EOL + TABLE ))('outlines') ) SCENARIO.setParseAction(Scenario.add_statements) # # Feature: description # FEATURE_DEFN = Group( Group(ZeroOrMore(TAG))('tags') + lang.FEATURE('keyword') + Suppress(':') + restOfLine('name') + EOL ) FEATURE_DEFN.setParseAction(Feature) # # A description composed of zero or more lines, before the # Background/Scenario block # DESCRIPTION_LINE = Group( ~BACKGROUND_DEFN + ~SCENARIO_DEFN + OneOrMore(UTFWORD).setWhitespaceChars(' \t') + EOL ) DESCRIPTION = Group(ZeroOrMore(DESCRIPTION_LINE | EOL)) DESCRIPTION.setParseAction(Description) # # Complete feature file definition # FEATURE = Group( FEATURE_DEFN('node') + DESCRIPTION('description') + Optional(BACKGROUND('background')) + Group(OneOrMore(SCENARIO))('scenarios') + stringEnd) FEATURE.ignore(pythonStyleComment) FEATURE.setParseAction(Feature.add_blocks) # # Try parsing the string # if not token: token = FEATURE else: token = locals()[token] try: if string: tokens = token.parseString(string) elif filename: with open(filename, 'r', 'utf-8') as fp: tokens = token.parseFile(fp) else: raise RuntimeError("Must pass string or filename") return tokens except ParseException as e: if e.parserElement == stringEnd: msg = "Expected EOF (max one feature per file)" else: msg = e.msg raise LettuceSyntaxError( filename, u"{lineno}:{col} Syntax Error: {msg}\n{line}\n{space}^".format( msg=msg, lineno=e.lineno, col=e.col, line=e.line, space=' ' * (e.col - 1))) except LettuceSyntaxError as e: # reraise the exception with the filename raise LettuceSyntaxError(filename, e.string)
# define the parser integer = Word(nums) real = Combine(Word(nums) + "." + Word(nums)) constant = oneOf('True False None yes no') + WordEnd(word_characters) variable = Regex(r'([a-zA-Z0-9\._]+)') explicit_variable = '$' + Regex(r'([a-zA-Z0-9\._]+)') current_scope = Literal('$$') string = (QuotedString("'''", escChar=None, unquoteResults=True) | QuotedString('"""', escChar=None, unquoteResults=True) | QuotedString('"', escChar="\\", unquoteResults=True) | QuotedString('\'', escChar="\\", unquoteResults=True)) regexp = QuotedString('/', escChar=None) timespan = Combine(Word(nums) + oneOf('ms s m h d')) current_scope_operand = current_scope variable_operand = variable explicit_variable_operand = explicit_variable integer_operand = integer real_operand = real number_operand = real | integer string_operand = string groupop = Literal(',') signop = oneOf('+ -') multop = oneOf('* / // %') filterop = oneOf('|') plusop = oneOf('+ -')