def parse_macro_arguments(argument_string, return_kwargs=False): if not argument_string: return None import re from pyparsing import Group, Or, QuotedString, Regex, Suppress, ZeroOrMore # General argument string parser argstring_def = ZeroOrMore(Or([ \ QuotedString('"'), # long arguments Group(Regex('[\w]+', flags=re.UNICODE) + # keyword arguments Suppress('=').leaveWhitespace() + Or([Regex('[\w]+'), QuotedString('"')])), Regex(r'\(\(.*\)\)', flags=re.UNICODE), # nested macros Regex('[\S]+', flags=re.UNICODE) # basic arguments ])) args = argstring_def.parseString(argument_string).asList() # The keyword arguments are stored as lists in the `args' variable, # extract them and convert them into a dict, then return if return_kwargs: kwargs = {} for arg in args: if isinstance(arg, list): kwargs[str(arg[0])] = arg[1] args.remove(arg) # remove the nested list return args, kwargs return args
def _logicals_(atomic): """ Return a new parser parsing logical expressions on atomics. This parser recognizes the following grammar, with precedences parser := atomic | '~' parser | parser '&' parser | parser '|' parser | parser '->' parser | parser '<->' parser Returned AST uses .ast package's classes. """ parser = Forward() atom = (atomic | Suppress("(") + parser + Suppress(")")) notstrict = "~" + atom notstrict.setParseAction(lambda tokens: Not(tokens[1])) not_ = notstrict | atom and_ = not_ + ZeroOrMore("&" + not_) and_.setParseAction(lambda tokens: _left_(And, tokens)) or_ = and_ + ZeroOrMore("|" + and_) or_.setParseAction(lambda tokens: _left_(Or, tokens)) implies = ZeroOrMore(or_ + "->") + or_ implies.setParseAction(lambda tokens: _right_(Implies, tokens)) iff = implies + ZeroOrMore("<->" + implies) iff.setParseAction(lambda tokens: _left_(Iff, tokens)) parser <<= iff return parser
def _logical_parser(expression): """ Return a new parser parsing logical expressions. This parser recognizes the following grammar, with precedence: <logical> ::= expression | '~' <logical> | <logical> '&' <logical> | <logical> '|' <logical> | <logical> '->' <logical> | <logical> '<->' <logical> .. note:: The parser uses :mod:`pytlq.ast` module's classes to build ASTs. .. credit:: Adapted from Simon Busard's parser parsing logical expressions on atomics. """ parser = Forward() not_strict = Literal('~') + expression not_strict.setParseAction(lambda tokens: Not(tokens[1])) not_ = (not_strict | expression) and_ = not_ + ZeroOrMore(Literal('&') + not_) and_.setParseAction(lambda tokens: _left(And, tokens)) or_ = and_ + ZeroOrMore(Literal('|') + and_) or_.setParseAction(lambda tokens: _left(Or, tokens)) imply = ZeroOrMore(or_ + Literal('->')) + or_ imply.setParseAction(lambda tokens: _right(Imply, tokens)) iff = imply + ZeroOrMore(Literal('<->') + imply) iff.setParseAction(lambda tokens: _left(Iff, tokens)) parser <<= iff return parser
def defineParsers(): #Enable a fast parsing mode with caching. ParserElement.enablePackrat() #end of line terminates statements, so it is not regular whitespace ParserElement.setDefaultWhitespaceChars('\t ') func_call = Forward() #forward declaration because this is a recursive rule #The "terminal" rules symbol = Word(alphas+'_-', alphanums+'_-') .setParseAction(action_symbol) q_symbol = quotedString .setParseAction(action_q_symbol) bracket_term = Literal("(").suppress() - func_call \ + Literal(")").suppress() word = symbol | q_symbol | bracket_term #The function call #Parse: "foo | bar | baz" or "foo" pipeline = (word + ZeroOrMore("|" - word)) .setParseAction(action_pipeline) #Parse "foo|bar op1 op2 op3" func_call << (pipeline - ZeroOrMore(word)) .setParseAction(action_func_call) #High level structure of program line = LineEnd() | func_call - LineEnd() #empty line or function call program = ZeroOrMore(line) + StringEnd() #multiple lines are a program #define the comments program.ignore('%' + restOfLine) #no tab expansion program.parseWithTabs() #return additional func_call parser to make testing more easy return program, func_call
def _extract_keyword_values_termination(self, line): """ Extracts the keyword, the values and the termination of an input line. The values are returned as a list. :arg line: input line :return: keyword, values, comment """ keywordletters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ-=" keyword = Word(keywordletters, max=8)("keyword") value = Word(alphanums + ".-+") values = Group(Optional(delimitedList(value)))("vals") termination = ZeroOrMore(Word(printables))("termination") termination.setParseAction(lambda tokens: " ".join(tokens)) expr = keyword + Suppress("(") + values + Suppress(")") + termination + stringEnd try: result = expr.parseString(line) except ParseException: return None, None, None return result.keyword, result.vals, result.termination
def parser(cls, width, height): """Parse a BZW file. For now, we're only supporting a subset of BZW's allobjects. """ comment = '#' + SkipTo(LineEnd()) bzw = ZeroOrMore(Box.parser() | Base.parser()).ignore(comment) bzw.setParseAction(lambda toks: cls(width, height, toks)) return bzw
def _parse_items(self, source): ParserElement.setDefaultWhitespaceChars(' \t\r') EOL = LineEnd().suppress() comment = Literal('#') + Optional( restOfLine ) + EOL string = CharsNotIn("\n") line = Group( Word(alphanums + '-')('key') + Literal(':').suppress() + Optional(Combine(string + ZeroOrMore(EOL + Literal(' ') + string)))("value") + EOL ) group = ZeroOrMore(line) group.ignore(comment) return group.parseString(source, True)
def parser(): rule = Forward() body = OneOrMore(CharsNotIn('{};') + ';') sel = CharsNotIn('{};') rule <<= sel + Group( '{' + ZeroOrMore( rule | body ) + '}' ) rule.setParseAction( make_action(Rule) ) stylesheet = ZeroOrMore( rule ) stylesheet.ignore( cStyleComment ) return stylesheet
def transform_human(text): """Transform user input into something Script can read.""" # these are parseActions for pyparsing. def str_literal_to_hex(s, loc, toks): for i, t in enumerate(toks): toks[i] = ''.join(['0x', t.encode('hex')]) return toks # ^ parseActions for pyparsing end here. str_literal = QuotedString('"') str_literal.setParseAction(str_literal_to_hex) transformer = ZeroOrMore( str_literal ) return transformer.transformString(text)
def build_parser(): key = Word(alphanums).setResultsName('key') value = restOfLine.setParseAction( lambda string, location, tokens: tokens[0].strip() ).setResultsName('value') property_ = Group(key + Suppress(Literal('=')) + value) properties = Group(OneOrMore(property_)).setResultsName('properties') section_name = (Suppress('[') + OneOrMore(CharsNotIn(']')) + Suppress(']')).setResultsName('section') section = Group(section_name + properties) ini_file = ZeroOrMore(section).setResultsName('sections') ini_file.ignore(pythonStyleComment) return ini_file
def create_grammar(): global arrows global stereotypes assert len(arrows) > 0 assert len(stereotypes) > 0 linechars = ''.join((c for c in printables if c not in '}\n')) + ' \t' norbracket = ''.join((c for c in printables if c != ']')) + ' \t' nogt = ''.join((c for c in printables if c != '>')) + ' \t' norparen = ''.join((c for c in printables if c != ')')) + ' \t' line = Word(linechars) cls_body = Group(ZeroOrMore(line)) classkeyword = Keyword('class').setResultsName('type') st_names = stereotypes.keys() st = Literal(st_names[0]) for s in st_names[1:]: st = st | Literal(s) stereotype = Group(Optional(Literal('<<').suppress() + st + Literal('>>').suppress())) identifier_list = Word(alphas) + ZeroOrMore(Literal(',').suppress() + Word(alphas)) baseclasses = Group(Optional(Literal(':').suppress() + identifier_list)) cls = Group(stereotype + classkeyword + Word(alphas) + baseclasses + \ Literal('{').suppress() + cls_body + Literal('}').suppress()) arrow_names = arrows.keys() arrow_names.sort(lambda x,y: -cmp(len(x), len(y))) arrow = Keyword(arrow_names[0]) for ar in arrow_names[1:]: arrow = arrow | Keyword(ar) relation_caption = Literal('(').suppress() + Word(norparen) + \ Literal(')').suppress() quantifier = Literal('[').suppress() + Word(norbracket) + Literal(']').suppress() relation = Group( Word(alphas) + Group(Optional(quantifier)) + \ arrow.setResultsName('type') + \ Word(alphas) + Group(Optional(quantifier)) + \ Group(Optional(relation_caption)) ) grammar = ZeroOrMore(cls | relation) grammar.ignore(cStyleComment) grammar.ignore("//" + restOfLine) return grammar
def getkw_bnf(self): sect_begin = Literal("{").suppress() sect_end = Literal("}").suppress() array_begin = Literal("[").suppress() array_end = Literal("]").suppress() tag_begin = Literal("<").suppress() tag_end = Literal(">").suppress() eql = Literal("=").suppress() dmark = Literal('$').suppress() end_data=Literal('$end').suppress() prtable = alphanums+r'!$%&*+-./<>?@^_|~' ival=Regex('[-]?\d+') dval=Regex('-?\d+\.\d*([eE]?[+-]?\d+)?') lval=Regex('([Yy]es|[Nn]o|[Tt]rue|[Ff]alse|[Oo]n|[Oo]ff)') # Helper definitions kstr= quotedString.setParseAction(removeQuotes) ^ \ dval ^ ival ^ lval ^ Word(prtable) name = Word(alphas+"_",alphanums+"_") vec=array_begin+delimitedList(dval ^ ival ^ lval ^ Word(prtable) ^ \ Literal("\n").suppress() ^ \ quotedString.setParseAction(removeQuotes))+array_end sect=name+sect_begin tag_sect=name+Group(tag_begin+name+tag_end)+sect_begin # Grammar keyword = name + eql + kstr vector = name + eql + vec data=Combine(dmark+name)+SkipTo(end_data)+end_data section=Forward() sect_def=(sect | tag_sect ) #| vec_sect) input=section | data | vector | keyword section << sect_def+ZeroOrMore(input) + sect_end # Parsing actions ival.setParseAction(self.conv_ival) dval.setParseAction(self.conv_dval) lval.setParseAction(self.conv_lval) keyword.setParseAction(self.store_key) vector.setParseAction(self.store_vector) data.setParseAction(self.store_data) sect.setParseAction(self.add_sect) tag_sect.setParseAction(self.add_sect) sect_end.setParseAction(self.pop_sect) bnf=ZeroOrMore(input) + StringEnd().setFailAction(parse_error) bnf.ignore(pythonStyleComment) return bnf
def gramma(): ## Tokens point = Literal('.') prefix_op = Literal('.') choice_op = Literal('+') parallel = Literal("||") | Literal("<>") #ident = Word(alphas, alphanums+'_') ratename = Word(alphas.lower(),alphanums+"_") lpar = Literal('(').suppress() rpar = Literal(')').suppress() lsqpar = Literal('[').suppress() rsqpar = Literal(']').suppress() define = Literal('=') semicol = Literal(';').suppress() col = Literal(',').suppress() number = Word(nums) integer = number floatnumber = Combine( integer + Optional( point + Optional(number))) passiverate = Word('infty') | Word('T') internalrate = Word('tau') pound = Literal('#').suppress() percent = Literal('%').suppress() peparate = (ratename | floatnumber | internalrate | passiverate).setParseAction(_check_var) peparate_indef = floatnumber | internalrate | passiverate sync = Word('<').suppress() + ratename + ZeroOrMore(col + ratename) + Word('>').suppress() coop_op = (parallel | sync).setParseAction(_create_sync_set) activity = (ratename + col + peparate).setParseAction(_create_activity) procdef = (Word(alphas.upper(), alphanums+"_") + Optional(lsqpar + peparate_indef + rsqpar)).setParseAction(_create_procdef) ## RATES Definitions ratedef = (Optional(percent)+ratename + define + peparate_indef).setParseAction(assign_var) + semicol prefix = Forward() choice = Forward() coop = Forward() process = ( activity | procdef | lpar + coop + rpar ).setParseAction(_create_process) prefix << (process + ZeroOrMore(prefix_op + prefix)).setParseAction( _create_prefix) choice << (prefix + ZeroOrMore(choice_op + choice)).setParseAction(_create_choice) coop << (choice + ZeroOrMore(coop_op + coop)).setParseAction(_create_coop) rmdef = (Optional(pound) + procdef + define + coop + semicol).setParseAction(_create_definition) system_eq = Optional(pound) + coop pepa = ZeroOrMore(ratedef) + ZeroOrMore(rmdef) + system_eq.setParseAction(_create_system_equation) pepacomment = '//' + restOfLine pepa.ignore(pepacomment) return pepa
def read(self, file_or_filename): """ Parses a PSAT data file and returns a case object file_or_filename: File object or path to PSAT data file return: Case object """ self.file_or_filename = file_or_filename logger.info("Parsing PSAT case file [%s]." % file_or_filename) t0 = time.time() self.case = Case() # Name the case if isinstance(file_or_filename, basestring): name, _ = splitext(basename(file_or_filename)) else: name, _ = splitext(file_or_filename.name) self.case.name = name bus_array = self._get_bus_array_construct() line_array = self._get_line_array_construct() # TODO: Lines.con - Alternative line data format slack_array = self._get_slack_array_construct() pv_array = self._get_pv_array_construct() pq_array = self._get_pq_array_construct() demand_array = self._get_demand_array_construct() supply_array = self._get_supply_array_construct() # TODO: Varname.bus (Bus names) # Pyparsing case: case = \ ZeroOrMore(matlab_comment) + bus_array + \ ZeroOrMore(matlab_comment) + line_array + \ ZeroOrMore(matlab_comment) + slack_array + \ ZeroOrMore(matlab_comment) + pv_array + \ ZeroOrMore(matlab_comment) + pq_array + \ ZeroOrMore(matlab_comment) + demand_array + \ ZeroOrMore(matlab_comment) + supply_array case.parseFile(file_or_filename) elapsed = time.time() - t0 logger.info("PSAT case file parsed in %.3fs." % elapsed) return self.case
def parse_cp2k_warnings(file_name, package_warnings): """ Parse All the warnings found in an output file """ p = ZeroOrMore(Suppress(SkipTo("*** WARNING")) + SkipTo('\n\n')) # Return dict of Warnings messages = p.parseFile(file_name).asList() # Search for warnings that match the ones provided by the user warnings = {m: assign_warning(package_warnings, m) for m in messages} if not warnings: return None else: return warnings
def __init__(self, path, text, state=None): self.path = path self.base_path = os.path.dirname(path) self.text = text self.state = state opcode_name = Word(alphanums + '_') value = Regex(r'.*?(?=\s*(([a-zA-Z0-9_]+=)|//|<[a-z]|$))', re.MULTILINE) opcode = locatedExpr(opcode_name) + Literal('=').suppress() + value opcode.setParseAction(self.handle_opcode) section_name = Literal('<').suppress() + Word(alphas) + Literal('>').suppress() section = section_name section.setParseAction(self.handle_section) include = Literal('#include').suppress() + locatedExpr(QuotedString('"')) include.setParseAction(self.handle_include) statement = (section ^ opcode ^ include) self.sfz_file = ZeroOrMore(statement) + stringEnd comment = Literal('//') + restOfLine self.sfz_file.ignore(comment)
def __init__(self): key_name = Word(re.sub(r"[\[\]=\"]", "", printables)) kgrp_name = Word(re.sub(r"[\[\]\.]", "", printables)) basic_int = Optional("-") + ("0" | Word(nums)) types = dict( string = QuotedString("\"", escChar="\\"), integer = Combine(basic_int), float = Combine(basic_int + "." + Word(nums)), datetime = Regex(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z"), boolean = Keyword("true") | Keyword("false"), array = Forward(), ) pure_array = Or(delimitedList(type_) for type_ in types.values()) types["array"] << Group(Suppress("[") + Optional(pure_array) + Suppress("]")) value = Or(type_ for type_ in types.values()) keyvalue = key_name + Suppress("=") + value + Suppress(LineEnd()) keygroup_namespace = kgrp_name + ZeroOrMore(Suppress(".") + kgrp_name) keygroup = "[" + keygroup_namespace + "]" + LineEnd() comments = pythonStyleComment self._toplevel = ZeroOrMore(keyvalue | keygroup) self._toplevel.ignore(comments) for k, v in types.items(): v.setParseAction(getattr(self, "_parse_"+k)) keyvalue.setParseAction(self._parse_keyvalue) keygroup_namespace.setParseAction(self._parse_keygroup_namespace)
def parse_template(template_text): identifier = Word(alphas, alphanums + '_') param = Group(identifier('name') + Suppress(':') + CharsNotIn(',)')('value')) param_list = Group(Suppress('(') + delimitedList(param, delim=',') + Suppress(')')) benchmark_id = originalTextFor(identifier + '.' + identifier + '.' + identifier) measurement_id = Group(benchmark_id('benchmark') + Optional(param_list('params')) + Suppress('[') + identifier('local_id') + Suppress(']')) macro = Group(Suppress('${') + measurement_id('measurement') + Suppress('}')) raw_text_block = originalTextFor(CharsNotIn('$')) text = ZeroOrMore(Group(raw_text_block('text') | macro('macro')))('template') text.leaveWhitespace() return text.parseString(template_text).asDict()
def fromString(inputText, verbose=False): if verbose: print 'Verbose:', verbose text = nestedExpr("/*", "*/").suppress().transformString(inputText) semicolon = Suppress(Word(";")) quote = Suppress(Word("\"")) op = Suppress(Word("{")) cl = Suppress(Word("}")) opp = Suppress(Word("(")) clp = Suppress(Word(")")) lt = Suppress(Word("<")) gt = Suppress(Word(">")) identifier = Word(alphas+"_",alphanums+"_") typeIdentifier = Word(alphas+"_",alphanums+"_:") ## Imports idslImport = Suppress(Word("import")) + quote + CharsNotIn("\";").setResultsName('path') + quote + semicolon idslImports = ZeroOrMore(idslImport) dictionaryDef = Word("dictionary") + lt + CharsNotIn("<>;") + gt + identifier.setResultsName('name') + semicolon sequenceDef = Word("sequence") + lt + CharsNotIn("<>;") + gt + identifier.setResultsName('name') + semicolon enumDef = Word("enum") + identifier.setResultsName('name') + op + CharsNotIn("{}") + cl + semicolon structDef = Word("struct") + identifier.setResultsName('name') + op + CharsNotIn("{}") + cl + semicolon exceptionDef = Word("exception") + identifier.setResultsName('name') + op + CharsNotIn("{}") + cl + semicolon raiseDef = Suppress(Word("throws")) + typeIdentifier + ZeroOrMore( Literal(',') + typeIdentifier ) decoratorDef = Literal('idempotent') | Literal('out') retValDef = typeIdentifier.setResultsName('ret') firstParam = Group( Optional(decoratorDef.setResultsName('decorator')) + typeIdentifier.setResultsName('type') + identifier.setResultsName('name')) nextParam = Suppress(Word(',')) + firstParam params = firstParam + ZeroOrMore(nextParam) remoteMethodDef = Group(Optional(decoratorDef) + retValDef + typeIdentifier.setResultsName('name') + opp + Optional( params).setResultsName('params') + clp + Optional(raiseDef) + semicolon ) interfaceDef = Word("interface") + typeIdentifier.setResultsName('name') + op + Group(ZeroOrMore(remoteMethodDef)) + cl + semicolon moduleContent = Group(structDef | enumDef | exceptionDef | dictionaryDef | sequenceDef | interfaceDef) module = Suppress(Word("module")) + identifier.setResultsName("name") + op + ZeroOrMore(moduleContent).setResultsName("contents") + cl + semicolon IDSL = idslImports.setResultsName("imports") + module.setResultsName("module") IDSL.ignore( cppStyleComment ) tree = IDSL.parseString(text) return IDSLParsing.module(tree)
def getkw_bnf(self): sect_begin = Literal("{").suppress() sect_end = Literal("}").suppress() array_begin = Literal("[").suppress() array_end = Literal("]").suppress() arg_begin = Literal("(").suppress() arg_end = Literal(")").suppress() eql = Literal("=").suppress() dmark = Literal('$').suppress() end_data=Literal('$end').suppress() prtable = alphanums+r'!$%&*+-./<>?@^_|~' # Helper definitions kstr=Word(prtable) ^ quotedString.setParseAction(removeQuotes) name = Word(alphas+"_",alphanums+"_") vec=array_begin+delimitedList(Word(prtable) ^ \ Literal("\n").suppress() ^ \ quotedString.setParseAction(removeQuotes))+array_end sect=name+sect_begin key_sect=name+Group(arg_begin+kstr+arg_end)+sect_begin vec_sect=name+Group(arg_begin+vec+ arg_end)+sect_begin # Grammar keyword = name + eql + kstr vector = name + eql + vec data=Combine(dmark+name)+SkipTo(end_data)+end_data section=Forward() sect_def=(sect | key_sect | vec_sect) input=section | data | vector | keyword section << sect_def+ZeroOrMore(input) + sect_end # Parsing actions keyword.setParseAction(self.store_key) vector.setParseAction(self.store_vector) data.setParseAction(self.store_data) sect.setParseAction(self.add_sect) key_sect.setParseAction(self.add_sect) vec_sect.setParseAction(self.add_vecsect) sect_end.setParseAction(self.pop_sect) bnf=ZeroOrMore(input) + StringEnd().setFailAction(parse_error) bnf.ignore(pythonStyleComment) return bnf
def _grammar(self): ident = Word(alphanums + ".") semi = Literal(";").suppress() # lrb = Literal("(").suppress() # rrb = Literal(")").suppress() lcb = Literal("{").suppress() rcb = Literal("}").suppress() Value = SkipTo(semi) KeyValue = Dict(Group(ident + Value + semi)) Dictionary = Forward() Block = lcb + ZeroOrMore(Dictionary | KeyValue) + rcb Dictionary << Dict(Group(ident + Block)) ParameterFile = ZeroOrMore(Dictionary | KeyValue) ParameterFile.ignore(cStyleComment) ParameterFile.ignore(cppStyleComment) return ParameterFile
def cleanAnnotation(filepath): """Clean out the obsolete or superflous annotations.""" with open(filepath, 'r') as mo_file: string = mo_file.read() # remove 'Window(),' and 'Coordsys()' annotations: WindowRef = ZeroOrMore(White(' \t')) + (Keyword('Window')|Keyword('Coordsys')) + nestedExpr() + ',' + ZeroOrMore(White(' \t') + lineEnd) out = Suppress(WindowRef).transformString(string) # special care needs to be taken if the annotation is the last one WindowLastRef = Optional(',') + ZeroOrMore(White(' \t')) + (Keyword('Window')|Keyword('Coordsys')) + nestedExpr() + ZeroOrMore(White(' \t') + lineEnd) out = Suppress(WindowLastRef).transformString(out) # remove empty '[__Dymola_]experimentSetupOutput(),' annotation: expRef = Optional(',') + ZeroOrMore(White(' \t')) + Optional('__Dymola_') + (Keyword('experimentSetupOutput')|Keyword('experiment')|Keyword('DymolaStoredErrors')|Keyword('Diagram')|Keyword('Icon')) + ~nestedExpr() + ~CharsNotIn(',)') out = Suppress(expRef).transformString(out) # Remove Icon and Diagram annotations that do not contain any graphics emptyRef = ZeroOrMore(White(' \t')) + (Keyword('Icon')|Keyword('Diagram')) + nestedExpr()('args') + ',' + ZeroOrMore(White(' \t') + lineEnd) emptyRef.setParseAction(skipNonEmptyGraphics) out = Suppress(emptyRef).transformString(out) # special care for the last annotation again emptyRef = Optional(',') + ZeroOrMore(White(' \t')) + (Keyword('Icon')|Keyword('Diagram')) + nestedExpr()('args') + ZeroOrMore(White(' \t') + lineEnd) emptyRef.setParseAction(skipNonEmptyGraphics) out = Suppress(emptyRef).transformString(out) # in case we end up with empty annotations remove them too AnnotationRef = ZeroOrMore(White(' \t')) + Keyword('annotation') + nestedExpr('(',');',content=' ') + ZeroOrMore(White(' \t') + lineEnd) out = Suppress(AnnotationRef).transformString(out) with open(filepath,'w') as mo_file: mo_file.write(out)
def find_procedures_headers(self): CREATE = CaselessKeyword("CREATE") OR = CaselessKeyword("OR") REPLACE = CaselessKeyword("REPLACE") FUNCTION = CaselessKeyword("FUNCTION") IN = CaselessKeyword("IN") OUT = CaselessKeyword("OUT") INOUT = CaselessKeyword("INOUT") VARIADIC = CaselessKeyword("VARIADIC") NAME = (Word(alphas, alphanums + "_."))("name") ALIAS = Word(alphas, alphanums + "_") TYPE = ( Word(alphas, alphanums + "[]_. ", ) + Suppress(Optional(Literal("(") + Word(nums) + Literal(")"))) ) PRM = ( (Optional(IN | OUT | INOUT | VARIADIC | (OUT + VARIADIC)) + Optional(ALIAS) + TYPE) | TYPE ).setParseAction(lambda res: " ".join([w.strip() for w in res])) COMMENT = "--" + restOfLine COMMA = Suppress(",") PARAMS = ZeroOrMore( PRM + Optional(COMMA) )("input") PARAMS.ignore(COMMENT) HEADER = ( CREATE + Optional(OR) + Optional(REPLACE) + FUNCTION + NAME + Suppress("(") + PARAMS + Suppress(")") ).setParseAction(lambda res: {"name": res.name, "input": res.input}) parse_header = OneOrMore(HEADER | Suppress(SkipTo(HEADER))) parse_header.ignore(COMMENT) parse_header.ignore(cStyleComment) try: headers = parse_header.parseString(self._sql) except Exception as error: print self._fpath raise error return headers
def __init__(self, max_=60): # define the grammar structure digits = "0123456789" star = Literal('*') number = Word(digits) | Word(alphas) steps = number range_ = number + Optional(Literal('-') + number) numspec = star | range_ expr = Group(numspec) + Optional(Literal('/') + steps) extra_groups = ZeroOrMore(Literal(',') + expr) groups = expr + extra_groups + StringEnd() # define parse actions star.setParseAction(self._expand_star) number.setParseAction(self._expand_number) range_.setParseAction(self._expand_range) expr.setParseAction(self._filter_steps) extra_groups.setParseAction(self._ignore_comma) groups.setParseAction(self._join_to_set) self.max_ = max_ self.parser = groups
def fromString(inputText, verbose=False): if verbose: print 'Verbose:', verbose text = nestedExpr("/*", "*/").suppress().transformString(inputText) semicolon = Suppress(Word(";")) quote = Suppress(Word("\"")) op = Suppress(Word("{")) cl = Suppress(Word("}")) opp = Suppress(Word("(")) clp = Suppress(Word(")")) identifier = Word( alphas+"_", alphanums+"_" ) commIdentifier = Group(identifier.setResultsName('identifier') + Optional(opp + (CaselessLiteral("ice")|CaselessLiteral("ros")).setResultsName("type") + clp)) # Imports idslImport = Suppress(CaselessLiteral("import")) + quote + CharsNotIn("\";").setResultsName('path') + quote + semicolon idslImports = ZeroOrMore(idslImport) # Communications implementsList = Group(CaselessLiteral('implements') + identifier + ZeroOrMore(Suppress(Word(',')) + identifier) + semicolon) requiresList = Group(CaselessLiteral('requires') + identifier + ZeroOrMore(Suppress(Word(',')) + identifier) + semicolon) subscribesList = Group(CaselessLiteral('subscribesTo') + commIdentifier + ZeroOrMore(Suppress(Word(',')) + commIdentifier) + semicolon) publishesList = Group(CaselessLiteral('publishes') + identifier + ZeroOrMore(Suppress(Word(',')) + identifier) + semicolon) communicationList = implementsList | requiresList | subscribesList | publishesList communications = Group( Suppress(CaselessLiteral("communications")) + op + ZeroOrMore(communicationList) + cl + semicolon) # Language language = Suppress(CaselessLiteral("language")) + (CaselessLiteral("cpp")|CaselessLiteral("python")) + semicolon # GUI gui = Group(Optional(Suppress(CaselessLiteral("gui")) + CaselessLiteral("Qt") + opp + identifier + clp + semicolon )) # additional options options = Group(Optional(Suppress(CaselessLiteral("options")) + identifier + ZeroOrMore(Suppress(Word(',')) + identifier) + semicolon)) componentContents = communications.setResultsName('communications') & language.setResultsName('language') & gui.setResultsName('gui') & options.setResultsName('options') component = Suppress(CaselessLiteral("component")) + identifier.setResultsName("name") + op + componentContents.setResultsName("properties") + cl + semicolon CDSL = idslImports.setResultsName("imports") + component.setResultsName("component") CDSL.ignore( cppStyleComment ) tree = CDSL.parseString(text) return CDSLParsing.component(tree)
def guess_language(string=None, filename=None): """ Attempt to guess the language Do this by parsing the comments at the top of the file for the # language: fr phrase. """ LANG_PARSER = ZeroOrMore( Suppress('#') + ( ((Suppress(Keyword('language')) + Suppress(':') + Word(unicodePrintables)('language')) | Suppress(restOfLine)) ) ) try: if string: tokens = LANG_PARSER.parseString(string) elif filename: with open(filename, 'r', 'utf-8') as fp: tokens = LANG_PARSER.parseFile(fp) else: raise RuntimeError("Must pass string or filename") code = tokens.language if code != '': return languages.Language(code=code) except ParseException as e: # try English pass return languages.English()
def gth_out(): """ A grammar to parse the responses which can come out of the GTH. This was written with reference to gth_out.dtd More information about the GTH API at https://www.corelatus.com/gth/api/ """ global gth_out_grammar if not gth_out_grammar: # Literals open = Suppress("<") close = Suppress(">") emclose = Suppress("/>") tagattr = Word(alphas) + Suppress("=") + quotedString ok = _empty_tag("ok", 0) job = _empty_tag("job") error = _empty_tag("error") | _tag("error", Word(alphas + " ")) event_child = open \ + Word(alphas + "_0123456789").setResultsName("type") \ + _attlist() + emclose event = _tag("event", event_child, 0) attributes = ZeroOrMore(_empty_tag("attribute")) attributes.setParseAction(_collapse_attributes) resource = _empty_tag("resource") ^ _tag("resource", attributes) resources = ZeroOrMore(resource) ^ error # REVISIT: state grammar below is incomplete state = _tag("state", resources, 0) gth_out_grammar = ok ^ job ^ event ^ state ^ resource ^ error return gth_out_grammar
def initBNF(self): constdecl = (CONST + NAME + VALUE).setParseAction(self.const_action) vardecl = (VAR + NAME + VALUE + Optional( COMMA + Regex("[^#\n]*")) ).setParseAction(self.var_action) insertdecl = (INSERT + dblQuotedString + LineEnd().suppress()).setParseAction(self.insert_action) LABEL = IDENTIFIER + COLON COMMANDEXP = (IDENTIFIER.setWhitespaceChars(" \t") + Regex("[^#\n]*").setWhitespaceChars(" \t") + LineEnd().suppress() ) COMMAND = COMMANDEXP.setParseAction(self.command_action) LABELEDCOMMAND = (LABEL + COMMANDEXP ).setParseAction(self.label_command_action) decl = constdecl | vardecl | insertdecl | LABELEDCOMMAND | COMMAND self.program = ZeroOrMore(decl) self.program.ignore(pythonStyleComment)
def test_prefixed_line(self): parser = ZeroOrMore(comment | empty_line) + prefixed_line("Foo:") + ZeroOrMore(comment | empty_line) foo = prefixed_line("Foo:") self.assertEqual(parser.parseString("Foo: bar\n\n\n", True).asList(), ["bar", "<EMPTYLINE>", "<EMPTYLINE>", "<EMPTYLINE>"]) self.assertEqual(parser.parseString("Foo: bar \n\n\n", True).asList(), ["bar", "<EMPTYLINE>", "<EMPTYLINE>", "<EMPTYLINE>"]) self.assertEqual(parser.parseString("Foo: bar baz\n\n\n", True).asList(), ["bar baz", "<EMPTYLINE>", "<EMPTYLINE>", "<EMPTYLINE>"]) self.assertEqual(parser.parseString("Foo: bar \n \n", True).asList(), ["bar", "<EMPTYLINE>", "<EMPTYLINE>"]) self.assertEqual(parser.parseString("Foo: bar \n#baz\n #spam\n", True).asList(), ["bar", "<COMMENT>", "<COMMENT>", "<EMPTYLINE>"]) bar = parser + prefixed_line("Bar:") + ZeroOrMore(comment | empty_line) self.assertEqual(bar.parseString("Foo: bar \n#baz\nBar: baz spam\n #spam\n", True).asList(), ["bar", "<COMMENT>", "baz spam", "<COMMENT>", "<EMPTYLINE>"])
def _create_grammar(): """Create the DBC grammar. """ word = Word(printables.replace(';', '').replace(':', '')) integer = Group(Optional('-') + Word(nums)) positive_integer = Word(nums).setName('positive integer') number = Word(nums + '.Ee-+') colon = Suppress(Literal(':')) scolon = Suppress(Literal(';')) pipe = Suppress(Literal('|')) at = Suppress(Literal('@')) sign = Literal('+') | Literal('-') lp = Suppress(Literal('(')) rp = Suppress(Literal(')')) lb = Suppress(Literal('[')) rb = Suppress(Literal(']')) comma = Suppress(Literal(',')) node = Word(alphas + nums + '_-').setWhitespaceChars(' ') frame_id = Word(nums).setName('frame id') version = Group(Keyword('VERSION') - QuotedString()) version.setName(VERSION) symbol = Word(alphas + '_') + Suppress(LineEnd()) symbols = Group(Keyword('NS_') - colon - Group(ZeroOrMore(symbol))) symbols.setName('NS_') discard = Suppress(Keyword('BS_') - colon).setName('BS_') nodes = Group(Keyword('BU_') - colon - Group(ZeroOrMore(node))) nodes.setName('BU_') signal = Group( Keyword(SIGNAL) - Group(word + Optional(word)) - colon - Group(positive_integer - pipe - positive_integer - at - positive_integer - sign) - Group(lp - number - comma - number - rp) - Group(lb - number - pipe - number - rb) - QuotedString() - Group(delimitedList(node))) signal.setName(SIGNAL) message = Group( Keyword(MESSAGE) - frame_id - word - colon - positive_integer - word - Group(ZeroOrMore(signal))) message.setName(MESSAGE) event = Suppress( Keyword(EVENT) - word - colon - positive_integer - lb - number - pipe - number - rb - QuotedString() - number - number - word - node - scolon) event.setName(EVENT) comment = Group( Keyword(COMMENT) - ((Keyword(SIGNAL) - frame_id - word - QuotedString() - scolon).setName(SIGNAL) | (Keyword(MESSAGE) - frame_id - QuotedString() - scolon).setName(MESSAGE) | (Keyword(EVENT) - word - QuotedString() - scolon).setName(EVENT) | (Keyword(NODES) - word - QuotedString() - scolon).setName(NODES) | (QuotedString() - scolon).setName('QuotedString'))) comment.setName(COMMENT) attribute_definition = Group( Keyword(ATTRIBUTE_DEFINITION) - ((QuotedString()) | (Keyword(SIGNAL) | Keyword(MESSAGE) | Keyword(EVENT) | Keyword(NODES)) + QuotedString()) - word - (scolon | (Group(ZeroOrMore(Group( (comma | Empty()) + QuotedString()))) + scolon) | (Group(ZeroOrMore(number)) + scolon))) attribute_definition.setName(ATTRIBUTE_DEFINITION) attribute_definition_default = Group( Keyword(ATTRIBUTE_DEFINITION_DEFAULT) - QuotedString() - (number | QuotedString()) - scolon) attribute_definition_default.setName(ATTRIBUTE_DEFINITION_DEFAULT) attribute = Group( Keyword(ATTRIBUTE) - QuotedString() - Group( Optional((Keyword(MESSAGE) + frame_id) | (Keyword(SIGNAL) + frame_id + word) | (Keyword(NODES) + word))) - (QuotedString() | number) - scolon) attribute.setName(ATTRIBUTE) choice = Group( Keyword(CHOICE) - Group(Optional(frame_id)) - word - Group(OneOrMore(Group(integer + QuotedString()))) - scolon) choice.setName(CHOICE) value_table = Group( Keyword(VALUE_TABLE) - word - Group(OneOrMore(Group(integer + QuotedString()))) - scolon) value_table.setName(VALUE_TABLE) signal_type = Group( Keyword(SIGNAL_TYPE) - frame_id - word - colon - positive_integer - scolon) signal_type.setName(SIGNAL_TYPE) signal_multiplexer_values = Group( Keyword(SIGNAL_MULTIPLEXER_VALUES) - frame_id - word - word - Group( delimitedList(positive_integer - Suppress('-') - Suppress(positive_integer))) - scolon) signal_multiplexer_values.setName(SIGNAL_MULTIPLEXER_VALUES) message_add_sender = Group( Keyword(MESSAGE_TX_NODE) - frame_id - colon - Group(delimitedList(node)) - scolon) message_add_sender.setName(MESSAGE_TX_NODE) attribute_definition_rel = Group( Keyword(ATTRIBUTE_DEFINITION_REL) - (QuotedString() | (Keyword(NODES_REL) + QuotedString())) - word - (scolon | (Group(ZeroOrMore(Group( (comma | Empty()) + QuotedString()))) + scolon) | (Group(ZeroOrMore(number)) + scolon))) attribute_definition_rel.setName(ATTRIBUTE_DEFINITION_REL) attribute_definition_default_rel = Group( Keyword(ATTRIBUTE_DEFINITION_DEFAULT_REL) - QuotedString() - (number | QuotedString()) - scolon) attribute_definition_default_rel.setName(ATTRIBUTE_DEFINITION_DEFAULT_REL) attribute_rel = Group( Keyword(ATTRIBUTE_REL) - QuotedString() - Keyword(NODES_REL) - word - Keyword(SIGNAL) - frame_id - word - (positive_integer | QuotedString()) - scolon) attribute_rel.setName(ATTRIBUTE_REL) signal_group = Group( Keyword(SIGNAL_GROUP) - frame_id - word - integer - colon - OneOrMore(word) - scolon) signal_group.setName(SIGNAL_GROUP) entry = (message | comment | attribute | choice | attribute_definition | attribute_definition_default | attribute_rel | attribute_definition_rel | attribute_definition_default_rel | signal_group | event | message_add_sender | value_table | signal_type | signal_multiplexer_values | discard | nodes | symbols | version) frame_id.setParseAction(lambda _s, _l, t: int(t[0])) return OneOrMore(entry) + StringEnd()
band = Literal( "@" ) args = 1 expr = Forward() atom = ( ( e | floatnumber | integer | (ident).setParseAction( assignVar ) + band + integer | fn + lpar + expr + ZeroOrMore(colon + expr) + rpar ).setParseAction(pushFirst) ( lpar + expr + rpar ) ) factor = Forward() factor << (atom | expr) term = (factor | expr) + ZeroOrMore( multop + expr ) addterm = (term | expr) + ZeroOrMore( addop + expr ) expr << ((lpar + expr + rpar) | addterm) bnf = expr pattern = bnf + StringEnd() # map operator symbols to corresponding arithmetic operations
@author: luca Submitted by Luca DallOlio, September, 2010 (Minor updates by Paul McGuire, June, 2012) ''' from pyparsing import Word, ZeroOrMore, printables, Suppress, OneOrMore, Group, \ LineEnd, Optional, White, originalTextFor, hexnums, nums, Combine, Literal, Keyword, \ cStyleComment, Regex, Forward, MatchFirst, And, oneOf, alphas, alphanums, \ delimitedList # http://www.antlr.org/grammar/ANTLR/ANTLRv3.g # Tokens EOL = Suppress(LineEnd()) # $ singleTextString = originalTextFor( ZeroOrMore(~EOL + (White(" \t") | Word(printables)))).leaveWhitespace() XDIGIT = hexnums INT = Word(nums) ESC = Literal('\\') + (oneOf(list(r'nrtbf\">' + "'")) | ('u' + Word(hexnums, exact=4)) | Word(printables, exact=1)) LITERAL_CHAR = ESC | ~(Literal("'") | Literal('\\')) + Word(printables, exact=1) CHAR_LITERAL = Suppress("'") + LITERAL_CHAR + Suppress("'") STRING_LITERAL = Suppress("'") + Combine( OneOrMore(LITERAL_CHAR)) + Suppress("'") DOUBLE_QUOTE_STRING_LITERAL = '"' + ZeroOrMore(LITERAL_CHAR) + '"' DOUBLE_ANGLE_STRING_LITERAL = '<<' + ZeroOrMore(Word(printables, exact=1)) + '>>' TOKEN_REF = Word(alphas.upper(), alphanums + '_') RULE_REF = Word(alphas.lower(), alphanums + '_')
def __init__(self): """ A program is a list of statements. Statements can be 'set' or 'select' statements. """ statement = Forward() SELECT, FROM, WHERE, SET, AS = map(CaselessKeyword, "select from where set as".split()) ident = Word( "$" + alphas, alphanums + "_$" ).setName("identifier") columnName = delimitedList(ident, ".", combine=True).setName("column name") columnNameList = Group( delimitedList(columnName)) tableName = delimitedList(ident, ".", combine=True).setName("column name") tableNameList = Group(delimitedList(tableName)) SEMI,COLON,LPAR,RPAR,LBRACE,RBRACE,LBRACK,RBRACK,DOT,COMMA,EQ = map(Literal,";:(){}[].,=") arrow = Literal ("->") t_expr = Group(ident + LPAR + Word("$" + alphas, alphanums + "_$") + RPAR + ZeroOrMore(LineEnd())).setName("t_expr") | \ Word(alphas, alphanums + "_$") + ZeroOrMore(LineEnd()) t_expr_chain = t_expr + ZeroOrMore(arrow + t_expr) whereExpression = Forward() and_, or_, in_ = map(CaselessKeyword, "and or in".split()) binop = oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True) realNum = ppc.real() intNum = ppc.signed_integer() columnRval = realNum | intNum | quotedString | columnName # need to add support for alg expressions whereCondition = Group( ( columnName + binop + (columnRval | Word(printables) ) ) | ( columnName + in_ + "(" + delimitedList( columnRval ) + ")" ) | ( columnName + in_ + "(" + statement + ")" ) | ( "(" + whereExpression + ")" ) ) whereExpression << whereCondition + ZeroOrMore( ( and_ | or_ ) + whereExpression ) ''' Assignment for handoff. ''' setExpression = Forward () setStatement = Group( ( ident ) | ( quotedString("json_path") + AS + ident("name") ) | ( "(" + setExpression + ")" ) ) setExpression << setStatement + ZeroOrMore( ( and_ | or_ ) + setExpression ) optWhite = ZeroOrMore(LineEnd() | White()) """ Define the statement grammar. """ statement <<= ( Group( Group(SELECT + t_expr_chain)("concepts") + optWhite + Group(FROM + tableNameList) + optWhite + Group(Optional(WHERE + whereExpression("where"), "")) + optWhite + Group(Optional(SET + setExpression("set"), ""))("select") ) | Group( SET + (columnName + EQ + ( quotedString | intNum | realNum )) )("set") )("statement") """ Make a program a series of statements. """ self.program = statement + ZeroOrMore(statement) """ Make rest-of-line comments. """ comment = "--" + restOfLine self.program.ignore (comment)
VARIABLE = Regex(r"-?\$[-a-zA-Z_][-a-zA-Z0-9_]*") NUMBER_VALUE = Regex(r"-?\d+(?:\.\d*)?|\.\d+") + Optional( Regex(r"(em|ex|px|cm|mm|in|pt|pc|deg|s|%)(?![-\w])")) PATH = Regex(r"[-\w\d_\.]*\/{1,2}[-\w\d_\.\/]*") | Regex( r"((https?|ftp|file):((//)|(\\\\))+[\w\d:#@%/;$()~_?\+-=\\\.&]*)") POINT_PART = (NUMBER_VALUE | Regex(r"(top|bottom|left|right)")) POINT = POINT_PART + POINT_PART # Values EXPRESSION = Forward() INTERPOLATION_VAR = Suppress("#") + LACC + EXPRESSION + RACC SIMPLE_VALUE = NUMBER_VALUE | PATH | IDENT | COLOR_VALUE | quotedString DIV_STRING = SIMPLE_VALUE + OneOrMore(Literal("/") + SIMPLE_VALUE) PARAMS = LPAREN + (POINT | EXPRESSION) + ZeroOrMore(COMMA + (POINT | EXPRESSION)) + RPAREN FUNCTION = Regex(r"-?[a-zA-Z_][-a-zA-Z0-9_]*") + PARAMS VALUE = FUNCTION | VARIABLE | SIMPLE_VALUE PARENS = LPAREN + EXPRESSION + RPAREN MATH_OPERATOR = Regex(r"(\+|-|/|\*|and|or|==|!=|<=|<|>|>=)\s+") _ = EXPRESSION << ((VALUE | PARENS) + ZeroOrMore(MATH_OPERATOR + (VALUE | PARENS))) # Declaration TERM = (DIV_STRING | EXPRESSION | INTERPOLATION_VAR) + Optional(",") DECLARATION_NAME = Optional("*") + OneOrMore(IDENT | INTERPOLATION_VAR) DECLARATION = Forward() _ = DECLARATION << ( DECLARATION_NAME + ":" + ZeroOrMore(TERM) + Optional("!important") + Optional(LACC + OneOrMore(DECLARATION | CSS_COMMENT | SCSS_COMMENT) + RACC) + OPT_SEMICOLON)
import collections import subprocess import argparse import json import sys import os # GCC *.map file grammar for parsing code size per file. hex_word = Regex(r"0x[a-f0-9]+").setParseAction(lambda x: int(x[0], 16)) address = hex_word ^ Literal("[!provide]") size = hex_word meta = SkipTo(address ^ StringEnd()).setParseAction(lambda x: x[0].strip()) line_a = Group(address + size) line_b = Group(address + size + meta) + ZeroOrMore(Group(address + meta)) line_c = Group(address + meta) grammar = SkipTo(address ^ StringEnd()) + (line_a ^ line_b ^ line_c ^ StringEnd()) def parse_arguments(): """ Parse command line arguments. """ parser = argparse.ArgumentParser() # Add options parser.add_argument("--riot",
An invalid requirement was found, users should refer to PEP 508. """ ALPHANUM = Word(string.ascii_letters + string.digits) LBRACKET = L("[").suppress() RBRACKET = L("]").suppress() LPAREN = L("(").suppress() RPAREN = L(")").suppress() COMMA = L(",").suppress() SEMICOLON = L(";").suppress() AT = L("@").suppress() PUNCTUATION = Word("-_.") IDENTIFIER_END = ALPHANUM | (ZeroOrMore(PUNCTUATION) + ALPHANUM) IDENTIFIER = Combine(ALPHANUM + ZeroOrMore(IDENTIFIER_END)) NAME = IDENTIFIER("name") EXTRA = IDENTIFIER URI = Regex(r"[^ ;]+")("url") URL = AT + URI EXTRAS_LIST = EXTRA + ZeroOrMore(COMMA + EXTRA) EXTRAS = (LBRACKET + Optional(EXTRAS_LIST) + RBRACKET)("extras") VERSION_PEP440 = Regex(REGEX, re.VERBOSE | re.IGNORECASE) VERSION_LEGACY = Regex(LEGACY_REGEX, re.VERBOSE | re.IGNORECASE) VERSION_ONE = VERSION_PEP440 ^ VERSION_LEGACY
alphas, alphas8bit, alphanums, hexnums, nums, printables from testbin.parser import parseurl octet = [chr(i) for i in range(0, 256)] OCTET = oneOf(octet) ctl = [chr(i) for i in range(0, 32)] ctl.append(chr(127)) CTL = oneOf(ctl) CR = "\r" LF = "\n" CRLF = CR + LF SP = ' ' HTAB = '\t' WSP = Literal(HTAB) ^ Literal(SP) WSP.leaveWhitespace() LWS = Optional(ZeroOrMore(WSP) + CRLF) + OneOrMore(WSP) LWS.leaveWhitespace() SWS = Optional(LWS) SWS.leaveWhitespace() ICAP_Version = Literal("ICAP/1.0") Token = Word(alphas) Extension_Method = Token Method = Literal("REQMOD") ^ Literal("RESPMOD") ^ Literal("OPTIONS") ^ \ Extension_Method Scheme = Literal("icap") Host = parseurl.host Port = parseurl.port User_Info = parseurl.user + Optional(Literal(":") + parseurl.password) Authority = Optional(User_Info + "@") + Host + Optional(":" + Port) Abs_Path = parseurl.path
def parse_algebra(self): """ Parse an algebraic expression into a tree. Store a `pyparsing.ParseResult` in `self.tree` with proper groupings to reflect parenthesis and order of operations. Leave all operators in the tree and do not parse any strings of numbers into their float versions. Adding the groups and result names makes the `repr()` of the result really gross. For debugging, use something like print OBJ.tree.asXML() """ # 0.33 or 7 or .34 or 16. number_part = Word(nums) inner_number = (number_part + Optional("." + Optional(number_part))) | ("." + number_part) # pyparsing allows spaces between tokens--`Combine` prevents that. inner_number = Combine(inner_number) # SI suffixes and percent. number_suffix = MatchFirst(Literal(k) for k in SUFFIXES.keys()) # 0.33k or 17 plus_minus = Literal('+') | Literal('-') number = Group( Optional(plus_minus) + inner_number + Optional( CaselessLiteral("E") + Optional(plus_minus) + number_part) + Optional(number_suffix)) number = number("number") # Predefine recursive variables. expr = Forward() # Handle variables passed in. They must start with letters/underscores # and may contain numbers afterward. inner_varname = Word(alphas + "_", alphanums + "_") varname = Group(inner_varname)("variable") varname.setParseAction(self.variable_parse_action) # Same thing for functions. function = Group(inner_varname + Suppress("(") + expr + Suppress(")"))("function") function.setParseAction(self.function_parse_action) atom = number | function | varname | "(" + expr + ")" atom = Group(atom)("atom") # Do the following in the correct order to preserve order of operation. pow_term = atom + ZeroOrMore("^" + atom) pow_term = Group(pow_term)("power") par_term = pow_term + ZeroOrMore('||' + pow_term) # 5k || 4k par_term = Group(par_term)("parallel") prod_term = par_term + ZeroOrMore( (Literal('*') | Literal('/')) + par_term) # 7 * 5 / 4 prod_term = Group(prod_term)("product") sum_term = Optional(plus_minus) + prod_term + ZeroOrMore( plus_minus + prod_term) # -5 + 4 - 3 sum_term = Group(sum_term)("sum") # Finish the recursion. expr << sum_term # pylint: disable=pointless-statement self.tree = (expr + stringEnd).parseString(self.math_expr)[0]
# # A simple example showing the use of the implied listAllMatches=True for # results names with a trailing '*' character. # # This example performs work similar to itertools.groupby, but without # having to sort the input first. # from pyparsing import Word, ZeroOrMore, nums aExpr = Word("A", nums) bExpr = Word("B", nums) cExpr = Word("C", nums) grammar = ZeroOrMore(aExpr("A*") | bExpr("B*") | cExpr("C*")) results = grammar.parseString("A1 B1 A2 C1 B2 A3") print(results.dump())
join_stmt = (join_op + source.setResultsName('source') + on_op.setResultsName('join_cols')).setParseAction(_build_join) # Example # join = join_stmt.parseString('join jtable1 as jt1') # print(join.source.name, join.source.alias) # (jtable1, jt1) # define the select grammar # select_stmt = Forward() select_stmt << (select_kw + (Keyword('*').setResultsName('columns') | select_column_list.setResultsName('columns')) + from_kw + many_sources.setResultsName('sources') + ZeroOrMore(Group(join_stmt)).setResultsName('joins')) # Examples: # select = select_stmt.parseString( # '''SELECT t1.col AS t1_c, t2.col AS t2_c, t3.col AS t3_c # FROM table1 AS t1 # JOIN table2 AS t2 # JOIN table3 AS t3;''') # print(select.columns[0].name, select.columns[0].alias) # ('t1.col', 't1_c') # print(select.columns[1].name, select.columns[1].alias) # ('t2.col', 't2_c') # print(select.columns[2].name, select.columns[2].alias) # ('t3.col', 't2_c') # print(select.sources[0].name, select.sources[0].alias) # ('table1', 't1')
a_expected_result=af_expected_result, a_assert_flag=False) addr = Word(alphanums + '_-./:').setResultsName('addr').setName('<addr>') semicolon, lbrack, rbrack = map(Suppress, ';{}') exclamation = Char('!') aml_nesting = Forward() aml_nesting << ( lbrack + ( ZeroOrMore( Group( (exclamation('not') + aml_nesting) | (exclamation('not') + addr + semicolon) | (aml_nesting) | ( addr + semicolon ) # never set a ResultsLabel here, you get duplicate but un-nested 'addr' ) # never set a ResultsLabel here, you get no [] )(None))('aml_nesting') + rbrack + semicolon)( None) # ResultsLabel here didn't force a list, one before here did. clause_stmt_acl_standalone = ( Literal('acl').suppress() + Word(alphanums + '_-')('acl_name') + ( ZeroOrMore( Group(aml_nesting(None) # peel away testing label here ) # ('aml_series3') )('aml_series'))(None))(None) # Syntax:
quoted_trusted_key_secret_type = ( Combine(squote + Word(charset_key_secret_base_dquote_allowed) + squote) | Combine(dquote + Word(charset_key_secret_base_squote_allowed) + dquote) ) quoted_trusted_key_secret_type.setName('<quoted-key-secret>') # domain name, flags, protocol, algorithm, and the Base64 # representation of the key data. trusted_keys_statements_set = ( Keyword('trusted-keys').suppress() + lbrack + Group( ungroup(trusted_key_domain_name)('domain') + trusted_key_flags_type - trusted_key_protocol_type - trusted_key_algorithm_type - quoted_trusted_key_secret_type + semicolon )('') + rbrack + semicolon )('trusted_keys') trusted_keys_statements_series = ( ZeroOrMore( trusted_keys_statements_set ) )('trusted_keys')
class Grammar: """ Lexical grammar of SCS definition (.sii) file """ class Parse: """ Helper class holding static methods that prepend type information """ @staticmethod def int(toks): """ Parse an ordinary int value """ toks[0] = int(toks[0]) return toks @staticmethod def float(toks): """ Parse an ordinary float or little endian hex string as a 4-byte float """ if toks[0].startswith('&'): binary = bytes.fromhex(toks[0][1:]) toks[0] = struct.unpack('>f', binary)[0] else: toks[0] = float(toks[0]) return toks @staticmethod def bool(toks): """ Parse bool True or False value """ toks[0] = (toks[0] == 'true') return toks @staticmethod def reference(toks): """ Parse delayed cross reference to an entry """ toks[0] = DefinitionFile.Reference(toks[0]) return toks @staticmethod def tuple(toks): """ Parse a tuple""" toks[0] = tuple(toks[0]) return toks @staticmethod def include(toks): """ Include content of another definition file """ pass identifier = Word(alphanums + '_') name = Optional(Suppress('"')) + Word(alphanums + '.' + '_') + Optional(Suppress('"')) intValue = Word(nums + '-', nums).setParseAction(Parse.int) int = identifier + Suppress(':') + intValue int.setParseAction(lambda toks: toks.insert(0, 'int')) binaryFloat = Word('&', hexnums) regularFloat = Word(nums + '-', nums + '.' + 'eE' + '-') floatValue = (regularFloat ^ binaryFloat).setParseAction(Parse.float) float = identifier + Suppress(':') + floatValue float.setParseAction(lambda toks: toks.insert(0, 'float')) boolValue = (Keyword('true') ^ Keyword('false')).setParseAction( Parse.bool) bool = identifier + Suppress(':') + boolValue bool.setParseAction(lambda toks: toks.insert(0, 'bool')) textValue = QuotedString('"', multiline=True) ^ identifier text = identifier + Suppress(':') + textValue text.setParseAction(lambda toks: toks.insert(0, 'text')) tupleValue = Group( Suppress('(') + delimitedList(intValue ^ floatValue, delim=',') + Suppress(')')) tupleValue.setParseAction(Parse.tuple) tuple = identifier + Suppress(':') + tupleValue tuple.setParseAction(lambda toks: toks.insert(0, 'tuple')) referenceValue = Word(alphanums + '.' + '_').setParseAction( Parse.reference) reference = identifier + Suppress(':') + referenceValue reference.setParseAction(lambda toks: toks.insert(0, 'reference')) arrayValue = (intValue ^ floatValue ^ boolValue ^ textValue ^ tupleValue ^ referenceValue) array = Combine(identifier + Suppress('[' + Optional(intValue) + ']') ) + Suppress(':') + arrayValue array.setParseAction(lambda toks: toks.insert(0, 'array')) label = Group(identifier + Suppress(':') + name) property = Group(int ^ float ^ bool ^ text ^ tuple ^ reference ^ array) include = Suppress( Keyword('@include')) + QuotedString('"').setParseAction( Parse.include) entry = label + Suppress('{') + ZeroOrMore(property ^ include) + Suppress('}') junk = ZeroOrMore(CharsNotIn(alphanums)) header = Suppress(junk + Optional(Keyword('SiiNunit') + '{')) footer = Suppress(Optional('}')) file = header + ZeroOrMore(Group(entry ^ include)) + footer file.ignore(cStyleComment) file.ignore(dblSlashComment) file.ignore(pythonStyleComment) @classmethod def tokenize(cls, string: str) -> list: """ Perform lexical analysis and return the list of discovered tokens """ return cls.file.parseString(string, parseAll=True).asList()
def create_bnf(term_descs): """term_descs .. list of TermParse objects (sign, term_name, term_arg_names), where sign can be real or complex multiplier""" lc = ['+'] # Linear combination context. equal = Literal("=").setParseAction(rhs(lc)) zero = Literal("0").suppress() point = Literal(".") e = CaselessLiteral("E") inumber = Word("+-" + nums, nums) fnumber = Combine( Word("+-" + nums, nums) + Optional(point + Optional(Word(nums))) + Optional(e + Word("+-" + nums, nums))) number = fnumber + Optional(Literal('j'), default='') add_op = oneOf('+ -') number_expr = Forward() number_expr << Optional(add_op) + ZeroOrMore('(') + number \ + ZeroOrMore(add_op + number_expr) \ + ZeroOrMore(')') ident = Word(alphas, alphanums + "_") integral = Combine((Literal('i') + Word(alphanums)) | Literal('i') | Literal('a') | Word(nums))("integral") history = Optional('[' + inumber + ']', default='')("history") variable = Combine(Word(alphas, alphanums + '._') + history) derivative = Combine(Literal('d') + variable \ + Literal('/') + Literal('dt')) trace = Combine(Literal('tr') + '(' + Optional(ident + Literal(',')) + variable + ')', adjacent=False) generalized_var = derivative | trace | variable args = Group(delimitedList(generalized_var)) flag = Literal('a') term = ((Optional(Literal('+') | Literal('-'), default='+')("sign") ^ Optional(number_expr + Literal('*').suppress(), default=['1.0', ''])("mul")) + Combine( ident("name") + Optional("." + (integral + "." + ident("region") + "." + flag("flag") | integral + "." + ident("region") | ident("region"))))("term_desc") + "(" + Optional(args, default=[''])("args") + ")") term.setParseAction(collect_term(term_descs, lc)) rhs1 = equal + OneOrMore(term) rhs2 = equal + zero equation = StringStart() + OneOrMore(term) \ + Optional(rhs1 | rhs2) + StringEnd() ## term.setDebug() return equation
def parse(cls, content, basedir=None, resolve=True, unresolved_value=DEFAULT_SUBSTITUTION): """parse a HOCON content :param content: HOCON content to parse :type content: basestring :param resolve: if true, resolve substitutions :type resolve: boolean :param unresolved_value: assigned value value to unresolved substitution. If overriden with a default value, it will replace all unresolved value to the default value. If it is set to to pyhocon.STR_SUBSTITUTION then it will replace the value by its substitution expression (e.g., ${x}) :type unresolved_value: boolean :return: a ConfigTree or a list """ unescape_pattern = re.compile(r'\\.') def replace_escape_sequence(match): value = match.group(0) return cls.REPLACEMENTS.get(value, value) def norm_string(value): return unescape_pattern.sub(replace_escape_sequence, value) def unescape_string(tokens): return ConfigUnquotedString(norm_string(tokens[0])) def parse_multi_string(tokens): # remove the first and last 3 " return tokens[0][3:-3] def convert_number(tokens): n = tokens[0] try: return int(n, 10) except ValueError: return float(n) def safe_convert_number(tokens): n = tokens[0] try: return int(n, 10) except ValueError: try: return float(n) except ValueError: return n def convert_period(tokens): period_value = int(tokens.value) period_identifier = tokens.unit period_unit = next((single_unit for single_unit, values in cls.get_supported_period_type_map().items() if period_identifier in values)) return period(period_value, period_unit) # ${path} or ${?path} for optional substitution SUBSTITUTION_PATTERN = r"\$\{(?P<optional>\?)?(?P<variable>[^}]+)\}(?P<ws>[ \t]*)" def create_substitution(instring, loc, token): # remove the ${ and } match = re.match(SUBSTITUTION_PATTERN, token[0]) variable = match.group('variable') ws = match.group('ws') optional = match.group('optional') == '?' substitution = ConfigSubstitution(variable, optional, ws, instring, loc) return substitution # ${path} or ${?path} for optional substitution STRING_PATTERN = '"(?P<value>(?:[^"\\\\]|\\\\.)*)"(?P<ws>[ \t]*)' def create_quoted_string(instring, loc, token): # remove the ${ and } match = re.match(STRING_PATTERN, token[0]) value = norm_string(match.group('value')) ws = match.group('ws') return ConfigQuotedString(value, ws, instring, loc) def include_config(instring, loc, token): url = None file = None required = False if token[0] == 'required': required = True final_tokens = token[1:] else: final_tokens = token if len(final_tokens) == 1: # include "test" value = final_tokens[0].value if isinstance( final_tokens[0], ConfigQuotedString) else final_tokens[0] if value.startswith("http://") or value.startswith( "https://") or value.startswith("file://"): url = value else: file = value elif len(final_tokens) == 2: # include url("test") or file("test") value = final_tokens[1].value if isinstance( token[1], ConfigQuotedString) else final_tokens[1] if final_tokens[0] == 'url': url = value else: file = value if url is not None: logger.debug('Loading config from url %s', url) obj = ConfigFactory.parse_URL(url, resolve=False, required=required, unresolved_value=NO_SUBSTITUTION) elif file is not None: path = file if basedir is None else os.path.join(basedir, file) logger.debug('Loading config from file %s', path) obj = ConfigFactory.parse_file( path, resolve=False, required=required, unresolved_value=NO_SUBSTITUTION) else: raise ConfigException( 'No file or URL specified at: {loc}: {instring}', loc=loc, instring=instring) return ConfigInclude(obj if isinstance(obj, list) else obj.items()) @contextlib.contextmanager def set_default_white_spaces(): default = ParserElement.DEFAULT_WHITE_CHARS ParserElement.setDefaultWhitespaceChars(' \t') yield ParserElement.setDefaultWhitespaceChars(default) with set_default_white_spaces(): assign_expr = Forward() true_expr = Keyword("true", caseless=True).setParseAction( replaceWith(True)) false_expr = Keyword("false", caseless=True).setParseAction( replaceWith(False)) null_expr = Keyword("null", caseless=True).setParseAction( replaceWith(NoneValue())) # key = QuotedString('"', escChar='\\', unquoteResults=False) | Word(alphanums + alphas8bit + '._- /') key = QuotedString('"', escChar='\\', unquoteResults=False) | \ Word("0123456789.").setParseAction(safe_convert_number) | Word(alphanums + alphas8bit + '._- /') eol = Word('\n\r').suppress() eol_comma = Word('\n\r,').suppress() comment = (Literal('#') | Literal('//')) - SkipTo(eol | StringEnd()) comment_eol = Suppress(Optional(eol_comma) + comment) comment_no_comma_eol = (comment | eol).suppress() number_expr = Regex( r'[+-]?(\d*\.\d+|\d+(\.\d+)?)([eE][+\-]?\d+)?(?=$|[ \t]*([\$\}\],#\n\r]|//))', re.DOTALL).setParseAction(convert_number) period_types = itertools.chain.from_iterable( cls.get_supported_period_type_map().values()) period_expr = Regex(r'(?P<value>\d+)\s*(?P<unit>' + '|'.join(period_types) + ')$').setParseAction(convert_period) # multi line string using """ # Using fix described in http://pyparsing.wikispaces.com/share/view/3778969 multiline_string = Regex( '""".*?"*"""', re.DOTALL | re.UNICODE).setParseAction(parse_multi_string) # single quoted line string quoted_string = Regex( r'"(?:[^"\\\n]|\\.)*"[ \t]*', re.UNICODE).setParseAction(create_quoted_string) # unquoted string that takes the rest of the line until an optional comment # we support .properties multiline support which is like this: # line1 \ # line2 \ # so a backslash precedes the \n unquoted_string = Regex( r'(?:[^^`+?!@*&"\[\{\s\]\}#,=\$\\]|\\.)+[ \t]*', re.UNICODE).setParseAction(unescape_string) substitution_expr = Regex(r'[ \t]*\$\{[^\}]+\}[ \t]*' ).setParseAction(create_substitution) string_expr = multiline_string | quoted_string | unquoted_string value_expr = period_expr | number_expr | true_expr | false_expr | null_expr | string_expr include_content = (quoted_string | ( (Keyword('url') | Keyword('file')) - Literal('(').suppress() - quoted_string - Literal(')').suppress())) include_expr = (Keyword("include", caseless=True).suppress() + (include_content | (Keyword("required") - Literal('(').suppress() - include_content - Literal(')').suppress())) ).setParseAction(include_config) root_dict_expr = Forward() dict_expr = Forward() list_expr = Forward() multi_value_expr = ZeroOrMore(comment_eol | include_expr | substitution_expr | dict_expr | list_expr | value_expr | (Literal('\\') - eol).suppress()) # for a dictionary : or = is optional # last zeroOrMore is because we can have t = {a:4} {b: 6} {c: 7} which is dictionary concatenation inside_dict_expr = ConfigTreeParser( ZeroOrMore(comment_eol | include_expr | assign_expr | eol_comma)) inside_root_dict_expr = ConfigTreeParser( ZeroOrMore(comment_eol | include_expr | assign_expr | eol_comma), root=True) dict_expr << Suppress('{') - inside_dict_expr - Suppress('}') root_dict_expr << Suppress('{') - inside_root_dict_expr - Suppress( '}') list_entry = ConcatenatedValueParser(multi_value_expr) list_expr << Suppress('[') - ListParser(list_entry - ZeroOrMore( eol_comma - list_entry)) - Suppress(']') # special case when we have a value assignment where the string can potentially be the remainder of the line assign_expr << Group(key - ZeroOrMore(comment_no_comma_eol) - ( dict_expr | (Literal('=') | Literal(':') | Literal('+=')) - ZeroOrMore(comment_no_comma_eol) - ConcatenatedValueParser(multi_value_expr))) # the file can be { ... } where {} can be omitted or [] config_expr = ZeroOrMore(comment_eol | eol) + ( list_expr | root_dict_expr | inside_root_dict_expr) + ZeroOrMore(comment_eol | eol_comma) config = config_expr.parseString(content, parseAll=True)[0] if resolve: allow_unresolved = resolve and unresolved_value is not DEFAULT_SUBSTITUTION and unresolved_value is not MANDATORY_SUBSTITUTION has_unresolved = cls.resolve_substitutions( config, allow_unresolved) if has_unresolved and unresolved_value is MANDATORY_SUBSTITUTION: raise ConfigSubstitutionException( 'resolve cannot be set to True and unresolved_value to MANDATORY_SUBSTITUTION' ) if unresolved_value is not NO_SUBSTITUTION and unresolved_value is not DEFAULT_SUBSTITUTION: cls.unresolve_substitutions_to_value(config, unresolved_value) return config
def __init__(self): """ Please use any of the following symbols: expop :: '^' multop :: '*' | '/' addop :: '+' | '-' integer :: ['+' | '-'] '0'..'9'+ """ point = Literal(".") e = CaselessLiteral("E") fnumber = Combine( Word("+-" + nums, nums) + Optional(point + Optional(Word(nums))) + Optional(e + Word("+-" + nums, nums))) ident = Word(alphas, alphas + nums + "_$") plus = Literal("+") minus = Literal("-") mult = Literal("*") div = Literal("/") lpar = Literal("(").suppress() rpar = Literal(")").suppress() addop = plus | minus multop = mult | div expop = Literal("^") pi = CaselessLiteral("PI") expr = Forward() atom = ((Optional(oneOf("- +")) + (pi | e | fnumber | ident + lpar + expr + rpar).setParseAction(self.pushFirst)) | Optional(oneOf("- +")) + Group(lpar + expr + rpar)).setParseAction(self.pushUMinus) # by defining exponentiation as "atom [ ^ factor ]..." instead of # "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-right # that is, 2^3^2 = 2^(3^2), not (2^3)^2. factor = Forward() factor << atom + ZeroOrMore( (expop + factor).setParseAction(self.pushFirst)) term = factor + ZeroOrMore( (multop + factor).setParseAction(self.pushFirst)) expr << term + ZeroOrMore( (addop + term).setParseAction(self.pushFirst)) # addop_term = ( addop + term ).setParseAction( self.pushFirst ) # general_term = term + ZeroOrMore( addop_term ) | OneOrMore( addop_term) # expr << general_term self.bnf = expr # this will map operator symbols to their corresponding arithmetic operations epsilon = 1e-12 self.opn = { "+": operator.add, "-": operator.sub, "*": operator.mul, "/": operator.truediv, "^": operator.pow } self.fn = { "sin": math.sin, "cos": math.cos, "tan": math.tan, "abs": abs, "trunc": lambda a: int(a), "round": round, "sgn": lambda a: abs(a) > epsilon and cmp(a, 0) or 0 }
nameLine = (Literal("name").suppress() + eq + quotedName) path = (Literal("path") + eq + intList).suppress() control = (Literal("control") + eq + floatList).suppress() outgoing = Literal("outgoing") + eq + begin + \ nameLine + path + Optional(control) + stop member = Group(Literal("members") + eq + intList) isEnd = Literal("end") + eq + yesno isInland = Literal("inland") + eq + yesno node = Group(name.setResultsName("name") + eq + begin + \ Each([location, Optional(color), Optional(isInland), ZeroOrMore(outgoing), OneOrMore(member), Optional(isEnd)]) + \ stop) nodes = OneOrMore(node) if __name__ == "__main__": f = open(r"C:\Program Files (x86)\Steam\steamapps\common\Europa Universalis IV\common\tradenodes\00_tradenodes.txt") txt = f.read() import tradeviz txt = tradeviz.removeComments(txt) results = nodes.parseString(txt) nLocations = txt.count("location") nFound = len(results)
def _string_to_ast(self, input_string): """ Parse a smart search string and return it in an AST like form """ # simple words # we need to use a regex to match on words because the regular # Word(alphanums) will only match on American ASCII alphanums and since # we try to be Unicode / internationally friendly we need to match much # much more. Trying to expand a word class to catch it all seems futile # so we match on everything *except* a few things, like our operators comp_word = Regex("[^*\s=><~!]+") word = Regex("[^*\s=><~!]+").setResultsName('word') # numbers comp_number = Word(nums) number = Word(nums).setResultsName('number') # IPv4 address ipv4_oct = Regex("((2(5[0-5]|[0-4][0-9])|[01]?[0-9][0-9]?))") comp_ipv4_address = Combine(ipv4_oct + ('.' + ipv4_oct * 3)) ipv4_address = Combine(ipv4_oct + ('.' + ipv4_oct * 3)).setResultsName('ipv4_address') # IPv6 address ipv6_address = Regex( "((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:)))(%.+)?" ).setResultsName('ipv6_address') ipv6_prefix = Combine(ipv6_address + Regex("/(12[0-8]|1[01][0-9]|[0-9][0-9]?)") ).setResultsName('ipv6_prefix') # VRF RTs of the form number:number vrf_rt = Combine((comp_ipv4_address | comp_number) + Literal(':') + comp_number).setResultsName('vrf_rt') # tags tags = Combine(Literal('#') + comp_word).setResultsName('tag') # operators for matching match_op = oneOf(' '.join( self.match_operators)).setResultsName('operator') boolean_op = oneOf(' '.join( self.boolean_operators)).setResultsName('boolean') # quoted string d_quoted_string = QuotedString('"', unquoteResults=True, escChar='\\') s_quoted_string = QuotedString('\'', unquoteResults=True, escChar='\\') quoted_string = (s_quoted_string | d_quoted_string).setResultsName('quoted_string') # expression to match a certain value for an attribute expression = Group(word + match_op + (quoted_string | vrf_rt | word | number)).setResultsName('expression') # we work on atoms, which are single quoted strings, match expressions, # tags, VRF RT or simple words. # NOTE: Place them in order of most exact match first! atom = Group(ipv6_prefix | ipv6_address | quoted_string | expression | tags | vrf_rt | boolean_op | word) enclosed = Forward() parens = nestedExpr('(', ')', content=enclosed) enclosed << (parens | atom).setResultsName('nested') content = Forward() content << (ZeroOrMore(enclosed)) res = content.parseString(input_string) return res
class ExplicitStateUpdater(StateUpdateMethod): ''' An object that can be used for defining state updaters via a simple description (see below). Resulting instances can be passed to the ``method`` argument of the `NeuronGroup` constructor. As other state updater functions the `ExplicitStateUpdater` objects are callable, returning abstract code when called with an `Equations` object. A description of an explicit state updater consists of a (multi-line) string, containing assignments to variables and a final "x_new = ...", stating the integration result for a single timestep. The assignments can be used to define an arbitrary number of intermediate results and can refer to ``f(x, t)`` (the function being integrated, as a function of ``x``, the previous value of the state variable and ``t``, the time) and ``dt``, the size of the timestep. For example, to define a Runge-Kutta 4 integrator (already provided as `rk4`), use:: k1 = dt*f(x,t) k2 = dt*f(x+k1/2,t+dt/2) k3 = dt*f(x+k2/2,t+dt/2) k4 = dt*f(x+k3,t+dt) x_new = x+(k1+2*k2+2*k3+k4)/6 Note that for stochastic equations, the function `f` only corresponds to the non-stochastic part of the equation. The additional function `g` corresponds to the stochastic part that has to be multiplied with the stochastic variable xi (a standard normal random variable -- if the algorithm needs a random variable with a different variance/mean you have to multiply/add it accordingly). Equations with more than one stochastic variable do not have to be treated differently, the part referring to ``g`` is repeated for all stochastic variables automatically. Stochastic integrators can also make reference to ``dW`` (a normal distributed random number with variance ``dt``) and ``g(x, t)``, the stochastic part of an equation. A stochastic state updater could therefore use a description like:: x_new = x + dt*f(x,t) + g(x, t) * dW For simplicity, the same syntax is used for state updaters that only support additive noise, even though ``g(x, t)`` does not depend on ``x`` or ``t`` in that case. There a some restrictions on the complexity of the expressions (but most can be worked around by using intermediate results as in the above Runge- Kutta example): Every statement can only contain the functions ``f`` and ``g`` once; The expressions have to be linear in the functions, e.g. you can use ``dt*f(x, t)`` but not ``f(x, t)**2``. Parameters ---------- description : str A state updater description (see above). stochastic : {None, 'additive', 'multiplicative'} What kind of stochastic equations this state updater supports: ``None`` means no support of stochastic equations, ``'additive'`` means only equations with additive noise and ``'multiplicative'`` means supporting arbitrary stochastic equations. Raises ------ ValueError If the parsing of the description failed. Notes ----- Since clocks are updated *after* the state update, the time ``t`` used in the state update step is still at its previous value. Enumerating the states and discrete times, ``x_new = x + dt*f(x, t)`` is therefore understood as :math:`x_{i+1} = x_i + dt f(x_i, t_i)`, yielding the correct forward Euler integration. If the integrator has to refer to the time at the end of the timestep, simply use ``t + dt`` instead of ``t``. See also -------- euler, rk2, rk4, milstein ''' #=========================================================================== # Parsing definitions #=========================================================================== #: Legal names for temporary variables TEMP_VAR = ~Literal('x_new') + Word( string.ascii_letters + '_', string.ascii_letters + string.digits + '_').setResultsName('identifier') #: A single expression EXPRESSION = restOfLine.setResultsName('expression') #: An assignment statement STATEMENT = Group(TEMP_VAR + Suppress('=') + EXPRESSION).setResultsName('statement') #: The last line of a state updater description OUTPUT = Group(Suppress(Literal('x_new')) + Suppress('=') + EXPRESSION).setResultsName('output') #: A complete state updater description DESCRIPTION = ZeroOrMore(STATEMENT) + OUTPUT def __init__(self, description, stochastic=None, custom_check=None): self._description = description self.stochastic = stochastic self.custom_check = custom_check try: parsed = ExplicitStateUpdater.DESCRIPTION.parseString( description, parseAll=True) except ParseException as p_exc: ex = SyntaxError('Parsing failed: ' + str(p_exc.msg)) ex.text = str(p_exc.line) ex.offset = p_exc.column ex.lineno = p_exc.lineno raise ex self.statements = [] self.symbols = SYMBOLS.copy() for element in parsed: expression = str_to_sympy(element.expression) # Replace all symbols used in state updater expressions by unique # names that cannot clash with user-defined variables or functions expression = expression.subs(sympy.Function('f'), self.symbols['__f']) expression = expression.subs(sympy.Function('g'), self.symbols['__g']) symbols = list(expression.atoms(sympy.Symbol)) unique_symbols = [] for symbol in symbols: if symbol.name == 'dt': unique_symbols.append(symbol) else: unique_symbols.append(_symbol('__' + symbol.name)) for symbol, unique_symbol in zip(symbols, unique_symbols): expression = expression.subs(symbol, unique_symbol) self.symbols.update( dict(((symbol.name, symbol) for symbol in unique_symbols))) if element.getName() == 'statement': self.statements.append(('__' + element.identifier, expression)) elif element.getName() == 'output': self.output = expression else: raise AssertionError('Unknown element name: %s' % element.getName()) def __repr__(self): # recreate a description string description = '\n'.join( ['%s = %s' % (var, expr) for var, expr in self.statements]) if len(description): description += '\n' description += 'x_new = ' + str(self.output) r = "{classname}('''{description}''', stochastic={stochastic})" return r.format(classname=self.__class__.__name__, description=description, stochastic=repr(self.stochastic)) def __str__(self): s = '%s\n' % self.__class__.__name__ if len(self.statements) > 0: s += 'Intermediate statements:\n' s += '\n'.join([(var + ' = ' + sympy_to_str(expr)) for var, expr in self.statements]) s += '\n' s += 'Output:\n' s += sympy_to_str(self.output) return s def _latex(self, *args): from sympy import latex, Symbol s = [r'\begin{equation}'] for var, expr in self.statements: expr = expr.subs(Symbol('x'), Symbol('x_t')) s.append(latex(Symbol(var)) + ' = ' + latex(expr) + r'\\') expr = self.output.subs(Symbol('x'), 'x_t') s.append(r'x_{t+1} = ' + latex(expr)) s.append(r'\end{equation}') return '\n'.join(s) def _repr_latex_(self): return self._latex() def replace_func(self, x, t, expr, temp_vars, eq_symbols, stochastic_variable=None): ''' Used to replace a single occurance of ``f(x, t)`` or ``g(x, t)``: `expr` is the non-stochastic (in the case of ``f``) or stochastic part (``g``) of the expression defining the right-hand-side of the differential equation describing `var`. It replaces the variable `var` with the value given as `x` and `t` by the value given for `t`. Intermediate variables will be replaced with the appropriate replacements as well. For example, in the `rk2` integrator, the second step involves the calculation of ``f(k/2 + x, dt/2 + t)``. If `var` is ``v`` and `expr` is ``-v / tau``, this will result in ``-(_k_v/2 + v)/tau``. Note that this deals with only one state variable `var`, given as an argument to the surrounding `_generate_RHS` function. ''' try: s_expr = str_to_sympy(str(expr)) except SympifyError as ex: raise ValueError('Error parsing the expression "%s": %s' % (expr, str(ex))) for var in eq_symbols: # Generate specific temporary variables for the state variable, # e.g. '_k_v' for the state variable 'v' and the temporary # variable 'k'. if stochastic_variable is None: temp_var_replacements = dict( ((self.symbols[temp_var], _symbol(temp_var + '_' + var)) for temp_var in temp_vars)) else: temp_var_replacements = dict( ((self.symbols[temp_var], _symbol(temp_var + '_' + var + '_' + stochastic_variable)) for temp_var in temp_vars)) # In the expression given as 'x', replace 'x' by the variable # 'var' and all the temporary variables by their # variable-specific counterparts. x_replacement = x.subs(self.symbols['__x'], eq_symbols[var]) x_replacement = x_replacement.subs(temp_var_replacements) # Replace the variable `var` in the expression by the new `x` # expression s_expr = s_expr.subs(eq_symbols[var], x_replacement) # If the expression given for t in the state updater description # is not just "t" (or rather "__t"), then replace t in the # equations by it, and replace "__t" by "t" afterwards. if t != self.symbols['__t']: s_expr = s_expr.subs(SYMBOLS['t'], t) s_expr = s_expr.replace(self.symbols['__t'], SYMBOLS['t']) return s_expr def _non_stochastic_part(self, eq_symbols, non_stochastic, non_stochastic_expr, stochastic_variable, temp_vars, var): non_stochastic_results = [] if stochastic_variable is None or len(stochastic_variable) == 0: # Replace the f(x, t) part replace_f = lambda x, t: self.replace_func(x, t, non_stochastic, temp_vars, eq_symbols) non_stochastic_result = non_stochastic_expr.replace( self.symbols['__f'], replace_f) # Replace x by the respective variable non_stochastic_result = non_stochastic_result.subs( self.symbols['__x'], eq_symbols[var]) # Replace intermediate variables temp_var_replacements = dict( (self.symbols[temp_var], _symbol(temp_var + '_' + var)) for temp_var in temp_vars) non_stochastic_result = non_stochastic_result.subs( temp_var_replacements) non_stochastic_results.append(non_stochastic_result) elif isinstance(stochastic_variable, str): # Replace the f(x, t) part replace_f = lambda x, t: self.replace_func(x, t, non_stochastic, temp_vars, eq_symbols, stochastic_variable) non_stochastic_result = non_stochastic_expr.replace( self.symbols['__f'], replace_f) # Replace x by the respective variable non_stochastic_result = non_stochastic_result.subs( self.symbols['__x'], eq_symbols[var]) # Replace intermediate variables temp_var_replacements = dict( (self.symbols[temp_var], _symbol(temp_var + '_' + var + '_' + stochastic_variable)) for temp_var in temp_vars) non_stochastic_result = non_stochastic_result.subs( temp_var_replacements) non_stochastic_results.append(non_stochastic_result) else: # Replace the f(x, t) part replace_f = lambda x, t: self.replace_func(x, t, non_stochastic, temp_vars, eq_symbols) non_stochastic_result = non_stochastic_expr.replace( self.symbols['__f'], replace_f) # Replace x by the respective variable non_stochastic_result = non_stochastic_result.subs( self.symbols['__x'], eq_symbols[var]) # Replace intermediate variables temp_var_replacements = dict( (self.symbols[temp_var], reduce(operator.add, [ _symbol(temp_var + '_' + var + '_' + xi) for xi in stochastic_variable ])) for temp_var in temp_vars) non_stochastic_result = non_stochastic_result.subs( temp_var_replacements) non_stochastic_results.append(non_stochastic_result) return non_stochastic_results def _stochastic_part(self, eq_symbols, stochastic, stochastic_expr, stochastic_variable, temp_vars, var): stochastic_results = [] if isinstance(stochastic_variable, str): # Replace the g(x, t) part replace_f = lambda x, t: self.replace_func( x, t, stochastic.get(stochastic_variable, 0), temp_vars, eq_symbols, stochastic_variable) stochastic_result = stochastic_expr.replace( self.symbols['__g'], replace_f) # Replace x by the respective variable stochastic_result = stochastic_result.subs(self.symbols['__x'], eq_symbols[var]) # Replace dW by the respective variable stochastic_result = stochastic_result.subs(self.symbols['__dW'], stochastic_variable) # Replace intermediate variables temp_var_replacements = dict( (self.symbols[temp_var], _symbol(temp_var + '_' + var + '_' + stochastic_variable)) for temp_var in temp_vars) stochastic_result = stochastic_result.subs(temp_var_replacements) stochastic_results.append(stochastic_result) else: for xi in stochastic_variable: # Replace the g(x, t) part replace_f = lambda x, t: self.replace_func( x, t, stochastic.get(xi, 0), temp_vars, eq_symbols, xi) stochastic_result = stochastic_expr.replace( self.symbols['__g'], replace_f) # Replace x by the respective variable stochastic_result = stochastic_result.subs( self.symbols['__x'], eq_symbols[var]) # Replace dW by the respective variable stochastic_result = stochastic_result.subs( self.symbols['__dW'], xi) # Replace intermediate variables temp_var_replacements = dict( (self.symbols[temp_var], _symbol(temp_var + '_' + var + '_' + xi)) for temp_var in temp_vars) stochastic_result = stochastic_result.subs( temp_var_replacements) stochastic_results.append(stochastic_result) return stochastic_results def _generate_RHS(self, eqs, var, eq_symbols, temp_vars, expr, non_stochastic_expr, stochastic_expr, stochastic_variable=()): ''' Helper function used in `__call__`. Generates the right hand side of an abstract code statement by appropriately replacing f, g and t. For example, given a differential equation ``dv/dt = -(v + I) / tau`` (i.e. `var` is ``v` and `expr` is ``(-v + I) / tau``) together with the `rk2` step ``return x + dt*f(x + k/2, t + dt/2)`` (i.e. `non_stochastic_expr` is ``x + dt*f(x + k/2, t + dt/2)`` and `stochastic_expr` is ``None``), produces ``v + dt*(-v - _k_v/2 + I + _k_I/2)/tau``. ''' # Note: in the following we are silently ignoring the case that a # state updater does not care about either the non-stochastic or the # stochastic part of an equation. We do trust state updaters to # correctly specify their own abilities (i.e. they do not claim to # support stochastic equations but actually just ignore the stochastic # part). We can't really check the issue here, as we are only dealing # with one line of the state updater description. It is perfectly valid # to write the euler update as: # non_stochastic = dt * f(x, t) # stochastic = dt**.5 * g(x, t) * xi # return x + non_stochastic + stochastic # # In the above case, we'll deal with lines which do not define either # the stochastic or the non-stochastic part. non_stochastic, stochastic = expr.split_stochastic() if non_stochastic_expr is not None: # We do have a non-stochastic part in the state updater description non_stochastic_results = self._non_stochastic_part( eq_symbols, non_stochastic, non_stochastic_expr, stochastic_variable, temp_vars, var) else: non_stochastic_results = [] if not (stochastic is None or stochastic_expr is None): # We do have a stochastic part in the state # updater description stochastic_results = self._stochastic_part(eq_symbols, stochastic, stochastic_expr, stochastic_variable, temp_vars, var) else: stochastic_results = [] RHS = sympy.Number(0) # All the parts (one non-stochastic and potentially more than one # stochastic part) are combined with addition for non_stochastic_result in non_stochastic_results: RHS += non_stochastic_result for stochastic_result in stochastic_results: RHS += stochastic_result return sympy_to_str(RHS) def __call__(self, eqs, variables=None, method_options=None): ''' Apply a state updater description to model equations. Parameters ---------- eqs : `Equations` The equations describing the model variables: dict-like, optional The `Variable` objects for the model. Ignored by the explicit state updater. method_options : dict, optional Additional options to the state updater (not used at the moment for the explicit state updaters). Examples -------- >>> from brian2 import * >>> eqs = Equations('dv/dt = -v / tau : volt') >>> print(euler(eqs)) _v = -dt*v/tau + v v = _v >>> print(rk4(eqs)) __k_1_v = -dt*v/tau __k_2_v = -dt*(__k_1_v/2 + v)/tau __k_3_v = -dt*(__k_2_v/2 + v)/tau __k_4_v = -dt*(__k_3_v + v)/tau _v = __k_1_v/6 + __k_2_v/3 + __k_3_v/3 + __k_4_v/6 + v v = _v ''' method_options = extract_method_options(method_options, {}) # Non-stochastic numerical integrators should work for all equations, # except for stochastic equations if eqs.is_stochastic and self.stochastic is None: raise UnsupportedEquationsException('Cannot integrate ' 'stochastic equations with ' 'this state updater.') if self.custom_check: self.custom_check(eqs, variables) # The final list of statements statements = [] stochastic_variables = eqs.stochastic_variables # The variables for the intermediate results in the state updater # description, e.g. the variable k in rk2 intermediate_vars = [var for var, expr in self.statements] # A dictionary mapping all the variables in the equations to their # sympy representations eq_variables = dict(((var, _symbol(var)) for var in eqs.eq_names)) # Generate the random numbers for the stochastic variables for stochastic_variable in stochastic_variables: statements.append(stochastic_variable + ' = ' + 'dt**.5 * randn()') substituted_expressions = eqs.get_substituted_expressions(variables) # Process the intermediate statements in the stateupdater description for intermediate_var, intermediate_expr in self.statements: # Split the expression into a non-stochastic and a stochastic part non_stochastic_expr, stochastic_expr = split_expression( intermediate_expr) # Execute the statement by appropriately replacing the functions f # and g and the variable x for every equation in the model. # We use the model equations where the subexpressions have # already been substituted into the model equations. for var, expr in substituted_expressions: for xi in stochastic_variables: RHS = self._generate_RHS(eqs, var, eq_variables, intermediate_vars, expr, non_stochastic_expr, stochastic_expr, xi) statements.append(intermediate_var + '_' + var + '_' + xi + ' = ' + RHS) if not stochastic_variables: # no stochastic variables RHS = self._generate_RHS(eqs, var, eq_variables, intermediate_vars, expr, non_stochastic_expr, stochastic_expr) statements.append(intermediate_var + '_' + var + ' = ' + RHS) # Process the "return" line of the stateupdater description non_stochastic_expr, stochastic_expr = split_expression(self.output) if eqs.is_stochastic and (self.stochastic != 'multiplicative' and eqs.stochastic_type == 'multiplicative'): # The equations are marked as having multiplicative noise and the # current state updater does not support such equations. However, # it is possible that the equations do not use multiplicative noise # at all. They could depend on time via a function that is constant # over a single time step (most likely, a TimedArray). In that case # we can integrate the equations dt_value = variables['dt'].get_value( )[0] if 'dt' in variables else None for _, expr in substituted_expressions: _, stoch = expr.split_stochastic() if stoch is None: continue # There could be more than one stochastic variable (e.g. xi_1, xi_2) for _, stoch_expr in stoch.items(): sympy_expr = str_to_sympy(stoch_expr.code) # The equation really has multiplicative noise, if it depends # on time (and not only via a function that is constant # over dt), or if it depends on another variable defined # via differential equations. if (not is_constant_over_dt(sympy_expr, variables, dt_value) or len(stoch_expr.identifiers & eqs.diff_eq_names)): raise UnsupportedEquationsException( 'Cannot integrate ' 'equations with ' 'multiplicative noise with ' 'this state updater.') # Assign a value to all the model variables described by differential # equations for var, expr in substituted_expressions: RHS = self._generate_RHS(eqs, var, eq_variables, intermediate_vars, expr, non_stochastic_expr, stochastic_expr, stochastic_variables) statements.append('_' + var + ' = ' + RHS) # Assign everything to the final variables for var, expr in substituted_expressions: statements.append(var + ' = ' + '_' + var) return '\n'.join(statements)
def parse_natural(input): # parse a string into an element of the abstract representation # Grammar: # # <expr> ::= <integer> # true # false # <identifier> # ( if <expr> <expr> <expr> ) # ( let ( ( <name> <expr> ) ) <expr ) # ( + <expr> <expr> ) # ( * <expr> <expr> ) # idChars = alphas + "_+*-?!=<>" pIDENTIFIER = Word(idChars, idChars + "0123456789") pIDENTIFIER.setParseAction(lambda result: EId(result[0])) # A name is like an identifier but it does not return an EId... pNAME = Word(idChars, idChars + "0123456789") pBOOLEAN = Keyword("true") | Keyword("false") pBOOLEAN.setParseAction(lambda result: EBoolean(result[0] == "true")) pINTEGER = Word("-0123456789", "0123456789") pINTEGER.setParseAction(lambda result: EInteger(int(result[0]))) pEXPR = Forward() pIF = "(" + Keyword("if") + pEXPR + ":" + pEXPR + "," + pEXPR + ")" pIF.setParseAction(lambda result: EIf(result[2], result[4], result[5])) pBINDING = pNAME + "=" + pEXPR + ZeroOrMore(",") pBINDING.setParseAction(lambda result: (result[0], result[2])) pLET = Keyword("let") + "(" + OneOrMore(pBINDING) + ")" + pEXPR pLET.setParseAction(lambda result: ELet(result[2:-2], result[-1])) pZERO = Keyword("zero?") + pEXPR pZERO.setParseAction(lambda result: ECall("zero?", [result[1]])) pSQUARE = Keyword("square") + pEXPR pSQUARE.setParseAction(lambda result: ECall("square", [result[1]])) pPLUS = "(" + pEXPR + Keyword("+") + pEXPR + ")" pPLUS.setParseAction(lambda result: ECall("+", [result[1], result[3]])) pTIMES = "(" + pEXPR + Keyword("*") + pEXPR + ")" pTIMES.setParseAction(lambda result: ECall("*", [result[1], result[3]])) pMINUS = "(" + pEXPR + Keyword("-") + pEXPR + ")" pMINUS.setParseAction(lambda result: ECall("-", [result[1], result[3]])) pEXPR << (pBOOLEAN | pIF | pLET | pZERO | pSQUARE | pPLUS | pTIMES | pMINUS | pINTEGER | pIDENTIFIER) result = pEXPR.parseString(input)[0] if type(result) == type(dict()): return result else: return {"result": "expression", "expr": result}
| JOIN | LEFT_JOIN | LEFT_OUTER_JOIN | RIGHT_JOIN | RIGHT_OUTER_JOIN )("op") + Group(table_source)("join") + Optional((ON + expr("on")) | (USING + expr("using"))) ).addParseAction(to_join_call) ungrouped_select_no_with = ( SELECT + delimitedList(selectColumn)("select") + Optional( (Suppress(FROM) + delimitedList(Group(table_source)) + ZeroOrMore(join))("from") + Optional(WHERE + expr("where")) + Optional(DISTRIBUTE_BY + delimitedList(Group(expr))("distributeby")) + Optional(SORT_BY + delimitedList(Group(sortColumn))("sortby")) + Optional(CLUSTER_BY + delimitedList(Group(sortColumn))("clusterby")) + Optional(GROUP_BY + delimitedList(Group(expr))("groupby")) + Optional(HAVING + expr("having")) + Optional(ORDER_BY + delimitedList(Group(sortColumn))("orderby")) + Optional(LIMIT + expr("limit")) + Optional(OFFSET + expr("offset")) ) ).addParseAction(to_ungrouped_select_no_with) select_no_with = ungrouped_select_no_with | ( LB + ungrouped_select_no_with + RB )
class ChoiceTree: ''' Class that parses strings representing possible combinations, and returns possible combinations. e.g. "abc[de|fg]" → [ "abcde", "abcfg" ] "I [eat|like] [|hot]dogs" → [ "I eat dogs", "I like dogs", "I eat hotdogs", "I like hotdogs" ] Escape symbol is '~' e.g. "abc~[def~]" → [ "abc[def]" ] Due to reasons, an escaped escape '~~' is not turned into a literal '~', if this is not up to liking, simply .replace('~~', '~') yourself after parsing. Essentially, consider the noncommutative Semiring of (unordered) lists of strings, so that in python notation: list1+list2 == [*list1, *list2] the concatenation of lists and list1*list2 == [a+b for a in list1 for b in list2] the concatenation of each pair of strings. (This ring has as neutral element the list of the empty string, and as zero element the empty list.) We write addition using the "|" symbol, the product is implicit (i.e. a*b == ab), and use [] as parentheses, so that in python notation e.g. "abc" == ["abc"] and "a|b|c" == ["a", "b", "c"] What ChoiceTree does is parse such expressions, and using the distributivity rule ( [a|b]c == ab|ac ) it simplifies the expression to a sum of products. ''' class Text: def __init__(self, text): self.text = text if text == '' else ''.join(text.asList()) self.count = 1 self.reset() __str__ = __repr__ = lambda s: s.text def next(self): self.done = True return self.text def random(self): return self.text def reset(self): self.done = False def current(self): return self.text class Choice: def __init__(self, vals): self.vals = vals.asList() self.count = sum(v.count for v in self.vals) self.reset() __str__ = __repr__ = lambda s: '[{}]'.format('|'.join( [str(v) for v in s.vals])) def next(self): next = self.vals[self.i] out = next.next() if next.done: self.i += 1 if self.i == len(self.vals): self.done = True return out def random(self): # Weighted based on the number of different possible branches each child has. return np.random.choice(self.vals, p=list(v.count / self.count for v in self.vals)).random() def reset(self): self.i = 0 self.done = False [c.reset() for c in self.vals] def current(self): return self.vals[self.i].current() class Group: def __init__(self, vals): self.vals = vals.asList() self.count = functools.reduce(lambda x, y: x * y, (c.count for c in self.vals), 1) self.reset() __str__ = __repr__ = lambda s: ''.join([str(v) for v in s.vals]) def next(self): i = 0 out = '' while True: out += self.vals[i].next() if self.vals[i].done: if i == len(self.vals) - 1: self.done = True break else: self.vals[i].reset() else: break i += 1 i += 1 while i < len(self.vals): out += self.vals[i].current() i += 1 return out def random(self): return ''.join(v.random() for v in self.vals) def reset(self): self.done = False [c.reset() for c in self.vals] def current(self): return ''.join([c.current() for c in self.vals]) escapedSymbol = Char('~').suppress() + Char('[|]') escapedEsc = Literal('~~') soleEsc = Char('~') lbr = Literal('[').suppress() rbr = Literal(']').suppress() div = Literal('|').suppress() _text = Regex( r'[^\[\|\]~]+' ) # any sequence of characters not containing '[', ']', '|' or '~' text = pGroup( OneOrMore(escapedSymbol | escapedEsc | soleEsc | _text)).setParseAction(lambda t: ChoiceTree.Text(t[0])) group = Forward() choice = pGroup(lbr + group + ZeroOrMore(div + group) + rbr).setParseAction(lambda t: ChoiceTree.Choice(t[0])) empty = Empty().setParseAction(lambda t: ChoiceTree.Text('')) group <<= pGroup(OneOrMore(text | choice) | empty).setParseAction( lambda t: ChoiceTree.Group(t[0])).leaveWhitespace() def __init__(self, text, parse_flags=False, add_brackets=False, leave_escapes=False): self.flag_random = False if parse_flags: if text[:3] == '[?]': text = text[3:] self.flag_random = True if add_brackets: text = '[' + text + ']' self.root: ChoiceTree.Group = ChoiceTree.group.parseString(text)[0] self.count = self.root.count def __iter__(self): if self.flag_random: yield self.random() return while not self.root.done: yield self.root.next() self.root.reset() def random(self): return self.root.random()
def CORBA_IDL_BNF(): global bnf if not bnf: # punctuation colon = Literal(":") lbrace = Literal("{") rbrace = Literal("}") lbrack = Literal("[") rbrack = Literal("]") lparen = Literal("(") rparen = Literal(")") equals = Literal("=") comma = Literal(",") dot = Literal(".") slash = Literal("/") bslash = Literal("\\") star = Literal("*") semi = Literal(";") langle = Literal("<") rangle = Literal(">") # keywords any_ = Keyword("any") attribute_ = Keyword("attribute") boolean_ = Keyword("boolean") case_ = Keyword("case") char_ = Keyword("char") const_ = Keyword("const") context_ = Keyword("context") default_ = Keyword("default") double_ = Keyword("double") enum_ = Keyword("enum") exception_ = Keyword("exception") false_ = Keyword("FALSE") fixed_ = Keyword("fixed") float_ = Keyword("float") inout_ = Keyword("inout") interface_ = Keyword("interface") in_ = Keyword("in") long_ = Keyword("long") module_ = Keyword("module") object_ = Keyword("Object") octet_ = Keyword("octet") oneway_ = Keyword("oneway") out_ = Keyword("out") raises_ = Keyword("raises") readonly_ = Keyword("readonly") sequence_ = Keyword("sequence") short_ = Keyword("short") string_ = Keyword("string") struct_ = Keyword("struct") switch_ = Keyword("switch") true_ = Keyword("TRUE") typedef_ = Keyword("typedef") unsigned_ = Keyword("unsigned") union_ = Keyword("union") void_ = Keyword("void") wchar_ = Keyword("wchar") wstring_ = Keyword("wstring") identifier = Word(alphas, alphanums + "_").setName("identifier") real = Combine( Word(nums + "+-", nums) + dot + Optional(Word(nums)) + Optional(CaselessLiteral("E") + Word(nums + "+-", nums))) integer = ( Combine(CaselessLiteral("0x") + Word(nums + "abcdefABCDEF")) | Word(nums + "+-", nums)).setName("int") udTypeName = delimitedList(identifier, "::", combine=True).setName("udType") # have to use longest match for type, in case a user-defined type name starts with a keyword type, like "stringSeq" or "longArray" typeName = (any_ ^ boolean_ ^ char_ ^ double_ ^ fixed_ ^ float_ ^ long_ ^ octet_ ^ short_ ^ string_ ^ wchar_ ^ wstring_ ^ udTypeName).setName("type") sequenceDef = Forward().setName("seq") sequenceDef << Group(sequence_ + langle + (sequenceDef | typeName) + rangle) typeDef = sequenceDef | (typeName + Optional(lbrack + integer + rbrack)) typedefDef = Group(typedef_ + typeDef + identifier + semi).setName("typedef") moduleDef = Forward() constDef = Group(const_ + typeDef + identifier + equals + (real | integer | quotedString) + semi) #| quotedString ) exceptionItem = Group(typeDef + identifier + semi) exceptionDef = (exception_ + identifier + lbrace + ZeroOrMore(exceptionItem) + rbrace + semi) attributeDef = Optional( readonly_) + attribute_ + typeDef + identifier + semi paramlist = delimitedList( Group((inout_ | in_ | out_) + typeName + identifier)).setName("paramlist") operationDef = ( ( void_ ^ typeDef ) + identifier + lparen + Optional( paramlist ) + rparen + \ Optional( raises_ + lparen + Group( delimitedList( typeName ) ) + rparen ) + semi ) interfaceItem = (constDef | exceptionDef | attributeDef | operationDef) interfaceDef = Group( interface_ + identifier + Optional( colon + delimitedList( typeName ) ) + lbrace + \ ZeroOrMore( interfaceItem ) + rbrace + semi ).setName("opnDef") moduleItem = (interfaceDef | exceptionDef | constDef | typedefDef | moduleDef) moduleDef << module_ + identifier + lbrace + ZeroOrMore( moduleItem) + rbrace + semi bnf = (moduleDef | OneOrMore(moduleItem)) singleLineComment = "//" + restOfLine bnf.ignore(singleLineComment) bnf.ignore(cStyleComment) return bnf
def pyparsing_parse(text): """ >>> import os >>> dirname = os.path.join(os.path.dirname(__file__), "data") >>> filename = os.path.join(dirname, "error1.blk") >>> with open(filename, encoding="utf8") as file: ... pyparsing_parse(file.read()) Traceback (most recent call last): ... ValueError: Error {0}: syntax error, line 8 >>> filename = os.path.join(dirname, "error2.blk") >>> with open(filename, encoding="utf8") as file: ... pyparsing_parse(file.read()) Traceback (most recent call last): ... ValueError: Error {0}: syntax error, line 1 >>> filename = os.path.join(dirname, "error3.blk") >>> with open(filename, encoding="utf8") as file: ... pyparsing_parse(file.read()) Traceback (most recent call last): ... ValueError: Error {0}: syntax error, line 4 >>> expected = "[white: ]\\n[lightblue: Director]\\n/\\n/\\n[white: ]\\n[lightgreen: Secretary]\\n/\\n/\\n[white: Minion #1]\\n[white: ]\\n[white: Minion #2]" >>> filename = os.path.join(dirname, "hierarchy.blk") >>> with open(filename, encoding="utf8") as file: ... blocks = pyparsing_parse(file.read()) >>> str(blocks).strip() == expected True >>> expected = "[#00CCDE: MessageBox Window\\n[lightgray: Frame\\n[white: ]\\n[white: Message text]\\n/\\n/\\n[goldenrod: OK Button]\\n[white: ]\\n[#ff0505: Cancel Button]\\n/\\n[white: ]\\n]\\n]" >>> filename = os.path.join(dirname, "messagebox.blk") >>> with open(filename, encoding="utf8") as file: ... blocks = pyparsing_parse(file.read()) >>> str(blocks).strip() == expected True """ def add_block(tokens): return Block.Block(tokens.name, tokens.color if tokens.color else "white") left_bracket, right_bracket = map(Suppress, "[]") new_rows = Word("/")("new_rows").setParseAction( lambda tokens: len(tokens.new_rows)) name = CharsNotIn("[]/\n")("name").setParseAction( lambda tokens: tokens.name.strip()) color = (Word("#", hexnums, exact=7) | Word(alphas, alphanums))("color") empty_node = (left_bracket + right_bracket).setParseAction(lambda: EmptyBlock) nodes = Forward() node_data = Optional(color + Suppress(":")) + Optional(name) node_data.setParseAction(add_block) node = left_bracket - node_data + nodes + right_bracket nodes << Group( ZeroOrMore(Optional(new_rows) + OneOrMore(node | empty_node))) stack = [Block.get_root_block()] try: results = nodes.parseString(text, parseAll=True) assert len(results) == 1 items = results.asList()[0] populate_children(items, stack) except (ParseException, ParseSyntaxException) as err: raise ValueError("Error {{0}}: syntax error, line " "{0}".format(err.lineno)) return stack[0]
quadraticBezierCurveto = Group(Command("Q") + Arguments(coordinatePairPairSequence)) smoothCurve = Group(Command("S") + Arguments(coordinatePairPairSequence)) #curve = Group(Command("C") + Arguments(coordinatePairTripleSequence)) horizontalLine = Group(Command("H") + Arguments(coordinateSequence)) verticalLine = Group(Command("V") + Arguments(coordinateSequence)) drawToCommand = ( lineTo | moveTo | closePath | ellipticalArc | smoothQuadraticBezierCurveto | quadraticBezierCurveto | smoothCurve | curve | horizontalLine | verticalLine ) #~ number.debug = True moveToDrawToCommands = moveTo + ZeroOrMore(drawToCommand) path = ZeroOrMore(moveToDrawToCommands) path.keepTabs = True def get_points(d): commands = path.parseString(d) points = [] currentset = None for command in commands: if command[0] == 'M' or command[0] == 'm': currentset = [] points.append(currentset) currentset.append(command[1][-1]) elif command[0] == 'L' or command[0] == 'l': currentset.extend(command[1])
def create_bnf( stack ): point = Literal( "." ) comma = Literal( "," ) e = CaselessLiteral( "E" ) inumber = Word( nums ) fnumber = Combine( Word( "+-"+nums, nums ) + Optional( point + Optional( Word( nums ) ) ) + Optional( e + Word( "+-"+nums, nums ) ) ) _of = Literal( 'of' ) _in = Literal( 'in' ) _by = Literal( 'by' ) _copy = Literal( 'copy' ) _mn = Literal( '-n' ).setParseAction( replace( 'OA_SubN' ) ) _me = Literal( '-e' ).setParseAction( replace( 'OA_SubE' ) ) _pn = Literal( '+n' ).setParseAction( replace( 'OA_AddN' ) ) _pe = Literal( '+e' ).setParseAction( replace( 'OA_AddE' ) ) _inn = Literal( '*n' ).setParseAction( replace( 'OA_IntersectN' ) ) _ine = Literal( '*e' ).setParseAction( replace( 'OA_IntersectE' ) ) regop = (_mn | _me | _pn | _pe | _inn | _ine) lpar = Literal( "(" ).suppress() rpar = Literal( ")" ).suppress() _all = Literal( 'all' ).setParseAction( replace( 'KW_All' ) ) node = Literal( 'node' ) nodes = Literal( 'nodes' ) element = Literal( 'element' ) elements = Literal( 'elements' ) group = Literal( 'group' ) surface = Literal( 'surface' ) variable = Word( 'xyz', max = 1 ) | Literal( 'domain' ) any_var = Word( alphas + '_', alphanums + '_' ) | fnumber ident = Word(alphas, alphanums + "_") function = Word( alphas, alphanums + '_' ) function = Group( function ).setParseAction( join_tokens ) region = Combine( Literal( 'r.' ) + Word( alphas, '_' + alphas + nums ) ) region = Group( Optional( _copy, default = 'nocopy' ) + region ) region.setParseAction( replace( 'KW_Region', keep = True ) ) coor = oneOf( 'x y z' ) boolop = oneOf( '& |' ) relop = oneOf( '< > <= >= != ==' ) bool_term = ZeroOrMore( '(' ) + (coor | fnumber ) + relop + (coor | fnumber)\ + ZeroOrMore( ')' ) relation = Forward() relation << ZeroOrMore( '(' )\ + bool_term + ZeroOrMore( boolop + relation )\ + ZeroOrMore( ')' ) relation = Group( relation ).setParseAction( join_tokens ) nos = Group( nodes + _of + surface ).setParseAction( replace( 'E_NOS' ) ) nir = Group( nodes + _in + relation ).setParseAction( \ replace( 'E_NIR', keep = True ) ) nbf = Group( nodes + _by + function ).setParseAction( \ replace( 'E_NBF', keep = True ) ) ebf = Group( elements + _by + function ).setParseAction( \ replace( 'E_EBF', keep = True ) ) eog = Group( elements + _of + group + Word( nums ) ).setParseAction( \ replace( 'E_EOG', keep = True ) ) nog = Group( nodes + _of + group + (Word(nums) | ident) ).setParseAction( \ replace( 'E_NOG', keep = True ) ) onir = Group( node + _in + region ).setParseAction( \ replace_with_region( 'E_ONIR', 2 ) ) ni = Group( node + delimitedList( inumber ) ).setParseAction( \ replace( 'E_NI', keep = True ) ) ei1 = Group( element + delimitedList( inumber ) ).setParseAction( \ replace( 'E_EI1', keep = True ) ) etuple = lpar.suppress() + inumber + comma.suppress() \ + inumber + rpar.suppress() ei2 = Group( element + delimitedList( etuple ) ).setParseAction( \ replace( 'E_EI2', keep = True ) ) region_expression = Forward() atom1 = (_all | region | ni | onir | nos | nir | nbf | ei1 | ei2 | ebf | eog | nog) atom1.setParseAction( to_stack( stack ) ) atom2 = (lpar + region_expression.suppress() + rpar) atom = (atom1 | atom2) aux = (regop + region_expression) aux.setParseAction( to_stack( stack ) ) region_expression << atom + ZeroOrMore( aux ) region_expression = StringStart() + region_expression + StringEnd() # region.set_debug() # relation.set_debug() # region_expression.set_debug() return region_expression
for kw in kwds.split(): exec("{0}_ = Keyword('{1}')".format(kw.upper(), kw)) messageBody = Forward() messageDefn = MESSAGE_ - ident("messageId") + LBRACE + messageBody( "body") + RBRACE typespec = oneOf("""double float int32 int64 uint32 uint64 sint32 sint64 fixed32 fixed64 sfixed32 sfixed64 bool string bytes""" ) | ident rvalue = integer | TRUE_ | FALSE_ | ident fieldDirective = LBRACK + Group(ident + EQ + rvalue) + RBRACK fieldDefn = ((REQUIRED_ | OPTIONAL_ | REPEATED_)("fieldQualifier") - typespec("typespec") + ident("ident") + EQ + integer("fieldint") + ZeroOrMore(fieldDirective) + SEMI) # enumDefn ::= 'enum' ident '{' { ident '=' integer ';' }* '}' enumDefn = ENUM_("typespec") - ident('name') + LBRACE + Dict( ZeroOrMore(Group(ident + EQ + integer + SEMI)))('values') + RBRACE # extensionsDefn ::= 'extensions' integer 'to' integer ';' extensionsDefn = EXTENSIONS_ - integer + TO_ + integer + SEMI # messageExtension ::= 'extend' ident '{' messageBody '}' messageExtension = EXTEND_ - ident + LBRACE + messageBody + RBRACE # messageBody ::= { fieldDefn | enumDefn | messageDefn | extensionsDefn | messageExtension }* messageBody << Group( ZeroOrMore( Group(fieldDefn | enumDefn | messageDefn | extensionsDefn
"""Appendices may have parenthetical paragraphs in its section number.""" if match.appendix_digit: lst = list(match) pars = lst[lst.index(match.appendix_digit) + 1:] section = match.appendix_digit if pars: section += '(' + ')('.join(el for el in pars) + ')' return section else: return None appendix_with_section = ( atomic.appendix + '-' + (atomic.appendix_digit + ZeroOrMore(atomic.lower_p | atomic.roman_p | atomic.digit_p | atomic.upper_p) ).setParseAction(appendix_section).setResultsName("appendix_section")) appendix_with_part = (atomic.appendix_marker.copy().setParseAction( keep_pos).setResultsName("marker") + atomic.appendix + Suppress(",") + Marker('part') + atomic.upper_roman_a + Optional(any_a) + Optional(any_a) + Optional(any_a)) marker_appendix = (atomic.appendix_marker.copy().setParseAction( keep_pos).setResultsName("marker") + (appendix_with_section | atomic.appendix)) marker_part = ( atomic.part_marker.copy().setParseAction(keep_pos).setResultsName("marker") + atomic.part)
keysubspace = hdtypes.Subspace(dimensions=[space.key], nosearch=[], regions=list(space.keyregions)) subspaces = [keysubspace] + list(space.subspaces) return hdtypes.Space(space.name, space.dimensions, subspaces) identifier = Word(string.ascii_letters + string.digits + '_') integer = Word(string.digits).setParseAction(lambda t: int(t[0])) hexnum = Combine(Literal("0x") + Word(string.hexdigits)).setParseAction(lambda t: int(t[0][2:], 16)) dimension = identifier.setResultsName("name") + \ Optional(Suppress(Literal("(")) + (Literal("string") | Literal("uint64")) + Suppress(Literal(")")), default="string").setResultsName("type") dimension.setParseAction(parse_dimension) autoregion = Literal("auto") + integer + integer staticregion = Literal("region") + integer + hexnum + integer region = ZeroOrMore(Group(staticregion)) + Optional(Group(autoregion)) region.setParseAction(parse_regions) subspace = Literal("subspace").suppress() + \ Group(delimitedList(identifier)) + \ Optional(Suppress(Literal("nosearch")) + Group(delimitedList(identifier)), default=[]) + \ Group(region) subspace.setParseAction(parse_subspace) space = Literal("space").suppress() + identifier.setResultsName("name") + \ Literal("dimensions").suppress() + Group(delimitedList(dimension)).setResultsName("dimensions") + \ Literal("key").suppress() + identifier.setResultsName("key") + \ Group(region).setResultsName("keyregions") + \ ZeroOrMore(subspace).setResultsName("subspaces") space.setParseAction(parse_space)
number = Word(nums) integer = Combine( Optional(plusorminus) + number ) floatnumber = Combine( integer + Optional( point + Optional(number) ) + Optional( e + integer ) ) lbracket = Literal("[") rbracket = Literal("]") ident = Forward() ## The definition below treats array accesses as identifiers. This means your expressions ## can include references to array elements, rows and columns, e.g., a = b[i] + 5. ## Expressions within []'s are not presently supported, so a = b[i+1] will raise ## a ParseException. ident = Combine(Word(alphas + '-',alphanums + '_') + \ ZeroOrMore(lbracket + (Word(alphas + '-',alphanums + '_')|integer) + rbracket) \ ) plus = Literal( "+" ) minus = Literal( "-" ) mult = Literal( "*" ) div = Literal( "/" ) outer = Literal( "@" ) lpar = Literal( "(" ).suppress() rpar = Literal( ")" ).suppress() addop = plus | minus multop = mult | div | outer expop = Literal( "^" ) assignop = Literal( "=" ) expr = Forward()