def bnf(css_style_obj): """ * decimal_digit :: '0' .. '9' * sign :: '-' | '+' * integer :: decimal_digit+ * float :: [ sign ] integer '.' [ integer ] [ 'e' | 'E' [ sign ] integer ] * lower_case :: 'a' .. 'z' * upper_case :: 'A' .. 'Z' * alpha :: lower_case | upper_case * punctuation :: '`' | '~' | '!' | '@' | '#' | '$' | '%' | '^' | '&' | '*' | '(' | ')' | '_' | '=' | '+' | ';' | ':' | '\'' | ',' | '<' | '.' | '>' | '/' | '?' | ' ' | '-' * string_delim :: '"' | '\'' * string :: string_delim [ alpha | decimal_digit | punctuation ]* string_delim * identifier :: '_' | alpha [ alpha | decimal_digit | '_' ]* * attr_selector :: '[' + identifier [ [ '~' | '*' | '^' | '$' | '|' ] '=' string ] ']' * class_or_id :: ( '#' | '.' ) identifier * pseudo_class :: ':' alpha [ alpha | '-' ]* [ '(' integer | identifier ')' ] * selector :: identifier [ class_or_id | attr_selector ] [ pseudo_class ] [ identifier [ pseudo_class ] ] * parameter_name :: alpha [ alpha | decimal_digit | '_' | '-' ]* * lower_hex :: 'a' .. 'f' * upper_hex :: 'A' .. 'F' * hex_digit :: decimal_digit | lower_hex | upper_hex * color :: '#' hex_digit * 6 * comment :: '/' '*' .*? '*' '/' * url :: 'url' '(' string ')' * pixel_count :: integer 'px' * percentage :: integer '%' * parameter_val :: url | color | pixel_count | percentage | parameter_name | float | integer * parameter :: parameter_name ':' [ comment* ]* parameter_val [ parameter_val | comment* ]+ ';' * parameter_block :: selector [ ',' selector ]* '{' ( parameter | comment* )+ '}' """ global BNF if BNF is None: fnumber = Regex(r"[+-]?\d+(:?\.\d*)?(:?[eE][+-]?\d+)?") identifier = Word("_"+alphas+nums) tilde = Literal("~") asterisk = Literal("*") caret = Literal("^") dsign = Literal("$") pipe = Literal("|") equal = Literal("=") squote = Literal("'") sqstring = squote + Regex(r"[^']+") + squote dquote = Literal('"') dqstring = dquote + Regex(r"[^\"]+") + dquote string = sqstring | dqstring class_or_id = Word("#"+".", "_"+alphas+nums) pclass = Combine(Word(":", "-"+alphas) + Optional( '(' + (Word(nums) | identifier) + ')')) attr_selector = Combine("[" + identifier + Optional(Optional( tilde | asterisk | caret | dsign | pipe) + equal + string) + "]") selector = Combine(Word("_"+alphas, "_"+alphas+nums) + Optional( attr_selector | class_or_id) + Optional(pclass)) | Combine( class_or_id + Optional(pclass)) | attr_selector integer = Word(nums) parameter_name = Word(alphas, alphas + nums + "_-") param_str = Word(alphas, alphas + nums + "_-") comment = Regex(r"[/][*].*?[*][/]", flags=re.S) lbrack = Literal("{") rbrack = Literal("}") px_suffix = Literal("px") pix_count = Combine(Word(nums) + px_suffix) percent = Literal("%") percentage = Combine(Word(nums) + percent) color = Word("#", hexnums, exact=7) urlstr = Keyword("url") url = urlstr + '(' + string + ')' parameter_val = url | color | pix_count | percentage | param_str | fnumber | integer parameter = (parameter_name.setParseAction( css_style_obj.push_param_name) + ':' + ZeroOrMore(comment.suppress()) + OneOrMore( parameter_val.setParseAction(css_style_obj.push_value) + ZeroOrMore( comment.suppress())) + ';').setParseAction(css_style_obj.push_parameter) parameter_block = (delimitedList(selector).setParseAction( css_style_obj.push_ident_list) + lbrack + OneOrMore( comment.suppress() | parameter) + rbrack).setParseAction( css_style_obj.push_param_block) BNF = OneOrMore(comment.suppress() | parameter_block) return BNF
from brian2.stateupdaters.base import StateUpdateMethod from brian2.units.allunits import second from brian2.units.fundamentalunits import (Quantity, DIMENSIONLESS, DimensionMismatchError, fail_for_dimension_mismatch) from brian2.utils.logger import get_logger from brian2.utils.stringtools import get_identifiers from .group import Group, CodeRunner, get_dtype from .subgroup import Subgroup __all__ = ['NeuronGroup'] logger = get_logger(__name__) IDENTIFIER = Word(string.ascii_letters + '_', string.ascii_letters + string.digits + '_').setResultsName('identifier') def _valid_event_name(event_name): ''' Helper function to check whether a name is a valid name for an event. Parameters ---------- event_name : str The name to check Returns ------- is_valid : bool Whether the given name is valid
class DdlParse(DdlParseBase): """DDL parser""" _LPAR, _RPAR, _COMMA, _SEMICOLON, _DOT, _DOUBLEQUOTE, _BACKQUOTE, _SPACE = map( Suppress, "(),;.\"` ") _CREATE, _TABLE, _TEMP, _CONSTRAINT, _NOT_NULL, _PRIMARY_KEY, _UNIQUE, _UNIQUE_KEY, _FOREIGN_KEY, _REFERENCES, _KEY, _CHAR_SEMANTICS, _BYTE_SEMANTICS = \ map(CaselessKeyword, "CREATE, TABLE, TEMP, CONSTRAINT, NOT NULL, PRIMARY KEY, UNIQUE, UNIQUE KEY, FOREIGN KEY, REFERENCES, KEY, CHAR, BYTE".replace(", ", ",").split(",")) _SUPPRESS_QUOTE = _BACKQUOTE | _DOUBLEQUOTE _COMMENT = Suppress("--" + Regex(r".+")) _CREATE_TABLE_STATEMENT = Suppress(_CREATE) + Optional(_TEMP)("temp") + Suppress(_TABLE) + Optional(Suppress(CaselessKeyword("IF NOT EXISTS"))) \ + Optional(_SUPPRESS_QUOTE) + Optional(Word(alphanums+"_")("schema") + Optional(_SUPPRESS_QUOTE) + _DOT + Optional(_SUPPRESS_QUOTE)) + Word(alphanums+"_<>")("table") + Optional(_SUPPRESS_QUOTE) \ + _LPAR \ + delimitedList( OneOrMore( _COMMENT | # Ignore Index Suppress(_KEY + Word(alphanums+"_'`() ")) | Group( Optional(Suppress(_CONSTRAINT) + Optional(_SUPPRESS_QUOTE) + Word(alphanums+"_")("name") + Optional(_SUPPRESS_QUOTE)) + ( ( (_PRIMARY_KEY ^ _UNIQUE ^ _UNIQUE_KEY ^ _NOT_NULL)("type") + Optional(_SUPPRESS_QUOTE) + Optional(Word(alphanums+"_"))("name") + Optional(_SUPPRESS_QUOTE) + _LPAR + Group(delimitedList(Optional(_SUPPRESS_QUOTE) + Word(alphanums+"_") + Optional(_SUPPRESS_QUOTE)))("constraint_columns") + _RPAR ) | ( (_FOREIGN_KEY)("type") + _LPAR + Group(delimitedList(Optional(_SUPPRESS_QUOTE) + Word(alphanums+"_") + Optional(_SUPPRESS_QUOTE)))("constraint_columns") + _RPAR + Optional(Suppress(_REFERENCES) + Optional(_SUPPRESS_QUOTE) + Word(alphanums+"_")("references_table") + Optional(_SUPPRESS_QUOTE) + _LPAR + Group(delimitedList(Optional(_SUPPRESS_QUOTE) + Word(alphanums+"_") + Optional(_SUPPRESS_QUOTE)))("references_columns") + _RPAR ) ) ) )("constraint") | Group( Optional(_SUPPRESS_QUOTE) + Word(alphanums+"_")("name") + Optional(_SUPPRESS_QUOTE) + Group( Word(alphanums+"_") + Optional(CaselessKeyword("WITHOUT TIME ZONE") ^ CaselessKeyword("WITH TIME ZONE") ^ CaselessKeyword("PRECISION") ^ CaselessKeyword("VARYING")) + Optional(_LPAR + Regex(r"\d+\s*,*\s*\d*") + Optional(Suppress(_CHAR_SEMANTICS | _BYTE_SEMANTICS)) + _RPAR) )("type") + Optional(Word(r"\[\]"))("array_brackets") + Optional(Regex(r"DEFAULT\s+[^,]+", re.IGNORECASE) | Word(alphanums+"_': -"))("constraint") )("column") | _COMMENT ) )("columns") _DDL_PARSE_EXPR = Forward() _DDL_PARSE_EXPR << OneOrMore(_COMMENT | _CREATE_TABLE_STATEMENT) def __init__(self, ddl=None, source_database=None): super().__init__(source_database) self._ddl = ddl self._table = DdlParseTable(source_database) @property def source_database(self): """ Source database option :param source_database: enum DdlParse.DATABASE """ return super().source_database @source_database.setter def source_database(self, source_database): super(self.__class__, self.__class__).source_database.__set__(self, source_database) self._table.source_database = source_database @property def ddl(self): """DDL script""" return self._ddl @ddl.setter def ddl(self, ddl): self._ddl = ddl def parse(self, ddl=None, source_database=None): """ Parse DDL script. :param ddl: DDL script :return: DdlParseTable, Parsed table define info. """ if ddl is not None: self._ddl = ddl if source_database is not None: self.source_database = source_database if self._ddl is None: raise ValueError("DDL is not specified") ret = self._DDL_PARSE_EXPR.parseString(self._ddl) # print(ret.dump()) if "schema" in ret: self._table.schema = ret["schema"] self._table.name = ret["table"] self._table.is_temp = True if "temp" in ret else False for ret_col in ret["columns"]: if ret_col.getName() == "column": # add column col = self._table.columns.append( column_name=ret_col["name"], data_type_array=ret_col["type"], array_brackets=ret_col['array_brackets'] if "array_brackets" in ret_col else None) if "constraint" in ret_col: col.constraint = ret_col["constraint"] elif ret_col.getName() == "constraint": # set column constraint for col_name in ret_col["constraint_columns"]: col = self._table.columns[col_name] if ret_col["type"] == "PRIMARY KEY": col.not_null = True col.primary_key = True elif ret_col["type"] in ["UNIQUE", "UNIQUE KEY"]: col.unique = True elif ret_col["type"] == "NOT NULL": col.not_null = True return self._table
class RawNginxParser(object): # pylint: disable=expression-not-assigned """A class that parses nginx configuration with pyparsing.""" # constants space = Optional(White()) nonspace = Regex(r"\S+") left_bracket = Literal("{").suppress() right_bracket = space.leaveWhitespace() + Literal("}").suppress() semicolon = Literal(";").suppress() key = Word(alphanums + "_/+-.") dollar_var = Combine(Literal('$') + Regex(r"[^\{\};,\s]+")) condition = Regex(r"\(.+\)") # Matches anything that is not a special character, and ${SHELL_VARS}, AND # any chars in single or double quotes # All of these COULD be upgraded to something like # https://stackoverflow.com/a/16130746 dquoted = Regex(r'(\".*\")') squoted = Regex(r"(\'.*\')") nonspecial = Regex(r"[^\{\};,]") varsub = Regex(r"(\$\{\w+\})") # nonspecial nibbles one character at a time, but the other objects take # precedence. We use ZeroOrMore to allow entries like "break ;" to be # parsed as assignments value = Combine(ZeroOrMore(dquoted | squoted | varsub | nonspecial)) location = CharsNotIn("{};," + string.whitespace) # modifier for location uri [ = | ~ | ~* | ^~ ] modifier = Literal("=") | Literal("~*") | Literal("~") | Literal("^~") # rules comment = space + Literal('#') + restOfLine() assignment = space + key + Optional(space + value, default=None) + semicolon location_statement = space + Optional(modifier) + Optional(space + location + space) if_statement = space + Literal("if") + space + condition + space charset_map_statement = space + Literal("charset_map") + space + value + space + value map_statement = space + Literal("map") + space + nonspace + space + dollar_var + space # This is NOT an accurate way to parse nginx map entries; it's almost # certianly too permissive and may be wrong in other ways, but it should # preserve things correctly in mmmmost or all cases. # # - I can neither prove nor disprove that it is corect wrt all escaped # semicolon situations # Addresses https://github.com/fatiherikli/nginxparser/issues/19 map_pattern = Regex(r'".*"') | Regex(r"'.*'") | nonspace map_entry = space + map_pattern + space + value + space + semicolon map_block = Group( Group(map_statement).leaveWhitespace() + left_bracket + Group(ZeroOrMore(Group(comment | map_entry)) + space).leaveWhitespace() + right_bracket) block = Forward() # key could for instance be "server" or "http", or "location" (in which case # location_statement needs to have a non-empty location) block_begin = (Group(space + key + location_statement) ^ Group(if_statement) ^ Group(charset_map_statement)).leaveWhitespace() block_innards = Group(ZeroOrMore(Group(comment | assignment) | block | map_block) + space).leaveWhitespace() block << Group(block_begin + left_bracket + block_innards + right_bracket) script = OneOrMore(Group(comment | assignment) ^ block ^ map_block) + space + stringEnd script.parseWithTabs().leaveWhitespace() def __init__(self, source): self.source = source def parse(self): """Returns the parsed tree.""" return self.script.parseString(self.source) def as_list(self): """Returns the parsed tree as a list.""" return self.parse().asList()
def grammar(): parenthesis = Forward() parenthesis <<= "(" + ZeroOrMore(CharsNotIn("()") | parenthesis) + ")" parenthesis.setParseAction(join_string_act) quoted_string = "'" + OneOrMore(CharsNotIn("'")) + "'" quoted_string.setParseAction(join_string_act) quoted_default_value = (CaselessLiteral("DEFAULT") + quoted_string + OneOrMore(CharsNotIn(", \n\t"))) quoted_default_value.setParseAction(quoted_default_value_act) field_def = OneOrMore(quoted_default_value | Word(alphanums + "_\"'`:-/[].") | parenthesis) field_def.setParseAction(field_act) tablename_def = ( Word(alphanums + "`_.") | QuotedString("\"") ) field_list_def = field_def + ZeroOrMore(Suppress(",") + field_def) field_list_def.setParseAction(field_list_act) create_table_def = (CaselessLiteral("CREATE") + Optional(CaselessLiteral("UNLOGGED")) + CaselessLiteral("TABLE") + tablename_def.setResultsName("tableName") + "(" + field_list_def.setResultsName("fields") + ")" + ";") create_table_def.setParseAction(create_table_act) delete_restrict_action = (CaselessLiteral("CASCADE") | CaselessLiteral("RESTRICT") | CaselessLiteral("NO ACTION") | ( CaselessLiteral("SET") + ( CaselessLiteral("NULL") | CaselessLiteral("DEFAULT") ))) fkey_cols = ( Word(alphanums + "._") + ZeroOrMore(Suppress(",") + Word(alphanums + "._")) ) add_fkey_def = (CaselessLiteral("ALTER") + CaselessLiteral("TABLE") + CaselessLiteral("ONLY") + tablename_def.setResultsName("tableName") + CaselessLiteral("ADD") + CaselessLiteral("CONSTRAINT") + Word(alphanums + "_") + CaselessLiteral("FOREIGN") + CaselessLiteral("KEY") + "(" + fkey_cols.setResultsName("keyName") + ")" + "REFERENCES" + Word(alphanums + "._").setResultsName("fkTable") + "(" + fkey_cols.setResultsName("fkCol") + ")" + Optional(CaselessLiteral("DEFERRABLE")) + Optional(CaselessLiteral("ON") + "UPDATE" + delete_restrict_action) + Optional(CaselessLiteral("ON") + "DELETE" + delete_restrict_action) + ";") add_fkey_def.setParseAction(add_fkey_act) other_statement_def = OneOrMore(CharsNotIn(";")) + ";" other_statement_def.setParseAction(other_statement_act) comment_def = "--" + ZeroOrMore(CharsNotIn("\n")) comment_def.setParseAction(other_statement_act) return OneOrMore( comment_def | create_table_def | add_fkey_def | other_statement_def )
# protobuf_parser.py # # simple parser for parsing protobuf .proto files # # Copyright 2010, Paul McGuire # from pyparsing import (Word, alphas, alphanums, Regex, Suppress, Forward, Group, oneOf, ZeroOrMore, Optional, delimitedList, restOfLine, quotedString, Dict) ident = Word(alphas + "_", alphanums + "_").setName("identifier") integer = Regex(r"[+-]?\d+") LBRACE, RBRACE, LBRACK, RBRACK, LPAR, RPAR, EQ, SEMI = map( Suppress, "{}[]()=;") kwds = """message required optional repeated enum extensions extends extend to package service rpc returns true false option import""" for kw in kwds.split(): exec("{}_ = Keyword('{}')".format(kw.upper(), kw)) messageBody = Forward() messageDefn = MESSAGE_ - ident("messageId") + LBRACE + messageBody( "body") + RBRACE typespec = oneOf("""double float int32 int64 uint32 uint64 sint32 sint64 fixed32 fixed64 sfixed32 sfixed64 bool string bytes""" ) | ident rvalue = integer | TRUE_ | FALSE_ | ident
def parse_algebra(self): """ Parse an algebraic expression into a tree. Store a `pyparsing.ParseResult` in `self.tree` with proper groupings to reflect parenthesis and order of operations. Leave all operators in the tree and do not parse any strings of numbers into their float versions. Adding the groups and result names makes the `repr()` of the result really gross. For debugging, use something like print OBJ.tree.asXML() """ # 0.33 or 7 or .34 or 16. number_part = Word(nums) inner_number = (number_part + Optional("." + Optional(number_part))) | ("." + number_part) # pyparsing allows spaces between tokens--`Combine` prevents that. inner_number = Combine(inner_number) # SI suffixes and percent. number_suffix = MatchFirst(Literal(k) for k in SUFFIXES.keys()) # 0.33k or 17 plus_minus = Literal('+') | Literal('-') number = Group( Optional(plus_minus) + inner_number + Optional( CaselessLiteral("E") + Optional(plus_minus) + number_part) + Optional(number_suffix)) number = number("number") # Predefine recursive variables. expr = Forward() # Handle variables passed in. They must start with a letter # and may contain numbers and underscores afterward. inner_varname = Combine( Word(alphas, alphanums + "_") + ZeroOrMore("'")) # Alternative variable name in tensor format # Tensor name must start with a letter, continue with alphanums # Indices may be alphanumeric # e.g., U_{ijk}^{123} upper_indices = Literal("^{") + Word(alphanums) + Literal("}") lower_indices = Literal("_{") + Word(alphanums) + Literal("}") tensor_lower = Combine( Word(alphas, alphanums) + lower_indices + ZeroOrMore("'")) tensor_mixed = Combine( Word(alphas, alphanums) + Optional(lower_indices) + upper_indices + ZeroOrMore("'")) # Test for mixed tensor first, then lower tensor alone, then generic variable name varname = Group(tensor_mixed | tensor_lower | inner_varname)("variable") varname.setParseAction(self.variable_parse_action) # Same thing for functions. function = Group(inner_varname + Suppress("(") + expr + Suppress(")"))("function") function.setParseAction(self.function_parse_action) atom = number | function | varname | "(" + expr + ")" atom = Group(atom)("atom") # Do the following in the correct order to preserve order of operation. pow_term = atom + ZeroOrMore("^" + atom) pow_term = Group(pow_term)("power") par_term = pow_term + ZeroOrMore('||' + pow_term) # 5k || 4k par_term = Group(par_term)("parallel") prod_term = par_term + ZeroOrMore( (Literal('*') | Literal('/')) + par_term) # 7 * 5 / 4 prod_term = Group(prod_term)("product") sum_term = Optional(plus_minus) + prod_term + ZeroOrMore( plus_minus + prod_term) # -5 + 4 - 3 sum_term = Group(sum_term)("sum") # Finish the recursion. expr << sum_term # pylint: disable=pointless-statement self.tree = (expr + stringEnd).parseString(self.math_expr)[0]
def parse(input): # parse a string into an element of the abstract representation # Grammar: # # <expr> ::= <integer> # true # false # <identifier> # ( if <expr> <expr> <expr> ) # ( let ( ( <name> <expr> ) ) <expr ) # ( function ( <name> ... ) <expr> ) # ( ref <expr> ) # ( <expr> <expr> ... ) # idChars = alphas + "_+*-?!=<>" pIDENTIFIER = Word(idChars, idChars + "0123456789") pIDENTIFIER.setParseAction(lambda result: EId(result[0])) # A name is like an identifier but it does not return an EId... pNAME = Word(idChars, idChars + "0123456789") pNAMES = ZeroOrMore(pNAME) pNAMES.setParseAction(lambda result: [result]) pINTEGER = Word("0123456789") pINTEGER.setParseAction(lambda result: EValue(VInteger(int(result[0])))) pBOOLEAN = Keyword("true") | Keyword("false") pBOOLEAN.setParseAction( lambda result: EValue(VBoolean(result[0] == "true"))) pEXPR = Forward() pEXPRS = ZeroOrMore(pEXPR) pEXPRS.setParseAction(lambda result: [result]) pIF = "(" + Keyword("if") + pEXPR + pEXPR + pEXPR + ")" pIF.setParseAction(lambda result: EIf(result[2], result[3], result[4])) pBINDING = "(" + pNAME + pEXPR + ")" pBINDING.setParseAction(lambda result: (result[1], result[2])) pBINDINGS = OneOrMore(pBINDING) pBINDINGS.setParseAction(lambda result: [result]) pLET = "(" + Keyword("let") + "(" + pBINDINGS + ")" + pEXPR + ")" pLET.setParseAction(lambda result: ELet(result[3], result[5])) pCALL = "(" + pEXPR + pEXPRS + ")" pCALL.setParseAction(lambda result: ECall(result[1], result[2])) pFUN = "(" + Keyword("function") + "(" + pNAMES + ")" + pEXPR + ")" pFUN.setParseAction(lambda result: EFunction(result[3], result[5])) pREF = "(" + Keyword("ref") + pEXPR + ")" pREF.setParseAction(lambda result: ERefCell(result[2])) pDO = "(" + Keyword("do") + pEXPRS + ")" pDO.setParseAction(lambda result: EDo(result[2])) pWHILE = "(" + Keyword("while") + pEXPR + pEXPR + ")" pWHILE.setParseAction(lambda result: EWhile(result[2], result[3])) pEXPR << (pINTEGER | pBOOLEAN | pIDENTIFIER | pIF | pLET | pFUN | pREF | pDO | pWHILE | pCALL) # can't attach a parse action to pEXPR because of recursion, so let's duplicate the parser pTOPEXPR = pEXPR.copy() pTOPEXPR.setParseAction(lambda result: { "result": "expression", "expr": result[0] }) pDEFINE = "(" + Keyword("define") + pNAME + pEXPR + ")" pDEFINE.setParseAction(lambda result: { "result": "value", "name": result[2], "expr": result[3] }) pDEFUN = "(" + Keyword("defun") + pNAME + "(" + pNAMES + ")" + pEXPR + ")" pDEFUN.setParseAction( lambda result: { "result": "function", "name": result[2], "params": result[4], "body": result[6] }) pABSTRACT = "#abs" + pEXPR pABSTRACT.setParseAction(lambda result: { "result": "abstract", "expr": result[1] }) pQUIT = Keyword("#quit") pQUIT.setParseAction(lambda result: {"result": "quit"}) pTOP = (pDEFUN | pDEFINE | pQUIT | pABSTRACT | pTOPEXPR) result = pTOP.parseString(input)[0] return result # the first element of the result is the expression
def parse_imp(input): # parse a string into an element of the abstract representation # Grammar: # # <expr> ::= <integer> # true # false # <identifier> # ( if <expr> <expr> <expr> ) # ( function ( <name ... ) <expr> ) # ( <expr> <expr> ... ) # # <decl> ::= var name = expr ; # # <stmt> ::= if <expr> <stmt> else <stmt> # while <expr> <stmt> # name <- <expr> ; # print <expr> ; # <expr> ; # <block> # # <block> ::= { <decl> ... <stmt> ... } # # <toplevel> ::= <decl> # <stmt> # idChars = alphas + "_+*-?!=<>" pIDENTIFIER = Word(idChars, idChars + "0123456789") #### NOTE THE DIFFERENCE pIDENTIFIER.setParseAction( lambda result: EPrimCall(oper_deref, [EId(result[0])])) # A name is like an identifier but it does not return an EId... pNAME = Word(idChars, idChars + "0123456789") pNAMES = ZeroOrMore(pNAME) pNAMES.setParseAction(lambda result: [result]) pINTEGER = Word("0123456789") pINTEGER.setParseAction(lambda result: EValue(VInteger(int(result[0])))) pBOOLEAN = Keyword("true") | Keyword("false") pBOOLEAN.setParseAction( lambda result: EValue(VBoolean(result[0] == "true"))) pEXPR = Forward() pEXPRS = ZeroOrMore(pEXPR) pEXPRS.setParseAction(lambda result: [result]) pIF = "(" + Keyword("if") + pEXPR + pEXPR + pEXPR + ")" pIF.setParseAction(lambda result: EIf(result[2], result[3], result[4])) def mkFunBody(params, body): bindings = [(p, ERefCell(EId(p))) for p in params] return ELet(bindings, body) pFUN = "(" + Keyword("function") + "(" + pNAMES + ")" + pEXPR + ")" pFUN.setParseAction( lambda result: EFunction(result[3], mkFunBody(result[3], result[5]))) pCALL = "(" + pEXPR + pEXPRS + ")" pCALL.setParseAction(lambda result: ECall(result[1], result[2])) pEXPR << (pINTEGER | pBOOLEAN | pIDENTIFIER | pIF | pFUN | pCALL) pDECL_VAR = "var" + pNAME + "=" + pEXPR + ";" pDECL_VAR.setParseAction(lambda result: (result[1], result[3])) # hack to get pDECL to match only PDECL_VAR (but still leave room # to add to pDECL later) pDECL = (pDECL_VAR | NoMatch()) pDECLS = ZeroOrMore(pDECL) pDECLS.setParseAction(lambda result: [result]) pSTMT = Forward() pSTMT_IF_1 = "if" + pEXPR + pSTMT + "else" + pSTMT pSTMT_IF_1.setParseAction( lambda result: EIf(result[1], result[2], result[4])) pSTMT_IF_2 = "if" + pEXPR + pSTMT pSTMT_IF_2.setParseAction( lambda result: EIf(result[1], result[2], EValue(VBoolean(True)))) pSTMT_WHILE = "while" + pEXPR + pSTMT pSTMT_WHILE.setParseAction(lambda result: EWhile(result[1], result[2])) pSTMT_PRINT = "print" + pEXPR + ";" pSTMT_PRINT.setParseAction( lambda result: EPrimCall(oper_print, [result[1]])) pSTMT_UPDATE = pNAME + "<-" + pEXPR + ";" pSTMT_UPDATE.setParseAction( lambda result: EPrimCall(oper_update, [EId(result[0]), result[2]])) pSTMT_EXPR = pEXPR + ";" pSTMT_EXPR.setParseAction(lambda result: result[0]) pSTMTS = ZeroOrMore(pSTMT) pSTMTS.setParseAction(lambda result: [result]) def mkBlock(decls, stmts): bindings = [(n, ERefCell(expr)) for (n, expr) in decls] return ELet(bindings, EDo(stmts)) pSTMT_BLOCK = "{" + pDECLS + pSTMTS + "}" pSTMT_BLOCK.setParseAction(lambda result: mkBlock(result[1], result[2])) pSTMT << (pSTMT_IF_1 | pSTMT_IF_2 | pSTMT_WHILE | pSTMT_PRINT | pSTMT_UPDATE | pSTMT_EXPR | pSTMT_BLOCK) # can't attach a parse action to pSTMT because of recursion, so let's duplicate the parser pTOP_STMT = pSTMT.copy() pTOP_STMT.setParseAction(lambda result: { "result": "statement", "stmt": result[0] }) pTOP_DECL = pDECL.copy() pTOP_DECL.setParseAction(lambda result: { "result": "declaration", "decl": result[0] }) pABSTRACT = "#abs" + pSTMT pABSTRACT.setParseAction(lambda result: { "result": "abstract", "stmt": result[1] }) pQUIT = Keyword("#quit") pQUIT.setParseAction(lambda result: {"result": "quit"}) pTOP = (pQUIT | pABSTRACT | pTOP_DECL | pTOP_STMT) result = pTOP.parseString(input)[0] return result # the first element of the result is the expression
def CORBA_IDL_BNF(): global bnf if not bnf: # punctuation colon = Literal(":") lbrace = Literal("{") rbrace = Literal("}") lbrack = Literal("[") rbrack = Literal("]") lparen = Literal("(") rparen = Literal(")") equals = Literal("=") comma = Literal(",") dot = Literal(".") slash = Literal("/") bslash = Literal("\\") star = Literal("*") semi = Literal(";") langle = Literal("<") rangle = Literal(">") # keywords any_ = Keyword("any") attribute_ = Keyword("attribute") boolean_ = Keyword("boolean") case_ = Keyword("case") char_ = Keyword("char") const_ = Keyword("const") context_ = Keyword("context") default_ = Keyword("default") double_ = Keyword("double") enum_ = Keyword("enum") exception_ = Keyword("exception") false_ = Keyword("FALSE") fixed_ = Keyword("fixed") float_ = Keyword("float") inout_ = Keyword("inout") interface_ = Keyword("interface") in_ = Keyword("in") long_ = Keyword("long") module_ = Keyword("module") object_ = Keyword("Object") octet_ = Keyword("octet") oneway_ = Keyword("oneway") out_ = Keyword("out") raises_ = Keyword("raises") readonly_ = Keyword("readonly") sequence_ = Keyword("sequence") short_ = Keyword("short") string_ = Keyword("string") struct_ = Keyword("struct") switch_ = Keyword("switch") true_ = Keyword("TRUE") typedef_ = Keyword("typedef") unsigned_ = Keyword("unsigned") union_ = Keyword("union") void_ = Keyword("void") wchar_ = Keyword("wchar") wstring_ = Keyword("wstring") identifier = Word(alphas, alphanums + "_").setName("identifier") #~ real = Combine( Word(nums+"+-", nums) + dot + Optional( Word(nums) ) #~ + Optional( CaselessLiteral("E") + Word(nums+"+-",nums) ) ) real = Regex(r"[+-]?\d+\.\d*([Ee][+-]?\d+)?").setName("real") #~ integer = ( Combine( CaselessLiteral("0x") + Word( nums+"abcdefABCDEF" ) ) | #~ Word( nums+"+-", nums ) ).setName("int") integer = Regex(r"0x[0-9a-fA-F]+|[+-]?\d+").setName("int") udTypeName = delimitedList(identifier, "::", combine=True).setName("udType") # have to use longest match for type, in case a user-defined type name starts with a keyword type, like "stringSeq" or "longArray" typeName = (any_ ^ boolean_ ^ char_ ^ double_ ^ fixed_ ^ float_ ^ long_ ^ octet_ ^ short_ ^ string_ ^ wchar_ ^ wstring_ ^ udTypeName).setName("type") sequenceDef = Forward().setName("seq") sequenceDef << Group(sequence_ + langle + (sequenceDef | typeName) + rangle) typeDef = sequenceDef | (typeName + Optional(lbrack + integer + rbrack)) typedefDef = Group(typedef_ + typeDef + identifier + semi).setName("typedef") moduleDef = Forward() constDef = Group(const_ + typeDef + identifier + equals + (real | integer | quotedString) + semi) #| quotedString ) exceptionItem = Group(typeDef + identifier + semi) exceptionDef = (exception_ + identifier + lbrace + ZeroOrMore(exceptionItem) + rbrace + semi) attributeDef = Optional( readonly_) + attribute_ + typeDef + identifier + semi paramlist = delimitedList( Group((inout_ | in_ | out_) + typeName + identifier)).setName("paramlist") operationDef = ( ( void_ ^ typeDef ) + identifier + lparen + Optional( paramlist ) + rparen + \ Optional( raises_ + lparen + Group( delimitedList( typeName ) ) + rparen ) + semi ) interfaceItem = (constDef | exceptionDef | attributeDef | operationDef) interfaceDef = Group( interface_ + identifier + Optional( colon + delimitedList( typeName ) ) + lbrace + \ ZeroOrMore( interfaceItem ) + rbrace + semi ).setName("opnDef") moduleItem = (interfaceDef | exceptionDef | constDef | typedefDef | moduleDef) moduleDef << module_ + identifier + lbrace + ZeroOrMore( moduleItem) + rbrace + semi bnf = (moduleDef | OneOrMore(moduleItem)) singleLineComment = "//" + restOfLine bnf.ignore(singleLineComment) bnf.ignore(cStyleComment) return bnf
# getNTPservers.py # # Demonstration of the parsing module, implementing a HTML page scanner, # to extract a list of NTP time servers from the NIST web site. # # Copyright 2004, by Paul McGuire # from pyparsing import Word, Combine, Suppress, CharsNotIn, nums import urllib integer = Word(nums) ipAddress = Combine( integer + "." + integer + "." + integer + "." + integer ) tdStart = Suppress("<td>") tdEnd = Suppress("</td>") timeServerPattern = tdStart + ipAddress.setResultsName("ipAddr") + tdEnd + \ tdStart + CharsNotIn("<").setResultsName("loc") + tdEnd # get list of time servers nistTimeServerURL = "http://www.boulder.nist.gov/timefreq/service/time-servers.html" serverListPage = urllib.urlopen( nistTimeServerURL ) serverListHTML = serverListPage.read() serverListPage.close() addrs = {} for srvr,startloc,endloc in timeServerPattern.scanString( serverListHTML ): print srvr.ipAddr, "-", srvr.loc addrs[srvr.ipAddr] = srvr.loc # or do this: #~ addr,loc = srvr #~ print addr, "-", loc
from pyparsing import Word, StringEnd, alphas noEnd = Word(alphas) print(noEnd.parseString('Dorking...')) withEnd = Word(alphas) + StringEnd() print(withEnd.parseString('Dorking...'))
class NginxConfigParser(object): """ Nginx config parser based on https://github.com/fatiherikli/nginxparser Parses single file into json structure """ max_size = 20*1024*1024 # 20 mb # line starts/ends line_start = LineStart().suppress() line_end = LineEnd().suppress() # constants left_brace = Literal("{").suppress() left_parentheses = Literal("(").suppress() right_brace = Literal("}").suppress() right_parentheses = Literal(")").suppress() semicolon = Literal(";").suppress() space = White().suppress() singleQuote = Literal("'").suppress() doubleQuote = Literal('"').suppress() # keys if_key = Keyword("if").setParseAction(set_line_number) set_key = Keyword("set").setParseAction(set_line_number) rewrite_key = Keyword("rewrite").setParseAction(set_line_number) perl_set_key = Keyword("perl_set").setParseAction(set_line_number) log_format_key = Keyword("log_format").setParseAction(set_line_number) alias_key = Keyword("alias").setParseAction(set_line_number) return_key = Keyword("return").setParseAction(set_line_number) error_page_key = Keyword("error_page").setParseAction(set_line_number) map_key = Keyword("map").setParseAction(set_line_number) server_name_key = Keyword("server_name").setParseAction(set_line_number) sub_filter_key = Keyword("sub_filter").setParseAction(set_line_number) # lua keys start_with_lua_key = Regex(r'lua_\S+').setParseAction(set_line_number) contains_by_lua_key = Regex(r'\S+_by_lua\S*').setParseAction(set_line_number) key = ( ~map_key & ~alias_key & ~perl_set_key & ~if_key & ~set_key & ~rewrite_key & ~server_name_key & ~sub_filter_key ) + Word(alphanums + '$_:%?"~<>\/-+.,*()[]"' + "'").setParseAction(set_line_number) # values value_one = Regex(r'[^{};]*"[^\";]+"[^{};]*') value_two = Regex(r'[^{};]*\'[^\';]+\'') value_three = Regex(r'[^{};]+((\${[\d|\w]+(?=})})|[^{};])+') value_four = Regex(r'[^{};]+(?!${.+})') value = (value_one | value_two | value_three | value_four).setParseAction(set_line_number) quotedValue = Regex(r'"[^;]+"|\'[^;]+\'').setParseAction(set_line_number) rewrite_value = CharsNotIn(";").setParseAction(set_line_number) any_value = CharsNotIn(";").setParseAction(set_line_number) non_space_value = Regex(r'[^\'\";\s]+').setParseAction(set_line_number) if_value = Regex(r'\(.*\)').setParseAction(set_line_number) language_include_value = CharsNotIn("'").setParseAction(set_line_number) strict_value = CharsNotIn("{};").setParseAction(set_line_number) sub_filter_value = (non_space_value | Regex(r"\'(.|\n)+?\'", )).setParseAction(set_line_number) # map values map_value_one = Regex(r'\'([^\']|\s)*\'').setParseAction(set_line_number) map_value_two = Regex(r'"([^"]|\s)*\"').setParseAction(set_line_number) map_value_three = Regex(r'((\\\s|[^{};\s])*)').setParseAction(set_line_number) map_value = (map_value_one | map_value_two | map_value_three) # modifier for location uri [ = | ~ | ~* | ^~ ] modifier = Literal("=") | Literal("~*") | Literal("~") | Literal("^~") # rules assignment = ( key + Optional(space) + Optional(value) + Optional(space) + Optional(value) + Optional(space) + semicolon ).setParseAction(set_line_number) set = ( set_key + Optional(space) + any_value + Optional(space) + semicolon ).setParseAction(set_line_number) rewrite = ( rewrite_key + Optional(space) + rewrite_value + Optional(space) + semicolon ).setParseAction(set_line_number) perl_set = ( perl_set_key + Optional(space) + key + Optional(space) + singleQuote + language_include_value + singleQuote + Optional(space) + semicolon ).setParseAction(set_line_number) lua_content = ( (start_with_lua_key | contains_by_lua_key) + Optional(space) + singleQuote + language_include_value + singleQuote + Optional(space) + semicolon ).setParseAction(set_line_number) alias = ( alias_key + space + any_value + Optional(space) + semicolon ).setParseAction(set_line_number) return_ = ( (return_key | error_page_key) + space + value + Optional(space) + Optional(any_value) + Optional(space) + semicolon ).setParseAction(set_line_number) log_format = ( log_format_key + Optional(space) + strict_value + Optional(space) + any_value + Optional(space) + semicolon ).setParseAction(set_line_number) server_name = ( server_name_key + space + any_value + Optional(space) + semicolon ).setParseAction(set_line_number) sub_filter = ( sub_filter_key + space + sub_filter_value + space + sub_filter_value + Optional(space) + semicolon ).setParseAction(set_line_number) # script map_block = Forward() map_block << Group( Group( map_key + space + map_value + space + map_value + Optional(space) ).setParseAction(set_line_number) + left_brace + Group( ZeroOrMore( Group(map_value + Optional(space) + Optional(map_value) + Optional(space) + semicolon) ).setParseAction(set_line_number) ) + right_brace ) block = Forward() block << Group( ( Group( key + Optional(space + modifier) + Optional(space) + Optional(value) + Optional(space) + Optional(value) + Optional(space) ) | Group(if_key + space + if_value + Optional(space)) ).setParseAction(set_line_number) + left_brace + Group( ZeroOrMore( Group(log_format) | Group(lua_content) | Group(perl_set) | Group(set) | Group(rewrite) | Group(alias) | Group(return_) | Group(assignment) | Group(server_name) | Group(sub_filter) | map_block | block ).setParseAction(set_line_number) ).setParseAction(set_line_number) + right_brace ) script = OneOrMore( Group(log_format) | Group(perl_set) | Group(lua_content) | Group(alias) | Group(return_) | Group(assignment) | Group(set) | Group(rewrite) | Group(sub_filter) | map_block | block ).ignore(pythonStyleComment) INCLUDE_RE = re.compile(r'[^#]*include\s+(?P<include_file>.*);') SSL_CERTIFICATE_RE = re.compile(r'[^#]*ssl_certificate\s+(?P<cert_file>.*);') def __init__(self, filename='/etc/nginx/nginx.conf'): global tokens_cache tokens_cache = {} self.filename = filename self.folder = '/'.join(self.filename.split('/')[:-1]) # stores path to folder with main config self.files = {} # to prevent cycle files and line indexing self.parsed_cache = {} # to cache multiple includes self.broken_files = set() # to prevent reloading broken files self.index = [] # stores index for all sections (points to file number and line number) self.ssl_certificates = [] self.errors = [] self.tree = {} def parse(self): self.tree = self.__logic_parse(self.__pyparse(self.filename)) # drop cached self.parsed_cache = None @staticmethod def get_file_info(filename): """ Returns file size, mtime and permissions :param filename: str filename :return: int, int, str - size, mtime, permissions """ size, mtime, permissions = 0, 0, '0000' try: size = os.path.getsize(filename) mtime = int(os.path.getmtime(filename)) permissions = oct(os.stat(filename).st_mode & 0777) except Exception, e: exception_name = e.__class__.__name__ message = 'failed to stat %s due to: %s' % (filename, exception_name) context.log.debug(message, exc_info=True) return size, mtime, permissions
return tuple(t[0]) omcRecord = Forward() omcValue = Forward() TRUE = Keyword("true").setParseAction(replaceWith(True)) FALSE = Keyword("false").setParseAction(replaceWith(False)) NONE = (Keyword("NONE") + Suppress("(") + Suppress(")")).setParseAction( replaceWith(None)) SOME = (Suppress(Keyword("SOME")) + Suppress("(") + omcValue + Suppress(")")) omcString = QuotedString(quoteChar='"', escChar='\\', multiline=True).setParseAction(convertString) omcNumber = Combine( Optional('-') + ('0' | Word('123456789', nums)) + Optional('.' + Word(nums)) + Optional(Word('eE', exact=1) + Word(nums + '+-', nums))) ident = Word(alphas + "_", alphanums + "_") | Combine( "'" + Word(alphanums + "!#$%&()*+,-./:;<>=?@[]^{}|~ ") + "'") fqident = Forward() fqident << ((ident + "." + fqident) | ident) omcValues = delimitedList(omcValue) omcTuple = Group(Suppress('(') + Optional(omcValues) + Suppress(')')).setParseAction(convertTuple) omcArray = Group(Suppress('{') + Optional(omcValues) + Suppress('}')).setParseAction(convertTuple) omcValue << (omcString | omcNumber | omcRecord | omcArray | omcTuple | SOME | TRUE | FALSE | NONE | Combine(fqident)) recordMember = delimitedList(Group(ident + Suppress('=') + omcValue))
from .utils import choose_one, error_exit REGEX_SPECIAL_CHARS = r'([\.\*\+\?\|\(\)\{\}\[\]])' REGEX_LOG_FORMAT_VARIABLE = r'\$([a-zA-Z0-9\_]+)' LOG_FORMAT_COMBINED = '$remote_addr - $remote_user [$time_local] $host $request ' \ '"$status" $body_bytes_sent "$http_referer" ' \ '"$http_user_agent" "$http_x_forwarded_for" "$request_time"' LOG_FORMAT_COMMON = '$remote_addr - $remote_user [$time_local] ' \ '"$request" $status $body_bytes_sent ' \ '"$http_x_forwarded_for"' # common parser element semicolon = Literal(';').suppress() # nginx string parameter can contain any character except: { ; " ' parameter = Word(''.join(c for c in printables if c not in set('{;"\''))) # which can also be quoted parameter = parameter | quotedString.setParseAction(removeQuotes) def detect_config_path(): """ Get nginx configuration file path based on `nginx -V` output :return: detected nginx configuration file path """ try: proc = subprocess.Popen(['nginx', '-V'], stderr=subprocess.PIPE) except OSError: error_exit( 'Access log file or format was not set and nginx config file cannot be detected. ' + 'Perhaps nginx is not in your PATH?')
from pyparsing import Combine, LineEnd, Literal, Optional, Suppress, Word, alphanums urn_word = Word(f"{alphanums}_$?=%.&,") ge_metrics_urn = Combine( Suppress(Literal("urn:great_expectations:")) + Literal("metrics").setResultsName("urn_type") + Suppress(":") + urn_word.setResultsName("run_id") + Suppress(":") + urn_word.setResultsName("expectation_suite_name") + Suppress(":") + urn_word.setResultsName("metric_name") + Optional(Suppress(":") + urn_word.setResultsName("metric_kwargs")) + Suppress(LineEnd()) ) ge_validations_urn = Combine( Suppress(Literal("urn:great_expectations:")) + Literal("validations").setResultsName("urn_type") + Suppress(":") + urn_word.setResultsName("expectation_suite_name") + Suppress(":") + urn_word.setResultsName("metric_name") + Optional(Suppress(":") + urn_word.setResultsName("metric_kwargs")) + Suppress(LineEnd()) ) ge_stores_urn = Combine( Suppress(Literal("urn:great_expectations:")) + Literal("stores").setResultsName("urn_type") + Suppress(":") + urn_word.setResultsName("store_name") + Suppress(":")
def parseImpl( self, instring, loc, doActions=True ): test = instring[ loc:loc+self.matchLen ] if test.upper() == self.match: return loc+self.matchLen, test #~ raise ParseException( instring, loc, self.errmsg ) exc = self.myException exc.loc = loc exc.pstr = instring raise exc def Sequence(token): """ A sequence of the token""" return OneOrMore(token+maybeComma) digit_sequence = Word(nums) sign = oneOf("+ -") def convertToFloat(s, loc, toks): try: return float(toks[0]) except: raise ParseException(loc, "invalid float format %s"%toks[0]) exponent = CaselessLiteral("e")+Optional(sign)+Word(nums) #note that almost all these fields are optional, #and this can match almost anything. We rely on Pythons built-in #float() function to clear out invalid values - loosely matching like this #speeds up parsing quite a lot
total_parse_count) return parsed_ace_config if __name__ == '__main__': s = """ serverfarm host SF-ADFS-HTTPS description ** ADFS Bluecoat HTTPS Server Farm ** probe PROBE_TCP:443 rserver EU2XAPW030 443 rserver EU2XAPW031 443 rserver EUHUB02-SG001 443 inservice """ name = Word(printables) num = Word(nums) type_key = Word('type') serverfarm = Keyword('serverfarm') host = Keyword('host') grammer_12_1 = Group(serverfarm + host + name) grammer_12_2 = Group(Keyword('probe') + name) grammer_12_3 = Group(Keyword('inband-health') + Keyword('check') + name) grammer_12_4_1 = Keyword('rserver') + ~Word('host') + name + ZeroOrMore( num) grammer_12_4_2 = Keyword('inservice') + Optional(Keyword('standby')) grammer_12_4_3 = Group(Keyword('probe') + restOfLine) grammer_12_4_4 = Group(Keyword('backup-rserver') + restOfLine) grammer_12_4 = Group(grammer_12_4_1 + ZeroOrMore(grammer_12_4_3) + ZeroOrMore(grammer_12_4_4) +
def _tdb_grammar(): #pylint: disable=R0914 """ Convenience function for getting the pyparsing grammar of a TDB file. """ int_number = Word(nums).setParseAction(lambda t: [int(t[0])]) # symbol name, e.g., phase name, function name symbol_name = Word(alphanums+'_:', min=1) ref_phase_name = symbol_name = Word(alphanums+'_-:()/', min=1) # species name, e.g., CO2, AL, FE3+ species_name = Word(alphanums+'+-*/_.', min=1) + Optional(Suppress('%')) # constituent arrays are colon-delimited # each subarray can be comma- or space-delimited constituent_array = Group(delimitedList(Group(OneOrMore(Optional(Suppress(',')) + species_name)), ':')) param_types = MatchFirst([TCCommand(param_type) for param_type in TDB_PARAM_TYPES]) # Let sympy do heavy arithmetic / algebra parsing for us # a convenience function will handle the piecewise details func_expr = (float_number | ZeroOrMore(',').setParseAction(lambda t: 0.01)) + OneOrMore(SkipTo(';') \ + Suppress(';') + ZeroOrMore(Suppress(',')) + Optional(float_number) + \ Suppress(Word('YNyn', exact=1) | White())) # ELEMENT cmd_element = TCCommand('ELEMENT') + Word(alphas+'/-', min=1, max=2) + Optional(Suppress(ref_phase_name)) + \ Optional(Suppress(OneOrMore(float_number))) + LineEnd() # SPECIES cmd_species = TCCommand('SPECIES') + species_name + chemical_formula + LineEnd() # TYPE_DEFINITION cmd_typedef = TCCommand('TYPE_DEFINITION') + \ Suppress(White()) + CharsNotIn(' !', exact=1) + SkipTo(LineEnd()) # FUNCTION cmd_function = TCCommand('FUNCTION') + symbol_name + \ func_expr.setParseAction(_make_piecewise_ast) # ASSESSED_SYSTEMS cmd_ass_sys = TCCommand('ASSESSED_SYSTEMS') + SkipTo(LineEnd()) # DEFINE_SYSTEM_DEFAULT cmd_defsysdef = TCCommand('DEFINE_SYSTEM_DEFAULT') + SkipTo(LineEnd()) # DEFAULT_COMMAND cmd_defcmd = TCCommand('DEFAULT_COMMAND') + SkipTo(LineEnd()) # DATABASE_INFO cmd_database_info = TCCommand('DATABASE_INFO') + SkipTo(LineEnd()) # VERSION_DATE cmd_version_date = TCCommand('VERSION_DATE') + SkipTo(LineEnd()) # REFERENCE_FILE cmd_reference_file = TCCommand('REFERENCE_FILE') + SkipTo(LineEnd()) # ADD_REFERENCES cmd_add_ref = TCCommand('ADD_REFERENCES') + SkipTo(LineEnd()) # LIST_OF_REFERENCES cmd_lor = TCCommand('LIST_OF_REFERENCES') + SkipTo(LineEnd()) # TEMPERATURE_LIMITS cmd_templim = TCCommand('TEMPERATURE_LIMITS') + SkipTo(LineEnd()) # PHASE cmd_phase = TCCommand('PHASE') + symbol_name + \ Suppress(White()) + CharsNotIn(' !', min=1) + Suppress(White()) + \ Suppress(int_number) + Group(OneOrMore(float_number)) + LineEnd() # CONSTITUENT cmd_constituent = TCCommand('CONSTITUENT') + symbol_name + \ Suppress(White()) + Suppress(':') + constituent_array + \ Suppress(':') + LineEnd() # PARAMETER cmd_parameter = TCCommand('PARAMETER') + param_types + \ Suppress('(') + symbol_name + \ Optional(Suppress('&') + Word(alphas+'/-', min=1, max=2), default=None) + \ Suppress(',') + constituent_array + \ Optional(Suppress(';') + int_number, default=0) + \ Suppress(')') + func_expr.setParseAction(_make_piecewise_ast) # Now combine the grammar together all_commands = cmd_element | \ cmd_species | \ cmd_typedef | \ cmd_function | \ cmd_ass_sys | \ cmd_defsysdef | \ cmd_defcmd | \ cmd_database_info | \ cmd_version_date | \ cmd_reference_file | \ cmd_add_ref | \ cmd_lor | \ cmd_templim | \ cmd_phase | \ cmd_constituent | \ cmd_parameter return all_commands
def create_ace_grammer(): """ This function creates grammer for ace configuration parsing. :return grammer for parsing """ # Pyparsing grammer starts here :excitement :-O command = Group( Keyword('logging') | Keyword('access-list') | Keyword('probe')) # Grammer Global name = Word(printables) ipaddress = Combine(Word(nums) + ('.' + Word(nums)) * 3) num = Word(nums) # Grammer 1: # logging enable # logging timestamp # logging trap 9 # logging buffered 9 # logging host 127.0.0.1 udp/619 log = Keyword('logging') single_key = Keyword('enable') | Keyword('timestamp') double_key = (Keyword('trap') | Keyword('buffered')) + num triple_key = Keyword('host') + ipaddress + name grammer_1 = Group(log + (single_key | double_key | triple_key)) # Grammer 2: # eg : access-list FROM_INSIDE line 11 extended permit ip <ip> 255.255.255.0 any access = Keyword('access-list') in_out = Keyword('FROM_INSIDE') | Keyword('FROM_OUTSIDE') line = Keyword('line') extend = Keyword('extended') permit = Keyword('permit') ip_key = Keyword('ip') any_key = Keyword('any') ip_any = ipaddress | any_key grammer_2 = Group(access + in_out + line + num + extend + permit + ip_key + ip_any + ip_any + ip_any) # Grammer 3: # eg: probe http prb_HTTP-1234 # port 1234 # receive 5 # interval 10 # expect status 200 200 # expect regex "(200|302)" # ssl version all # request method get url /test/test:ping # passdetect interval 10 # open 3 probe = Keyword('probe') type_key = Keyword('http') | Keyword('icmp') | Keyword('https') | Keyword( 'tcp') grammer_3_1 = Group(probe + type_key + name) grammer_3_2 = Group(Keyword('port') + Word(nums)) grammer_3_3 = Group(Keyword('receive') + Word(nums)) grammer_3_4 = Group(Keyword('interval') + Word(nums)) grammer_3_5 = Group( (Keyword('expect') + Keyword('status') + Word(nums) + Word(nums)) | (Keyword('expect') + Keyword('regex') + Word(printables))) # grammer_3_6 = Group(Keyword('passdetect') + Keyword('interval') + num) #grammer_3_7 = Group(Keyword('open') + num) grammer_3_6 = Group(Keyword('ssl') + Keyword('version') + Keyword('all')) grammer_3_7 = Group( Keyword('request') + Keyword('method') + Keyword('get') + Keyword('url') + Word(printables)) grammer_3_8 = Group( Keyword('request') + Keyword('method') + Word(printables)) grammer_3_9 = Group( Keyword('header') + Keyword('Host') + Keyword('header-value') + Word(printables)) grammer_3 = Group(grammer_3_1 + ZeroOrMore(grammer_3_2 | grammer_3_3 | grammer_3_4 | grammer_3_5 | grammer_3_6 | grammer_3_7 | grammer_3_8 | grammer_3_9)) # grammer 4: # rserver host rs_Test123 # description TEST_DESC # ip address 127.0.0.1 # webhost-redirection https://www.google.com/test/1234/ 301 # probe prb_HTTP-1234 # inservice rserver_key = Keyword('rserver') host = Keyword('host') rserver_name = Word(printables) grammer_4_1 = Group(rserver_key + host + rserver_name) grammer_4_2 = Group(Keyword('description') + restOfLine) grammer_4_3 = Group(Keyword('ip address') + ipaddress) grammer_4_4 = Group(Keyword('probe') + Word(printables)) grammer_4_5 = Group(Keyword('inservice')) grammer_4_6 = Group( Keyword('webhost-redirection') + Word(printables) + num) grammer_4 = Group(grammer_4_1 + ZeroOrMore(grammer_4_2 | grammer_4_3 | grammer_4_4 | grammer_4_5 | grammer_4_6)) # grammer 5 # parameter-map type <connection|http|ssl> ALLOW_TEST # tcp-options selective-ack allow # tcp-options timestamp allow # tcp-options window-scale allow # persistence-rebalance strict # set timeout inactivity 9999 # session-cache timeout 300 # queue-delay timeout 1 # set header-maxparse-length 65535 # set content-maxparse-length 65535 # cipher RSA_EXPORT1024_WITH_RC4_56_SHA param_key = Keyword('parameter-map') type_key = Word('type') connection = Word('connection') | Word('http') | Word('ssl') param_name = Word(printables) tcp_key = Word('tcp-options') tcp_type = Keyword('timestamp') | Keyword('window-scale') | Keyword( 'selective-ack') allow = Word('allow') sess_queue = Keyword('session-cache') | Keyword('queue-delay') timeout = Keyword('timeout') set = Keyword('set') length = Keyword('header-maxparse-length') | Keyword( 'content-maxparse-length') grammer_5_1 = Group(param_key + type_key + connection + param_name) grammer_5_2 = Group(tcp_key + tcp_type + allow) grammer_5_3 = Group(Keyword('persistence-rebalance') + Keyword('strict')) grammer_5_4 = Group( Keyword('set') + Keyword('timeout') + Keyword('inactivity') + Word(nums)) grammer_5_5 = Group(set + length + num) grammer_5_6 = Group(sess_queue + timeout + num) grammer_5_7 = Group(Keyword('cipher') + name) grammer_5_8 = Keyword('case-insensitive') grammer_5_9 = Group(Keyword('parsing') + name) grammer_5_10 = Group(Keyword('exceed-mss') + name) grammer_5 = Group(grammer_5_1 + ZeroOrMore(grammer_5_2 | grammer_5_3 | grammer_5_4 | grammer_5_6 | grammer_5_7 | grammer_5_8 | grammer_5_9 | grammer_5_10)) # Grammer 6: # sticky ip-netmask 255.255.255.255 address source test-adfdas-$5D # sticky http-cookie TEST TEST_COOKIE # serverfarm sf_TEST # timeout 1000 # replicate sticky # cookie insert browser-expire # 8 static cookie-value "ONETXEIS" rserver ESC20_TXEIS_APP_1 443 sticky = Keyword('sticky') ipnetmask = Keyword('ip-netmask') http_cookie = Keyword('http-cookie') address = Keyword('address') source = Keyword('source') sticky_name = Word(printables) cookie = Keyword('cookie') insert = Keyword('insert') browser_expire = Keyword('browser-expire') static = Keyword('static') cookie_val = Keyword('cookie-value') grammer_6_1 = Group(sticky + ipnetmask + ipaddress + address + source + sticky_name) | Group(sticky + http_cookie + name + name) grammer_6_2 = Group(Keyword('serverfarm') + Word(printables)) grammer_6_3 = Group(Keyword('timeout') + Word(nums)) grammer_6_4 = Group(Keyword('replicate') + sticky) grammer_6_5 = Group(cookie + insert + browser_expire) grammer_6_6 = Group(num + static + cookie_val + name + rserver_key + name + num) grammer_6 = Group(grammer_6_1 + ZeroOrMore(grammer_6_2 | grammer_6_3 | grammer_6_4 | grammer_6_5 | grammer_6_6)) # grammer7: # class-map type management match-any TEST-PROTOCOLS # class-map match-any TEST_TEST_123 # class-map match-any TEST_TEST_123 # class-map match-all TEST_TEST_123 # 2 match protocol icmp source-address 127.0.0.1 255.0.0.0 # 3 match protocol snmp source-address 127.0.0.1 255.255.255.0 # 2 match destination-address 127.0.0.1 255.255.255.0 # 3 match source-address 127.0.0.1 255.255.255.0 # 2 match virtual-address 127.0.0.1 tcp eq 1234 # 2 match virtual-address 127.0.0.1 tcp any # 2 match http url .* description = Keyword('description') classmap = Keyword('class-map') ipaddress = Combine(Word(nums) + ('.' + Word(nums)) * 3) classmap_type = Keyword('type') mgmt = Keyword('management') | (Keyword('http') + Keyword('loadbalance')) type_key_att = classmap_type + mgmt match_key = Keyword('match-any') | Keyword('match-all') grammer7_1 = Group(classmap + match_key + name) match_key = Keyword('match') proto_key = Keyword('protocol') grammer_url = Group(num + match_key + Keyword('http') + Keyword('url') + name) proto_type = Keyword('tcp') | Keyword('icmp') | Keyword('snmp') | Keyword( 'http') | Keyword('https') | Keyword('udp') proto = proto_key + proto_type source_dest = Keyword('source-address') | Keyword('destination-address') virtual_add = Keyword('virtual-address') eq_key = Keyword('eq') eq_val = (Keyword('ftp-data') | Keyword('https') | Keyword('www') | Keyword('http') | Keyword('ftp') | num) any_key = Keyword('any') range_key = Keyword('range') add_att = Optional(proto) + source_dest + ipaddress + ipaddress virt_att = virtual_add + ipaddress + \ proto_type + ((eq_key + eq_val) | any_key | (range_key + num + num)) grammer7_3 = Group(description + restOfLine) grammer7_2 = Group(num + match_key + (add_att | virt_att)) | grammer_url grammer_7 = Group(grammer7_1 + Optional(grammer7_3) + ZeroOrMore(grammer7_2)) # grammer8: # policy-map type loadbalance first-match LB_TEST_MAP_1235 # class class-default # serverfarm TEST_FARM_2 # sticky-serverfarm TEST_FARM_2 # connection advanced-options TEST_CONN123 # loadbalance vip inservice # loadbalance vip icmp-reply # loadbalance policy LB_TEST_123 # inspect ftp # ssl-proxy server ssl_name # nat dynamic 5 vlan 2100 # appl-parameter http advanced-options ADV-HTTP # connection advanced-options NETSETTINGS # action test_rewrite policy_key = Keyword('policy-map') lb_key = Keyword('loadbalance') http_kw = Keyword('http') match = Keyword('first-match') | Keyword('multi-match') grammer_8_1 = Group(policy_key + Optional(type_key + lb_key + Optional(http_kw).ignore(http_kw)) + match + name) grammer_8_2_1 = Group(Keyword('class') + name) grammer_8_2_2 = Group(((Keyword('serverfarm') | Keyword('action') | Keyword('sticky-serverfarm')) + name) | Keyword('drop') | Keyword('insert-http') + restOfLine) grammer_8_2_3 = Group( Keyword('connection') + Keyword('advanced-option') + name) lb_vip = Keyword('vip') + ( Keyword('inservice') | Keyword('icmp-reply') + ZeroOrMore( Keyword('active') + ZeroOrMore(Keyword('primary-inservice'))) | Keyword('inservice')) lb_policy = Keyword('policy') + name grammer_8_2_4 = Group(Keyword('loadbalance') + (lb_vip | lb_policy)) grammer_8_2_5 = Group(Keyword('inspect') + Keyword('ftp')) grammer_8_2_6 = Group(Keyword('ssl-proxy') + Keyword('server') + name) grammer_8_2_7 = Group( Keyword('nat') + Keyword('dynamic') + num + Keyword('vlan') + num) grammer_8_2_8 = Group( Keyword('appl-parameter') + Keyword('http') + Keyword('advanced-options') + name) grammer_8_2_9 = Group( Keyword('connection') + Keyword('advanced-options') + name) grammer_8_2_10 = Group(Keyword('action') + name) grammer_8_3 = Group(Keyword('description') + restOfLine) grammer_8_2 = Group( grammer_8_2_1 + ZeroOrMore(grammer_8_2_2 | grammer_8_2_3 | grammer_8_2_4 | grammer_8_2_5 | grammer_8_2_6 | grammer_8_2_7 | grammer_8_2_8 | grammer_8_2_9 | grammer_8_2_10)) grammer_8 = Group(grammer_8_1 + ZeroOrMore(grammer_8_3) + ZeroOrMore(grammer_8_2)) # grammer9: # interface vlan 1011 # ip address 127.0.0.1 255.255.255.0 # alias 127.0.0.1 255.255.255.0 # peer ip address 127.0.0.1 255.255.255.0 # access-group input FROM_TEST # service-policy input TEST_ACCESS # service-policy input vs_TEST # service-policy input TEST_POLICY_8080 # no shutdown # nat-pool 1 127.0.0.1 127.0.0.1 netmask 255.255.255.255 pat grammer_9_1 = Group(Keyword('interface') + Keyword('vlan') + num) grammer_9_2 = Group(ip_key + address + ipaddress + ipaddress) grammer_9_3 = Group(Keyword('alias') + ipaddress + ipaddress) grammer_9_4 = Group( Keyword('peer') + ip_key + address + ipaddress + ipaddress) grammer_9_5 = Group(Keyword('access-group') + Keyword('input') + name) grammer_9_6 = Group(Keyword('service-policy') + Keyword('input') + name) grammer_9_7 = Group(Keyword('no') + Keyword('shutdown')) grammer_9_8 = Group( Keyword('nat-pool') + num + ipaddress + ipaddress + Keyword('netmask') + ipaddress + Keyword('pat')) grammer_9 = Group(grammer_9_1 + ZeroOrMore(grammer_9_2 | grammer_9_3 | grammer_9_4 | grammer_9_5 | grammer_9_6 | grammer_9_7 | grammer_9_8)) # grammer 10: # ip route 0.0.0.0 0.0.0.0 127.0.0.1 grammer_10 = Group(ip_key + Keyword('route') + ipaddress + ipaddress) # grammer 11: # snmp-server host 127.0.0.1 traps version 2c ******** # snmp-server enable traps slb k7server snmp = Keyword('snmp-server') host = Keyword('host') traps = Keyword('traps') slb = Keyword('slb') version = Keyword('version') enable = Keyword('enable') host_att = host + ipaddress + traps + version + name + name ord_att = enable + traps + slb + name grammer_11 = Group(snmp + (host_att | ord_att)) # grammer 12 # serverfarm host TEST_TEST_79 # probe probe_TEST_123 # inband-health check count # predictor leastconns slowstart 30 # rserver RS_TEST123 # inservice serverfarm = Keyword('serverfarm') host = Keyword('host') grammer_12_1 = Group(serverfarm + host + name) grammer_12_2 = Group(Keyword('probe') + name) grammer_12_3 = Group(Keyword('inband-health') + Keyword('check') + name) grammer_12_4_1 = Keyword('rserver') + ~Word('host') + name + ZeroOrMore( num) grammer_12_4_2 = Keyword('inservice') + Optional(Keyword('standby')) grammer_12_4_3 = Group(Keyword('probe') + restOfLine) grammer_12_4_4 = Group(Keyword('backup-rserver') + restOfLine) grammer_12_4 = Group(grammer_12_4_1 + ZeroOrMore(grammer_12_4_3) + ZeroOrMore(grammer_12_4_4) + ZeroOrMore(grammer_12_4_2)) grammer_12_5 = Group( Keyword('predictor') + Keyword('leastconns') + Keyword('slowstart') + num) grammer_12_6 = Group(Keyword('description') + restOfLine) grammer_12_7 = Group(Keyword('predictor') + restOfLine) grammer_12_8 = Group(Keyword('retcode') + restOfLine) grammer_12_9 = Group(Keyword('failaction') + restOfLine) grammer_12_10 = Keyword('fail-on-all') grammer_12 = Group(grammer_12_1 + ZeroOrMore(grammer_12_2 | grammer_12_3 | grammer_12_4 | grammer_12_5 | grammer_12_6 | grammer_12_7 | grammer_12_8 | grammer_12_9 | grammer_12_10)) # grammer ssl # ssl-proxy service SSL_CLIENT # key KEY12.PEM # cert CERT12.PEM # ssl advanced-options PM1 grammer_ssl = Group(Keyword('ssl-proxy') + Keyword('service') + name) grammer_ssl_key = Group(Keyword('key') + name) grammer_ssl_cert = Group(Keyword('cert') + name) grammer_ssl_chaingroup = Group(Keyword('chaingroup') + name) grammer_ssl_opt = Group( Keyword('ssl') + Keyword('advanced-options') + name) grammer_ssl_comp = Group(grammer_ssl + ZeroOrMore(grammer_ssl_key | grammer_ssl_cert | grammer_ssl_chaingroup | grammer_ssl_opt)) # Grammer crypto: # eg: crypto chaingroup ACME-PROD-CA_CHAINGROUP # cert acme-prod-root-ca_24092044.crt # cert acme-prod-issuing-ca_22102028.crt # # crypto csr-params llprd-frontend-csr # country DK # state Sealand # organization-name ACME # organization-unit ACME Input Management # common-name tcpwebprod.prod.acmeintern.dk # crypto csr-params llprd-backend-csr # country DK # state Sealand # organization-name ACME # organization-unit ACME Input Management # common-name acmenpap.prod.acmeintern.dk #grammer for crypto chaingroup test_group # cert Test_cert.crt grammer_crypto_1 = Group(Keyword('crypto') + Keyword('chaingroup') + name) grammer_crypto_2 = Group(Keyword('cert') + name) grammer_crypto_3 = Group(grammer_crypto_1 + ZeroOrMore(grammer_crypto_2)) #grammer for crypto csr-params grammer_crypto_4 = Group(Keyword('crypto') + Keyword('csr-params') + name) grammer_crypto_5 = Group(Keyword('country') + name) grammer_crypto_6 = Group(Keyword('state') + name) grammer_crypto_7 = Group(Keyword('organization-name') + restOfLine) grammer_crypto_8 = Group(Keyword('organization-unit') + name) grammer_crypto_9 = Group(Keyword('common-name') + name) grammer_crypto_10 = Group(grammer_crypto_4 + ZeroOrMore(grammer_crypto_5 | grammer_crypto_6 | grammer_crypto_7 | grammer_crypto_8 | grammer_crypto_9)) # aaa authentication login default group TAC_PLUS local # aaa accounting default group TAC_PLUS grammer_aaa_1 = Keyword('aaa') grammer_aaa_2 = Keyword('authentication login') | Keyword('accounting') grammer_aaa_3 = Keyword('default') grammer_aaa_4 = Keyword('group') grammer_aaa_5 = Keyword('local') grammer_aaa = Group(grammer_aaa_1 + grammer_aaa_2 + grammer_aaa_3 + grammer_aaa_4 + (name | grammer_aaa_5)) # action-list type modify http test-ssl-rewrite # ssl url rewrite location ".*" # header rewrite request Host header-value "(.*)" replace "%1\/" grammer_al_1 = Keyword('action-list') grammer_al_2 = Keyword('type') grammer_al_3 = Keyword('modify') grammer_al_4 = Keyword('http') grammer_al_5 = Keyword('ssl') grammer_al_6 = Keyword('url') grammer_al_7 = Keyword('rewrite') grammer_al_8 = Keyword('location') grammer_al_9 = Keyword('header') grammer_al_10 = Keyword('request') grammer_al_11 = Keyword('Host') grammer_al_12 = Keyword('header-value') grammer_al_13 = Keyword('replace') grammer_al_1_1 = Group(grammer_al_5 + grammer_al_6 + grammer_al_7 + grammer_al_8 + name) grammer_al_1_2 = Group(grammer_al_9 + grammer_al_7 + grammer_al_10 + grammer_al_11 + grammer_al_12 + name + grammer_al_13 + name) grammer_al = Group( Group(grammer_al_1 + grammer_al_2 + grammer_al_3 + grammer_al_4 + name) + ZeroOrMore(grammer_al_1_1 | grammer_al_1_2)) # Overall Grammer grammer = Group(grammer_1 | grammer_2 | grammer_3 | grammer_4 | grammer_5 | grammer_6 | grammer_7 | grammer_8 | grammer_9 | grammer_10 | grammer_11 | grammer_12 | grammer_ssl_comp | grammer_aaa | grammer_crypto_3 | grammer_crypto_10 | grammer_al) print "Grammer created for ace config parser." LOG.info("Grammer created for ace config parser.") return grammer
def __init__(self): name = Word(alphas, alphanums + '_' + '-') variable = Suppress('?') + name require_def = Suppress('(') + ':requirements' + \ OneOrMore(one_of(':strips :typing :negative-preconditions :disjunctive-preconditions :equality :existential-preconditions :universal-preconditions :quantified-preconditions :conditional-effects :fluents :numeric-fluents :adl :durative-actions :duration-inequalities :timed-initial-literals :action-costs')) \ + Suppress(')') types_def = Suppress('(') + ':types' + \ OneOrMore(Group(Group(OneOrMore(name)) + \ Optional(Suppress('-') + name))).setResultsName('types') \ + Suppress(')') constants_def = Suppress('(') + ':constants' + \ OneOrMore(Group(Group(OneOrMore(name)) + \ Optional(Suppress('-') + name))).setResultsName('constants') \ + Suppress(')') predicate = Suppress('(') + \ Group(name + Group(ZeroOrMore(Group(Group(OneOrMore(variable)) + \ Optional(Suppress('-') + name))))) \ + Suppress(')') predicates_def = Suppress('(') + ':predicates' + \ Group(OneOrMore(predicate)).setResultsName('predicates') \ + Suppress(')') functions_def = Suppress('(') + ':functions' + \ Group(OneOrMore(predicate)).setResultsName('functions') \ + Suppress(')') parameters = ZeroOrMore(Group(Group(OneOrMore(variable)) \ + Optional(Suppress('-') + name))).setResultsName('params') action_def = Group(Suppress('(') + ':action' + name.setResultsName('name') \ + ':parameters' + Suppress('(') + parameters + Suppress(')') \ + Optional(':precondition' + nestedExpr().setResultsName('pre')) \ + Optional(':effect' + nestedExpr().setResultsName('eff')) \ + Suppress(')')) dur_action_def = Group(Suppress('(') + ':durative-action' + name.setResultsName('name') \ + ':parameters' + Suppress('(') + parameters + Suppress(')') \ + ':duration' + nestedExpr().setResultsName('duration') \ + ':condition' + nestedExpr().setResultsName('cond') \ + ':effect' + nestedExpr().setResultsName('eff') \ + Suppress(')')) domain = Suppress('(') + 'define' \ + Suppress('(') + 'domain' + name.setResultsName('name') + Suppress(')') \ + Optional(require_def) + Optional(types_def) + Optional(constants_def) \ + Optional(predicates_def) + Optional(functions_def) \ + Group(ZeroOrMore(action_def | dur_action_def)).setResultsName('actions') + Suppress(')') objects = OneOrMore(Group(Group(OneOrMore(name)) \ + Optional(Suppress('-') + name))).setResultsName('objects') metric = (Keyword('minimize') | Keyword('maximize')).setResultsName('optimization') \ + (name | nestedExpr()).setResultsName('metric') problem = Suppress('(') + 'define' \ + Suppress('(') + 'problem' + name.setResultsName('name') + Suppress(')') \ + Suppress('(') + ':domain' + name + Suppress(')') + Optional(require_def) \ + Optional(Suppress('(') + ':objects' + objects + Suppress(')')) \ + Suppress('(') + ':init' + ZeroOrMore(nestedExpr()).setResultsName('init') + Suppress(')') \ + Suppress('(') + ':goal' + nestedExpr().setResultsName('goal') + Suppress(')') \ + Optional(Suppress('(') + ':metric' + metric + Suppress(')')) \ + Suppress(')') domain.ignore(';' + restOfLine) problem.ignore(';' + restOfLine) self._domain = domain self._problem = problem self._parameters = parameters
class DdlParse(DdlParseBase): """DDL parser""" _LPAR, _RPAR, _COMMA, _SEMICOLON, _DOT, _DOUBLEQUOTE, _BACKQUOTE, _SPACE = map( Suppress, "(),;.\"` ") _CREATE, _TABLE, _TEMP, _CONSTRAINT, _NOT_NULL, _PRIMARY_KEY, _UNIQUE, _UNIQUE_KEY, _FOREIGN_KEY, _REFERENCES, _KEY, _CHAR_SEMANTICS, _BYTE_SEMANTICS = \ map(CaselessKeyword, "CREATE, TABLE, TEMP, CONSTRAINT, NOT NULL, PRIMARY KEY, UNIQUE, UNIQUE KEY, FOREIGN KEY, REFERENCES, KEY, CHAR, BYTE".replace(", ", ",").split(",")) _TYPE_UNSIGNED, _TYPE_ZEROFILL = \ map(CaselessKeyword, "UNSIGNED, ZEROFILL".replace(", ", ",").split(",")) _COL_ATTR_DISTKEY, _COL_ATTR_SORTKEY, _COL_ATTR_CHARACTER_SET = \ map(CaselessKeyword, "DISTKEY, SORTKEY, CHARACTER SET".replace(", ", ",").split(",")) _FK_MATCH = \ CaselessKeyword("MATCH") + Word(alphanums + "_") _FK_ON, _FK_ON_OPT_RESTRICT, _FK_ON_OPT_CASCADE, _FK_ON_OPT_SET_NULL, _FK_ON_OPT_NO_ACTION = \ map(CaselessKeyword, "ON, RESTRICT, CASCADE, SET NULL, NO ACTION".replace(", ", ",").split(",")) _FK_ON_DELETE = \ _FK_ON + CaselessKeyword("DELETE") + (_FK_ON_OPT_RESTRICT | _FK_ON_OPT_CASCADE | _FK_ON_OPT_SET_NULL | _FK_ON_OPT_NO_ACTION) _FK_ON_UPDATE = \ _FK_ON + CaselessKeyword("UPDATE") + (_FK_ON_OPT_RESTRICT | _FK_ON_OPT_CASCADE | _FK_ON_OPT_SET_NULL | _FK_ON_OPT_NO_ACTION) _SUPPRESS_QUOTE = _BACKQUOTE | _DOUBLEQUOTE _COMMENT = Suppress("--" + Regex(r".+")) _CREATE_TABLE_STATEMENT = Suppress(_CREATE) + Optional(_TEMP)("temp") + Suppress(_TABLE) + Optional(Suppress(CaselessKeyword("IF NOT EXISTS"))) \ + Optional(_SUPPRESS_QUOTE) + Optional(Word(alphanums + "_")("schema") + Optional(_SUPPRESS_QUOTE) + _DOT + Optional(_SUPPRESS_QUOTE)) + Word(alphanums + "_<>")("table") + Optional(_SUPPRESS_QUOTE) \ + _LPAR \ + delimitedList( OneOrMore( _COMMENT | # Ignore Index Suppress(_KEY + Word(alphanums + "_'`() ")) | Group( Optional(Suppress(_CONSTRAINT) + Optional(_SUPPRESS_QUOTE) + Word(alphanums + "_")("name") + Optional(_SUPPRESS_QUOTE)) + ( ( (_PRIMARY_KEY ^ _UNIQUE ^ _UNIQUE_KEY ^ _NOT_NULL)("type") + Optional(_SUPPRESS_QUOTE) + Optional(Word(alphanums + "_"))("name") + Optional(_SUPPRESS_QUOTE) + _LPAR + Group(delimitedList(Optional(_SUPPRESS_QUOTE) + Word(alphanums + "_") + Optional(_SUPPRESS_QUOTE)))("constraint_columns") + _RPAR ) | ( (_FOREIGN_KEY)("type") + _LPAR + Group(delimitedList(Optional(_SUPPRESS_QUOTE) + Word(alphanums + "_") + Optional(_SUPPRESS_QUOTE)))("constraint_columns") + _RPAR + Optional(Suppress(_REFERENCES) + Optional(_SUPPRESS_QUOTE) + Word(alphanums + "_")("references_table") + Optional(_SUPPRESS_QUOTE) + _LPAR + Group(delimitedList(Optional(_SUPPRESS_QUOTE) + Word(alphanums + "_") + Optional(_SUPPRESS_QUOTE)))("references_columns") + _RPAR + Optional(_FK_MATCH)("references_fk_match") # MySQL + Optional(_FK_ON_DELETE)("references_fk_on_delete") # MySQL + Optional(_FK_ON_UPDATE)("references_fk_on_update") # MySQL ) ) ) )("constraint") | Group( ((_SUPPRESS_QUOTE + Word(alphanums + " _")("name") + _SUPPRESS_QUOTE) ^ (Optional(_SUPPRESS_QUOTE) + Word(alphanums + "_")("name") + Optional(_SUPPRESS_QUOTE))) + Group( Group( Word(alphanums + "_") + Optional(CaselessKeyword("WITHOUT TIME ZONE") ^ CaselessKeyword("WITH TIME ZONE") ^ CaselessKeyword("PRECISION") ^ CaselessKeyword("VARYING")) )("type_name") + Optional(_LPAR + Regex(r"[\d\*]+\s*,*\s*\d*")("length") + Optional(_CHAR_SEMANTICS | _BYTE_SEMANTICS)("semantics") + _RPAR) + Optional(_TYPE_UNSIGNED)("unsigned") + Optional(_TYPE_ZEROFILL)("zerofill") )("type") + Optional(Word(r"\[\]"))("array_brackets") + Optional( Regex(r"(?!--)", re.IGNORECASE) + Group( Optional(Regex(r"\b(?:NOT\s+)?NULL?\b", re.IGNORECASE))("null") & Optional(Regex(r"\bAUTO_INCREMENT\b", re.IGNORECASE))("auto_increment") & Optional(Regex(r"\b(UNIQUE|PRIMARY)(?:\s+KEY)?\b", re.IGNORECASE))("key") & Optional(Regex( r"\bDEFAULT\b\s+(?:((?:[A-Za-z0-9_\.\'\" -\{\}]|[^\x01-\x7E])*\:\:(?:character varying)?[A-Za-z0-9\[\]]+)|(?:\')((?:\\\'|[^\']|,)+)(?:\')|(?:\")((?:\\\"|[^\"]|,)+)(?:\")|([^,\s]+))", re.IGNORECASE))("default") & Optional(Regex(r"\bCOMMENT\b\s+(\'(\\\'|[^\']|,)+\'|\"(\\\"|[^\"]|,)+\"|[^,\s]+)", re.IGNORECASE))("comment") & Optional(Regex(r"\bENCODE\s+[A-Za-z0-9]+\b", re.IGNORECASE))("encode") # Redshift & Optional(_COL_ATTR_DISTKEY)("distkey") # Redshift & Optional(_COL_ATTR_SORTKEY)("sortkey") # Redshift & Optional(Suppress(_COL_ATTR_CHARACTER_SET) + Word(alphanums + "_")("character_set")) # MySQL )("constraint") ) )("column") | _COMMENT ) )("columns") _DDL_PARSE_EXPR = Forward() _DDL_PARSE_EXPR << OneOrMore(_COMMENT | _CREATE_TABLE_STATEMENT) def __init__(self, ddl=None, source_database=None): super().__init__(source_database) self._ddl = ddl self._table = DdlParseTable(source_database) @property def source_database(self): """ Source database option :param source_database: enum DdlParse.DATABASE """ return super().source_database @source_database.setter def source_database(self, source_database): super(self.__class__, self.__class__).source_database.__set__(self, source_database) self._table.source_database = source_database @property def ddl(self): """DDL script""" return self._ddl @ddl.setter def ddl(self, ddl): self._ddl = ddl def parse(self, ddl=None, source_database=None): """ Parse DDL script. :param ddl: DDL script :return: DdlParseTable, Parsed table define info. """ if ddl is not None: self._ddl = ddl if source_database is not None: self.source_database = source_database if self._ddl is None: raise ValueError("DDL is not specified") ret = self._DDL_PARSE_EXPR.parseString(self._ddl) # print(ret.dump()) if "schema" in ret: self._table.schema = ret["schema"] self._table.name = ret["table"] self._table.is_temp = True if "temp" in ret else False for ret_col in ret["columns"]: if ret_col.getName() == "column": # add column col = self._table.columns.append( column_name=ret_col["name"], data_type_array=ret_col["type"], array_brackets=ret_col['array_brackets'] if "array_brackets" in ret_col else None, constraint=ret_col['constraint'] if "constraint" in ret_col else None) elif ret_col.getName() == "constraint": # set column constraint for col_name in ret_col["constraint_columns"]: col = self._table.columns[col_name] if ret_col["type"] == "PRIMARY KEY": col.not_null = True col.primary_key = True elif ret_col["type"] in ["UNIQUE", "UNIQUE KEY"]: col.unique = True elif ret_col["type"] == "NOT NULL": col.not_null = True return self._table
def __init__(self, query): self._methods = { 'and': self.evaluate_and, 'or': self.evaluate_or, 'not': self.evaluate_not, 'parenthesis': self.evaluate_parenthesis, 'quotes': self.evaluate_quotes, 'word': self.evaluate_word, } self.line = '' self.query = query.lower() if query else '' if self.query: # TODO: Cleanup operator_or = Forward() operator_word = Group(Word(alphanums)).setResultsName('word') operator_quotes_content = Forward() operator_quotes_content << ( (operator_word + operator_quotes_content) | operator_word) operator_quotes = Group( Suppress('"') + operator_quotes_content + Suppress('"')).setResultsName('quotes') | operator_word operator_parenthesis = Group( (Suppress('(') + operator_or + Suppress(")") )).setResultsName('parenthesis') | operator_quotes operator_not = Forward() operator_not << ( Group(Suppress(Keyword('no', caseless=True)) + operator_not).setResultsName('not') | operator_parenthesis) operator_and = Forward() operator_and << ( Group(operator_not + Suppress(Keyword('and', caseless=True)) + operator_and).setResultsName('and') | Group(operator_not + OneOrMore(~oneOf('and or') + operator_and) ).setResultsName('and') | operator_not) operator_or << ( Group(operator_and + Suppress(Keyword('or', caseless=True)) + operator_or).setResultsName('or') | operator_and) self._query_parser = operator_or.parseString(self.query)[0] else: self._query_parser = False time_cmpnt = Word(nums).setParseAction(lambda t: t[0].zfill(2)) date = Combine((time_cmpnt + '-' + time_cmpnt + '-' + time_cmpnt) + ' ' + time_cmpnt + ':' + time_cmpnt) word = Word(printables) self._log_parser = ( date.setResultsName('timestamp') + word.setResultsName('log_level') + word.setResultsName('plugin') + (White(min=16).setParseAction( lambda s, l, t: [t[0].strip()]).setResultsName('task') | (White(min=1).suppress() & word.setResultsName('task'))) + restOfLine.setResultsName('message'))
def _create_grammar(): """Create the DBC grammar. """ word = Word(printables.replace(';', '').replace(':', '')) integer = Group(Optional('-') + Word(nums)) positive_integer = Word(nums).setName('positive integer') number = Word(nums + '.Ee-+') colon = Suppress(Literal(':')) scolon = Suppress(Literal(';')) pipe = Suppress(Literal('|')) at = Suppress(Literal('@')) sign = Literal('+') | Literal('-') lp = Suppress(Literal('(')) rp = Suppress(Literal(')')) lb = Suppress(Literal('[')) rb = Suppress(Literal(']')) comma = Suppress(Literal(',')) node = Word(alphas + nums + '_-').setWhitespaceChars(' ') frame_id = Word(nums).setName('frame id') version = Group(Keyword('VERSION') - QuotedString()) version.setName(VERSION) symbol = Word(alphas + '_') + Suppress(LineEnd()) symbols = Group(Keyword('NS_') - colon - Group(ZeroOrMore(symbol))) symbols.setName('NS_') discard = Suppress(Keyword('BS_') - colon).setName('BS_') nodes = Group(Keyword('BU_') - colon - Group(ZeroOrMore(node))) nodes.setName('BU_') signal = Group( Keyword(SIGNAL) - Group(word + Optional(word)) - colon - Group(positive_integer - pipe - positive_integer - at - positive_integer - sign) - Group(lp - number - comma - number - rp) - Group(lb - number - pipe - number - rb) - QuotedString() - Group(delimitedList(node))) signal.setName(SIGNAL) message = Group( Keyword(MESSAGE) - frame_id - word - colon - positive_integer - word - Group(ZeroOrMore(signal))) message.setName(MESSAGE) event = Suppress( Keyword(EVENT) - word - colon - positive_integer - lb - number - pipe - number - rb - QuotedString() - number - number - word - node - scolon) event.setName(EVENT) comment = Group( Keyword(COMMENT) - ((Keyword(SIGNAL) - frame_id - word - QuotedString() - scolon).setName(SIGNAL) | (Keyword(MESSAGE) - frame_id - QuotedString() - scolon).setName(MESSAGE) | (Keyword(EVENT) - word - QuotedString() - scolon).setName(EVENT) | (Keyword(NODES) - word - QuotedString() - scolon).setName(NODES) | (QuotedString() - scolon).setName('QuotedString'))) comment.setName(COMMENT) attribute_definition = Group( Keyword(ATTRIBUTE_DEFINITION) - ((QuotedString()) | (Keyword(SIGNAL) | Keyword(MESSAGE) | Keyword(EVENT) | Keyword(NODES)) + QuotedString()) - word - (scolon | (Group(ZeroOrMore(Group( (comma | Empty()) + QuotedString()))) + scolon) | (Group(ZeroOrMore(number)) + scolon))) attribute_definition.setName(ATTRIBUTE_DEFINITION) attribute_definition_default = Group( Keyword(ATTRIBUTE_DEFINITION_DEFAULT) - QuotedString() - (number | QuotedString()) - scolon) attribute_definition_default.setName(ATTRIBUTE_DEFINITION_DEFAULT) attribute = Group( Keyword(ATTRIBUTE) - QuotedString() - Group( Optional((Keyword(MESSAGE) + frame_id) | (Keyword(SIGNAL) + frame_id + word) | (Keyword(NODES) + word))) - (QuotedString() | number) - scolon) attribute.setName(ATTRIBUTE) choice = Group( Keyword(CHOICE) - Group(Optional(frame_id)) - word - Group(OneOrMore(Group(integer + QuotedString()))) - scolon) choice.setName(CHOICE) value_table = Group( Keyword(VALUE_TABLE) - word - Group(OneOrMore(Group(integer + QuotedString()))) - scolon) value_table.setName(VALUE_TABLE) signal_type = Group( Keyword(SIGNAL_TYPE) - frame_id - word - colon - positive_integer - scolon) signal_type.setName(SIGNAL_TYPE) signal_multiplexer_values = Group( Keyword(SIGNAL_MULTIPLEXER_VALUES) - frame_id - word - word - Group( delimitedList(positive_integer - Suppress('-') - Suppress(positive_integer))) - scolon) signal_multiplexer_values.setName(SIGNAL_MULTIPLEXER_VALUES) message_add_sender = Group( Keyword(MESSAGE_TX_NODE) - frame_id - colon - Group(delimitedList(node)) - scolon) message_add_sender.setName(MESSAGE_TX_NODE) attribute_definition_rel = Group( Keyword(ATTRIBUTE_DEFINITION_REL) - (QuotedString() | (Keyword(NODES_REL) + QuotedString())) - word - (scolon | (Group(ZeroOrMore(Group( (comma | Empty()) + QuotedString()))) + scolon) | (Group(ZeroOrMore(number)) + scolon))) attribute_definition_rel.setName(ATTRIBUTE_DEFINITION_REL) attribute_definition_default_rel = Group( Keyword(ATTRIBUTE_DEFINITION_DEFAULT_REL) - QuotedString() - (number | QuotedString()) - scolon) attribute_definition_default_rel.setName(ATTRIBUTE_DEFINITION_DEFAULT_REL) attribute_rel = Group( Keyword(ATTRIBUTE_REL) - QuotedString() - Keyword(NODES_REL) - word - Keyword(SIGNAL) - frame_id - word - (positive_integer | QuotedString()) - scolon) attribute_rel.setName(ATTRIBUTE_REL) signal_group = Group( Keyword(SIGNAL_GROUP) - frame_id - word - integer - colon - OneOrMore(word) - scolon) signal_group.setName(SIGNAL_GROUP) entry = (message | comment | attribute | choice | attribute_definition | attribute_definition_default | attribute_rel | attribute_definition_rel | attribute_definition_default_rel | signal_group | event | message_add_sender | value_table | signal_type | signal_multiplexer_values | discard | nodes | symbols | version) frame_id.setParseAction(lambda _s, _l, t: int(t[0])) return OneOrMore(entry) + StringEnd()
from pyparsing import (Word, Group, Suppress, Combine, Optional, Forward, Empty, quotedString, oneOf, removeQuotes, delimitedList, nums, alphas, alphanums, Keyword, CaselessLiteral) (lparen, rparen, lbrack, rbrack, lbrace, rbrace, colon, equal_sign) = map(Suppress, '()[]{}:=') word_free_wb = Word(alphas + '@_-/.+*:' + alphanums) word_free = Forward() word_free = word_free_wb + Optional(lbrace + word_free + rbrace + word_free) word_free.setParseAction(lambda toks: ''.join(toks[0])) word_strict = Word(alphas, alphas + alphanums + '_') integer = Combine(Optional(oneOf('+ -')) + Word(nums)).setName('integer') cvt_int = lambda toks: int(toks[0]) integer.setParseAction(cvt_int) boolean_true = Keyword('True', caseless=True) boolean_true.setParseAction(lambda x: True) boolean_false = Keyword('False', caseless=True) boolean_false.setParseAction(lambda x: False) boolean = boolean_true | boolean_false none = Keyword('None', caseless=True) cvt_none = lambda toks: [None] none.setParseAction(cvt_none) e = CaselessLiteral("e")
def express(self, env): return self.query.express(env) def domain_expression_action(*args): print 'd:', args, [type(i) for i in args] def value_list_action(*args): print 'v:', args, [type(i) for i in args] integer_value = Regex(r'[-]?\d+').setParseAction(NumericIntegerAction) float_value = Regex(r'[-]?\d+(\.\d*)?([eE]\d+)?').setParseAction( NumericFloatAction) value_chars = Word(alphas + alphas8bit, alphanums + alphas8bit + '%.-_*;:') string_value = ( value_chars | quotedString.setParseAction(removeQuotes)).setParseAction(StringAction) # value can contain any string once it's quoted value = string_value | integer_value | float_value value_list = (string_value ^ delimitedList(string_value) ^ OneOrMore(string_value)) binop = oneOf('= == != <> < <= > >= not like contains has ilike ' 'icontains ihas is').setName('binop') domain = Word(alphas, alphanums).setName('domain') domain_values = Group(value_list.copy()) domain_expression = (domain + Literal('=') + Literal('*') + stringEnd) \ | (domain + binop + domain_values + stringEnd)
class Parser: FINDINGS_IDENTIFIER = Word(alphas + "-") @classmethod def parse(cls, string): return cls.FULL_EXPR.parseString(string)
alphas, oneOf, nums, Group, OneOrMore, pyparsing_unicode as ppu, ) # usamos las letras en latin1, que incluye las como 'ñ', 'á', 'é', etc. alphas = ppu.Latin1.alphas # Aqui decimos que la gramatica "saludo" DEBE contener # una palabra compuesta de caracteres alfanumericos # (Word(alphas)) mas una ',' mas otra palabra alfanumerica, # mas '!' y esos seian nuestros tokens saludo = Word(alphas) + "," + Word(alphas) + oneOf("! . ?") tokens = saludo.parseString("Hola, Mundo !") # Ahora parseamos una cadena, "Hola, Mundo!", # el metodo parseString, nos devuelve una lista con los tokens # encontrados, en caso de no haber errores... for i, token in enumerate(tokens): print("Token %d -> %s" % (i, token)) # imprimimos cada uno de los tokens Y listooo!!, he aquí a salida # Token 0 -> Hola # Token 1 -> , # Token 2-> Mundo # Token 3 -> ! # ahora cambia el parseador, aceptando saludos con mas que una sola palabra antes que ','
NAMESPACE = 'sqlpp' # PARSER def ddlWord(string): return WordStart(alphanums + "_") + CaselessLiteral(string) + WordEnd(alphanums + "_") # This function should be refactored if we find some database function which needs parameters # Right now it works only for something like NOW() in MySQL default field value def ddlFunctionWord(string): return CaselessLiteral(string) + OneOrMore("(") + ZeroOrMore(" ") + OneOrMore(")") ddlString = Or([QuotedString("'"), QuotedString("\"", escQuote='""'), QuotedString("`")]) negativeSign = Literal('-') ddlNum = Combine(Optional(negativeSign) + Word(nums + ".")) ddlTerm = Word(alphanums + "_$") ddlName = Or([ddlTerm, ddlString]) ddlArguments = "(" + delimitedList(Or([ddlString, ddlTerm, ddlNum])) + ")" ddlNotNull = Group(ddlWord("NOT") + ddlWord("NULL")).setResultsName("notNull") ddlDefaultValue = ddlWord("DEFAULT").setResultsName("hasDefaultValue") ddlAutoValue = Or([ ddlWord("AUTO_INCREMENT"), ddlWord("SMALLSERIAL"), ddlWord("SERIAL"), ddlWord("BIGSERIAL"), ]).setResultsName("hasAutoValue") ddlColumnComment = Group(ddlWord("COMMENT") + ddlString).setResultsName("comment") ddlConstraint = Or([ ddlWord("CONSTRAINT"), ddlWord("PRIMARY"),
from pyparsing import Word, alphas, alphanums, nums, Combine, Optional, Suppress, Regex # define grammar of a greeting greet = Word(alphanums) + "," + Word(alphas) + "!" hello = "Primera123, Segunda!" print (hello, "->", greet.parseString(hello)) #logline = MES DD HH:MM:SS HOSTNAME PROC[PID]: MESSAGE month = Word(alphas, exact=3) ints = Word(nums) day = ints Horas = ints Mins = ints Segs = ints hour = Combine(Horas + ":" + Mins + ":" + Segs) timestamp = month + day + Horas + Suppress(":") + Mins + Suppress(":") + Segs hostname = Word(alphas + nums + "_" + "-" + ".") appname = Word(alphas + "/" + "-" + "_" + ".") + Optional(Suppress("[") + ints + Suppress("]")) + Suppress(":") message = Regex(".*") logline = timestamp + hostname + appname + message mess_prueba = "Oct 26 08:30:01 MX3750006dc0458 systemd[1]: Started Session 653 of user mfe." campos = logline.parseString(mess_prueba) print (mess_prueba, "->", campos) for i in range(len(campos)): print ("Campo[",i,"]=",campos[i]) #print ("Campo[0]=", campos[0]) #print ("Campo[1]=", campos[1]) #print ("Campo[2]=", campos[2])