def __init__(self): """ Setup the Backus Normal Form (BNF) parser logic. """ # Set an empty formula attribute self.formula = None # Instantiate blank parser for BNF construction self.bnf = Forward() # Expression for parenthesis, which are suppressed in the atoms # after matching. lpar = Literal(const.LPAR).suppress() rpar = Literal(const.RPAR).suppress() # Expression for mathematical constants: Euler number and Pi e = Keyword(const.EULER) pi = Keyword(const.PI) null = Keyword(const.NULL) _true = Keyword(const.TRUE) _false = Keyword(const.FALSE) # Prepare operator expressions addop = oneOf(const.ADDOP) multop = oneOf(const.MULTOP) powop = oneOf(const.POWOP) unary = reduce(operator.add, (Optional(x) for x in const.UNOP)) # Expression for floating point numbers, allowing for scientific notation. number = Regex(const.NUMBER) # Variables are alphanumeric strings that represent keys in the input # data dictionary. variable = delimitedList(Word(alphanums), delim=const.VARIABLE_NAME_SEPARATOR, combine=True) # Functional calls function = Word(alphanums) + lpar + self.bnf + rpar # Atom core - a single element is either a math constant, # a function or a variable. atom_core = function | pi | e | null | _true | _false | number | variable # Atom subelement between parenthesis atom_subelement = lpar + self.bnf.suppress() + rpar # In atoms, pi and e need to be before the letters for it to be found atom = (unary + atom_core.setParseAction(self.push_first) | atom_subelement).setParseAction(self.push_unary_operator) # By defining exponentiation as "atom [ ^ factor ]..." instead of # "atom [ ^ atom ]...", we get right-to-left exponents, instead of # left-to-right that is, 2^3^2 = 2^(3^2), not (2^3)^2. factor = Forward() factor << atom + ZeroOrMore( (powop + factor).setParseAction(self.push_first)) term = factor + ZeroOrMore( (multop + factor).setParseAction(self.push_first)) self.bnf << term + ZeroOrMore( (addop + term).setParseAction(self.push_first))
# [162] WS ::= #x20 | #x9 | #xD | #xA # Not needed? # WS = #x20 | #x9 | #xD | #xA # [163] ANON ::= '[' WS* ']' ANON = Literal('[') + ']' ANON.setParseAction(lambda x: rdflib.BNode()) # A = CaseSensitiveKeyword('a') A = Literal('a') A.setParseAction(lambda x: rdflib.RDF.type) # ------ NON-TERMINALS -------------- # [5] BaseDecl ::= 'BASE' IRIREF BaseDecl = Comp('Base', Keyword('BASE') + Param('iri', IRIREF)) # [6] PrefixDecl ::= 'PREFIX' PNAME_NS IRIREF PrefixDecl = Comp('PrefixDecl', Keyword('PREFIX') + PNAME_NS + Param('iri', IRIREF)) # [4] Prologue ::= ( BaseDecl | PrefixDecl )* Prologue = Group(ZeroOrMore(BaseDecl | PrefixDecl)) # [108] Var ::= VAR1 | VAR2 Var = VAR1 | VAR2 Var.setParseAction(lambda x: rdflib.term.Variable(x[0])) # [137] PrefixedName ::= PNAME_LN | PNAME_NS PrefixedName = Comp('pname', PNAME_LN | PNAME_NS)
def parse_imp(input): # parse a string into an element of the abstract representation # Grammar: # # <expr> ::= <integer> # true # false # <identifier> # ( if <expr> <expr> <expr> ) # ( function ( <name ... ) <expr> ) # ( <expr> <expr> ... ) # # <decl> ::= var name = expr ; # # <stmt> ::= if <expr> <stmt> else <stmt> # while <expr> <stmt> # name <- <expr> ; # print <expr> ; # <expr> ; # <block> # # <block> ::= { <decl> ... <stmt> ... } # # <toplevel> ::= <decl> # <stmt> # idChars = alphas + "_+*-?!=<>" pIDENTIFIER = Word(idChars, idChars + "0123456789") #### NOTE THE DIFFERENCE pIDENTIFIER.setParseAction( lambda result: EPrimCall(oper_deref, [EId(result[0])])) # A name is like an identifier but it does not return an EId... pNAME = Word(idChars, idChars + "0123456789") pNAMES = ZeroOrMore(pNAME) pNAMES.setParseAction(lambda result: [result]) pINTEGER = Word("0123456789") pINTEGER.setParseAction(lambda result: EValue(VInteger(int(result[0])))) pBOOLEAN = Keyword("true") | Keyword("false") pBOOLEAN.setParseAction( lambda result: EValue(VBoolean(result[0] == "true"))) pEXPR = Forward() pEXPRS = ZeroOrMore(pEXPR) pEXPRS.setParseAction(lambda result: [result]) pIF = "(" + Keyword("if") + pEXPR + pEXPR + pEXPR + ")" pIF.setParseAction(lambda result: EIf(result[2], result[3], result[4])) def mkFunBody(params, body): bindings = [(p, ERefCell(EId(p))) for p in params] return ELet(bindings, body) pFUN = "(" + Keyword("function") + "(" + pNAMES + ")" + pEXPR + ")" pFUN.setParseAction( lambda result: EFunction(result[3], mkFunBody(result[3], result[5]))) pCALL = "(" + pEXPR + pEXPRS + ")" pCALL.setParseAction(lambda result: ECall(result[1], result[2])) pEXPR << (pINTEGER | pBOOLEAN | pIDENTIFIER | pIF | pFUN | pCALL) pDECL_VAR = "var" + pNAME + "=" + pEXPR + ";" pDECL_VAR.setParseAction(lambda result: (result[1], result[3])) # hack to get pDECL to match only PDECL_VAR (but still leave room # to add to pDECL later) pDECL = (pDECL_VAR | NoMatch()) pDECLS = ZeroOrMore(pDECL) pDECLS.setParseAction(lambda result: [result]) pSTMT = Forward() pSTMT_IF_1 = "if" + pEXPR + pSTMT + "else" + pSTMT pSTMT_IF_1.setParseAction( lambda result: EIf(result[1], result[2], result[4])) pSTMT_IF_2 = "if" + pEXPR + pSTMT pSTMT_IF_2.setParseAction( lambda result: EIf(result[1], result[2], EValue(VBoolean(True)))) pSTMT_WHILE = "while" + pEXPR + pSTMT pSTMT_WHILE.setParseAction(lambda result: EWhile(result[1], result[2])) pSTMT_PRINT = "print" + pEXPR + ";" pSTMT_PRINT.setParseAction( lambda result: EPrimCall(oper_print, [result[1]])) pSTMT_UPDATE = pNAME + "<-" + pEXPR + ";" pSTMT_UPDATE.setParseAction( lambda result: EPrimCall(oper_update, [EId(result[0]), result[2]])) pSTMT_EXPR = pEXPR + ";" pSTMT_EXPR.setParseAction(lambda result: result[0]) pSTMTS = ZeroOrMore(pSTMT) pSTMTS.setParseAction(lambda result: [result]) def mkBlock(decls, stmts): bindings = [(n, ERefCell(expr)) for (n, expr) in decls] return ELet(bindings, EDo(stmts)) pSTMT_BLOCK = "{" + pDECLS + pSTMTS + "}" pSTMT_BLOCK.setParseAction(lambda result: mkBlock(result[1], result[2])) pSTMT << (pSTMT_IF_1 | pSTMT_IF_2 | pSTMT_WHILE | pSTMT_PRINT | pSTMT_UPDATE | pSTMT_EXPR | pSTMT_BLOCK) # can't attach a parse action to pSTMT because of recursion, so let's duplicate the parser pTOP_STMT = pSTMT.copy() pTOP_STMT.setParseAction(lambda result: { "result": "statement", "stmt": result[0] }) pTOP_DECL = pDECL.copy() pTOP_DECL.setParseAction(lambda result: { "result": "declaration", "decl": result[0] }) pABSTRACT = "#abs" + pSTMT pABSTRACT.setParseAction(lambda result: { "result": "abstract", "stmt": result[1] }) pQUIT = Keyword("#quit") pQUIT.setParseAction(lambda result: {"result": "quit"}) pTOP = (pQUIT | pABSTRACT | pTOP_DECL | pTOP_STMT) result = pTOP.parseString(input)[0] return result # the first element of the result is the expression
def bnf(css_style_obj): """ * decimal_digit :: '0' .. '9' * sign :: '-' | '+' * integer :: decimal_digit+ * float :: [ sign ] integer '.' [ integer ] [ 'e' | 'E' [ sign ] integer ] * lower_case :: 'a' .. 'z' * upper_case :: 'A' .. 'Z' * alpha :: lower_case | upper_case * punctuation :: '`' | '~' | '!' | '@' | '#' | '$' | '%' | '^' | '&' | '*' | '(' | ')' | '_' | '=' | '+' | ';' | ':' | '\'' | ',' | '<' | '.' | '>' | '/' | '?' | ' ' | '-' * string_delim :: '"' | '\'' * string :: string_delim [ alpha | decimal_digit | punctuation ]* string_delim * identifier :: '_' | alpha [ alpha | decimal_digit | '_' ]* * attr_selector :: '[' + identifier [ [ '~' | '*' | '^' | '$' | '|' ] '=' string ] ']' * class_or_id :: ( '#' | '.' ) identifier * pseudo_class :: ':' alpha [ alpha | '-' ]* [ '(' integer | identifier ')' ] * selector :: identifier [ class_or_id | attr_selector ] [ pseudo_class ] [ identifier [ pseudo_class ] ] * parameter_name :: alpha [ alpha | decimal_digit | '_' | '-' ]* * lower_hex :: 'a' .. 'f' * upper_hex :: 'A' .. 'F' * hex_digit :: decimal_digit | lower_hex | upper_hex * color :: '#' hex_digit * 6 * comment :: '/' '*' .*? '*' '/' * url :: 'url' '(' string ')' * pixel_count :: integer 'px' * percentage :: integer '%' * parameter_val :: url | color | pixel_count | percentage | parameter_name | float | integer * parameter :: parameter_name ':' [ comment* ]* parameter_val [ parameter_val | comment* ]+ ';' * parameter_block :: selector [ ',' selector ]* '{' ( parameter | comment* )+ '}' """ global BNF if BNF is None: fnumber = Regex(r"[+-]?\d+(:?\.\d*)?(:?[eE][+-]?\d+)?") identifier = Word("_"+alphas+nums) tilde = Literal("~") asterisk = Literal("*") caret = Literal("^") dsign = Literal("$") pipe = Literal("|") equal = Literal("=") squote = Literal("'") sqstring = squote + Regex(r"[^']+") + squote dquote = Literal('"') dqstring = dquote + Regex(r"[^\"]+") + dquote string = sqstring | dqstring class_or_id = Word("#"+".", "_"+alphas+nums) pclass = Combine(Word(":", "-"+alphas) + Optional( '(' + (Word(nums) | identifier) + ')')) attr_selector = Combine("[" + identifier + Optional(Optional( tilde | asterisk | caret | dsign | pipe) + equal + string) + "]") selector = Combine(Word("_"+alphas, "_"+alphas+nums) + Optional( attr_selector | class_or_id) + Optional(pclass)) | Combine( class_or_id + Optional(pclass)) | attr_selector integer = Word(nums) parameter_name = Word(alphas, alphas + nums + "_-") param_str = Word(alphas, alphas + nums + "_-") comment = Regex(r"[/][*].*?[*][/]", flags=re.S) lbrack = Literal("{") rbrack = Literal("}") px_suffix = Literal("px") pix_count = Combine(Word(nums) + px_suffix) percent = Literal("%") percentage = Combine(Word(nums) + percent) color = Word("#", hexnums, exact=7) urlstr = Keyword("url") url = urlstr + '(' + string + ')' parameter_val = url | color | pix_count | percentage | param_str | fnumber | integer parameter = (parameter_name.setParseAction( css_style_obj.push_param_name) + ':' + ZeroOrMore(comment.suppress()) + OneOrMore( parameter_val.setParseAction(css_style_obj.push_value) + ZeroOrMore( comment.suppress())) + ';').setParseAction(css_style_obj.push_parameter) parameter_block = (delimitedList(selector).setParseAction( css_style_obj.push_ident_list) + lbrack + OneOrMore( comment.suppress() | parameter) + rbrack).setParseAction( css_style_obj.push_param_block) BNF = OneOrMore(comment.suppress() | parameter_block) return BNF
class NginxConfigParser(object): """ Nginx config parser based on https://github.com/fatiherikli/nginxparser Parses single file into json structure """ max_size = 20*1024*1024 # 20 mb # line starts/ends line_start = LineStart().suppress() line_end = LineEnd().suppress() # constants left_brace = Literal("{").suppress() left_parentheses = Literal("(").suppress() right_brace = Literal("}").suppress() right_parentheses = Literal(")").suppress() semicolon = Literal(";").suppress() space = White().suppress() singleQuote = Literal("'").suppress() doubleQuote = Literal('"').suppress() # keys if_key = Keyword("if").setParseAction(set_line_number) set_key = Keyword("set").setParseAction(set_line_number) rewrite_key = Keyword("rewrite").setParseAction(set_line_number) perl_set_key = Keyword("perl_set").setParseAction(set_line_number) log_format_key = Keyword("log_format").setParseAction(set_line_number) alias_key = Keyword("alias").setParseAction(set_line_number) return_key = Keyword("return").setParseAction(set_line_number) error_page_key = Keyword("error_page").setParseAction(set_line_number) map_key = Keyword("map").setParseAction(set_line_number) server_name_key = Keyword("server_name").setParseAction(set_line_number) sub_filter_key = Keyword("sub_filter").setParseAction(set_line_number) # lua keys start_with_lua_key = Regex(r'lua_\S+').setParseAction(set_line_number) contains_by_lua_key = Regex(r'\S+_by_lua\S*').setParseAction(set_line_number) key = ( ~map_key & ~alias_key & ~perl_set_key & ~if_key & ~set_key & ~rewrite_key & ~server_name_key & ~sub_filter_key ) + Word(alphanums + '$_:%?"~<>\/-+.,*()[]"' + "'").setParseAction(set_line_number) # values value_one = Regex(r'[^{};]*"[^\";]+"[^{};]*') value_two = Regex(r'[^{};]*\'[^\';]+\'') value_three = Regex(r'[^{};]+((\${[\d|\w]+(?=})})|[^{};])+') value_four = Regex(r'[^{};]+(?!${.+})') value = (value_one | value_two | value_three | value_four).setParseAction(set_line_number) quotedValue = Regex(r'"[^;]+"|\'[^;]+\'').setParseAction(set_line_number) rewrite_value = CharsNotIn(";").setParseAction(set_line_number) any_value = CharsNotIn(";").setParseAction(set_line_number) non_space_value = Regex(r'[^\'\";\s]+').setParseAction(set_line_number) if_value = Regex(r'\(.*\)').setParseAction(set_line_number) language_include_value = CharsNotIn("'").setParseAction(set_line_number) strict_value = CharsNotIn("{};").setParseAction(set_line_number) sub_filter_value = (non_space_value | Regex(r"\'(.|\n)+?\'", )).setParseAction(set_line_number) # map values map_value_one = Regex(r'\'([^\']|\s)*\'').setParseAction(set_line_number) map_value_two = Regex(r'"([^"]|\s)*\"').setParseAction(set_line_number) map_value_three = Regex(r'((\\\s|[^{};\s])*)').setParseAction(set_line_number) map_value = (map_value_one | map_value_two | map_value_three) # modifier for location uri [ = | ~ | ~* | ^~ ] modifier = Literal("=") | Literal("~*") | Literal("~") | Literal("^~") # rules assignment = ( key + Optional(space) + Optional(value) + Optional(space) + Optional(value) + Optional(space) + semicolon ).setParseAction(set_line_number) set = ( set_key + Optional(space) + any_value + Optional(space) + semicolon ).setParseAction(set_line_number) rewrite = ( rewrite_key + Optional(space) + rewrite_value + Optional(space) + semicolon ).setParseAction(set_line_number) perl_set = ( perl_set_key + Optional(space) + key + Optional(space) + singleQuote + language_include_value + singleQuote + Optional(space) + semicolon ).setParseAction(set_line_number) lua_content = ( (start_with_lua_key | contains_by_lua_key) + Optional(space) + singleQuote + language_include_value + singleQuote + Optional(space) + semicolon ).setParseAction(set_line_number) alias = ( alias_key + space + any_value + Optional(space) + semicolon ).setParseAction(set_line_number) return_ = ( (return_key | error_page_key) + space + value + Optional(space) + Optional(any_value) + Optional(space) + semicolon ).setParseAction(set_line_number) log_format = ( log_format_key + Optional(space) + strict_value + Optional(space) + any_value + Optional(space) + semicolon ).setParseAction(set_line_number) server_name = ( server_name_key + space + any_value + Optional(space) + semicolon ).setParseAction(set_line_number) sub_filter = ( sub_filter_key + space + sub_filter_value + space + sub_filter_value + Optional(space) + semicolon ).setParseAction(set_line_number) # script map_block = Forward() map_block << Group( Group( map_key + space + map_value + space + map_value + Optional(space) ).setParseAction(set_line_number) + left_brace + Group( ZeroOrMore( Group(map_value + Optional(space) + Optional(map_value) + Optional(space) + semicolon) ).setParseAction(set_line_number) ) + right_brace ) block = Forward() block << Group( ( Group( key + Optional(space + modifier) + Optional(space) + Optional(value) + Optional(space) + Optional(value) + Optional(space) ) | Group(if_key + space + if_value + Optional(space)) ).setParseAction(set_line_number) + left_brace + Group( ZeroOrMore( Group(log_format) | Group(lua_content) | Group(perl_set) | Group(set) | Group(rewrite) | Group(alias) | Group(return_) | Group(assignment) | Group(server_name) | Group(sub_filter) | map_block | block ).setParseAction(set_line_number) ).setParseAction(set_line_number) + right_brace ) script = OneOrMore( Group(log_format) | Group(perl_set) | Group(lua_content) | Group(alias) | Group(return_) | Group(assignment) | Group(set) | Group(rewrite) | Group(sub_filter) | map_block | block ).ignore(pythonStyleComment) INCLUDE_RE = re.compile(r'[^#]*include\s+(?P<include_file>.*);') SSL_CERTIFICATE_RE = re.compile(r'[^#]*ssl_certificate\s+(?P<cert_file>.*);') def __init__(self, filename='/etc/nginx/nginx.conf'): global tokens_cache tokens_cache = {} self.filename = filename self.folder = '/'.join(self.filename.split('/')[:-1]) # stores path to folder with main config self.files = {} # to prevent cycle files and line indexing self.parsed_cache = {} # to cache multiple includes self.broken_files = set() # to prevent reloading broken files self.index = [] # stores index for all sections (points to file number and line number) self.ssl_certificates = [] self.errors = [] self.tree = {} def parse(self): self.tree = self.__logic_parse(self.__pyparse(self.filename)) # drop cached self.parsed_cache = None @staticmethod def get_file_info(filename): """ Returns file size, mtime and permissions :param filename: str filename :return: int, int, str - size, mtime, permissions """ size, mtime, permissions = 0, 0, '0000' try: size = os.path.getsize(filename) mtime = int(os.path.getmtime(filename)) permissions = oct(os.stat(filename).st_mode & 0777) except Exception, e: exception_name = e.__class__.__name__ message = 'failed to stat %s due to: %s' % (filename, exception_name) context.log.debug(message, exc_info=True) return size, mtime, permissions
from undebt.pattern.common import COMMA_IND from undebt.pattern.common import DOT from undebt.pattern.common import NAME from undebt.pattern.common import NL from undebt.pattern.common import NO_BS_NL from undebt.pattern.common import NUM from undebt.pattern.common import PARENS from undebt.pattern.common import SKIP_TO_TEXT from undebt.pattern.common import START_OF_FILE from undebt.pattern.common import STRING from undebt.pattern.util import addspace from undebt.pattern.util import condense ASSIGN_OP = Combine((Word("~%^&*-+|/") | ~Literal("==")) + Literal("=")) UNARY_OP = addspace(OneOrMore(Word("~-+") | Keyword("not"))) BINARY_OP = ~ASSIGN_OP + ( Word("!%^&*-+=|/<>") | Keyword("and") | Keyword("or") | addspace(OneOrMore(Keyword("is") | Keyword("not") | Keyword("in")))) OP = ASSIGN_OP | UNARY_OP | BINARY_OP TRAILER = DOT + NAME | PARENS | BRACKETS TRAILERS = condense(ZeroOrMore(TRAILER)) ATOM_BASE = NAME | NUM | PARENS | BRACKETS | BRACES | STRING
idPrefixes = { 'event': 'e', 'instance': 'ei', 'signal': 's', 'timex3': 't', 'tlink': 'l', 'slink': 'l', 'alink': 'l' } # the top-level command cavatStmt = Forward() # top-level commands showToken = Keyword("show", caseless=True) debugToken = Keyword("debug", caseless=True) corpusToken = Keyword("corpus", caseless=True) helpToken = Keyword("help", caseless=True) checkToken = Keyword("check", caseless=True) browseToken = Keyword("browse", caseless=True) # parameter values onOff = oneOf('on off', caseless=True) tag = oneOf(' '.join(validTags), caseless=True) alphaNums_ = Word(alphanums + "_-") fileName = Word(alphanums + "_-.+%/~") reportType = oneOf("list distribution state", caseless=True) outputFormat = oneOf("screen csv tsv tex", caseless=True) browseFormat = oneOf("screen timeml csv", caseless=True) conditionValue = Group(Word(nums) | QuotedString('"\''))
def parser(cls): box_contents = Each(obstacle_items) box = Dict(Keyword('box').suppress() + box_contents + end) box.setParseAction(lambda toks: cls(**dict(toks))) return box
def __init__(self, n, colors, shapes, max_constant=5): """ :param n: length of side of the grids :param colors: list of color names :param shapes: list of shape names """ self.colors = Or([Keyword(w) for w in colors]) self.colors ^= Keyword("getMarkerColor()") self.shapes = Or([Keyword(w) for w in shapes]) self.shapes ^= Keyword("getMarkerShape()") self.positions = Or([Keyword(str(i)) for i in range(n)]) self.constants = Or([Keyword(str(i)) for i in range(1, max_constant)]) self.actions = (("move(" + self.positions + "," + self.positions + ")") | "moveUp()" | "moveDown()" | "moveLeft()" | "moveRight()" | "moveTop()" | "moveBottom()" | "moveLeftmost()" | "moveRightmost()" | "moveToMovableMarker()" | "pickMarker()" | "putMarker()" | "fixMarker()") self.conditions = (Group(self.shapes + "==" + self.shapes) | Group(self.colors + "==" + self.colors) | "markersPresent()" | "movableMarkersPresent()" | "existMovableMarkers()" | "upperBoundary()" | "lowerBoundary()" | "leftBoundary()" | "rightBoundary()" | "true") self.conditions = (self.conditions | Group(Keyword("not") + self.conditions)) block = Forward() stmt = (Group( Keyword("while") + "(" + self.conditions + ")" + "{" + Group(block) + "}") | Group( Keyword("repeat") + "(" + self.constants + ")" + "{" + Group(block) + "}") | Group( Keyword("if") + "(" + self.conditions + ")" + "{" + Group(block) + "}") | Group( Keyword("ifelse") + "(" + self.conditions + ")" + "{" + Group(block) + "}" + Keyword("else") + "{" + Group(block) + "}") | Group(self.actions + ";")) block << OneOrMore(stmt) # stmt ^= block self.statements = block self.program = Keyword("def") + Keyword("run()") + "{" \ + Group(self.statements) + "}"
n = toks[0] try: return int(n) except ValueError: return float(n) integer = Word(nums).setParseAction(numeric) floatnum = Combine( Optional('-') + ('0' | Word('123456789', nums)) + Optional('.' + Word(nums)) + Optional(Word('eE', exact=1) + Word(nums + '+-', nums))) floatnum.setParseAction(numeric) end = Keyword('end').suppress() point2d = floatnum + floatnum # Note: Since we're just doing 2D, we ignore the z term of 3D points. point3d = floatnum + floatnum + floatnum.suppress() # Obstacle position = Group((Keyword('pos') | Keyword('position')) + point3d) size = Group(Keyword('size') + point3d) rotation = Group((Keyword('rot') | Keyword('rotation')) + floatnum) obstacle_items = [position, Optional(size), Optional(rotation)] class Box(object): """A basic obstacle type.""" def __init__(self,
def parser(cls): color = Group(Keyword('color') + integer) base_contents = Each([color] + obstacle_items) base = Dict(Keyword('base').suppress() + base_contents + end) base.setParseAction(lambda toks: cls(**dict(toks))) return base
def _create_dbc_grammar(): """Create DBC grammar. """ word = Word(printables.replace(';', '').replace(':', '')) integer = Group(Optional('-') + Word(nums)) positive_integer = Word(nums) number = Word(nums + '.Ee-+') colon = Suppress(Literal(':')) scolon = Suppress(Literal(';')) pipe = Suppress(Literal('|')) at = Suppress(Literal('@')) sign = Literal('+') | Literal('-') lp = Suppress(Literal('(')) rp = Suppress(Literal(')')) lb = Suppress(Literal('[')) rb = Suppress(Literal(']')) comma = Suppress(Literal(',')) node = Word(alphas + nums + '_-').setWhitespaceChars(' ') version = Group(Keyword('VERSION') + QuotedString('"', multiline=True)) symbol = Word(alphas + '_') + Suppress(LineEnd()) symbols = Group(Keyword('NS_') + colon + Group(ZeroOrMore(symbol))) discard = Suppress(Keyword('BS_') + colon) nodes = Group(Keyword('BU_') + colon + Group(ZeroOrMore(node))) signal = Group(Keyword(SIGNAL) + Group(word + Optional(word)) + colon + Group(positive_integer + pipe + positive_integer + at + positive_integer + sign) + Group(lp + number + comma + number + rp) + Group(lb + number + pipe + number + rb) + QuotedString('"', multiline=True) + Group(delimitedList(node))) message = Group(Keyword(MESSAGE) + positive_integer + word + colon + positive_integer + word + Group(ZeroOrMore(signal))) event = Suppress(Keyword(EVENT) + word + colon + positive_integer + lb + number + pipe + number + rb + QuotedString('"', multiline=True) + number + number + word + node + scolon) comment = Group(Keyword(COMMENT) + ((Keyword(MESSAGE) + positive_integer + QuotedString('"', multiline=True) + scolon) | (Keyword(SIGNAL) + positive_integer + word + QuotedString('"', multiline=True) + scolon) | (Keyword(NODES) + word + QuotedString('"', multiline=True) + scolon) | (Keyword(EVENT) + word + QuotedString('"', multiline=True) + scolon))) attribute_definition = Group(Keyword(ATTRIBUTE_DEFINITION) + ((QuotedString('"', multiline=True)) | (Keyword(SIGNAL) | Keyword(MESSAGE) | Keyword(EVENT) | Keyword(NODES)) + QuotedString('"', multiline=True)) + word + (scolon | (Group(ZeroOrMore(Group( (comma | Empty()) + QuotedString('"', multiline=True)))) + scolon) | (Group(ZeroOrMore(number)) + scolon))) attribute_definition_default = Group(Keyword(ATTRIBUTE_DEFINITION_DEFAULT) + QuotedString('"', multiline=True) + (positive_integer | QuotedString('"', multiline=True)) + scolon) attribute = Group(Keyword(ATTRIBUTE) + QuotedString('"', multiline=True) + Group(Optional((Keyword(MESSAGE) + positive_integer) | (Keyword(SIGNAL) + positive_integer + word) | (Keyword(NODES) + word))) + (QuotedString('"', multiline=True) | positive_integer) + scolon) choice = Group(Keyword(CHOICE) + Optional(positive_integer) + word + Group(OneOrMore(Group(integer + QuotedString('"', multiline=True)))) + scolon) value_table = Group(Keyword(VALUE_TABLE) + word + Group(OneOrMore(Group(integer + QuotedString('"', multiline=True)))) + scolon) entry = (version | symbols | discard | nodes | message | comment | attribute_definition | attribute_definition_default | attribute | choice | value_table | event) return OneOrMore(entry) + StringEnd()
# simpleSQL.py # # simple demo of using the parsing library to do simple-minded SQL parsing # could be extended to include where clauses etc. # # Copyright (c) 2003, Paul McGuire # Modified by Aaron Quinlan, 2012 # from pyparsing import Literal, CaselessLiteral, Word, Upcase, delimitedList, Optional, \ Combine, Group, alphas, nums, alphanums, ParseException, Forward, oneOf, quotedString, \ ZeroOrMore, restOfLine, Keyword # define SQL tokens selectStmt = Forward() selectToken = Keyword("select", caseless=True) fromToken = Keyword("from", caseless=True) # ARQ 2012-Feb-10: allow struct-like column names, e.,g. gt_types.sample1 (add + ".$") ident = Word( alphas, alphanums + "_$" + ".$" ).setName("identifier") columnName = Upcase( delimitedList( ident, ".", combine=True ) ) columnNameList = Group( delimitedList( columnName ) ) tableName = Upcase( delimitedList( ident, ".", combine=True ) ) tableNameList = Group( delimitedList( tableName ) ) whereExpression = Forward() and_ = Keyword("and", caseless=True) or_ = Keyword("or", caseless=True) in_ = Keyword("in", caseless=True) # ARQ 2012-Feb-10: add "like" as an operator like_ = Keyword("like", caseless=True)
def rc_statement(): """ Generate a RC statement parser that can be used to parse a RC file :rtype: pyparsing.ParserElement """ one_line_comment = "//" + restOfLine comments = cStyleComment ^ one_line_comment precompiler = Word("#", alphanums) + restOfLine language_definition = ( "LANGUAGE" + Word(alphas + "_").setResultsName("language") + Optional("," + Word(alphas + "_").setResultsName("sublanguage"))) block_start = (Keyword("{") | Keyword("BEGIN")).setName("block_start") block_end = (Keyword("}") | Keyword("END")).setName("block_end") reserved_words = block_start | block_end name_id = ~reserved_words + Word(alphas, alphanums + "_").setName("name_id") numbers = Word(nums) integerconstant = numbers ^ Combine("0x" + numbers) constant = Combine( Optional(Keyword("NOT")) + (name_id | integerconstant), adjacent=False, joinString=" ", ) combined_constants = delimitedList(constant, "|") concatenated_string = OneOrMore(quotedString) block_options = Optional( SkipTo(Keyword("CAPTION"), failOn=block_start)("pre_caption") + Keyword("CAPTION") + quotedString("caption")) + SkipTo(block_start)("post_caption") undefined_control = (Group( name_id.setResultsName("id_control") + delimitedList(concatenated_string ^ constant ^ numbers ^ Group(combined_constants)).setResultsName("values_")) | one_line_comment) block = block_start + ZeroOrMore(undefined_control)("controls") + block_end dialog = (name_id("block_id") + (Keyword("DIALOGEX") | Keyword("DIALOG"))("block_type") + block_options + block) string_table = Keyword("STRINGTABLE")("block_type") + block_options + block menu_item = Keyword("MENUITEM")("block_type") + ( commaSeparatedList("values_") | Keyword("SEPARATOR")) popup_block = Forward() popup_block <<= Group( Keyword("POPUP")("block_type") + Optional(quotedString("caption")) + block_start + ZeroOrMore(Group(menu_item | popup_block))("elements") + block_end)("popups*") menu = (name_id("block_id") + Keyword("MENU")("block_type") + block_options + block_start + ZeroOrMore(popup_block) + block_end) return comments ^ precompiler ^ language_definition ^ dialog ^ string_table ^ menu
EXPONENT_re = r'(?:[eE][+-]?[0-9]+)' INT_re = r'[+-]?[0-9]+' INT = Regex(INT_re).setParseAction( composition([refer_component(int), rdflib.term.Literal])) INTEGER = Regex(r'[0-9]+').setParseAction( composition([refer_component(int), rdflib.term.Literal])) FLOAT_re = (r'[+-]?(?:(?:[0-9]+\.[0-9]*%s?)|' + r'(?:\.[0-9]+%s?)|(?:[0-9]+%s))') % ((EXPONENT_re, ) * 3) FLOAT = Regex(FLOAT_re).setParseAction( composition([refer_component(float), rdflib.term.Literal])) NumericLiteral = (FLOAT | INT) if DEBUG: NumericLiteral.setName('NumericLiteral') # BooleanLiteral: BooleanLiteral = (Keyword('true') | Keyword('false')).setParseAction( refer_component(rdflib.term.Literal, datatype=XSD.boolean)) if DEBUG: BooleanLiteral.setName('BooleanLiteral') # BlankNode: ANON = Regex(r'\[' + WS_re + r'\]').setParseAction( refer_component(rdflib.term.BNode, None, [])) BLANK_NODE_LABEL = (Suppress('_:') + PN_LOCAL).setParseAction( refer_component(rdflib.term.BNode)) BlankNode = (BLANK_NODE_LABEL | ANON) if DEBUG: BlankNode.setName('BlankNode') # GraphTerm: GraphTerm = (IRIref | RDFLiteral | NumericLiteral | BooleanLiteral | BlankNode
def _create_grammar(): """Create the DBC grammar. """ word = Word(printables.replace(';', '').replace(':', '')) integer = Group(Optional('-') + Word(nums)) positive_integer = Word(nums).setName('positive integer') number = Word(nums + '.Ee-+') colon = Suppress(Literal(':')) scolon = Suppress(Literal(';')) pipe = Suppress(Literal('|')) at = Suppress(Literal('@')) sign = Literal('+') | Literal('-') lp = Suppress(Literal('(')) rp = Suppress(Literal(')')) lb = Suppress(Literal('[')) rb = Suppress(Literal(']')) comma = Suppress(Literal(',')) node = Word(alphas + nums + '_-').setWhitespaceChars(' ') frame_id = Word(nums).setName('frame id') version = Group(Keyword('VERSION') - QuotedString('"', multiline=True)) version.setName(VERSION) symbol = Word(alphas + '_') + Suppress(LineEnd()) symbols = Group(Keyword('NS_') - colon - Group(ZeroOrMore(symbol))) symbols.setName('NS_') discard = Suppress(Keyword('BS_') - colon).setName('BS_') nodes = Group(Keyword('BU_') - colon - Group(ZeroOrMore(node))) nodes.setName('BU_') signal = Group( Keyword(SIGNAL) - Group(word + Optional(word)) - colon - Group(positive_integer - pipe - positive_integer - at - positive_integer - sign) - Group(lp - number - comma - number - rp) - Group(lb - number - pipe - number - rb) - QuotedString('"', multiline=True) - Group(delimitedList(node))) signal.setName(SIGNAL) message = Group( Keyword(MESSAGE) - frame_id - word - colon - positive_integer - word - Group(ZeroOrMore(signal))) message.setName(MESSAGE) event = Suppress( Keyword(EVENT) - word - colon - positive_integer - lb - number - pipe - number - rb - QuotedString('"', multiline=True) - number - number - word - node - scolon) event.setName(EVENT) comment = Group( Keyword(COMMENT) - ((Keyword(MESSAGE) - frame_id - QuotedString('"', multiline=True) - scolon).setName(MESSAGE) | (Keyword(SIGNAL) - frame_id - word - QuotedString('"', multiline=True) - scolon).setName(SIGNAL) | (Keyword(NODES) - word - QuotedString('"', multiline=True) - scolon).setName(NODES) | (Keyword(EVENT) - word - QuotedString('"', multiline=True) - scolon).setName(EVENT) | (QuotedString('"', multiline=True) - scolon).setName('QuotedString'))) comment.setName(COMMENT) attribute_definition = Group( Keyword(ATTRIBUTE_DEFINITION) - ((QuotedString('"', multiline=True)) | (Keyword(SIGNAL) | Keyword(MESSAGE) | Keyword(EVENT) | Keyword(NODES)) + QuotedString('"', multiline=True)) - word - (scolon | (Group( ZeroOrMore( Group((comma | Empty()) + QuotedString('"', multiline=True)))) + scolon) | (Group(ZeroOrMore(number)) + scolon))) attribute_definition.setName(ATTRIBUTE_DEFINITION) attribute_definition_default = Group( Keyword(ATTRIBUTE_DEFINITION_DEFAULT) - QuotedString('"', multiline=True) - (number | QuotedString('"', multiline=True)) - scolon) attribute_definition_default.setName(ATTRIBUTE_DEFINITION_DEFAULT) attribute = Group( Keyword(ATTRIBUTE) - QuotedString('"', multiline=True) - Group( Optional((Keyword(MESSAGE) + frame_id) | (Keyword(SIGNAL) + frame_id + word) | (Keyword(NODES) + word))) - (QuotedString('"', multiline=True) | number) - scolon) attribute.setName(ATTRIBUTE) choice = Group( Keyword(CHOICE) - Group(Optional(frame_id)) - word - Group(OneOrMore(Group(integer + QuotedString('"', multiline=True)))) - scolon) choice.setName(CHOICE) value_table = Group( Keyword(VALUE_TABLE) - word - Group(OneOrMore(Group(integer + QuotedString('"', multiline=True)))) - scolon) value_table.setName(VALUE_TABLE) signal_type = Group( Keyword(SIGNAL_TYPE) - frame_id - word - colon - positive_integer - scolon) signal_type.setName(SIGNAL_TYPE) signal_multiplexer_values = Group( Keyword(SIGNAL_MULTIPLEXER_VALUES) - frame_id - word - word - Group( delimitedList(positive_integer - Suppress('-') - Suppress(positive_integer))) - scolon) signal_multiplexer_values.setName(SIGNAL_MULTIPLEXER_VALUES) message_add_sender = Group( Keyword(MESSAGE_TX_NODE) - frame_id - colon - Group(delimitedList(node)) - scolon) message_add_sender.setName(MESSAGE_TX_NODE) attribute_definition_rel = Group( Keyword(ATTRIBUTE_DEFINITION_REL) - (QuotedString('"', multiline=True) | (Keyword(NODES_REL) + QuotedString('"', multiline=True))) - word - (scolon | (Group( ZeroOrMore( Group((comma | Empty()) + QuotedString('"', multiline=True)))) + scolon) | (Group(ZeroOrMore(number)) + scolon))) attribute_definition_rel.setName(ATTRIBUTE_DEFINITION_REL) attribute_definition_default_rel = Group( Keyword(ATTRIBUTE_DEFINITION_DEFAULT_REL) - QuotedString('"', multiline=True) - (number | QuotedString('"', multiline=True)) - scolon) attribute_definition_default_rel.setName(ATTRIBUTE_DEFINITION_DEFAULT_REL) attribute_rel = Group( Keyword(ATTRIBUTE_REL) - QuotedString('"', multiline=True) - Keyword(NODES_REL) - word - Keyword(SIGNAL) - frame_id - word - positive_integer - scolon) attribute_rel.setName(ATTRIBUTE_REL) entry = (version | symbols | discard | nodes | message | comment | attribute_definition | attribute_definition_default | attribute | choice | value_table | signal_type | signal_multiplexer_values | message_add_sender | attribute_definition_rel | attribute_definition_default_rel | attribute_rel | event) frame_id.setParseAction(lambda _s, _l, t: int(t[0])) return OneOrMore(entry) + StringEnd()
Keyword, opAssoc, ParseException, ParserElement, Word, ) log = logging.getLogger(__name__) ParserElement.enablePackrat() # Defines the allowed characters that form a valid token. # Tokens that don't match this format will raise an exception when found. DEFAULT_TOKEN_FORMAT = f"{alphanums}_-@." TRUE = Keyword("True") FALSE = Keyword("False") NOT_OP = CaselessKeyword("not") AND_OP = CaselessKeyword("and") OR_OP = CaselessKeyword("or") class TokenEvaluator: """Interface to evaluate a token and determine its boolean value.""" def evaluate(self, token: str) -> bool: """Returns the boolean representation of the given token according to some custom logic.""" raise NotImplementedError class BoolOperand: """Represents a boolean operand that has a label and a value.
AS_SYM, BY_SYM, DAY_SYM, EXCEPT_SYM, DISTINCT_SYM, DSTREAM_SYM, \ FIRST_SYM, FROM_SYM, GROUP_SYM, IF_SYM, ISTREAM_SYM, HOUR_SYM, \ IDENTIFIED_SYM, MINUTE_SYM, NOT_SYM, NOW_SYM, PREFERENCES_SYM, \ PREVIOUS_SYM, RANGE_SYM, RSTREAM_SYM, SECOND_SYM, \ SELECT_SYM, SEQUENCE_SYM, SLIDE_SYM, SUBSEQUENCE_SYM, \ CONSECUTIVE_SYM, SOME_SYM, TEMPORAL_SYM, THEN_SYM, \ TO_SYM, TOP_SYM, UNBOUNDED_SYM, WHERE_SYM, UNION_SYM, \ CURRENT_SYM, REGISTER_SYM, INTERSECT_SYM, OUTPUT_SYM, \ MINIMUM_SYM, MAXIMUM_SYM, LENGTH_SYM, IS_SYM, BETTER_SYM, \ QUERY_SYM, CHANGES_SYM, STREAM_SYM, TABLE_SYM, INTEGER_SYM, OR_SYM, \ FLOAT_SYM, STRING_SYM, INPUT_SYM, TIMESTAMP_SYM, END_SYM, POSITION_SYM,\ TUPLES_SYM # Grammar keywords ACCORDING_KEYWORD = Keyword(ACCORDING_SYM, caseless=True) ALL_KEYWORD = Keyword(ALL_SYM, caseless=True) AND_KEYWORD = Keyword(AND_SYM, caseless=True) AS_KEYWORD = Keyword(AS_SYM, caseless=True) BETTER_KEYWORD = Keyword(BETTER_SYM, caseless=True) BY_KEYWORD = Keyword(BY_SYM, caseless=True) CHANGES_KEYWORD = Keyword(CHANGES_SYM, caseless=True) CONSECUTIVE_KEYWORD = Keyword(CONSECUTIVE_SYM, caseless=True) CURRENT_KEYWORD = Keyword(CURRENT_SYM, caseless=True) DAY_KEYWORD = Keyword(DAY_SYM, caseless=True) DIFFERENCE_KEYWORD = Keyword(EXCEPT_SYM, caseless=True) DISTINCT_KEYWORD = Keyword(DISTINCT_SYM, caseless=True) DSTREAM_KEYWORD = Keyword(DSTREAM_SYM, caseless=True) END_KEYWORD = Keyword(END_SYM, caseless=True) EXCEPT_KEYWORD = Keyword(EXCEPT_SYM, caseless=True) FIRST_KEYWORD = Keyword(FIRST_SYM, caseless=True)
except ValueError: return token[0][0] if op == 'or': return left | right elif op == 'and': return left & right return token[0] selectStmt = Forward() ident = Word(alphas, alphanums).setName('identifier') columnName = delimitedList(ident) whereExpression = Forward() and_ = Keyword('and', caseless=True) or_ = Keyword('or', caseless=True) in_ = Keyword('in', caseless=True) null = Keyword('NULL', caseless=True) binop = oneOf('= != < > >= <= is isnot like') arithSign = Word('+-', exact=1) realNum = Combine( Optional(arithSign) + (Word(nums) + '.' + Optional(Word(nums)) | ('.' + Word(nums)))) intNum = Combine(Optional(arithSign) + Word(nums)) columnRval = realNum | intNum | quotedString | null whereCondition = Group(
from pyparsing import Suppress, Keyword, Word, Forward, Group, ZeroOrMore, restOfLine # <constant> ::= False | True # <variable> ::= 'p' | 'q' | 'r' # <or> ::= 'or' # <and> ::= 'and' # <not> ::= 'not' # <expression> ::= <term> { <or><term> } # <term> ::= <factor> { <and><factor> } # <factor> ::= <constant> | <not><factor> | (<expression>) l_par, r_par = Suppress('('), Suppress(')') and_op = Keyword('and') or_op = Keyword('or') not_op = Keyword('not') variable = Word('pqr', exact=1) constant = Keyword('True') | Keyword('False') expr = Forward() factor = Forward() factor <<= constant | variable | Group(not_op + factor) | Group(l_par + expr + r_par) term = factor + ZeroOrMore(and_op + factor) expr <<= term + ZeroOrMore(or_op + factor) test_strings = [ 'True', 'not True', 'p', 'q and r', '(q and r)', '(False)',
List = Group(Value + OneOrMore(Suppress(Literal(",")) + Value)).setParseAction(_List) in_op = lambda lhs, rhs: operator.contains(rhs, lhs) if lhs and rhs else False re_op = lambda lhs, rhs: bool(rhs.search(lhs)) if lhs and rhs else False # Operators ComparisonOp = MatchFirst([ Literal("==").setParseAction(lambda toks: operator.eq), Literal("!=").setParseAction(lambda toks: operator.ne), Literal("<=").setParseAction(lambda toks: operator.le), Literal("<").setParseAction(lambda toks: operator.lt), Literal(">=").setParseAction(lambda toks: operator.ge), Literal(">").setParseAction(lambda toks: operator.gt), Keyword("in").setParseAction(lambda toks: in_op), Literal("=~").setParseAction(lambda toks: re_op), ]) class _Not(Token): def compiler(self, *toks): # pylint: disable=no-self-use return toks[0][1] def __call__(self, ctx): return not self.value(ctx) class _And(Token): def compiler(self, *toks):
def script(self): # constants left_bracket = Suppress("{") right_bracket = Suppress("}") semicolon = Suppress(";") space = White().suppress() keyword = Word(alphanums + ".+-_/") path = Word(alphanums + ".-_/") variable = Word("$_-" + alphanums) value_wq = Regex(r'(?:\([^\s;]*\)|\$\{\w+\}|[^\s;(){}])+') value_sq = NginxQuotedString(quoteChar="'") value_dq = NginxQuotedString(quoteChar='"') value = (value_dq | value_sq | value_wq) # modifier for location uri [ = | ~ | ~* | ^~ ] location_modifier = (Keyword("=") | Keyword("~*") | Keyword("~") | Keyword("^~")) # modifier for if statement if_modifier = Combine( Optional("!") + (Keyword("=") | Keyword("~*") | Keyword("~") | (Literal("-") + (Literal("f") | Literal("d") | Literal("e") | Literal("x"))))) condition = ((if_modifier + Optional(space) + value) | (variable + Optional(space + if_modifier + Optional(space) + value))) # rules include = (Keyword("include") + space + value + semicolon)("include") directive = (keyword + ZeroOrMore(space + value) + semicolon)("directive") file_delimiter = (Suppress("# configuration file ") + path + Suppress(":"))("file_delimiter") comment = (Suppress('#') + Regex(r".*"))("comment") hash_value = Group(value + ZeroOrMore(space + value) + semicolon)("hash_value") generic_block = Forward() if_block = Forward() location_block = Forward() hash_block = Forward() unparsed_block = Forward() sub_block = OneOrMore( Group(if_block | location_block | hash_block | generic_block | include | directive | file_delimiter | comment | unparsed_block)) if_block << ( Keyword("if") + Suppress("(") + Group(condition) + Suppress(")") + Group(left_bracket + Optional(sub_block) + right_bracket))("block") location_block << (Keyword("location") + Group( Optional(space + location_modifier) + Optional(space) + value) + Group(left_bracket + Optional(sub_block) + right_bracket))("block") hash_block << (keyword + Group(OneOrMore(space + variable)) + Group(left_bracket + Optional(OneOrMore(hash_value)) + right_bracket))("block") generic_block << ( keyword + Group(ZeroOrMore(space + variable)) + Group(left_bracket + Optional(sub_block) + right_bracket))("block") unparsed_block << ( keyword + Group(ZeroOrMore(space + variable)) + nestedExpr(opener="{", closer="}"))("unparsed_block") return sub_block
def convertString(s, s2): return s2[0].replace("\\\"", '"') def convertDict(d): return dict(d[0]) def convertTuple(t): return tuple(t[0]) omcRecord = Forward() omcValue = Forward() TRUE = Keyword("true").setParseAction(replaceWith(True)) FALSE = Keyword("false").setParseAction(replaceWith(False)) NONE = (Keyword("NONE") + Suppress("(") + Suppress(")")).setParseAction( replaceWith(None)) SOME = (Suppress(Keyword("SOME")) + Suppress("(") + omcValue + Suppress(")")) omcString = QuotedString(quoteChar='"', escChar='\\', multiline=True).setParseAction(convertString) omcNumber = Combine( Optional('-') + ('0' | Word('123456789', nums)) + Optional('.' + Word(nums)) + Optional(Word('eE', exact=1) + Word(nums + '+-', nums))) ident = Word(alphas + "_", alphanums + "_") | Combine( "'" + Word(alphanums + "!#$%&()*+,-./:;<>=?@[]^{}|~ ") + "'") fqident = Forward()
def _make_arabic_parser(): escapechar = "//" # wordchars = printables # for specialchar in '*?^():"{}[] ' + escapechar: # wordchars = wordchars.replace(specialchar, "") # wordtext = Word(wordchars) alephba = """ abcdefghijklmnopqrstuvwxyz_ األآإـتنمكطدجحخهعغفقثصضشسيبئءؤرىةوزظذ """ wordtext = CharsNotIn('//*؟^():"{}[]$><%~#،,\' +-|') escape = Suppress( escapechar ) \ + ( Word( printables, exact = 1 ) | White( exact = 1 ) ) wordtoken = Combine(OneOrMore(wordtext | escape)) # A plain old word. plainWord = Group(wordtoken).setResultsName("Word") # A wildcard word containing * or ?. wildchars = Word("؟?*") # Start with word chars and then have wild chars mixed in wildmixed = wordtoken + OneOrMore(wildchars + Optional(wordtoken)) # Or, start with wildchars, and then either a mixture of word and wild chars # , or the next token wildstart = wildchars \ + ( OneOrMore( wordtoken + Optional( wildchars ) ) \ | FollowedBy( White() \ | StringEnd() ) ) wildcard = Group(Combine(wildmixed | wildstart)).setResultsName("Wildcard") # A range of terms startfence = Literal("[") endfence = Literal("]") rangeitem = QuotedString('"') | wordtoken to = Keyword( "الى" ) \ | Keyword( "إلى" ) \ | Keyword( "To" ) \ | Keyword( "to" ) \ | Keyword( "TO" ) openstartrange = Group( Empty() ) \ + Suppress( to + White() ) \ + Group( rangeitem ) openendrange = Group( rangeitem ) \ + Suppress( White() + to ) \ + Group( Empty() ) normalrange = Group( rangeitem ) \ + Suppress( White() + to + White() ) \ + Group( rangeitem ) range = Group( startfence \ + ( normalrange | openstartrange | openendrange ) \ + endfence ).setResultsName( "Range" ) # synonyms syn_symbol = Literal("~") synonym = Group(syn_symbol + wordtoken).setResultsName("Synonyms") # antonyms ant_symbol = Literal("#") antonym = Group(ant_symbol + wordtoken).setResultsName("Antonyms") # derivation level 1,2 derive_symbole = Literal("<") | Literal(">") derivation = Group(OneOrMore(derive_symbole) + wordtoken).setResultsName("Derivation") # spellerrors # spellerrors=Group(QuotedString('\'')).setResultsName("Errors") spellerrors_symbole = Literal("%") spellerrors = Group(spellerrors_symbole + wordtoken).setResultsName("SpellErrors") # shakl:must uplevel to boostable tashkil_symbol = Literal("'") tashkil = Group( tashkil_symbol + \ ZeroOrMore( wordtoken | White() ) + \ tashkil_symbol ).setResultsName( "Tashkil" ) # tuple search (root,pattern,type) starttuple = Literal("{") endtuple = Literal("}") bettuple = Literal("،") | Literal(",") wordtuple = Group(Optional(wordtoken)) tuple = Group( starttuple + \ wordtuple + \ ZeroOrMore( bettuple + wordtuple ) + \ endtuple ).setResultsName( "Tuple" ) # A word-like thing generalWord = range | wildcard | plainWord | tuple | antonym | synonym | \ derivation | tashkil | spellerrors # A quoted phrase quotedPhrase = Group(QuotedString('"')).setResultsName("Quotes") expression = Forward() # Parentheses can enclose (group) any expression parenthetical = Group( (Suppress("(") + expression + Suppress(")"))).setResultsName("Group") boostableUnit = generalWord | quotedPhrase boostedUnit = Group( boostableUnit + \ Suppress( "^" ) + \ Word( "0123456789", ".0123456789" ) ).setResultsName( "Boost" ) # The user can flag that a parenthetical group, quoted phrase, or word # should be searched in a particular field by prepending 'fn:', where fn is # the name of the field. fieldableUnit = parenthetical | boostedUnit | boostableUnit fieldedUnit = Group( ( Word( alephba + "_" ) | Word( alphanums + "_" ) ) + \ Suppress( ':' ) + \ fieldableUnit ).setResultsName( "Field" ) # Units of content unit = fieldedUnit | fieldableUnit # A unit may be "not"-ed. operatorNot = Group( Suppress( Keyword( "ليس" ) | Keyword( "NOT" ) ) + \ Suppress( White() ) + \ unit ).setResultsName( "Not" ) generalUnit = operatorNot | unit andToken = Keyword("و") | Keyword("AND") orToken = Keyword("أو") | Keyword("او") | Keyword("OR") andNotToken = Keyword("وليس") | Keyword("ANDNOT") operatorAnd = Group( ( generalUnit + \ Suppress( White() ) + \ Suppress( andToken ) + \ Suppress( White() ) + \ expression ) | \ ( generalUnit + \ Suppress( Literal( "+" ) ) + \ expression ) ).setResultsName( "And" ) operatorOr = Group( ( generalUnit + \ Suppress( White() ) + \ Suppress( orToken ) + \ Suppress( White() ) + \ expression ) | \ ( generalUnit + \ Suppress( Literal( "|" ) ) + \ expression ) ).setResultsName( "Or" ) operatorAndNot = Group( ( unit + \ Suppress( White() ) + \ Suppress( andNotToken ) + \ Suppress( White() ) + \ expression ) | \ ( unit + \ Suppress( Literal( "-" ) ) + \ expression ) ).setResultsName( "AndNot" ) expression << ( OneOrMore( operatorAnd | operatorOr | operatorAndNot | \ generalUnit | Suppress( White() ) ) | Empty() ) toplevel = Group(expression).setResultsName("Toplevel") + StringEnd() return toplevel.parseString
def CORBA_IDL_BNF(): global bnf if not bnf: # punctuation colon = Literal(":") lbrace = Literal("{") rbrace = Literal("}") lbrack = Literal("[") rbrack = Literal("]") lparen = Literal("(") rparen = Literal(")") equals = Literal("=") comma = Literal(",") dot = Literal(".") slash = Literal("/") bslash = Literal("\\") star = Literal("*") semi = Literal(";") langle = Literal("<") rangle = Literal(">") # keywords any_ = Keyword("any") attribute_ = Keyword("attribute") boolean_ = Keyword("boolean") case_ = Keyword("case") char_ = Keyword("char") const_ = Keyword("const") context_ = Keyword("context") default_ = Keyword("default") double_ = Keyword("double") enum_ = Keyword("enum") exception_ = Keyword("exception") false_ = Keyword("FALSE") fixed_ = Keyword("fixed") float_ = Keyword("float") inout_ = Keyword("inout") interface_ = Keyword("interface") in_ = Keyword("in") long_ = Keyword("long") module_ = Keyword("module") object_ = Keyword("Object") octet_ = Keyword("octet") oneway_ = Keyword("oneway") out_ = Keyword("out") raises_ = Keyword("raises") readonly_ = Keyword("readonly") sequence_ = Keyword("sequence") short_ = Keyword("short") string_ = Keyword("string") struct_ = Keyword("struct") switch_ = Keyword("switch") true_ = Keyword("TRUE") typedef_ = Keyword("typedef") unsigned_ = Keyword("unsigned") union_ = Keyword("union") void_ = Keyword("void") wchar_ = Keyword("wchar") wstring_ = Keyword("wstring") identifier = Word(alphas, alphanums + "_").setName("identifier") #~ real = Combine( Word(nums+"+-", nums) + dot + Optional( Word(nums) ) #~ + Optional( CaselessLiteral("E") + Word(nums+"+-",nums) ) ) real = Regex(r"[+-]?\d+\.\d*([Ee][+-]?\d+)?").setName("real") #~ integer = ( Combine( CaselessLiteral("0x") + Word( nums+"abcdefABCDEF" ) ) | #~ Word( nums+"+-", nums ) ).setName("int") integer = Regex(r"0x[0-9a-fA-F]+|[+-]?\d+").setName("int") udTypeName = delimitedList(identifier, "::", combine=True).setName("udType") # have to use longest match for type, in case a user-defined type name starts with a keyword type, like "stringSeq" or "longArray" typeName = (any_ ^ boolean_ ^ char_ ^ double_ ^ fixed_ ^ float_ ^ long_ ^ octet_ ^ short_ ^ string_ ^ wchar_ ^ wstring_ ^ udTypeName).setName("type") sequenceDef = Forward().setName("seq") sequenceDef << Group(sequence_ + langle + (sequenceDef | typeName) + rangle) typeDef = sequenceDef | (typeName + Optional(lbrack + integer + rbrack)) typedefDef = Group(typedef_ + typeDef + identifier + semi).setName("typedef") moduleDef = Forward() constDef = Group(const_ + typeDef + identifier + equals + (real | integer | quotedString) + semi) #| quotedString ) exceptionItem = Group(typeDef + identifier + semi) exceptionDef = (exception_ + identifier + lbrace + ZeroOrMore(exceptionItem) + rbrace + semi) attributeDef = Optional( readonly_) + attribute_ + typeDef + identifier + semi paramlist = delimitedList( Group((inout_ | in_ | out_) + typeName + identifier)).setName("paramlist") operationDef = ( ( void_ ^ typeDef ) + identifier + lparen + Optional( paramlist ) + rparen + \ Optional( raises_ + lparen + Group( delimitedList( typeName ) ) + rparen ) + semi ) interfaceItem = (constDef | exceptionDef | attributeDef | operationDef) interfaceDef = Group( interface_ + identifier + Optional( colon + delimitedList( typeName ) ) + lbrace + \ ZeroOrMore( interfaceItem ) + rbrace + semi ).setName("opnDef") moduleItem = (interfaceDef | exceptionDef | constDef | typedefDef | moduleDef) moduleDef << module_ + identifier + lbrace + ZeroOrMore( moduleItem) + rbrace + semi bnf = (moduleDef | OneOrMore(moduleItem)) singleLineComment = "//" + restOfLine bnf.ignore(singleLineComment) bnf.ignore(cStyleComment) return bnf
def fold_chain(t, p, c): i = iter(c[1:]) return reduce(lambda fc, (m, a): function_or_method_call([fc, m, a]), zip(i, i), c[0]) functioncall_chain = (functioncall_simple + OneOrMore(args)).setParseAction(fold_chain) functioncall = functioncall_chain | functioncall_simple # prefixexp ::= var | functioncall | ‘(’ exp ‘)’ prefixexp = (functioncall | var | (Suppress('(') + exp + Suppress(')'))) # exp ::= nil | false | true | Numeral | LiteralString | ‘...’ | functiondef | # prefixexp | tableconstructor | exp binop exp | unop exp exp << (Keyword('nil').setParseAction(from_parse_result(ast.Nil)) | (Keyword('true') | Keyword('false')).setParseAction( from_parse_result(ast.Boolean)) | literal_string | prefixexp | tableconstructor) # explist ::= {exp `,´} exp explist << Group(exp + ZeroOrMore(Suppress(",") + exp)) semicolon = Literal(';').setParseAction(from_parse_result(ast.Semicolon)) # varlist ::= var {`,´ var} varlist = Group(var + ZeroOrMore(Suppress(",") + var)).setParseAction(lambda t, p, c: c) # stat ::= varlist `=´ explist stat = (varlist + Suppress("=") + explist).setParseAction(lambda t, p, (
def parse(input): # parse a string into an element of the abstract representation # Grammar: # # <expr> ::= <integer> # true # false # <identifier> # ( if <expr> <expr> <expr> ) # ( let ( ( <name> <expr> ) ) <expr ) # ( function ( <name> ... ) <expr> ) # ( ref <expr> ) # ( <expr> <expr> ... ) # idChars = alphas + "_+*-?!=<>" pIDENTIFIER = Word(idChars, idChars + "0123456789") pIDENTIFIER.setParseAction(lambda result: EId(result[0])) # A name is like an identifier but it does not return an EId... pNAME = Word(idChars, idChars + "0123456789") pNAMES = ZeroOrMore(pNAME) pNAMES.setParseAction(lambda result: [result]) pINTEGER = Word("0123456789") pINTEGER.setParseAction(lambda result: EValue(VInteger(int(result[0])))) pBOOLEAN = Keyword("true") | Keyword("false") pBOOLEAN.setParseAction( lambda result: EValue(VBoolean(result[0] == "true"))) pEXPR = Forward() pEXPRS = ZeroOrMore(pEXPR) pEXPRS.setParseAction(lambda result: [result]) pIF = "(" + Keyword("if") + pEXPR + pEXPR + pEXPR + ")" pIF.setParseAction(lambda result: EIf(result[2], result[3], result[4])) pBINDING = "(" + pNAME + pEXPR + ")" pBINDING.setParseAction(lambda result: (result[1], result[2])) pBINDINGS = OneOrMore(pBINDING) pBINDINGS.setParseAction(lambda result: [result]) pLET = "(" + Keyword("let") + "(" + pBINDINGS + ")" + pEXPR + ")" pLET.setParseAction(lambda result: ELet(result[3], result[5])) pCALL = "(" + pEXPR + pEXPRS + ")" pCALL.setParseAction(lambda result: ECall(result[1], result[2])) pFUN = "(" + Keyword("function") + "(" + pNAMES + ")" + pEXPR + ")" pFUN.setParseAction(lambda result: EFunction(result[3], result[5])) pREF = "(" + Keyword("ref") + pEXPR + ")" pREF.setParseAction(lambda result: ERefCell(result[2])) pDO = "(" + Keyword("do") + pEXPRS + ")" pDO.setParseAction(lambda result: EDo(result[2])) pWHILE = "(" + Keyword("while") + pEXPR + pEXPR + ")" pWHILE.setParseAction(lambda result: EWhile(result[2], result[3])) pEXPR << (pINTEGER | pBOOLEAN | pIDENTIFIER | pIF | pLET | pFUN | pREF | pDO | pWHILE | pCALL) # can't attach a parse action to pEXPR because of recursion, so let's duplicate the parser pTOPEXPR = pEXPR.copy() pTOPEXPR.setParseAction(lambda result: { "result": "expression", "expr": result[0] }) pDEFINE = "(" + Keyword("define") + pNAME + pEXPR + ")" pDEFINE.setParseAction(lambda result: { "result": "value", "name": result[2], "expr": result[3] }) pDEFUN = "(" + Keyword("defun") + pNAME + "(" + pNAMES + ")" + pEXPR + ")" pDEFUN.setParseAction( lambda result: { "result": "function", "name": result[2], "params": result[4], "body": result[6] }) pABSTRACT = "#abs" + pEXPR pABSTRACT.setParseAction(lambda result: { "result": "abstract", "expr": result[1] }) pQUIT = Keyword("#quit") pQUIT.setParseAction(lambda result: {"result": "quit"}) pTOP = (pDEFUN | pDEFINE | pQUIT | pABSTRACT | pTOPEXPR) result = pTOP.parseString(input)[0] return result # the first element of the result is the expression
# simple demo of using the parsing library to do simple-minded SQL parsing # could be extended to include where clauses etc. # # Copyright (c) 2003,2016, Paul McGuire # # Modified by Junhui Hu<*****@*****.**> from pyparsing import Literal, CaselessLiteral, Word, delimitedList, Optional, \ Combine, Group, alphas, nums, alphanums, ParseException, Forward, oneOf, quotedString, \ ZeroOrMore, restOfLine, Keyword, upcaseTokens # define simple search searchStmt = Forward() # define SQL tokens selectStmt = Forward() SELECT = Keyword("select", caseless=True) FROM = Keyword("from", caseless=True) WHERE = Keyword("where", caseless=True) GROUP_BY = Keyword("group by", caseless=True) ORDER_BY = Keyword("order by", caseless=True) HAVING = Keyword("having", caseless=True) LIMIT = Keyword("limit", caseless=True) OFFSET = Keyword("offset", caseless=True) ident = Word(alphas, alphanums + "_$").setName("identifier") # Remove upcaseTokens #columnName = ( delimitedList( ident, ".", combine=True ) ).setName("column name").addParseAction(upcaseTokens) columnName = (delimitedList(ident, ".", combine=True)).setName("column name") columnNameList = Group(delimitedList(columnName)) #tableName = ( delimitedList( ident, ".", combine=True ) ).setName("table name").addParseAction(upcaseTokens) tableName = (delimitedList(ident, ".", combine=True)).setName("table name")
def parse_element(cls, indent_stack): return (Keyword("@depends_on").suppress() + Literal('(').suppress() + delimitedList(quotedString.setParseAction(removeQuotes)) + Literal(')').suppress()).setResultsName("depends_on")
def __init__(self): # define SQL tokens selectStmt = Forward() selectToken = Keyword("select", caseless=True) fromToken = Keyword("from", caseless=True) asToken = Keyword("as", caseless=True) whereToken = Keyword("where", caseless=True) semicolon = Literal(";") ident = Word(alphas, alphanums + "_$").setName("identifier") columnName = delimitedList(ident, ".", combine=True) #columnName.setParseAction(upcaseTokens) columnNameList = Group(columnName + ZeroOrMore("," + columnName)) # selectableList = Forward() columnRvalList = Forward() functionExpr = ident + Optional("." + ident) + Literal( '(') + columnRvalList + Literal(')') alias = Forward() identExpr = functionExpr | ident self.identExpr = identExpr # Debug self.functionExpr = functionExpr # Debug alias = ident.copy() selectableName = identExpr | columnName selectableList = Group(selectableName + ZeroOrMore("," + selectableName)) columnRef = columnName functionSpec = functionExpr valueExprPrimary = functionSpec | columnRef numPrimary = valueExprPrimary ## | numericValFunc factor = Optional(Literal("+") | Literal("-")) + numPrimary muldiv = oneOf("* /") term = Forward() term << factor + Optional(muldiv + factor) numericExpr = Forward() addsub = oneOf("+ -") numericExpr << term + Optional(addsub + numericExpr) arithop = oneOf("+ - * /") columnNumericExpr = Forward() cTerm = valueExprPrimary testme = valueExprPrimary + arithop + valueExprPrimary columnNumericExpr << cTerm + Optional(arithop + columnNumericExpr) colNumExpList = Group(columnNumericExpr + ZeroOrMore("," + columnNumericExpr)) valueExpr = numericExpr ## | stringExpr | dateExpr | intervalExpr derivedColumn = valueExpr + Optional(asToken + alias) selectSubList = derivedColumn + ZeroOrMore("," + derivedColumn) tableName = delimitedList(ident, ".", combine=True) # don't upcase table names anymore # tableName.setParseAction(upcaseTokens) self.tableAction = [] tableName.addParseAction(self.actionWrapper(self.tableAction)) tableName.setResultsName("table") tableAlias = tableName + asToken + ident.setResultsName("aliasName") tableAlias.setResultsName("alias") genericTableName = tableAlias | tableName genericTableName = genericTableName.setResultsName("tablename") tableNameList = Group(genericTableName + ZeroOrMore("," + genericTableName)) whereExpression = Forward() and_ = Keyword("and", caseless=True) or_ = Keyword("or", caseless=True) in_ = Keyword("in", caseless=True) between_ = Keyword("between", caseless=True) E = CaselessLiteral("E") binop = oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True) arithSign = Word("+-", exact=1) realNum = Combine( Optional(arithSign) + (Word(nums) + "." + Optional(Word(nums)) | ("." + Word(nums))) + Optional(E + Optional(arithSign) + Word(nums))) intNum = Combine( Optional(arithSign) + Word(nums) + Optional(E + Optional("+") + Word(nums))) # need to add support for alg expressions columnRval = realNum | intNum | quotedString | columnNumericExpr # | numericExpr columnRvalList << Group(columnRval + ZeroOrMore("," + columnRval)) self.whereExpAction = [] namedRv = columnRval.setResultsName("column") whereConditionFlat = Group((functionSpec + binop + columnRval) | (namedRv + binop + columnRval) | (namedRv + in_ + "(" + columnRval + ZeroOrMore("," + namedRv) + ")") | (namedRv + in_ + "(" + selectStmt + ")") | (namedRv + between_ + namedRv + and_ + namedRv)) whereConditionFlat.addParseAction( self.actionWrapper(self.whereExpAction)) whereCondition = Group(whereConditionFlat | ("(" + whereExpression + ")")) # Test code to try to make an expression parse. # print whereConditionFlat.parseString("ABS(o1.ra - o2.ra) < 0.00083 / COS(RADIANS(o2.decl))") # goodFunction = ident + Literal('(') + columnNumericExpr + Literal(')') # print "ADFDSFDSF",testme.parseString("o1.ra - o2.ra", parseAll=True) # print "ADSFDSFAD", goodFunction.parseString("ABS(o1.ra - o2.ra)") #whereExpression << whereCondition.setResultsName("wherecond") #+ ZeroOrMore( ( and_ | or_ ) + whereExpression ) def scAnd(tok): print "scAnd", tok if "TRUE" == tok[0][0]: tok = tok[2] elif "TRUE" == tok[2][0]: tok = tok[0] return tok def scOr(tok): print "scOr", tok if ("TRUE" == tok[0][0]) or ("TRUE" == tok[2][0]): tok = [["TRUE"]] return tok def scWhere(tok): newtok = [] i = 0 while i < len(tok): if str(tok[i]) in ["TRUE", str(["TRUE"]) ] and (i + 1) < len(tok): if str(tok[i + 1]).upper() == "AND": i += 2 continue elif str(tok[i + i]).upper() == "OR": break newtok.append(tok[i]) i += 1 return newtok def collapseWhere(tok): #collapse.append(tok[0][1]) if ["TRUE"] == tok.asList()[0][1]: tok = [] return tok andExpr = and_ + whereExpression orExpr = or_ + whereExpression whereExpression << whereCondition + ZeroOrMore(andExpr | orExpr) whereExpression.addParseAction(scWhere) self.selectPart = selectToken + ( '*' | selectSubList).setResultsName("columns") whereClause = Group(whereToken + whereExpression).setResultsName("where") whereClause.addParseAction(collapseWhere) self.fromPart = fromToken + tableNameList.setResultsName("tables") # define the grammar selectStmt << (self.selectPart + fromToken + tableNameList.setResultsName("tables") + whereClause) self.simpleSQL = selectStmt + semicolon # define Oracle comment format, and ignore them oracleSqlComment = "--" + restOfLine self.simpleSQL.ignore(oracleSqlComment)