def has_generic_exceptions(csharp_dest: str, exclude: list = None) -> bool: """ Search for generic exceptions in a C# source file or package. :param csharp_dest: Path to a C# source file or package. :param exclude: Paths that contains any string from this list are ignored. """ tk_catch = CaselessKeyword('catch') tk_generic_exc = CaselessKeyword('exception') tk_type = Word(alphas) tk_object_name = Word(alphas) tk_object = Word(alphas) generic_exception = Optional(Literal('}')) + tk_catch + Literal('(') + \ tk_generic_exc + Optional(Literal('(') + tk_type + Literal(')')) + \ Optional(tk_object_name) + \ Optional(Literal('(') + tk_object + Literal(')')) result = False try: matches = lang.check_grammar(generic_exception, csharp_dest, LANGUAGE_SPECS, exclude) if not matches: show_close('Code does not use generic exceptions', details=dict(code_dest=csharp_dest)) return False except FileNotFoundError: show_unknown('File does not exist', details=dict(code_dest=csharp_dest)) result = False else: result = True show_open('Code uses generic exceptions', details=dict(matched=matches, total_vulns=len(matches))) return result
def uses_console_writeline(csharp_dest: str, exclude: list = None) -> bool: """ Check if code uses Console.WriteLine method. :param csharp_dest: Path to a C# source file or package. :param exclude: Paths that contains any string from this list are ignored. """ method = "Console.WriteLine" tk_console = CaselessKeyword('console') tk_wrilin = CaselessKeyword('writeline') call_function = tk_console + Literal('.') + tk_wrilin result = False try: matches = lang.check_grammar(call_function, csharp_dest, LANGUAGE_SPECS, exclude) if not matches: show_close('Code does not use {} method'.format(method), details=dict(code_dest=csharp_dest)) return False except FileNotFoundError: show_unknown('File does not exist', details=dict(code_dest=csharp_dest)) return False else: result = True show_open('Code uses {} method'.format(method), details=dict(matched=matches, total_vulns=len(matches))) return result
def parse_as_create_datasource(self) -> dict: ''' Parse 'CREATE DATASOURCE' query Example: CREATE DATASOURCE name FROM mysql WITH {"user": "******", "password": "******", "host": "127.0.0.1"} ''' result = { 'datasource_name': None, 'database_type': None, 'connection_args': None } expr = (CaselessKeyword("create").suppress() + CaselessKeyword("datasource").suppress() + Word(printables).setResultsName('datasource_name') + CaselessKeyword("from").suppress() + Word(printables).setResultsName('database_type') + CaselessKeyword("with").suppress() + originalTextFor(nestedExpr('{', '}'))('connection_args')) r = expr.parseString(self._sql).asDict() datasource_name = r.get('datasource_name') if isinstance(datasource_name, str) is False: raise Exception("Cant determine datasource name") result['datasource_name'] = datasource_name database_type = r.get('database_type') if isinstance(database_type, str) is False: raise Exception("Cant determine database type") result['database_type'] = database_type try: result['connection_args'] = json.loads(r.get('connection_args')) except Exception: raise Exception('Cant parse connection arguments.')
def BNF() -> Any: """ expop :: '^' multop :: '*' | '/' addop :: '+' | '-' integer :: ['+' | '-'] '0'..'9'+ atom :: PI | E | real | fn '(' expr ')' | '(' expr ')' factor :: atom [ expop factor ]* term :: factor [ multop factor ]* expr :: term [ addop term ]* """ global bnf if not bnf: # use CaselessKeyword for e and pi, to avoid accidentally matching # functions that start with 'e' or 'pi' (such as 'exp'); Keyword # and CaselessKeyword only match whole words e = CaselessKeyword("E") pi = CaselessKeyword("PI") fnumber = Regex(r"[+-]?\d+(?:\.\d*)?(?:[eE][+-]?\d+)?") ident = Word(alphas, alphanums + "_$") plus, minus, mult, div = map(Literal, "+-*/") lpar, rpar = map(Suppress, "()") addop = plus | minus multop = mult | div expop = Literal("^") expr = Forward() expr_list = delimitedList(Group(expr)) # add parse action that replaces the function identifier with a (name, number of args) tuple def insert_fn_argcount_tuple(t: List[Any]): fn = t.pop(0) num_args = len(t[0]) t.insert(0, (fn, num_args)) f = ident + lpar - Group(expr_list) + rpar # type: ignore fn_call = f.setParseAction(insert_fn_argcount_tuple) # type: ignore g = fn_call | pi | e | fnumber | ident # type: ignore assert g is not None atom = ( addop[...] # type: ignore + ( g.setParseAction(push_first) | Group(lpar + expr + rpar) # type: ignore )).setParseAction(push_unary_minus) # type: ignore # by defining exponentiation as "atom [ ^ factor ]..." instead of "atom [ ^ atom ]...", we get right-to-left # exponents, instead of left-to-right that is, 2^3^2 = 2^(3^2), not (2^3)^2. factor = Forward() factor <<= atom + ( expop + factor).setParseAction(push_first)[...] # type: ignore term = factor + ( multop + # type: ignore factor).setParseAction(push_first)[...] # type: ignore expr <<= term + ( addop + # type: ignore term).setParseAction(push_first)[...] # type: ignore bnf = expr return bnf
def has_generic_exceptions(rpg_dest: str, exclude: list = None) -> bool: """ Search for on-error empty. See `REQ. 161 <https://fluidattacks.com/web/rules/161/>`_. :param rpg_dest: Path to a RPG source or directory. :param exclude: Paths that contains any string from this list are ignored. """ tk_on = CaselessKeyword('on') tk_error = CaselessKeyword('error') tk_monitor = tk_on + Literal('-') + tk_error + Literal(';') result = False try: matches = lang.check_grammar(tk_monitor, rpg_dest, LANGUAGE_SPECS, exclude) if not matches: show_close('Code does not have empty monitors', details=dict(code_dest=rpg_dest)) return False except FileNotFoundError: show_unknown('File does not exist', details=dict(code_dest=rpg_dest)) return False else: result = True show_open('Code has empty monitors', details=dict(matched=matches, total_vulns=len(matches))) return result
def has_insecure_randoms(js_dest: str, exclude: list = None) -> bool: r""" Check if code uses ``Math.Random()``\ . See `REQ.224 <https://fluidattacks.com/web/rules/224/>`_. :param js_dest: Path to a JavaScript source file or package. :param exclude: Paths that contains any string from this list are ignored. """ method = 'Math.random()' tk_class = CaselessKeyword('math') tk_method = CaselessKeyword('random') tk_params = nestedExpr() call_function = tk_class + Literal('.') + tk_method + Suppress(tk_params) result = False try: matches = lang.check_grammar(call_function, js_dest, LANGUAGE_SPECS, exclude) if not matches: show_close('Code does not use {} method'.format(method), details=dict(code_dest=js_dest)) return False except FileNotFoundError: show_unknown('File does not exist', details=dict(code_dest=js_dest)) return False else: result = True show_open('Code uses {} method'.format(method), details=dict(matched=matches, total_vulns=len(matches))) return result
def uses_ecb_encryption_mode(csharp_dest: str, exclude: list = None) -> bool: """ Check if code uses ECB as encryption mode. :param csharp_dest: Path to a C# source file or package. :param exclude: Paths that contains any string from this list are ignored. """ method = "Mode = CipherMode.ECB" tk_eq = Literal('=') tk_obj = SkipTo(tk_eq) tk_cm = CaselessKeyword('ciphermode') tk_ecb = CaselessKeyword('ecb') call_function = tk_obj + tk_eq + tk_cm + Literal('.') + tk_ecb result = False try: matches = lang.check_grammar(call_function, csharp_dest, LANGUAGE_SPECS, exclude) if not matches: show_close('Code does not use {} method'.format(method), details=dict(code_dest=csharp_dest)) return False except FileNotFoundError: show_unknown('File does not exist', details=dict(code_dest=csharp_dest)) result = False else: result = True show_open('Code uses {} method'.format(method), details=dict(matched=matches, total_vulns=len(matches))) return result
def __init__(self, name, argParser, version="OPENQASM 2.0", qop=False, keyOverride=None): global cops global qops global _reservedKeys if name in qops or name in cops: raise IOError(dupTokenWarning.format("Operation", name)) self.operation = name if keyOverride is not None: self.parser = (keyOverride + argParser).addParseAction( lambda s, l, t: _override_keyword(t, name)) else: self.parser = CaselessKeyword(name)("keyword") + argParser self.version = parse_version(version) self.parser.addParseAction( lambda s, l, t: _set_version(t, self.version)) _reservedKeys.append(name) if qop: qops[name] = self else: cops[name] = self
def uses_console_log(js_dest: str, exclude: list = None) -> bool: """ Search for ``console.log()`` calls in a JavaScript file or directory. :param js_dest: Path to a JavaScript source file or directory. :param exclude: Paths that contains any string from this list are ignored. """ method = 'Console.log()' tk_object = CaselessKeyword('console') tk_method = CaselessKeyword('log') clog = tk_object + Literal('.') + tk_method + Suppress(nestedExpr()) result = False try: matches = lang.check_grammar(clog, js_dest, LANGUAGE_SPECS, exclude) if not matches: show_close('Code does not use {} method'.format(method), details=dict(code_dest=js_dest)) return False except FileNotFoundError: show_unknown('File does not exist', details=dict(code_dest=js_dest)) return False else: result = True show_open('Code uses {} method'.format(method), details=dict(matched=matches, total_vulns=len(matches))) return result
def uses_insecure_hash(java_dest: str, algorithm: str, exclude: list = None) -> bool: """ Check if code uses an insecure hashing algorithm. See `REQ.150 <https://fluidattacks.com/web/rules/150/>`_. :param java_dest: Path to a Java source file or package. :param algorithm: Insecure algorithm. :param exclude: Paths that contains any string from this list are ignored. """ method = 'MessageDigest.getInstance("{}")'.format(algorithm.upper()) tk_mess_dig = CaselessKeyword('messagedigest') tk_get_inst = CaselessKeyword('getinstance') tk_alg = Literal('"') + CaselessKeyword(algorithm.lower()) + Literal('"') tk_params = Literal('(') + tk_alg + Literal(')') instance = tk_mess_dig + Literal('.') + tk_get_inst + tk_params result = False try: matches = lang.check_grammar(instance, java_dest, LANGUAGE_SPECS, exclude) if not matches: show_close('Code does not use {} method'.format(method), details=dict(code_dest=java_dest)) return False except FileNotFoundError: show_unknown('File does not exist', details=dict(code_dest=java_dest)) return False else: result = True show_open('Code uses {} method'.format(method), details=dict(matched=matches, total_vulns=len(matches))) return result
def get_grammar(): ident = alphas + '_' column = Word(ident) literal = Combine( Word(nums) + Optional(oneOf(' '.join(LITERAL_SUFFIXES), caseless=True))) | sglQuotedString funcall = Forward() value = funcall | column | literal funcall << Group(Word(ident) + Suppress('(') + Group(delimitedList(value)) + Suppress(')')) bin_op = oneOf(' '.join(OPERATORS), caseless=True) columns = (Group(delimitedList(value)) | '*').setResultsName('columns') from_clause = (CaselessKeyword('FROM') + QuotedString("'").setResultsName('directory')) condition = (Group(Optional(CaselessKeyword('NOT')) + value + bin_op + value) | Group(Optional(CaselessKeyword('NOT')) + value)) conditions = Group(delimitedList(condition, delim=CaselessKeyword('AND'))) where_clause = CaselessKeyword('WHERE') + conditions.setResultsName('condition') order_by_clause = (CaselessKeyword('ORDER BY') + Group( value + Optional(CaselessKeyword('ASC') | CaselessKeyword('DESC'))).setResultsName( 'order_by')) limit_clause = CaselessKeyword('LIMIT') + Word(nums).setResultsName('limit') select_clause = CaselessKeyword('SELECT') + columns return (Optional(select_clause) + Optional(from_clause) + Optional(where_clause) + Optional(order_by_clause) + Optional(limit_clause))
def _create_parser() -> Forward: """Create an instance of a dice roll string parser.""" atom = (CaselessLiteral("d%") | pyparsing_common.number | CaselessKeyword("pi") | CaselessKeyword("e")) expression = operatorPrecedence( atom, [ (Literal('-'), 1, opAssoc.RIGHT), (CaselessLiteral('sqrt'), 1, opAssoc.RIGHT), (oneOf('^ **'), 2, opAssoc.RIGHT), (Literal('-'), 1, opAssoc.RIGHT), (Literal('!'), 1, opAssoc.LEFT), (CaselessLiteral('d%'), 1, opAssoc.LEFT), (CaselessLiteral('d'), 2, opAssoc.RIGHT), # This line causes the recursion debug to go off. # Will have to find a way to have an optional left # operator in this case. (CaselessLiteral('d'), 1, opAssoc.RIGHT), (oneOf('* / % //'), 2, opAssoc.LEFT), (oneOf('+ -'), 2, opAssoc.LEFT), ]) return expression
def _create_parser(self): OPERATORS = ComparisonExpr.OPERATORS.keys() AND = oneOf(LogicExpr.AND) OR = oneOf(LogicExpr.OR) FIELD = Word(alphanums + '_') OPERATOR = oneOf(OPERATORS) VALUE = (Word(nums + '-.') | QuotedString(quoteChar="'", unquoteResults=False)(alphanums) | QuotedString('[', endQuoteChar=']', unquoteResults=False)(alphanums + "'-.") | CaselessKeyword('true') | CaselessKeyword('false') | CaselessKeyword('notblank')) COMPARISON = FIELD + OPERATOR + VALUE QUERY = infixNotation(COMPARISON, [ ( AND, 2, opAssoc.LEFT, ), ( OR, 2, opAssoc.LEFT, ), ]) COMPARISON.addParseAction(ComparisonExpr) AND.addParseAction(LogicExpr) OR.addParseAction(LogicExpr) return QUERY
def setup(self): e = CaselessKeyword('E') pi = CaselessKeyword('PI') var = CaselessKeyword('X') fnumber = Regex(r'[+-]?\d+(?:\.\d*)?(?:[eE][+-]?\d+)?') ident = Word(alphas, alphanums + '_$') plus, minus, mult, div = map(Literal, '+-*/') lpar, rpar = map(Suppress, '()') addop = plus | minus multop = mult | div expop = Literal('^') expr = Forward() atom = ((0, None) * minus + (pi | e | var | fnumber | ident + lpar + expr + rpar | ident).setParseAction(self._pushFirst) | Group(lpar + expr + rpar)).setParseAction(self._pushUMinus) # by defining exponentiation as "atom [ ^ factor ]..." instead of # "atom [ ^ atom ]...", we get right-to-left exponents, instead of # left-to-right. that is, 2^3^2 = 2^(3^2), not (2^3)^2. factor = Forward() factor << atom + ZeroOrMore( (expop + factor).setParseAction(self._pushFirst)) term = factor + ZeroOrMore( (multop + factor).setParseAction(self._pushFirst)) expr << term + ZeroOrMore( (addop + term).setParseAction(self._pushFirst)) self._parseExpr = expr
def has_dos_dow_sqlcod(rpg_dest: str, exclude: list = None) -> bool: r""" Search for DoS for using ``DoW SQLCOD = <ZERO>``\ . :param rpg_dest: Path to a RPG source or directory. :param exclude: Paths that contains any string from this list are ignored. """ tk_dow = CaselessKeyword('dow') tk_sqlcod = CaselessKeyword('sqlcod') tk_literal_zero = CaselessKeyword('*zeros') tk_zeros = MatchFirst([Literal('0'), tk_literal_zero]) dos_dow_sqlcod = tk_dow + tk_sqlcod + Literal('=') + tk_zeros result = False try: matches = lang.check_grammar(dos_dow_sqlcod, rpg_dest, LANGUAGE_SPECS, exclude) if not matches: show_close('Code does not have DoS for using "DoW SQLCOD = 0"', details=dict(code_dest=rpg_dest)) return False except FileNotFoundError: show_unknown('File does not exist', details=dict(code_dest=rpg_dest)) return False else: result = True show_open('Code has DoS for using "DoW SQLCOD = 0"', details=dict(matched=matches, total_vulns=len(matches))) return result
def __defineBasicTypes(self): self.KDELIM = Suppress(":") sign = Word("+-", max=1) + FollowedBy(Word(nums)) crncy = Word(nums) + ZeroOrMore(Suppress(",") + Word(nums)) + \ Optional(Literal(".") + Word(nums)) baseUnknownValue = Keyword("?") self.unknown = self.completeType(baseUnknownValue, "UNKNOWN_VAL", lambda t: np.nan) floatNumberBasic = Combine(Optional(sign) + \ Or([Word(nums), crncy, Regex(r'[0-9]+(\.\d*)?([eE]\d+)?')])) + \ Optional(Suppress("%")) self.floatNumber = self.completeType(floatNumberBasic, "float", lambda t: float(t[0])) baseBoolValue = Or([ CaselessKeyword("false"), CaselessKeyword("true"), CaselessKeyword("yes"), CaselessKeyword("no") ]) self.boolean = self.completeType(baseBoolValue, "bool", lambda t: WebParser.boolMaps[t[0]]) ratingKeywords = [CaselessKeyword(k).setParseAction( \ lambda t: Ratings.ratingMaps[t[0].lower()]) \ for k in Ratings.ratingMaps.keys()] ratingKeywords.append(Keyword("--").setParseAction(lambda t: np.nan)) self.ratings = self.completeType(Or(ratingKeywords), "ratings") self.parseTypes[WebParser.PSTYPE_RATINGS] = self.ratings
def uses_sha1_hash(csharp_dest: str, exclude: list = None) -> bool: """ Check if code uses SHA1 as hashing algorithm. See `REQ.150 <https://fluidattacks.com/web/rules/150/>`_. :param csharp_dest: Path to a C# source file or package. :param exclude: Paths that contains any string from this list are ignored. """ method = "new SHA1CryptoServiceProvider(), new SHA1Managed()" tk_new = CaselessKeyword('new') tk_sha1cry = CaselessKeyword('SHA1CryptoServiceProvider') tk_sha1man = CaselessKeyword('SHA1Managed') tk_params = nestedExpr() call_function = tk_new + MatchFirst([tk_sha1cry, tk_sha1man]) + tk_params result = False try: matches = lang.check_grammar(call_function, csharp_dest, LANGUAGE_SPECS, exclude) if not matches: show_close('Code does not use {} method'.format(method), details=dict(code_dest=csharp_dest)) return False except FileNotFoundError: show_unknown('File does not exist', details=dict(code_dest=csharp_dest)) return False else: result = True show_open('Code uses {} method'.format(method), details=dict(matched=matches, total_vulns=len(matches))) return result
def BNF(): """ expop :: '^' multop :: '*' | '/' addop :: '+' | '-' integer :: ['+' | '-'] '0'..'9'+ atom :: PI | E | real | fn '(' expr ')' | '(' expr ')' factor :: atom [ expop factor ]* term :: factor [ multop factor ]* expr :: term [ addop term ]* """ global bnf if not bnf: # use CaselessKeyword for e and pi, to avoid accidentally matching # functions that start with 'e' or 'pi' (such as 'exp'); Keyword # and CaselessKeyword only match whole words e = CaselessKeyword("E") pi = CaselessKeyword("PI") # fnumber = Combine(Word("+-"+nums, nums) + # Optional("." + Optional(Word(nums))) + # Optional(e + Word("+-"+nums, nums))) # or use provided pyparsing_common.number, but convert back to str: # fnumber = ppc.number().addParseAction(lambda t: str(t[0])) fnumber = Regex(r"[+-]?\d+(?:\.\d*)?(?:[eE][+-]?\d+)?") ident = Word(alphas, alphanums + "_$") plus, minus, mult, div = map(Literal, "+-*/") lpar, rpar = map(Suppress, "()") addop = plus | minus multop = mult | div expop = Literal("^") expr = Forward() expr_list = delimitedList(Group(expr)) # add parse action that replaces the function identifier with a (name, number of args) tuple def insert_fn_argcount_tuple(t): fn = t.pop(0) num_args = len(t[0]) t.insert(0, (fn, num_args)) fn_call = (ident + lpar - Group(expr_list) + rpar).setParseAction( insert_fn_argcount_tuple ) atom = ( addop[...] + ( (fn_call | pi | e | fnumber | ident).setParseAction(push_first) | Group(lpar + expr + rpar) ) ).setParseAction(push_unary_minus) # by defining exponentiation as "atom [ ^ factor ]..." instead of "atom [ ^ atom ]...", we get right-to-left # exponents, instead of left-to-right that is, 2^3^2 = 2^(3^2), not (2^3)^2. factor = Forward() factor <<= atom + (expop + factor).setParseAction(push_first)[...] term = factor + (multop + factor).setParseAction(push_first)[...] expr <<= term + (addop + term).setParseAction(push_first)[...] bnf = expr return bnf
def __init__(self): # build grammar according to Glue.Client.get_partitions(Expression) lpar, rpar = map(Suppress, "()") # NOTE these are AWS Athena column name best practices ident = Forward().set_name("ident") ident <<= Word(alphanums + "._").set_parse_action(_Ident) | lpar + ident + rpar number = Forward().set_name("number") number <<= pyparsing_common.number | lpar + number + rpar string = Forward().set_name("string") string <<= QuotedString(quote_char="'", esc_quote="''") | lpar + string + rpar literal = (number | string).set_name("literal") literal_list = delimited_list(literal, min=1).set_name("list") bin_op = one_of("<> >= <= > < =").set_name("binary op") and_ = Forward() and_ <<= CaselessKeyword("and") | lpar + and_ + rpar or_ = Forward() or_ <<= CaselessKeyword("or") | lpar + or_ + rpar in_, between, like, not_, is_, null = map( CaselessKeyword, "in between like not is null".split() ) not_ = Suppress(not_) # only needed for matching cond = ( (ident + is_ + null).set_parse_action(_IsNull) | (ident + is_ + not_ + null).set_parse_action(_IsNotNull) | (ident + bin_op + literal).set_parse_action(_BinOp) | (ident + like + string).set_parse_action(_Like) | (ident + not_ + like + string).set_parse_action(_NotLike) | (ident + in_ + lpar + literal_list + rpar).set_parse_action(_In) | (ident + not_ + in_ + lpar + literal_list + rpar).set_parse_action(_NotIn) | (ident + between + literal + and_ + literal).set_parse_action(_Between) | (ident + not_ + between + literal + and_ + literal).set_parse_action( _NotBetween ) ).set_name("cond") # conditions can be joined using 2-ary AND and/or OR expr = infix_notation( cond, [ (and_, 2, OpAssoc.LEFT, _BoolAnd), (or_, 2, OpAssoc.LEFT, _BoolOr), ], ) self._expr = expr.set_name("expr") self._cache: Dict[str, _Expr] = {}
def __init__(self): super(TernaryExpressionParser, self).__init__() IF = CaselessKeyword('if').suppress() ELSE = CaselessKeyword('else').suppress() RELATIONAL_OPERATOR = oneOf(">= <= != > < = ==") CONDITION = (self.ARITH_EXPR + RELATIONAL_OPERATOR + self.ARITH_EXPR).setParseAction(Condition) self.TERNARY_EXPR = (self.ARITH_EXPR.setResultsName('if_expr') + IF + CONDITION.setResultsName('condition') + ELSE + self.ARITH_EXPR.setResultsName('else_expr') ).setParseAction(TernaryExpression) self.pattern = self.TERNARY_EXPR + StringEnd()
def get_parser_atoms(self): """ Function defining the atoms of the grammar """ point = Literal(".") e = CaselessLiteral("E") return { # float number: "int": Combine(Word("+-" + nums, nums)), "float": Combine( Word("+-" + nums, nums) + Optional(point + Optional(Word(nums))) + Optional(e + Word("+-" + nums, nums))), "variable": Word(alphas, alphas + nums + "_"), "array_lpar": Literal(self.array_lpar), "array_delim": Literal(self.array_delim), "array_rpar": Literal(self.array_rpar), "function": Word(alphas, alphas + nums + "_$"), "func_lpar": Literal(self.func_lpar), "func_delim": Literal(self.func_delim), "func_rpar": Literal(self.func_rpar), "assign": Literal(self.op_assign), "equal": Literal("=="), "plus": Literal("+"), "minus": Literal("-"), "mult": Literal("*"), "div": Literal("/"), "lpar": Literal(self.lpar), "rpar": Literal(self.rpar), "exp": Literal("^"), "consts": CaselessKeyword("PI").setParseAction(upcaseTokens) | CaselessKeyword("E").setParseAction(upcaseTokens), }
def remove_transactional(string): """Remove begin or commit from the statement Args: string(str): String to be processed Returns: result(str): String with begin and commit trimmed """ transaction = WordStart() + (CaselessKeyword('BEGIN') | CaselessKeyword('COMMIT')) return transaction.suppress().transformString(string)
def BNF(): """ expop :: '^' multop :: '*' | '/' addop :: '+' | '-' integer :: ['+' | '-'] '0'..'9'+ atom :: PI | E | real | fn '(' expr ')' | '(' expr ')' factor :: atom [ expop factor ]* term :: factor [ multop factor ]* expr :: term [ addop term ]* """ global bnf if not bnf: point = Literal(".") # use CaselessKeyword for e and pi, to avoid accidentally matching # functions that start with 'e' or 'pi' (such as 'exp'); Keyword # and CaselessKeyword only match whole words e = CaselessKeyword("E") pi = CaselessKeyword("PI") #~ fnumber = Combine( Word( "+-"+nums, nums ) + #~ Optional( point + Optional( Word( nums ) ) ) + #~ Optional( e + Word( "+-"+nums, nums ) ) ) fnumber = Regex(r"[+-]?\d+(:?\.\d*)?(:?[eE][+-]?\d+)?") ident = Word(alphas, alphas + nums + "_$") plus = Literal("+") minus = Literal("-") mult = Literal("*") div = Literal("/") lpar = Literal("(").suppress() rpar = Literal(")").suppress() addop = plus | minus multop = mult | div expop = Literal("^") expr = Forward() atom = ((0, None) * minus + (pi | e | fnumber | ident + lpar + expr + rpar | ident).setParseAction(pushFirst) | Group(lpar + expr + rpar)).setParseAction(pushUMinus) # by defining exponentiation as "atom [ ^ factor ]..." instead of "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-righ # that is, 2^3^2 = 2^(3^2), not (2^3)^2. factor = Forward() factor << atom + ZeroOrMore((expop + factor).setParseAction(pushFirst)) term = factor + ZeroOrMore((multop + factor).setParseAction(pushFirst)) expr << term + ZeroOrMore((addop + term).setParseAction(pushFirst)) bnf = expr return bnf
def get_parser(self): self.clear_stack() if not self._parser: # use CaselessKeyword for e and pi, to avoid accidentally matching # functions that start with 'e' or 'pi' (such as 'exp'); Keyword # and CaselessKeyword only match whole words e = CaselessKeyword("E") pi = CaselessKeyword("PI") # fnumber = Combine(Word("+-"+nums, nums) + # Optional("." + Optional(Word(nums))) + # Optional(e + Word("+-"+nums, nums))) # or use provided pyparsing_common.number, but convert back to str: # fnumber = ppc.number().addParseAction(lambda t: str(t[0])) fnumber = Regex(r"[+-]?\d+(?:\.\d*)?(?:[eE][+-]?\d+)?") ge_urn = Combine( Literal("urn:great_expectations:") + Word(alphas, alphanums + "_$:?=%.&") ) variable = Word(alphas, alphanums + "_$") ident = ge_urn | variable plus, minus, mult, div = map(Literal, "+-*/") lpar, rpar = map(Suppress, "()") addop = plus | minus multop = mult | div expop = Literal("^") expr = Forward() expr_list = delimitedList(Group(expr)) # add parse action that replaces the function identifier with a (name, number of args) tuple fn_call = (ident + lpar - Group(expr_list) + rpar).setParseAction( lambda t: t.insert(0, (t.pop(0), len(t[0]))) ) atom = ( addop[...] + ( (fn_call | pi | e | fnumber | ident).setParseAction(self.push_first) | Group(lpar + expr + rpar) ) ).setParseAction(self.push_unary_minus) # by defining exponentiation as "atom [ ^ factor ]..." instead of "atom [ ^ atom ]...", we get right-to-left # exponents, instead of left-to-right that is, 2^3^2 = 2^(3^2), not (2^3)^2. factor = Forward() factor <<= atom + (expop + factor).setParseAction(self.push_first)[...] term = factor + (multop + factor).setParseAction(self.push_first)[...] expr <<= term + (addop + term).setParseAction(self.push_first)[...] self._parser = expr return self._parser
def caselessKeywordsList(keywords): """Build a pyparsing parser from a list of caseless keywords @param keywords (list) Tuple or list of keyword names (strings). @reurn PyParsing parser object for recongizing the given keywords. """ # start with the first keyword: p = CaselessKeyword(keywords[0]) # then add all the other keywords: for kw in keywords[1:]: p |= CaselessKeyword(kw) return p
def __init__(self): RegionPusher.__init__(self) self.shape_definition = ds9_shape_defs regionShape = define_shape_helper(self.shape_definition) regionShape = regionShape.setParseAction( lambda s, l, tok: Shape(tok[0], tok[1:])) regionExpr = define_expr( regionShape, negate_func=lambda s, l, tok: tok[-1].set_exclude(), ) coord_command_keys = [ 'PHYSICAL', 'IMAGE', 'FK4', 'B1950', 'FK5', 'J2000', 'GALACTIC', 'ECLIPTIC', 'ICRS', 'LINEAR', 'AMPLIFIER', 'DETECTOR' ] coordCommandLiterals = define_simple_literals(coord_command_keys) coordCommandWCS = Combine( CaselessLiteral("WCS") + Optional(Word(alphas))) coordCommand = (coordCommandLiterals | coordCommandWCS) coordCommand.setParseAction(lambda s, l, tok: CoordCommand(tok[-1])) regionGlobal = comment_shell_like(CaselessKeyword("global"), lambda s, l, tok: Global(tok[-1])) regionAtom = (regionExpr | coordCommand | regionGlobal) regionAtom = regionAtom.setParseAction(self.pushAtom) regionComment = comment_shell_like(Literal("#"), parseAction=self.pushComment) line_simple = define_line(atom=regionAtom, separator=Literal(";"), comment=regionComment) line_w_composite = And([regionAtom, CaselessKeyword("||").setParseAction(self.set_continued) ]) \ + Optional(regionComment) line = Or([line_simple, line_w_composite]) self.parser = Optional(line) + StringEnd()
def has_unitialized_vars(rpg_dest: str, exclude: list = None) -> bool: """ Search for unitialized variables. :param rpg_dest: Path to a RPG source or directory. :param exclude: Paths that contains any string from this list are ignored. """ tk_data = Keyword('D') tk_first = Word(alphas + "_", exact=1) tk_rest = Word(alphanums + "_") tk_vartype = Word(alphas, exact=1) tk_varlen = Word(nums) + Word(alphas, exact=1) tk_inz = CaselessKeyword('inz') tk_varname = tk_first + tk_rest unitialized = tk_data + tk_varname + Optional(tk_vartype) + \ Optional(tk_varlen) + Optional(Word(nums)) + NotAny(tk_inz) result = False try: matches = lang.check_grammar(unitialized, rpg_dest, LANGUAGE_SPECS, exclude) if not matches: show_close('Code does not have unitialized variables', details=dict(code_dest=rpg_dest)) return False except FileNotFoundError: show_unknown('File does not exist', details=dict(code_dest=rpg_dest)) return False else: result = True show_open('Code has unitialized variables', details=dict(matched=matches, total_vulns=len(matches))) return result
def get_dialect_patterns(dialect_name: str) -> dict: """Return dialect patterns (used in SQL parsing), given dialect name. If dialect name not recorded, return empty dictionary. """ # Notice, that if DIALECT_PATTERS is a global variable, pyparsing slows down remarkably. DIALECT_PATTERNS = { 'mssql': { 'quoted_strings': [ # depends on how QUOTED_IDENTIFIER is set QuotedString("'", escQuote="''", multiline=True), QuotedString('"', escQuote='""', multiline=True) ], 'one_line_comment': Combine('--' + restOfLine), 'multiline_comment': Regex(r'/\*.+?\*/', flags=DOTALL), # GO must be on its own line 'batch_separator': LineStart().leaveWhitespace() + ( ( CaselessKeyword('GO') + Word(nums) ) | CaselessKeyword('GO') ), 'script_variable_pattern': '$({})' }, 'postgresql': { # https://www.postgresql.org/docs/current/sql-syntax-lexical.html 'quoted_strings': [ QuotedString("'", escQuote="''", multiline=True), QuotedString('$$', multiline=True) # TODO: dollar quote with tag ], 'one_line_comment': Combine('--' + restOfLine), 'multiline_comment': Regex(r'/\*.+?\*/', flags=DOTALL), 'batch_separator': Literal(';'), 'script_variable_pattern': ':{}' }, 'sqlite': { # https://sqlite.org/lang.html 'quoted_strings': [QuotedString("'", escQuote="''", multiline=True)], 'one_line_comment': Combine('--' + restOfLine), 'multiline_comment': Regex(r'/\*.+?\*/', flags=DOTALL), 'batch_separator': Literal(';') } } return DIALECT_PATTERNS.get(dialect_name, {})
def __init__(self): self.continued = False ds9_attr_parser = get_ds9_attr_parser() ds9_shape_in_comment_defs = dict(text=wcs_shape(CoordOdd, CoordEven), vector=wcs_shape(CoordOdd, CoordEven, Distance, Angle), composite=wcs_shape(CoordOdd, CoordEven, Angle), projection=wcs_shape(CoordOdd, CoordEven, CoordOdd, CoordEven, Distance), segment=wcs_shape(CoordOdd, CoordEven, repeat=(0,2)), ) regionShape = define_shape_helper(ds9_shape_in_comment_defs) regionShape = regionShape.setParseAction(lambda s, l, tok: Shape(tok[0], tok[1:])) self.parser_default = ds9_attr_parser cont = CaselessKeyword("||").setParseAction(self.set_continued).suppress() line = Optional(And([regionShape, Optional(cont)])) \ + ds9_attr_parser self.parser_with_shape = line
def has_not_autocomplete(filename: str) -> bool: """ Check the autocomplete attribute. Check if tags ``form`` and ``input`` have the ``autocomplete`` attribute set to ``off``. :param filename: Path to the ``HTML`` source. :returns: True if tags ``form`` and ``input`` have attribute ``autocomplete`` set as specified, False otherwise. """ tk_off = CaselessKeyword('off') attr = {'autocomplete': tk_off} tag_i = 'input' tag_f = 'form' try: has_input = _has_attributes(filename, tag_i, attr) has_form = _has_attributes(filename, tag_f, attr) except FileNotFoundError as exc: show_unknown('There was an error', details=dict(error=str(exc))) return False if not (has_input or has_form): result = True show_open('Attribute in {}'.format(filename), details=dict(atributes=str(attr))) else: result = False show_close('Attribute in {}'.format(filename), details=dict(atributes=str(attr))) return result
def _make_grammar(self): from pyparsing import (QuotedString, ZeroOrMore, Combine, Literal, Optional, OneOrMore, Regex, CaselessKeyword) def escape_handler(s, loc, toks): if toks[0] == '\\\\': return "\\" elif toks[0] == '\\\'': return "'" elif toks[0] == '\\"': return '"' elif toks[0] == '\\f': return "\f" elif toks[0] == '\\n': return "\n" elif toks[0] == '\\r': return "\r" elif toks[0] == '\\t': return "\t" elif toks[0] == '\\ ': return " " else: return toks[0][1:] escape = Combine(Regex(r'\\.')).setParseAction(escape_handler) word = Combine(OneOrMore(escape | Regex(r'[^\s\\]+'))) whitespace = Regex(r'\s+').suppress() quotedstring = Combine(OneOrMore(QuotedString('"', escChar='\\') | QuotedString("'", escChar='\\'))) command = Regex(r'[^\s:]+') + Literal(":").suppress() + (quotedstring | word) include = quotedstring | command | word exclude = (Literal("-") | Literal("^")).suppress() + (quotedstring | command | word) or_keyword = CaselessKeyword("or") and_keyword = CaselessKeyword("and") keyword = or_keyword | and_keyword argument = (keyword | exclude | include) expr = ZeroOrMore(Optional(whitespace) + argument) # arguments.leaveWhitespace() command.setParseAction(CommandExpr) include.setParseAction(IncludeExpr) exclude.setParseAction(ExcludeExpr) or_keyword.setParseAction(OrKeywordExpr) and_keyword.setParseAction(AndKeywordExpr) # or_expr.setParseAction(lambda s, loc, toks: OrOperator(toks[0], toks[2])) # and_expr.setParseAction(lambda s, loc, toks: AndOperator(toks[0], toks[2])) # no_expr.setParseAction(lambda s, loc, toks: AndOperator(toks[0], toks[1])) # expr.setParseAction(Operator) return expr
def parse(self, key, value): """ Parse the ACL/filters in the [ACL] section. They have the following format: <who> = <filter>: <requests> Where: who is one or more of: @group or a username (user connecting to Sepiida) filter is one or more of: ALL or @group or sameLocation requests is one or more of: ALL or request name (not currently checked) """ def failToken(s): """ pyparsing hack to give better error messages, "a or b expected" rather than "b expected". """ t = NoMatch() t.setName(s) return t identifierChars = alphanums + '_-' group = Word('@', identifierChars) kwAll = CaselessKeyword('ALL') kwSameLocation = CaselessKeyword('sameLocation') # make sure the case is always the same after parsing kwAll.setParseAction(lambda tokens: 'ALL') kwSameLocation.setParseAction(lambda tokens: 'sameLocation') user = ~(kwAll | kwSameLocation) + Word(identifierChars) request = ~(kwAll | kwSameLocation) + Word(identifierChars) request.setParseAction(lambda tokens: tokens[0].lower()) who = Group(OneOrMore(failToken("@group or username") | group | user)).setResultsName('who') filter_ = Group(failToken("ALL or sameLocation or @group") | kwAll | OneOrMore(group | kwSameLocation)).setResultsName('filter') requests = Group(failToken("ALL or request name") | kwAll | OneOrMore(request)).setResultsName('requests') lhs = who + stringEnd rhs = filter_ + Suppress(':') + requests + stringEnd r1 = lhs.parseString(key) r2 = rhs.parseString(value) self._who = r1.who.asList() self._filter = r2.filter.asList() self._allowedRequests = r2.requests.asList()
""" Parses the projx NetworkX DSL. """ from itertools import cycle, islice from pyparsing import (Word, alphanums, ZeroOrMore, stringEnd, Suppress, Literal, CaselessKeyword, Optional, Forward, quotedString, removeQuotes) # Used throughout as a variable/attr name. var = Word(alphanums, "_" + alphanums) ############### MATCH STATEMENT ###################### match = CaselessKeyword("MATCH") graph = CaselessKeyword("GRAPH") | CaselessKeyword("SUBGRAPH") graph.setParseAction(lambda t: t[0].lower()) ################ Transformations ####################### transformation = ( CaselessKeyword("TRANSFER") | CaselessKeyword("PROJECT") | CaselessKeyword("COMBINE") ) transformation.setParseAction(lambda t: t[0].lower()) ################ NODE AND EDGE PATTERNS ################### # Used for node and edge patterns. seperator = Suppress(Literal(":"))
prefix = ts[0][:4] + [", ".join(ts[0][4:-2])] + ts[0][-2:] for vs in ts[1:]: value = [vs[0] + ", ".join(vs[1:-1]) + vs[-1]] res += prefix + value + [";\n"] return res createToken = Keyword("CREATE") databaseToken = Keyword("DATABASE") tableToken = Keyword("TABLE") ifneToken = Keyword("IF") + Keyword("NOT") + Keyword("EXISTS") nullToken = Keyword("NULL") nnToken = Keyword("NOT") + nullToken collateToken = Keyword("COLLATE") dcsToken = Keyword("DEFAULT") + Keyword("CHARACTER") + Keyword("SET") useToken = Keyword("USE") defaultToken = Keyword("DEFAULT") unsignedToken = Keyword("UNSIGNED") autoincrementToken = Keyword("AUTO_INCREMENT") autoincrementToken.setParseAction(lambda toks: ["PRIMARY KEY AUTOINCREMENT"]) keyToken = Keyword("KEY") primaryToken = Keyword("PRIMARY") uniqueToken = Keyword("UNIQUE") insertToken = Keyword("INSERT") intoToken = Keyword("INTO") valuesToken = Keyword("VALUES") ident = Word(alphas, alphanums + "_$" ) ^ QuotedString('"') ^ QuotedString("`") ident.setParseAction(lambda toks: ['"%s"' % toks[0]]) string = QuotedString("'",multiline=True) string.setParseAction(lambda toks: ["'%s'" % toks[0]]) columnName = delimitedList( ident, ".",combine=True)
floatnumber | integer | var | attr_open + Optional(dictMembers) + attr_close | sets | list_open + Group(delimitedList(list_member_val)) + list_close ) memberDef = Dict(Group(field_name + colon + field_val)) dictMembers << delimitedList(memberDef) attributes = attr_open + Optional(dictMembers).setResultsName("attributes") +\ attr_close merge = CaselessKeyword("MERGE") merge.setParseAction(lambda t: t[0].lower()) method = CaselessKeyword("METHOD") method.setParseAction(lambda t: t[0].lower()) edges = CaselessKeyword("EDGES") edges.setParseAction(lambda t: t[0].lower()) union = CaselessKeyword("UNION") as_keyword = CaselessKeyword("AS") intersection = CaselessKeyword("INTERSECTION") method_id = (union | intersection) method_id.setParseAction(lambda t: t[0].lower()) merge = merge + list_open +\ Group(list_of_nodes).setResultsName("nodes") +\ list_close +\ Optional(method + method_id.setResultsName("method")) +\ Optional(as_keyword + node.setResultsName("node_name")) +\