def _grammar(): from pyparsing import alphas, alphanums, nums from pyparsing import oneOf, Suppress, Optional, Group, ZeroOrMore, NotAny from pyparsing import Forward, operatorPrecedence, opAssoc, Word, White from pyparsing import delimitedList, Combine, Literal, OneOrMore expression = Forward() LPAR, RPAR, DOT, LBRAC, RBRAC = map(Suppress, "().{}") nw = NotAny(White()) identifier = Word(alphas + "_", alphanums + "_") integer = Word(nums) integer.setParseAction(IntegerNode) fractional = Combine(Word('+' + '-' + nums, nums) + '.' + Word(nums)) fractional.setParseAction(FloatNode) literal = fractional | integer arglist = delimitedList(expression) seqrange = LBRAC + expression + Suppress('..') + expression + RBRAC seqrange.setParseAction(lambda t: SequenceNode(start=t[0], stop=t[1])) seqexplicit = LBRAC + Optional(arglist) + RBRAC seqexplicit.setParseAction(lambda t: SequenceNode(lst=t)) sequence = seqrange | seqexplicit rollmod = nw + Group(oneOf("d k r e x") + Optional(integer)) numdice = Optional(integer, default=1) roll = numdice + nw + Suppress("d") + nw + (integer | sequence) roll += Group(ZeroOrMore(rollmod)) roll.setParseAction(DieRollNode) call = LPAR + Group(Optional(arglist)) + RPAR function = identifier + call function.setParseAction(FunctionNode) seqexpr = ((roll | sequence | function) + Group(OneOrMore(DOT + identifier + call))) seqexpr.setParseAction(SeqMethodNode) variable = Word(alphas + "_", alphanums + "_ ") variable.setParseAction(VariableNode) atom = seqexpr | roll | literal | sequence | function | variable expoop = Literal('^') signop = oneOf("+ -") multop = oneOf("* /") plusop = oneOf("+ -") # noinspection PyUnresolvedReferences expression << operatorPrecedence(atom, [ (expoop, 2, opAssoc.LEFT, BinaryOpNode), (signop, 1, opAssoc.RIGHT, UnaryOpNode), (multop, 2, opAssoc.LEFT, BinaryOpNode), (plusop, 2, opAssoc.LEFT, BinaryOpNode), ]) return expression
def has_unitialized_vars(rpg_dest: str, exclude: list = None) -> bool: """ Search for unitialized variables. :param rpg_dest: Path to a RPG source or directory. :param exclude: Paths that contains any string from this list are ignored. """ tk_data = Keyword('D') tk_first = Word(alphas + "_", exact=1) tk_rest = Word(alphanums + "_") tk_vartype = Word(alphas, exact=1) tk_varlen = Word(nums) + Word(alphas, exact=1) tk_inz = CaselessKeyword('inz') tk_varname = tk_first + tk_rest unitialized = tk_data + tk_varname + Optional(tk_vartype) + \ Optional(tk_varlen) + Optional(Word(nums)) + NotAny(tk_inz) result = False try: matches = lang.check_grammar(unitialized, rpg_dest, LANGUAGE_SPECS, exclude) if not matches: show_close('Code does not have unitialized variables', details=dict(code_dest=rpg_dest)) return False except FileNotFoundError: show_unknown('File does not exist', details=dict(code_dest=rpg_dest)) return False else: result = True show_open('Code has unitialized variables', details=dict(matched=matches, total_vulns=len(matches))) return result
def __init__(self): if not ParserElement: return with warnings.catch_warnings(): # In Python 2.6, pyparsing throws warnings on its own code. warnings.simplefilter("ignore") orOperator = Suppress( CaselessLiteral("OR")).setResultsName("OR_OPERATOR") quoteContents = Group(Word(ALLCHARS.replace("\"", ""))) quoteContents.leaveWhitespace() quotedWord = Group(Suppress('"') + quoteContents + Suppress('"')).setResultsName("QUOTES") plainWord = Group( NotAny(CaselessLiteral("OR")) + Word(WORDCHARS.replace("-", ""), WORDCHARS)).setResultsName( "PLAINWORD") anyWord = Group( NotAny('(') + ~FollowedBy(')') + Word(ALLWORDCHARS)).setResultsName("ANYWORD") keyWord = Group( Combine( Optional("-") + Word(string.ascii_letters) + Literal(":") + (Word(WORDCHARS) | quotedWord))).setResultsName("KEYWORD") notExpr = Group( Suppress("-") + NotAny(string.whitespace) + (quotedWord | plainWord)).setResultsName("NOT") word = Group(keyWord | notExpr | quotedWord | plainWord).setResultsName("WORD") grammar = Forward() parens = Forward() orOperand = Group(word | parens | notExpr | anyWord).setResultsName("OR_OPERAND") orExpr = Group( FollowedBy(orOperand + orOperator + orOperand) + Group(orOperand + OneOrMore(orOperator + orOperand)) ).setResultsName("OR_EXPRESSION") oneExpr = Group(orExpr | parens | word | anyWord).setResultsName("ONE EXPRESSION") parens <<= Group( Group(Optional("-")).setResultsName("NOT_PARENTHESIS") + Suppress("(") + ZeroOrMore(parens | grammar) + Suppress(")")).setResultsName("PARENTHESIS") grammar <<= ((oneExpr + grammar) | oneExpr).setResultsName("GRAMMAR") self._grammar = grammar
class ConstraintParser(): wsp = White().suppress() comma = Literal(",").suppress() num_keyword = CaselessKeyword("num") os_keyword = CaselessKeyword("os") link_keyword = CaselessKeyword("link") lan_keyword = CaselessKeyword("lan") interfaces_keyword = CaselessKeyword("interfaces") location_keyword = CaselessKeyword("location") nodetype_keyword = CaselessKeyword("nodetype") not_akeyword = (NotAny(num_keyword) + NotAny(os_keyword) + NotAny(link_keyword) + NotAny(lan_keyword) + NotAny(interfaces_keyword) + NotAny(location_keyword) + NotAny(nodetype_keyword)) actor = (not_akeyword + Word(alphanums)) actors = actor + ZeroOrMore(wsp + actor) actor_pair = actor + wsp + actor strValue = (not_akeyword + Word(alphanums)) numValue = (not_akeyword + Word(nums)) single_entity_num_constraint = num_keyword single_entity_constraint = os_keyword | interfaces_keyword | location_keyword | nodetype_keyword single_entity_num_stmt = (single_entity_num_constraint("constraint") + wsp + actor("target") + wsp + numValue("value")) single_entity_str_stmt = (single_entity_constraint("constraint") + wsp + actor("target") + wsp + strValue("value")) link_stmt = (link_keyword("constraint") + wsp + actor_pair("target")) lan_stmt = (lan_keyword("constraint") + wsp + actors("target")) constraint_statement = (single_entity_num_stmt | single_entity_str_stmt | link_stmt | lan_stmt) + LineEnd() def parse_stmt(self, statement): """Parses an HLB statement and returns a tuple: (triggers, actors, action, e_events) Values are returned as "None" if they cannot be extracted. """ try: parsed = self.constraint_statement.parseString(statement) except ParseException as pe: #print("WARNING: Could not parse HLB statement:\n\t%s" % (statement)) return(None, None, None) if parsed.e_events == None: parsed.e_events = [] return_tuple = (str(parsed.constraint), list(parsed.target), list(parsed.value)) return(tuple(r if r != [] and r != "" else None for r in return_tuple))
def make_one_header_assignment_parser(struct_parsers): """Create a parser that can parse a single header assignment :Parameters: - `struct_parsers`: a dictionary of parsers for structs @return: a parser that can parse on header assignment >>> test_parser = make_one_header_assignment_parser({}) >>> >>> test_string = "a 42 32" >>> parsed_assignments = test_parser.parseString(test_string) >>> print parsed_assignments[0]['name'] a >>> print parsed_assignments[0]['value'] 42 32 """ all_struct_parsers = Or([struct_parsers[s] for s in struct_parsers.keys()]) one_header_assignment_parser = NotAny(all_struct_parsers) + Group( value_name('name') + restOfLine('value')) one_header_assignment_parser.ignore(hash_comment) return one_header_assignment_parser
def _define_grammar(self): '''define the grammar to be used, and add actions''' self._define_actions() eol = LineEnd().suppress() white = Optional(White()).suppress() begin = Keyword('begin').suppress() end = Keyword('end').suppress() comment = (Literal('#') + restOfLine).suppress() data_value = Combine(OneOrMore(CharsNotIn('#\n\r'))) data_line = (LineStart() + white + Optional(data_value) + Optional(comment) + eol) block_name = Word(alphas, alphanums + '_') begin_block = (LineStart() + begin + block_name + Optional(comment) + eol) end_block = LineStart() + end + block_name + Optional(comment) + eol junk = ZeroOrMore(LineStart() + white + NotAny(begin) + restOfLine + eol).suppress() data = Group(ZeroOrMore(NotAny(end) + data_line)) block_def = begin_block + data + end_block block_defs = junk + OneOrMore(block_def + junk) self._grammar = block_defs begin_block.addParseAction(self._begin_block_action) end_block.addParseAction(self._end_block_action) data_value.addParseAction(self._data_value_action)
def define_number(self): """ Return the syntax definition for a number in Arabic Numerals. Override this method to support numeral systems other than Arabic Numerals (0-9). Do not override this method just to change the character used to separate thousands and decimals: Use :attr:`T_THOUSANDS_SEPARATOR` and :attr:`T_DECIMAL_SEPARATOR`, respectively. """ # Defining the basic tokens: to_dot = lambda t: "." to_plus = lambda t: "+" to_minus = lambda t: "-" positive_sign = Literal(self._grammar.get_token("positive_sign")) positive_sign.setParseAction(to_plus) negative_sign = Literal(self._grammar.get_token("negative_sign")) negative_sign.setParseAction(to_minus) decimal_sep = Literal(self._grammar.get_token("decimal_separator")) decimal_sep.setParseAction(to_dot) thousands_sep = Suppress( self._grammar.get_token("thousands_separator")) digits = Word(nums) # Building the integers and decimals: sign = positive_sign | negative_sign thousands = Word( nums, max=3) + OneOrMore(thousands_sep + Word(nums, exact=3)) integers = thousands | digits decimals = decimal_sep + digits expop = Literal('^') signop = oneOf('+ -') multop = oneOf('* /') plusop = oneOf('+ -') factop = Literal('!') modop = Literal('%') opers = expop | signop | multop | plusop | factop | modop number = Combine( Optional(sign) + integers + Optional(decimals) + NotAny(opers)) number.setParseAction(self.make_number) number.setName("number") return number
def __init__(self): super(ServerParser, self).__init__() word = Word(alphanums + '-' + '_' + '.' + '/' + '$' + ':') server = Literal('server').suppress() location = Literal('location') lbrace = Literal('{').suppress() rbrace = Literal('}').suppress() config_line = NotAny(rbrace) + word + Group( OneOrMore(word)) + Literal(';').suppress() location_def = location + word + lbrace + Group( OneOrMore(Group(config_line))) + rbrace self.server_def = server + lbrace + OneOrMore( Group(location_def) | Group(config_line)) + rbrace comment = Literal('#') + Optional(restOfLine) self.server_def.ignore(comment)
def define_identifier(self): """ Return the syntax definition for an identifier. """ # --- Defining the individual identifiers: # Getting all the Unicode numbers in a single string: try: unicode_numbers = "".join( [unichr(n) for n in xrange(0x10000) if unichr(n).isdigit()]) except NameError: unicode_numbers = "".join( [chr(n) for n in range(0x10000) if chr(n).isdigit()]) unicode_number_expr = Regex("[%s]" % unicode_numbers, re.UNICODE) space_char = re.escape(self._grammar.get_token("identifier_spacing")) identifier0 = Regex("[\w%s]+" % space_char, re.UNICODE) # Identifiers cannot start with a number: identifier0 = Combine(identifier0) identifier0.setName("individual_identifier") # --- Defining the namespaces: namespace_sep = Suppress( self._grammar.get_token("namespace_separator")) namespace = Group(ZeroOrMore(identifier0 + namespace_sep)) namespace.setName("namespace") # --- The full identifier, which could have a namespace: identifier = Combine( namespace.setResultsName("namespace_parts") + identifier0.setResultsName("identifier")) identifier.setName("full_identifier") expop = Literal('^') multop = oneOf('* /') factop = Literal('!') modop = Literal('%') signop = oneOf('+ -') opers = expop | signop | multop | factop | modop identifier = identifier + NotAny(opers) return identifier
def _create_filter_parser(): and_kw = Keyword('AND') or_kw = Keyword('OR') variable = Literal('?') + Word(alphanums + '_').leaveWhitespace() uri_term = NotAny(Literal('"')) + Word(printables, excludeChars='>*') uri_part = Keyword('*') ^ uri_term ^ variable literal_term = QuotedString(quoteChar='"', escChar='\\') triple = Group( Literal('<').suppress() + uri_part.setResultsName('subj') + uri_part.setResultsName('pred') + (Group(uri_part).setResultsName('obj') ^ Group(literal_term).setResultsName('objlit')) + Literal('>').suppress()) expr = Forward() atom = (triple.setResultsName('triple') | Literal('(').suppress() + expr + Literal(')').suppress()) and_group = Group(atom + ZeroOrMore(and_kw.suppress() + atom)) or_group = Group(atom + ZeroOrMore(or_kw.suppress() + atom)) expr << (and_group.setResultsName('and') ^ or_group.setResultsName('or')) return expr
def build(self): # ------------------------------------------ # C. building blocks # ------------------------------------------ self.termop = Regex( "|".join(self.neighbourhood_symbols), re.IGNORECASE).setParseAction(upcaseTokens).setName("termop") termword = Word(self.unicode_printables + self.separators + self.wildcards).setName("term") termword_termop = (termword + OneOrMore(self.termop + termword)) # ------------------------------------------ # D. triple # ------------------------------------------ index = Word(alphanums).setName("index") #index = Word(indexchars).setName("index") #SolrProximitySuffix = Suppress(Optional(Word('~') + Word(nums))) binop = oneOf(self.binop_symbols, caseless=True).setName("binop") term = ( # Attempt to parse {!complexphrase}text:"((aussto* OR eject* OR pusher*) AND (verriegel* OR lock* OR sperr*))"~6 ... # ... but failed. #Combine(quotedString.setParseAction(removeQuotes) + SolrProximitySuffix).setName("term") ^ # term is a quoted string, easy peasy quotedString.setName("term") ^ # term is just a termword, easy too termword.setName("term") ^ # term contains neighbourhood operators, so should have been wrapped in parenthesis Combine('(' + Suppress(ZeroOrMore(' ')) + termword_termop + Suppress(ZeroOrMore(' ')) + ')').setName("term") ^ # convenience/gracefulness: we also allow terms containing # neighbourhood operators without being wrapped in parenthesis Combine(termword_termop).setName("term")) # ------------------------------------------ # E. condition # ------------------------------------------ cqlStatement = Forward() # Parse regular cql condition notation 'index=term'. cqlConditionBase = Group( # a regular triple (index + binop + term).setResultsName("triple") | # a regular subquery ("(" + cqlStatement + ")").setResultsName("subquery")) # Parse value shortcut notations like 'index=(term)' or 'index=(term1 and term2 or term3)'. cqlConditionShortcut = Group( # a triple in value shortcut notation (contains only the single term) # "term + NotAny(binop)" helps giving proper error messages like # "ParseException: Expected term (at char 4)" for erroneous queries like "foo=" (term + NotAny(binop)).setResultsName("triple-short") | # a subquery containing values in shortcut notation (index + binop + "(" + cqlStatement + ")").setResultsName("subquery-short")) #cqlCondition = cqlConditionBase cqlCondition = cqlConditionBase | cqlConditionShortcut # ------------------------------------------ # F. statement # ------------------------------------------ cqlStatement << cqlCondition + ZeroOrMore(self.booleans_or + cqlStatement) # apply SQL comment format cqlComment = "--" + restOfLine cqlStatement.ignore(cqlComment) self.parser = cqlStatement
Suppress(Literal("***"))).setParseAction(lambda t: [["bold-italic", t[0]]]) link_text = ( Suppress(Literal("[")) + ( Optional(url) + ZeroOrMore(Word(printables.replace( "]", ""))).setParseAction(lambda t: "text") # SkipTo(Literal("]")) ) + Suppress( Literal("]") + Literal("(") + SkipTo(Literal(")")) + Literal(")"))).setParseAction(lambda t: [["link", t[0]]]) # Ex: this is regular reg_text = ( OneOrMore( NotAny(italic_text | link_text) + Word(printables) | Suppress(Literal("\n")) + Word(printables)) # SkipTo(Literal("*")) | SkipTo(Literal("\n\n")) ).setParseAction(lambda t: [["regular", " ".join(t)]]) # Note: can be either two newlines or the end of the string line_break = Suppress( Literal("\n\n") + ZeroOrMore(Literal("\n")) | Literal("\n") + StringEnd() | StringEnd()) divider = Suppress(OneOrMore(Literal("_"))) text = OneOrMore(link_text | bold_italic_text | bold_text | italic_text | reg_text)
RBRACKET = Suppress(']') END = Suppress(';') PLUS = Literal('+') MINUS = Literal('-') single = number | symbol | QuotedString('"') | QuotedString("'") tuple_ = Group(LPAREN + delimitedList(single) + RPAREN) subscript_domain = LBRACE + Group(delimitedList(symbol)) \ .setResultsName('subscripts') + RBRACE data = single | tuple_ # should not match a single (tr) simple_data = Group( NotAny('(tr)') + data + ZeroOrMore(Optional(Suppress(',')) + data)) # the first element of a set data record cannot be 'dimen', or else # these would match set_def_stmts non_dimen_simple_data = ~Literal('dimen') + simple_data matrix_row = Group(single + OneOrMore(PLUS | MINUS)) matrix_data = ":" + OneOrMore(single).setResultsName('columns') \ + ":=" + OneOrMore(matrix_row).setResultsName('data') matrix_data.setParseAction(MatrixData) tr_matrix_data = Suppress("(tr)") + matrix_data tr_matrix_data.setParseAction(mark_transposed) set_slice_component = number | symbol | '*' set_slice_record = LPAREN + NotAny('tr') + delimitedList( set_slice_component) + RPAREN
second = zeroThru59 minute = zeroThru59 hour = zeroThru23 day = oneThru31("day") month = oneThru12("month") monthDay = ((oneOf("01 03 05 07 08 10 12")("month") + "-" + oneThru31("day")) ^ (oneOf("04 06 09 11")("month") + "-" + oneThru30("day")) ^ (L("02")("month") + "-" + oneThru29("day"))) # 4 digits, 0 to 9 positiveYear = Word(nums, exact=4) # Negative version of positive year, but "-0000" is illegal negativeYear = NotAny(L("-0000")) + ("-" + positiveYear) year = Combine(positiveYear ^ negativeYear)("year") yearMonth = year + "-" + month yearMonthDay = year + "-" + monthDay # o hai iso date date = Combine(year ^ yearMonth ^ yearMonthDay)("date") Date.set_parser(date) zoneOffsetHour = oneThru13 zoneOffset = L("Z") ^ (Regex("[+-]") + (zoneOffsetHour + Optional(":" + minute) ^ L("14:00") ^ ("00:" + oneThru59))) baseTime = Combine(hour + ":" + minute + ":" + second ^ "24:00:00")
class SqlGrammarMSSQLServer(SqlGrammar): """ SQL grammar (subclass of :class:`.SqlGrammar`) implementing Microsoft SQL Server syntax. """ # ------------------------------------------------------------------------- # Forward declarations # ------------------------------------------------------------------------- expr = Forward() select_statement = Forward() # ------------------------------------------------------------------------- # Keywords # ------------------------------------------------------------------------- # https://msdn.microsoft.com/en-us/library/ms189822.aspx sql_server_reserved_words = """ ADD ALL ALTER AND ANY AS ASC AUTHORIZATION BACKUP BEGIN BETWEEN BREAK BROWSE BULK BY CASCADE CASE CHECK CHECKPOINT CLOSE CLUSTERED COALESCE COLLATE COLUMN COMMIT COMPUTE CONSTRAINT CONTAINS CONTAINSTABLE CONTINUE CONVERT CREATE CROSS CURRENT CURRENT_DATE CURRENT_TIME CURRENT_TIMESTAMP CURRENT_USER CURSOR DATABASE DBCC DEALLOCATE DECLARE DEFAULT DELETE DENY DESC DISK DISTINCT DISTRIBUTED DOUBLE DROP DUMP ELSE END ERRLVL ESCAPE EXCEPT EXEC EXECUTE EXISTS EXIT EXTERNAL FETCH FILE FILLFACTOR FOR FOREIGN FREETEXT FREETEXTTABLE FROM FULL FUNCTION GOTO GRANT GROUP HAVING HOLDLOCK IDENTITY IDENTITY_INSERT IDENTITYCOL IF IN INDEX INNER INSERT INTERSECT INTO IS JOIN KEY KILL LEFT LIKE LINENO LOAD MERGE NATIONAL NOCHECK NONCLUSTERED NOT NULL NULLIF OF OFF OFFSETS ON OPEN OPENDATASOURCE OPENQUERY OPENROWSET OPENXML OPTION OR ORDER OUTER OVER PERCENT PIVOT PLAN PRECISION PRIMARY PRINT PROC PROCEDURE PUBLIC RAISERROR READ READTEXT RECONFIGURE REFERENCES REPLICATION RESTORE RESTRICT RETURN REVERT REVOKE RIGHT ROLLBACK ROWCOUNT ROWGUIDCOL RULE SAVE SCHEMA SECURITYAUDIT SELECT SEMANTICKEYPHRASETABLE SEMANTICSIMILARITYDETAILSTABLE SEMANTICSIMILARITYTABLE SESSION_USER SET SETUSER SHUTDOWN SOME STATISTICS SYSTEM_USER TABLE TABLESAMPLE TEXTSIZE THEN TO TOP TRAN TRANSACTION TRIGGER TRUNCATE TRY_CONVERT TSEQUAL UNION UNIQUE UNPIVOT UPDATE UPDATETEXT USE USER VALUES VARYING VIEW WAITFOR WHEN WHERE WHILE WITH WITHIN WRITETEXT """ # ... "WITHIN GROUP" is listed, not "WITHIN", but odbc_reserved_words = """ ABSOLUTE ACTION ADA ADD ALL ALLOCATE ALTER AND ANY ARE AS ASC ASSERTION AT AUTHORIZATION AVG BEGIN BETWEEN BIT BIT_LENGTH BOTH BY CASCADE CASCADED CASE CAST CATALOG CHAR CHAR_LENGTH CHARACTER CHARACTER_LENGTH CHECK CLOSE COALESCE COLLATE COLLATION COLUMN COMMIT CONNECT CONNECTION CONSTRAINT CONSTRAINTS CONTINUE CONVERT CORRESPONDING COUNT CREATE CROSS CURRENT CURRENT_DATE CURRENT_TIME CURRENT_TIMESTAMP CURRENT_USER CURSOR DATE DAY DEALLOCATE DEC DECIMAL DECLARE DEFAULT DEFERRABLE DEFERRED DELETE DESC DESCRIBE DESCRIPTOR DIAGNOSTICS DISCONNECT DISTINCT DOMAIN DOUBLE DROP ELSE END END-EXEC ESCAPE EXCEPT EXCEPTION EXEC EXECUTE EXISTS EXTERNAL EXTRACT FALSE FETCH FIRST FLOAT FOR FOREIGN FORTRAN FOUND FROM FULL GET GLOBAL GO GOTO GRANT GROUP HAVING HOUR IDENTITY IMMEDIATE IN INCLUDE INDEX INDICATOR INITIALLY INNER INPUT INSENSITIVE INSERT INT INTEGER INTERSECT INTERVAL INTO IS ISOLATION JOIN KEY LANGUAGE LAST LEADING LEFT LEVEL LIKE LOCAL LOWER MATCH MAX MIN MINUTE MODULE MONTH NAMES NATIONAL NATURAL NCHAR NEXT NO NONE NOT NULL NULLIF NUMERIC OCTET_LENGTH OF ON ONLY OPEN OPTION OR ORDER OUTER OUTPUT OVERLAPS PAD PARTIAL PASCAL POSITION PRECISION PREPARE PRESERVE PRIMARY PRIOR PRIVILEGES PROCEDURE PUBLIC READ REAL REFERENCES RELATIVE RESTRICT REVOKE RIGHT ROLLBACK ROWS SCHEMA SCROLL SECOND SECTION SELECT SESSION SESSION_USER SET SIZE SMALLINT SOME SPACE SQL SQLCA SQLCODE SQLERROR SQLSTATE SQLWARNING SUBSTRING SUM SYSTEM_USER TABLE TEMPORARY THEN TIME TIMESTAMP TIMEZONE_HOUR TIMEZONE_MINUTE TO TRAILING TRANSACTION TRANSLATE TRANSLATION TRIM TRUE UNION UNIQUE UNKNOWN UPDATE UPPER USAGE USER USING VALUE VALUES VARCHAR VARYING VIEW WHEN WHENEVER WHERE WITH WORK WRITE YEAR ZONE """ # ... who thought "END-EXEC" was a good one? # Then some more: # - WITH ROLLUP: https://technet.microsoft.com/en-us/library/ms189305(v=sql.90).aspx # noqa # - SOUNDEX: https://msdn.microsoft.com/en-us/library/ms187384.aspx rnc_extra_sql_server_keywords = """ ROLLUP SOUNDEX """ sql_server_keywords = sorted( list( set(sql_server_reserved_words.split() + odbc_reserved_words.split() + ANSI92_RESERVED_WORD_LIST.split()))) # log.critical(sql_server_keywords) keyword = make_words_regex(sql_server_keywords, caseless=True, name="keyword") # ------------------------------------------------------------------------- # Comments # ------------------------------------------------------------------------- # https://msdn.microsoft.com/en-us/library/ff848807.aspx comment = ansi_comment # ----------------------------------------------------------------------------- # identifier # ----------------------------------------------------------------------------- # http://dev.mysql.com/doc/refman/5.7/en/identifiers.html bare_identifier_word = make_regex_except_words(r"\b[a-zA-Z0-9$_]*\b", ANSI92_RESERVED_WORD_LIST, caseless=True, name="bare_identifier_word") identifier = (bare_identifier_word | QuotedString( quoteChar="[", endQuoteChar="]", unquoteResults=False)).setName("identifier") collation_name = identifier.copy() column_name = identifier.copy() column_alias = identifier.copy() table_name = identifier.copy() table_alias = identifier.copy() schema_name = identifier.copy() index_name = identifier.copy() function_name = identifier.copy() parameter_name = identifier.copy() database_name = identifier.copy() no_dot = NotAny('.') table_spec = ( Combine(database_name + '.' + schema_name + '.' + table_name + no_dot) | Combine(schema_name + '.' + table_name + no_dot) | table_name + no_dot).setName("table_spec") column_spec = ( Combine(database_name + '.' + schema_name + '.' + table_name + '.' + column_name + no_dot) | Combine(schema_name + '.' + table_name + '.' + column_name + no_dot) | Combine(table_name + '.' + column_name + no_dot) | column_name + no_dot).setName("column_spec") # I'm unsure if SQL Server allows keywords in the parts after dots, like # MySQL does. # - http://stackoverflow.com/questions/285775/how-to-deal-with-sql-column-names-that-look-like-sql-keywords # noqa bind_parameter = Literal('?') variable = Regex(r"@[a-zA-Z0-9\.$_]+").setName("variable") argument_list = (delimitedList(expr).setName("arglist").setParseAction( ', '.join)) function_call = Combine(function_name + LPAR) + argument_list + RPAR # Not supported: index hints # ... http://stackoverflow.com/questions/11016935/how-can-i-force-a-query-to-not-use-a-index-on-a-given-table # noqa # ----------------------------------------------------------------------------- # CASE # ----------------------------------------------------------------------------- case_expr = ((CASE + expr + OneOrMore(WHEN + expr + THEN + expr) + Optional(ELSE + expr) + END) | (CASE + OneOrMore(WHEN + expr + THEN + expr) + Optional(ELSE + expr) + END)).setName("case_expr") # ----------------------------------------------------------------------------- # Expressions # ----------------------------------------------------------------------------- aggregate_function = ( # https://msdn.microsoft.com/en-us/library/ms173454.aspx AVG | CHECKSUM_AGG | COUNT | COUNT_BIG | GROUPING | GROUPING_ID | MAX | MIN | STDEV | STDEV_P | SUM | VAR | VARP) expr_term = ( INTERVAL + expr + time_unit | Optional(EXISTS) + LPAR + select_statement + RPAR | # ... e.g. mycol = EXISTS(SELECT ...) # ... e.g. mycol IN (SELECT ...) LPAR + delim_list(expr) + RPAR | # ... e.g. mycol IN (1, 2, 3) case_expr | bind_parameter | variable | function_call | literal_value | column_spec # not just identifier ) UNARY_OP, BINARY_OP, TERNARY_OP = 1, 2, 3 expr << infixNotation( expr_term, [ # Having lots of operations in the list here SLOWS IT DOWN A LOT. # Just combine them into an ordered list. (COLLATE | oneOf('! - + ~'), UNARY_OP, opAssoc.RIGHT), ( ( oneOf('^ * / %') | oneOf('+ - << >> & | = <=> >= > <= < <> !=') | (IS + Optional(NOT)) | LIKE | (Optional(NOT) + IN) | SOUNDEX # RNC; presumably at same level as LIKE ), BINARY_OP, opAssoc.LEFT), ((BETWEEN, AND), TERNARY_OP, opAssoc.LEFT), # CASE handled above (hoping precedence is not too much of a problem) (NOT, UNARY_OP, opAssoc.RIGHT), (AND | '&&' | OR | '||' | ':=', BINARY_OP, opAssoc.LEFT), ], lpar=LPAR, rpar=RPAR) # ignores LIKE [ESCAPE] # ------------------------------------------------------------------------- # SELECT # ------------------------------------------------------------------------- compound_operator = UNION + Optional(ALL | DISTINCT) ordering_term = (expr + Optional(COLLATE + collation_name) + Optional(ASC | DESC)) join_constraint = Optional( Group((ON + expr) | (USING + LPAR + delim_list(column_name) + RPAR))) join_op = Group(COMMA | NATURAL + (Optional(LEFT | RIGHT) + Optional(OUTER)) + JOIN | (INNER | CROSS) + JOIN | Optional(LEFT | RIGHT) + Optional(OUTER) + JOIN) join_source = Forward() single_source = (( table_spec.copy().setResultsName("from_tables", listAllMatches=True) + Optional(Optional(AS) + table_alias) # Optional(index_hint_list) # not supported yet ) | (select_statement + Optional(AS) + table_alias) + (LPAR + join_source + RPAR)) join_source << Group(single_source + ZeroOrMore(join_op + single_source + join_constraint))("join_source") # ... must have a Group to append to it later, it seems # ... but name it "join_source" here, or it gets enclosed in a further list # when you name it later result_base = ( # Aggregate functions: e.g. "MAX(" allowed, "MAX (" not allowed Combine(COUNT + LPAR) + '*' + RPAR | # special aggregate function Combine(COUNT + LPAR) + DISTINCT + expr + RPAR | # special aggregate function # noqa Combine(aggregate_function + LPAR) + expr + RPAR | expr | '*' | Combine(table_name + '.' + '*') | column_spec | literal_value) result_column = (result_base + Optional(Optional(AS) + column_alias)).setResultsName( "select_columns", listAllMatches=True) # ------------------------------------------------------------------------- # SELECT # ------------------------------------------------------------------------- where_expr = Group(expr).setResultsName("where_expr") where_clause = Group(Optional(WHERE + where_expr)).setResultsName("where_clause") select_core = ( SELECT + Optional(TOP + integer) + Group(Optional(ALL | DISTINCT))("select_specifier") + Group(delim_list(result_column))("select_expression") + Optional(FROM + join_source + where_clause + Optional(GROUP + BY + delim_list(ordering_term + Optional(ASC | DESC)) ("group_by_term") + Optional(WITH + ROLLUP)) + Optional(HAVING + expr("having_expr")))) select_statement << ( select_core + ZeroOrMore(compound_operator + select_core) + Optional(ORDER + BY + delim_list(ordering_term + Optional(ASC | DESC)) ("order_by_terms")) + # PROCEDURE ignored # rest ignored Optional(';')) select_statement.ignore(comment) # https://msdn.microsoft.com/en-us/library/ms175874.aspx # ... approximately (and conservatively): MSSQL_INVALID_FIRST_IF_UNQUOTED = re.compile(r"[^a-zA-Z_@#]") MSSQL_INVALID_IF_UNQUOTED = re.compile(r"[^a-zA-Z0-9_@#$]") def __init__(self): super().__init__() @classmethod def quote_identifier(cls, identifier: str) -> str: return f"[{identifier}]" @classmethod def is_quoted(cls, identifier: str) -> bool: return identifier.startswith("[") and identifier.endswith("]") @classmethod def requires_quoting(cls, identifier: str) -> bool: assert identifier, "Empty identifier" if cls.MSSQL_INVALID_IF_UNQUOTED.search(identifier): return True firstchar = identifier[0] if cls.MSSQL_INVALID_FIRST_IF_UNQUOTED.search(firstchar): return True if identifier.upper() in cls.sql_server_keywords: return True return False @classmethod def get_grammar(cls) -> ParserElement: # Grammar (here, just SELECT) return cls.select_statement @classmethod def get_column_spec(cls): return cls.column_spec @classmethod def get_result_column(cls): return cls.result_column @classmethod def get_join_op(cls): return cls.join_op @classmethod def get_table_spec(cls): return cls.table_spec @classmethod def get_join_constraint(cls): return cls.join_constraint @classmethod def get_select_statement(cls): return cls.select_statement @classmethod def get_expr(cls): return cls.expr @classmethod def get_where_clause(cls): return cls.where_clause @classmethod def get_where_expr(cls): return cls.where_expr @classmethod def test_dialect_specific_2(cls): log.info("Testing Microsoft SQL Server-specific aspects...") log.info("Testing quoted identifiers") test_succeed(cls.identifier, "[FROM]") test_succeed(cls.identifier, "[SELECT FROM]") log.info("Testing table_spec") # SQL Server uses up to: db.schema.table.column test_succeed(cls.table_spec, "mytable") test_succeed(cls.table_spec, "mydb.mytable") test_succeed(cls.table_spec, "mydb.[my silly table]") test_succeed(cls.table_spec, "mydb.myschema.mytable") test_fail(cls.table_spec, "mydb . mytable") test_fail(cls.table_spec, "mydb.myschema.mytable.mycol") log.info("Testing column_spec") test_succeed(cls.column_spec, "mycol") test_succeed(cls.column_spec, "forename") test_succeed(cls.column_spec, "mytable.mycol") test_succeed(cls.column_spec, "t1.a") test_succeed(cls.column_spec, "[my silly table].[my silly column]") test_succeed(cls.column_spec, "mydb.myschema.mytable.mycol") test_succeed(cls.column_spec, "myschema.mytable.mycol") test_fail(cls.column_spec, "myschema . mytable . mycol") log.info("Testing variable") test_succeed(cls.variable, "@myvar") log.info("Testing argument_list") test_succeed(cls.argument_list, "@myvar, 5") log.info("Testing function_call") test_succeed(cls.function_call, "myfunc(@myvar, 5)") # --------------------------------------------------------------------- # Expressions # --------------------------------------------------------------------- log.info("Testing case_expr") test_succeed( cls.case_expr, """ CASE v WHEN 2 THEN x WHEN 3 THEN y ELSE -99 END """)
for i in range(len(args), 0, -1))) NAME = Word(alphas, alphanums + '_') INTEGER = Word(nums).setName('integer') INTEGER_K = Combine(INTEGER + Optional('_' + (INTEGER | NAME))) EOL = p.LineEnd() FortranComment = Regex(r'!.*$') FortranComment.setParseAction(lambda s, loc, toks: [' ' + toks[0]]) EOLL = Optional(FortranComment) + EOL precision = Combine('.' + INTEGER) exponent = Combine(oneOf('d e D E') + Optional(oneOf('+ -')) + INTEGER) REAL = Combine(INTEGER + ((precision + exponent) | precision | exponent)) STRING = quotedString comp_op = Forward() user_op = NotAny(comp_op | oneOf('.not. .and. .or. .eqv. .neqv. ** // % .true. .false.')) \ + Combine('.' + NAME + '.') atom = Forward() calllist = Forward() array_sub = '(' + Optional(atom) + ':' + Optional(atom) + Optional(':' + atom) + ')' type_sub = '%' + NAME trailer = p.Or((calllist, array_sub, type_sub)) user_monadic_expr = Forward() user_monadic_expr << ((user_op + user_monadic_expr) | atom) factor = Forward() power = user_monadic_expr + ZeroOrMore(trailer) + Optional('**' + factor) factor << power
CaselessLiteral("}arroba}"), CaselessLiteral("{arroba"), CaselessLiteral("arroba{"), CaselessLiteral("}arroba"), CaselessLiteral("arroba}"), # additional patterns CaselessLiteral("-at-"), CaselessLiteral("-et-"), CaselessLiteral("-arroba-"), ]) + Optional(White())).addParseAction(replaceWith("@")) more_at_fanging_patterns = (not_uppercase_word_regex + Combine( Optional(White()) + Or([Literal( "AT"), Literal("ET"), Literal("ARROBA")]) + Optional(White())).addParseAction(replaceWith("@")) + NotAny(uppercase_word)) colon_slash_slash_fanging_patterns = Combine( Optional(White()) + Or([ # '://' - enclosed with ( and ) CaselessLiteral("(://("), CaselessLiteral("(://)"), CaselessLiteral(")://("), CaselessLiteral(")://)"), CaselessLiteral("(://"), CaselessLiteral("://("), CaselessLiteral(")://"), CaselessLiteral("://)"), # '://' - enclosed with [ and ] CaselessLiteral("[://["), CaselessLiteral("[://]"),
def create(cls, base_shader_path, base_texture_path): """ Create a Stanford polygon file parser (PLY). :param base_shader_path: :param base_texture_path: :return: """ # Define the base patterns for parsing real = pyparsing_common.real() integer = pyparsing_common.integer() # Define how the header portion begins and ends start_keyword = cls._or(cls.begin_header_keyword, suppress=True) stop_keyword = cls._or(cls.end_header_keyword, suppress=True) # Define the grammar of a comment statement comment_keyword = cls._or(cls.comment_keyword, suppress=True) vertex_shader_comment = Group( comment_keyword + Suppress(CaselessKeyword("VertexShaderFile")) + Word(alphanums + ".-_"))("vertex_shader_file") fragment_shader_comment = Group( comment_keyword + Suppress(CaselessKeyword("FragmentShaderFile")) + Word(alphanums + ".-_"))("fragment_shader_file") texture_comment = Group(comment_keyword + Suppress(CaselessKeyword("TextureFile")) + Word(alphanums + ".-_"))("texture_file") other_comment = comment_keyword + NotAny("TextureFile") + Word( printables + " ") # Define the grammar of a format statement format_keyword = cls._or(cls.format_keyword, suppress=True) format_type = cls._or(cls.format_type_map) format_expr = Group(format_keyword + format_type("file_type") + real("version"))("format") # Define the grammar of properties property_keyword = cls._or(cls.property_keyword, suppress=True) list_keyword = cls._or(cls.list_keyword, suppress=True) property_type = cls._or(cls.data_type_map) psp = property_keyword + property_type("data_type") position_keywords = [cls._or(k) for k in ("x", "y", "z")] property_position = cls._aggregate_property("position", psp, *position_keywords) property_color = Group( And([ Group(psp + MatchFirst((CaselessKeyword("r"), CaselessKeyword("red")))("name")), Group(psp + MatchFirst((CaselessKeyword("g"), CaselessKeyword("green")))("name")), Group(psp + MatchFirst((CaselessKeyword("b"), CaselessKeyword("blue")))("name")), Optional( Group(psp + MatchFirst((CaselessKeyword("a"), CaselessKeyword("alpha")))("name")), ) ]))("color") ambient_keywords = [ cls._or(k) for k in ("ambient_red", "ambient_green", "ambient_blue", "ambient_alpha") ] property_ambient_color = cls._aggregate_property( "ambient_color", psp, *ambient_keywords) diffuse_keywords = [ cls._or(k) for k in ("diffuse_red", "diffuse_green", "diffuse_blue", "diffuse_alpha") ] property_diffuse_color = cls._aggregate_property( "diffuse_color", psp, *diffuse_keywords) specular_keywords = [ cls._or(k) for k in ("specular_red", "specular_green", "specular_blue", "specular_alpha") ] property_specular_color = cls._aggregate_property( "specular_color", psp, *specular_keywords) texture_keywords = [ cls._or(*k) for k in (("s", "u", "tx"), ("t", "v", "ty")) ] property_texture = cls._aggregate_property("texture", psp, *texture_keywords) normal_keywords = [cls._or(k) for k in ("nx", "ny", "nz")] property_normal = cls._aggregate_property("normal", psp, *normal_keywords) power_keywords = [CaselessKeyword("specular_power")] property_specular_power = cls._aggregate_property( "specular_power", psp, *power_keywords) opacity_keywords = [CaselessKeyword("opacity")] property_opacity = cls._aggregate_property("opacity", psp, *opacity_keywords) plp = property_keyword + list_keyword + property_type( "index_type") + property_type("data_type") vertex_index_keywords = [cls._or("vertex_index", "vertex_indices")] property_vertex_index = cls._aggregate_property( "vertex_index", plp, *vertex_index_keywords) material_index_keywords = [ cls._or("material_index", "material_indices") ] property_material_index = cls._aggregate_property( "material_index", plp, *material_index_keywords) # Define the grammar of elements element_keyword = cls._or(cls.element_keyword, suppress=True) element_vertex = Group( element_keyword + CaselessKeyword("vertex")("name") + integer("count") + Group( OneOrMore(property_position | property_color | property_ambient_color | property_diffuse_color | property_specular_color | property_texture | property_normal | property_specular_power | property_opacity))("properties")) element_face = Group(element_keyword + CaselessKeyword("face")("name") + integer("count") + Group(property_vertex_index | property_material_index)("properties")) element_group = element_vertex | element_face declarations = format_expr + \ Group(ZeroOrMore(vertex_shader_comment | fragment_shader_comment | texture_comment | other_comment))("comments") + \ Group(OneOrMore(element_group))("elements") header_grammar = start_keyword + declarations + stop_keyword return cls(header_grammar, base_shader_path, base_texture_path)
parse_ipv6_destinationguard = Suppress('destination-guard ') + restOfLine parse_ipv6_dhcpguard = Suppress('dhcp guard ') + restOfLine parse_lldp = Suppress('lldp ') + restOfLine parse_username = Suppress('username ') + restOfLine parse_aaa = Suppress('aaa ') + restOfLine parse_stp = Suppress('spanning-tree ') + restOfLine # parse_vtp = Suppress('vtp ') + restOfLine parse_line = Suppress('line ') + restOfLine parse_ip_ssh = Suppress('ip ssh ') + restOfLine parse_arp_proxy = Suppress('ip arp proxy ') + restOfLine parse_vstack = Suppress('no') + 'vstack' parse_enable_password = Suppress('enable') + MatchFirst([ 'secret', 'password' ]) + Optional(Word(nums) + Suppress(White(exact=1))) + Suppress(restOfLine) parse_ip_dhcp = NotAny(White()) + Suppress('ip dhcp snooping') + Optional( Suppress('vlan') + Word(nums) + ZeroOrMore(Suppress(',') + Word(nums))) parse_ip_arp = NotAny(White()) + Suppress('ip arp inspection') + Suppress( 'vlan') + Word(nums) + ZeroOrMore(Suppress(',') + Word(nums)) parse_ip_service = NotAny(White()) + Suppress('ip') + MatchFirst( ['finger', 'identd', 'source-route', 'bootp server']) parse_ip_http = NotAny(White()) + Suppress('ip http ') + restOfLine # aaa_authorization = Suppress('authorization ') + restOfLine aaa_authentication = Suppress('authentication ') + restOfLine aaa_accounting = Suppress('accounting ') + restOfLine aaa_groups = Suppress('group server ') + restOfLine utill = lambda parse_meth, featur_str: parse_meth.parseString(featur_str ).asList()
def __init__(self): self._AST = Syntax_tree() # keywords self.int_ = Keyword('Int') self.false_ = Keyword('False') self.true_ = Keyword('True') self.bit_ = Combine(Optional(Literal("@")) + Keyword('Bit')) self.sbox_ = Keyword('Sbox') self.l_shift_ = Keyword('<<') self.r_shift_ = Keyword('>>') self.circ_l_shift_ = Keyword('<<<') self.circ_r_shift_ = Keyword('>>>') self.bit_val = self.false_ ^ self.true_ self.if_ = Keyword('if') self.for_ = Keyword('for') self.return_ = Keyword('return') self.void_ = Keyword('void') self.ID = NotAny(self.sbox_ ^ self.int_ ^ self.bit_ ^ self.false_ ^ self.true_ ^ self.if_ ^ self.for_ ^ self.sbox_) + Word(alphas + '_', alphanums + '_') # NOQA self.ID_ = NotAny(self.sbox_ ^ self.int_ ^ self.bit_ ^ self.false_ ^ self.true_ ^ self.if_ ^ self.for_ ^ self.sbox_) + Word(alphas + '_', alphanums + '_') # Other Tokens self.l_bracket = Literal('(') self.r_bracket = Literal(')') self.eq_set = Literal('=')("set") self.term_st = Literal(';') self.b_2_num = Combine(Literal("0b") + Word("01")) self.b_2_num.setParseAction(self.semantic_analyser.convert_base_to_str) self.b_16_num = Combine(Literal("0x") + Word(srange("[0-9a-fA-F]"))) self.b_16_num.setParseAction(self.semantic_analyser.convert_base_to_str) self.b_10_num = Word(nums) self.bit_and = Literal('&') self.bit_or = Keyword('|') self.bit_xor = Keyword('^') self.bit_not = Literal('~') self.eq_compare = Literal('==') self.neq_compare = Literal('!=') self.l_brace = Literal('{') self.r_brace = Literal('}') self.bin_add = Literal('+') self.bin_mult = Literal('*') self.bin_sub = Literal('-') self.bin_mod = Literal('%') self.bin_div = Literal('/') self.g_than = Literal('>') self.ge_than = Literal('>=') self.l_than = Literal('<') self.le_than = Literal('<=') self.log_and = Keyword('&&') self.log_or = Keyword('||') self.l_sq_b = Literal('[') self.r_sq_b = Literal(']') # Operator Productions self.log_op = self.log_and ^ self.log_or self.comparison_op = self.g_than ^ self.ge_than ^ self.l_than ^ self.le_than ^ self.eq_compare ^ self.neq_compare self.arith_op = self.bin_add ^ self.bin_mult ^ self.bin_sub ^ self.bin_mod ^ self.bin_div self.bitwise_op = self.bit_and ^ self.bit_or ^ self.bit_xor ^ self.bit_not ^ self.l_shift_ ^ self.r_shift_ ^ self.circ_l_shift_ ^ self.circ_r_shift_ # Grammar self.stmt = Forward() self.for_loop = Forward() self.cast = Forward() self.seq_val = Forward() self.int_value = self.b_2_num ^ self.b_16_num ^ self.b_10_num self.expr = Forward() self.function_call = Forward() self.index_select = Forward() self.seq_ = Forward() self.operand = Forward() self.seq_range = Forward() # #######Operands self.sbox_call = Group((self.ID ^ self.seq_val) + ~White() + Literal(".") + ~White() + self.sbox_ + ~White() + self.l_bracket + (self.ID ^ self.int_value) + self.r_bracket) self.operand = self.index_select | self.seq_val | self.function_call | self.ID | self.int_value | self.cast | self.bit_val self.seq_val.setParseAction(lambda t: ['Seq_val'] + [t.asList()]) self.index_select.setParseAction(lambda t: ['index_select'] + [t.asList()]) self.function_call.setParseAction(lambda t: ['function_call'] + [t.asList()]) self.ID.setParseAction(lambda t: ['ID'] + [t.asList()]) self.int_value.setParseAction(lambda t: ['Int_val'] + [t.asList()]) self.cast.setParseAction(lambda t: ['cast'] + [t.asList()]) self.bit_val.setParseAction(lambda t: ['Bit_val'] + [t.asList()]) self.seq_range.setParseAction(lambda t: ['seq_range'] + [t.asList()]) # #######Expressions self.expr = Group(infixNotation(Group(self.operand), [(self.bitwise_op, 2, opAssoc.LEFT, self.nest_operand_pairs), (self.comparison_op, 2, opAssoc.LEFT, self.nest_operand_pairs), (self.log_op, 2, opAssoc.LEFT, self.nest_operand_pairs), (self.arith_op, 2, opAssoc.LEFT, self.nest_operand_pairs)])) # self.expr.setParseAction(self.expr_p) self.int_size = Combine(Optional(Literal("@")) + self.int_)("decl") + ~White() + Suppress(self.l_bracket) + self.expr + Suppress(self.r_bracket) self.sbox_size = self.sbox_ + ~White() + Suppress(self.l_bracket) + self.expr + Suppress(self.r_bracket) self.seq_range << self.expr + Suppress(Literal(":")) + self.expr self.seq_val << Suppress(self.l_sq_b) + Optional(Group(delimitedList(self.expr))) + Suppress(self.r_sq_b) self.seq_ << (self.int_size | self.bit_ | self.sbox_size)("type") +\ Group(OneOrMore(~White() + Suppress(self.l_sq_b) + self.expr + Suppress(self.r_sq_b)))("seq_size") self.function_call << self.ID("function_name") + ~White() + Suppress(self.l_bracket) +\ Optional(Group(delimitedList(self.expr)))("param_list") + Suppress(self.r_bracket) self.cast << Suppress(self.l_bracket) + Group((self.seq_ | self.int_size | self.bit_)) +\ Suppress(self.r_bracket) + (self.expr)("target") self.index_select << (self.ID("ID") ^ (Suppress(self.l_bracket) + self.cast + Suppress(self.r_bracket))("cast")) + ~White() +\ Group(OneOrMore(Suppress(self.l_sq_b) + Group(delimitedList(self.expr ^ Group(Group(self.seq_range))))("index") + Suppress(self.r_sq_b))) # ####### Declarations self.id_set = Group((Group(self.index_select) | self.ID_) + self.eq_set + self.expr) self.id_set.setParseAction(self.AST.id_set) self.int_decl = Group(self.int_size + delimitedList(Group((self.ID_("ID") + self.eq_set + self.expr("set_value")) | self.ID_("ID")))("value")) # NOQA self.int_decl.setParseAction(self.AST.int_decl) self.bit_decl = Group(self.bit_("decl") + delimitedList(Group(self.ID_("ID")) ^ Group(self.ID_("ID") + self.eq_set + self.expr("set_value")))("value")) self.bit_decl.setParseAction(self.AST.bit_decl) self.seq_decl = Group(self.seq_("decl") + Group(self.ID)("ID") + Optional(self.eq_set + Group(self.expr))("value")) self.seq_decl.setParseAction(self.AST.seq_decl) self.decl = self.bit_decl ^ self.int_decl ^ self.seq_decl # ###### Statements self.return_stmt = Group(self.return_ + self.expr) self.return_stmt.setParseAction(self.AST.return_stmt) self.function_start = Literal("{") self.function_start.setParseAction(self.AST.function_start) self.function_end = Literal("}") self.function_decl = Group((Group(self.seq_) | Group(self.int_size) | Group(self.bit_) | Group(self.void_))("return_type") + Group(self.ID)("func_ID") + Suppress(self.l_bracket) + Group(Optional(delimitedList(Group((self.seq_ | self.int_size | self.bit_) + Group(self.ID)))))("func_param") + # NOQA Suppress(self.r_bracket) + Suppress(self.function_start) + Group(self.stmt)("body") + Suppress(self.r_brace)) self.function_decl.setParseAction(self.AST.function_decl) self.for_init = Literal('(') self.for_init.setParseAction(self.AST.begin_for) self.for_terminator = Literal(';') self.for_terminator.setParseAction(self.AST.for_terminator) self.for_increment = Literal(';') self.for_increment.setParseAction(self.AST.for_increment) self.terminator_expr = Group(infixNotation(Group(self.operand), [(self.log_op, 2, opAssoc.LEFT, self.nest_operand_pairs), (self.bitwise_op, 2, opAssoc.LEFT, self.nest_operand_pairs), (self.comparison_op, 2, opAssoc.LEFT, self.nest_operand_pairs), (self.arith_op, 2, opAssoc.LEFT, self.nest_operand_pairs)])) self.terminator_expr.setParseAction(self.AST.terminator_expr) self.for_body = Literal('{') self.for_body.setParseAction(self.AST.for_body) self.end_for = Literal('}') self.end_for.setParseAction(self.AST.end_for) self.for_loop << Group(self.for_ + ~White() + Suppress(self.for_init) + Optional(delimitedList(self.decl ^ self.id_set))("init") + Suppress(self.for_terminator) + Optional(self.terminator_expr) + Suppress(self.for_increment) + Optional(delimitedList(self.id_set))("increm") + Suppress(self.r_bracket) + Suppress(self.for_body) + self.stmt("loop_body") + Suppress(self.end_for)) self.if_condition = Suppress(self.l_bracket) + self.expr + Suppress(self.r_bracket) self.if_condition.setParseAction(self.AST.if_cond) self.if_.setParseAction(self.AST.begin_if) self.if_body_st = Literal('{') self.if_body_st.setParseAction(self.AST.if_body_st) self.if_body_end = Literal('}') self.if_body_end.setParseAction(self.AST.if_body_end) self.if_stmt = Group(self.if_ + self.if_condition("if_cond") + Suppress(self.if_body_st) + Group(self.stmt).setResultsName("body") + Suppress(self.if_body_end)) self.single_expr = self.expr + Suppress(self.term_st) self.single_expr.setParseAction(self.AST.stand_alone_expr) self.stmt << ZeroOrMore(self.decl + Suppress(self.term_st) ^ self.function_decl ^ self.id_set + Suppress(self.term_st) ^ self.single_expr ^ self.for_loop ^ self.if_stmt ^ self.return_stmt + Suppress(self.term_st) ^ self.sbox_call + Suppress(self.term_st)) self.grammar_test = self.stmt + StringEnd() # Allows single statements to be parsed self.grammar = ZeroOrMore(self.function_decl ^ self.seq_decl + Suppress(self.term_st)) + StringEnd()
def handle_data(token): global current_block current_block.add_data(token[0]) eol = LineEnd().suppress() begin = Keyword('begin').suppress() end = Keyword('end').suppress() comment = (Literal('#') + restOfLine).suppress() data_value = Combine(OneOrMore(CharsNotIn('#\n\r'))) data = LineStart() + Optional(data_value) + Optional(comment) + eol block_name = Word(alphas, alphanums + '_') begin_block = LineStart() + begin + block_name + Optional(comment) + eol end_block = LineStart() + end + block_name + Optional(comment) + eol junk = ZeroOrMore(LineStart() + NotAny(begin) + restOfLine + eol).suppress() block_def = begin_block + Group(ZeroOrMore(NotAny(end) + data)) + end_block block_defs = junk + OneOrMore(block_def + junk) begin_block.addParseAction(create_block) end_block.addParseAction(finish_block) data_value.addParseAction(handle_data) test_str = ''' bla bla begin block_1 # bla bla 0.17 # suspicious value # comment line -7.34 end block_1 # and noe the second block
if n == 0: return Empty() else: return Group((Suppress(funOrbNumber(n)) + funCoefficients(n)).setResultsName("lastCoeffs")) # ====================> Basis File <========================== comment = Literal("#") + restOfLine parseAtomLabel = Word(srange("[A-Z]"), max=1) + Optional( Word(srange("[a-z]"), max=1)) parserBasisName = Word(alphanums + "-") + Suppress(restOfLine) parserFormat = OneOrMore(natural + NotAny(FollowedBy(point))) parserKey = (parseAtomLabel.setResultsName("atom") + parserBasisName.setResultsName("basisName") + Suppress(Literal("1"))) parserBasisData = OneOrMore(floatNumber) parserBasis = (parserKey + parserFormat.setResultsName("format") + parserBasisData.setResultsName("coeffs")) topParseBasis = OneOrMore(Suppress(comment)) + OneOrMore( Group(parserBasis + Suppress(Optional(OneOrMore(comment))))) # ===============================<>==================================== # Parsing From File
class SqlGrammarMySQL(SqlGrammar): # ------------------------------------------------------------------------- # Forward declarations # ------------------------------------------------------------------------- expr = Forward() select_statement = Forward() # ------------------------------------------------------------------------- # Keywords # ------------------------------------------------------------------------- # https://dev.mysql.com/doc/refman/5.7/en/keywords.html mysql_reserved_words = """ ACCESSIBLE ADD ALL ALTER ANALYZE AND AS ASC ASENSITIVE BEFORE BETWEEN BIGINT BINARY BLOB BOTH BY CALL CASCADE CASE CHANGE CHAR CHARACTER CHECK COLLATE COLUMN CONDITION CONSTRAINT CONTINUE CONVERT CREATE CROSS CURRENT_DATE CURRENT_TIME CURRENT_TIMESTAMP CURRENT_USER CURSOR DATABASE DATABASES DAY_HOUR DAY_MICROSECOND DAY_MINUTE DAY_SECOND DEC DECIMAL DECLARE DEFAULT DELAYED DELETE DESC DESCRIBE DETERMINISTIC DISTINCT DISTINCTROW DIV DOUBLE DROP DUAL EACH ELSE ELSEIF ENCLOSED ESCAPED EXISTS EXIT EXPLAIN FALSE FETCH FLOAT FLOAT4 FLOAT8 FOR FORCE FOREIGN FROM FULLTEXT GENERATED GET GRANT GROUP HAVING HIGH_PRIORITY HOUR_MICROSECOND HOUR_MINUTE HOUR_SECOND IF IGNORE IN INDEX INFILE INNER INOUT INSENSITIVE INSERT INT INT1 INT2 INT3 INT4 INT8 INTEGER INTERVAL INTO IO_AFTER_GTIDS IO_BEFORE_GTIDS IS ITERATE JOIN KEY KEYS KILL LEADING LEAVE LEFT LIKE LIMIT LINEAR LINES LOAD LOCALTIME LOCALTIMESTAMP LOCK LONG LONGBLOB LONGTEXT LOOP LOW_PRIORITY MASTER_BIND MASTER_SSL_VERIFY_SERVER_CERT MATCH MAXVALUE MEDIUMBLOB MEDIUMINT MEDIUMTEXT MIDDLEINT MINUTE_MICROSECOND MINUTE_SECOND MOD MODIFIES NATURAL NOT NO_WRITE_TO_BINLOG NULL NUMERIC ON OPTIMIZE OPTIMIZER_COSTS OPTION OPTIONALLY OR ORDER OUT OUTER OUTFILE PARTITION PRECISION PRIMARY PROCEDURE PURGE RANGE READ READS READ_WRITE REAL REFERENCES REGEXP RELEASE RENAME REPEAT REPLACE REQUIRE RESIGNAL RESTRICT RETURN REVOKE RIGHT RLIKE SCHEMA SCHEMAS SECOND_MICROSECOND SELECT SENSITIVE SEPARATOR SET SHOW SIGNAL SMALLINT SPATIAL SPECIFIC SQL SQLEXCEPTION SQLSTATE SQLWARNING SQL_BIG_RESULT SQL_CALC_FOUND_ROWS SQL_SMALL_RESULT SSL STARTING STORED STRAIGHT_JOIN TABLE TERMINATED THEN TINYBLOB TINYINT TINYTEXT TO TRAILING TRIGGER TRUE UNDO UNION UNIQUE UNLOCK UNSIGNED UPDATE USAGE USE USING UTC_DATE UTC_TIME UTC_TIMESTAMP VALUES VARBINARY VARCHAR VARCHARACTER VARYING VIRTUAL WHEN WHERE WHILE WITH WRITE XOR YEAR_MONTH ZEROFILL """ mysql_nonreserved_keywords = """ ACCOUNT ACTION AFTER AGAINST AGGREGATE ALGORITHM ALWAYS ANALYSE ANY ASCII AT AUTOEXTEND_SIZE AUTO_INCREMENT AVG AVG_ROW_LENGTH BACKUP BEGIN BINLOG BIT BLOCK BOOL BOOLEAN BTREE BYTE CACHE CASCADED CATALOG_NAME CHAIN CHANGED CHANNEL CHARSET CHECKSUM CIPHER CLASS_ORIGIN CLIENT CLOSE COALESCE CODE COLLATION COLUMNS COLUMN_FORMAT COLUMN_NAME COMMENT COMMIT COMMITTED COMPACT COMPLETION COMPRESSED COMPRESSION CONCURRENT CONNECTION CONSISTENT CONSTRAINT_CATALOG CONSTRAINT_NAME CONSTRAINT_SCHEMA CONTAINS CONTEXT CPU CUBE CURRENT CURSOR_NAME DATA DATAFILE DATE DATETIME DAY DEALLOCATE DEFAULT_AUTH DEFINER DELAY_KEY_WRITE DES_KEY_FILE DIAGNOSTICS DIRECTORY DISABLE DISCARD DISK DO DUMPFILE DUPLICATE DYNAMIC ENABLE ENCRYPTION END ENDS ENGINE ENGINES ENUM ERROR ERRORS ESCAPE EVENT EVENTS EVERY EXCHANGE EXECUTE EXPANSION EXPIRE EXPORT EXTENDED EXTENT_SIZE FAST FAULTS FIELDS FILE FILE_BLOCK_SIZE FILTER FIRST FIXED FLUSH FOLLOWS FORMAT FOUND FULL FUNCTION GENERAL GEOMETRY GEOMETRYCOLLECTION GET_FORMAT GLOBAL GRANTS GROUP_REPLICATION HANDLER HASH HELP HOST HOSTS HOUR IDENTIFIED IGNORE_SERVER_IDS IMPORT INDEXES INITIAL_SIZE INSERT_METHOD INSTALL INSTANCE INVOKER IO IO_THREAD IPC ISOLATION ISSUER JSON KEY_BLOCK_SIZE LANGUAGE LAST LEAVES LESS LEVEL LINESTRING LIST LOCAL LOCKS LOGFILE LOGS MASTER MASTER_AUTO_POSITION MASTER_CONNECT_RETRY MASTER_DELAY MASTER_HEARTBEAT_PERIOD MASTER_HOST MASTER_LOG_FILE MASTER_LOG_POS MASTER_PASSWORD MASTER_PORT MASTER_RETRY_COUNT MASTER_SERVER_ID MASTER_SSL MASTER_SSL_CA MASTER_SSL_CAPATH MASTER_SSL_CERT MASTER_SSL_CIPHER MASTER_SSL_CRL MASTER_SSL_CRLPATH MASTER_SSL_KEY MASTER_TLS_VERSION MASTER_USER MAX_CONNECTIONS_PER_HOUR MAX_QUERIES_PER_HOUR MAX_ROWS MAX_SIZE MAX_STATEMENT_TIME MAX_UPDATES_PER_HOUR MAX_USER_CONNECTIONS MEDIUM MEMORY MERGE MESSAGE_TEXT MICROSECOND MIGRATE MINUTE MIN_ROWS MODE MODIFY MONTH MULTILINESTRING MULTIPOINT MULTIPOLYGON MUTEX MYSQL_ERRNO NAME NAMES NATIONAL NCHAR NDB NDBCLUSTER NEVER NEW NEXT NO NODEGROUP NONBLOCKING NONE NO_WAIT NUMBER NVARCHAR OFFSET OLD_PASSWORD ONE ONLY OPEN OPTIONS OWNER PACK_KEYS PAGE PARSER PARSE_GCOL_EXPR PARTIAL PARTITIONING PARTITIONS PASSWORD PHASE PLUGIN PLUGINS PLUGIN_DIR POINT POLYGON PORT PRECEDES PREPARE PRESERVE PREV PRIVILEGES PROCESSLIST PROFILE PROFILES PROXY QUARTER QUERY QUICK READ_ONLY REBUILD RECOVER REDOFILE REDO_BUFFER_SIZE REDUNDANT RELAY RELAYLOG RELAY_LOG_FILE RELAY_LOG_POS RELAY_THREAD RELOAD REMOVE REORGANIZE REPAIR REPEATABLE REPLICATE_DO_DB REPLICATE_DO_TABLE REPLICATE_IGNORE_DB REPLICATE_IGNORE_TABLE REPLICATE_REWRITE_DB REPLICATE_WILD_DO_TABLE REPLICATE_WILD_IGNORE_TABLE REPLICATION RESET RESTORE RESUME RETURNED_SQLSTATE RETURNS REVERSE ROLLBACK ROLLUP ROTATE ROUTINE ROW ROWS ROW_COUNT ROW_FORMAT RTREE SAVEPOINT SCHEDULE SCHEMA_NAME SECOND SECURITY SERIAL SERIALIZABLE SERVER SESSION SHARE SHUTDOWN SIGNED SIMPLE SLAVE SLOW SNAPSHOT SOCKET SOME SONAME SOUNDS SOURCE SQL_AFTER_GTIDS SQL_AFTER_MTS_GAPS SQL_BEFORE_GTIDS SQL_BUFFER_RESULT SQL_CACHE SQL_NO_CACHE SQL_THREAD SQL_TSI_DAY SQL_TSI_HOUR SQL_TSI_MINUTE SQL_TSI_MONTH SQL_TSI_QUARTER SQL_TSI_SECOND SQL_TSI_WEEK SQL_TSI_YEAR STACKED START STARTS STATS_AUTO_RECALC STATS_PERSISTENT STATS_SAMPLE_PAGES STATUS STOP STORAGE STRING SUBCLASS_ORIGIN SUBJECT SUBPARTITION SUBPARTITIONS SUPER SUSPEND SWAPS SWITCHES TABLES TABLESPACE TABLE_CHECKSUM TABLE_NAME TEMPORARY TEMPTABLE TEXT THAN TIME TIMESTAMP TIMESTAMPADD TIMESTAMPDIFF TRANSACTION TRIGGERS TRUNCATE TYPE TYPES UNCOMMITTED UNDEFINED UNDOFILE UNDO_BUFFER_SIZE UNICODE UNINSTALL UNKNOWN UNTIL UPGRADE USER USER_RESOURCES USE_FRM VALIDATION VALUE VARIABLES VIEW WAIT WARNINGS WEEK WEIGHT_STRING WITHOUT WORK WRAPPER X509 XA XID XML YEAR """ mysql_keywords = sorted(list(set( mysql_reserved_words.split() + ANSI92_RESERVED_WORD_LIST.split() ))) # log.critical(mysql_keywords) keyword = make_words_regex(mysql_keywords, caseless=True, name="keyword") # ------------------------------------------------------------------------- # Comments # ------------------------------------------------------------------------- # http://dev.mysql.com/doc/refman/5.7/en/comments.html comment = (ansi_comment | bash_comment | cStyleComment) # ----------------------------------------------------------------------------- # identifier # ----------------------------------------------------------------------------- # http://dev.mysql.com/doc/refman/5.7/en/identifiers.html bare_identifier_word = make_regex_except_words( r"\b[a-zA-Z0-9$_]*\b", mysql_keywords, caseless=True, name="bare_identifier_word" ) liberal_identifier_word = make_pyparsing_regex( r"\b[a-zA-Z0-9$_]*\b", caseless=True, name="liberal_identifier_word" ) identifier = ( bare_identifier_word | QuotedString(quoteChar="`", unquoteResults=False) ).setName("identifier") liberal_identifier = ( liberal_identifier_word | QuotedString(quoteChar="`", unquoteResults=False) ).setName("liberal_identifier") # http://dev.mysql.com/doc/refman/5.7/en/charset-collate.html collation_name = identifier.copy() column_name = identifier.copy() column_alias = identifier.copy() table_name = identifier.copy() table_alias = identifier.copy() index_name = identifier.copy() function_name = identifier.copy() parameter_name = identifier.copy() database_name = identifier.copy() partition_name = identifier.copy() no_dot = NotAny('.') # MySQL allows keywords in the later parts of combined identifiers; # therefore, for example, "count.thing.thing" is not OK, but # "thing.thing.count" is. table_spec = ( Combine(database_name + '.' + liberal_identifier + no_dot) | table_name + no_dot ).setName("table_spec") column_spec = ( Combine(database_name + '.' + liberal_identifier + '.' + liberal_identifier + no_dot) | Combine(table_name + '.' + liberal_identifier + no_dot) | Combine(column_name + no_dot) ).setName("column_spec") # http://dev.mysql.com/doc/refman/5.7/en/expressions.html bind_parameter = Literal('?') # http://dev.mysql.com/doc/refman/5.7/en/user-variables.html variable = Regex(r"@[a-zA-Z0-9\.$_]+").setName("variable") # http://dev.mysql.com/doc/refman/5.7/en/functions.html argument_list = ( delimitedList(expr).setName("arglist").setParseAction(', '.join) ) # ... we don't care about sub-parsing the argument list, so use combine=True # or setParseAction: http://stackoverflow.com/questions/37926516 function_call = Combine(function_name + LPAR) + argument_list + RPAR # http://dev.mysql.com/doc/refman/5.7/en/partitioning-selection.html partition_list = ( LPAR + delim_list(partition_name, combine=True) + RPAR ).setName("partition_list") # http://dev.mysql.com/doc/refman/5.7/en/index-hints.html index_list = delim_list(index_name, combine=False) # ... see pyparsing_bugtest_delimited_list_combine index_hint = ( ( USE + (INDEX | KEY) + Optional(FOR + (JOIN | (ORDER + BY) | (GROUP + BY))) + LPAR + Optional(index_list) + RPAR ) | ( IGNORE + (INDEX | KEY) + Optional(FOR + (JOIN | (ORDER + BY) | (GROUP + BY))) + LPAR + index_list + RPAR ) | ( FORCE + (INDEX | KEY) + Optional(FOR + (JOIN | (ORDER + BY) | (GROUP + BY))) + LPAR + index_list + RPAR ) ) index_hint_list = delim_list(index_hint, combine=True).setName( "index_hint_list") # ----------------------------------------------------------------------------- # CASE # ----------------------------------------------------------------------------- # NOT THIS: https://dev.mysql.com/doc/refman/5.7/en/case.html # THIS: https://dev.mysql.com/doc/refman/5.7/en/control-flow-functions.html#operator_case # noqa case_expr = ( ( CASE + expr + OneOrMore(WHEN + expr + THEN + expr) + Optional(ELSE + expr) + END ) | ( CASE + OneOrMore(WHEN + expr + THEN + expr) + Optional(ELSE + expr) + END ) ).setName("case_expr") # ------------------------------------------------------------------------- # MATCH # ------------------------------------------------------------------------- # https://dev.mysql.com/doc/refman/5.7/en/fulltext-search.html#function_match search_modifier = ( (IN + NATURAL + LANGUAGE + MODE + Optional(WITH + QUERY + EXPANSION)) | (IN + BOOLEAN + MODE) | (WITH + QUERY + EXPANSION) ) match_expr = ( MATCH + LPAR + delim_list(column_spec) + RPAR + AGAINST + LPAR + string_literal + Optional(search_modifier) + RPAR ).setName("match_expr") # ... don't use "expr"; MATCH AGAINST uses restricted expressions, and we # don't want it to think that "MATCH ... AGAINST ('+keyword' IN # BOOLEAN MODE)" resembles the IN in "WHERE something IN (SELECT ...)" # ----------------------------------------------------------------------------- # Expressions # ----------------------------------------------------------------------------- # http://dev.mysql.com/doc/refman/5.7/en/expressions.html # https://pyparsing.wikispaces.com/file/view/select_parser.py # http://dev.mysql.com/doc/refman/5.7/en/operator-precedence.html expr_term = ( INTERVAL + expr + time_unit | # "{" + identifier + expr + "}" | # see MySQL notes; antique ODBC syntax # noqa Optional(EXISTS) + LPAR + select_statement + RPAR | # ... e.g. mycol = EXISTS(SELECT ...) # ... e.g. mycol IN (SELECT ...) LPAR + delim_list(expr) + RPAR | # ... e.g. mycol IN (1, 2, 3) -- "(1, 2, 3)" being a term here case_expr | match_expr | bind_parameter | variable | function_call | literal_value | column_spec # not just identifier ) UNARY_OP, BINARY_OP, TERNARY_OP = 1, 2, 3 expr << infixNotation(expr_term, [ # https://pythonhosted.org/pyparsing/ # Having lots of operations in the list here SLOWS IT DOWN A LOT. # Just combine them into an ordered list. (BINARY | COLLATE | oneOf('! - + ~'), UNARY_OP, opAssoc.RIGHT), ( ( oneOf('^ * / %') | DIV | MOD | oneOf('+ - << >> & | = <=> >= > <= < <> !=') | (IS + Optional(NOT)) | LIKE | REGEXP | (Optional(NOT) + IN) | (SOUNDS + LIKE) ), # RNC; presumably at same level as LIKE BINARY_OP, opAssoc.LEFT ), ((BETWEEN, AND), TERNARY_OP, opAssoc.LEFT), # CASE handled above (hoping precedence is not too much of a problem) (NOT, UNARY_OP, opAssoc.RIGHT), (AND | '&&' | XOR | OR | '||' | ':=', BINARY_OP, opAssoc.LEFT), ], lpar=LPAR, rpar=RPAR) # ignores LIKE [ESCAPE] # ------------------------------------------------------------------------- # SELECT # ------------------------------------------------------------------------- compound_operator = UNION + Optional(ALL | DISTINCT) # no INTERSECT or EXCEPT in MySQL? ordering_term = ( expr + Optional(COLLATE + collation_name) + Optional(ASC | DESC) ) # ... COLLATE can appear in lots of places; # http://dev.mysql.com/doc/refman/5.7/en/charset-collate.html join_constraint = Optional(Group( # join_condition in MySQL grammar (ON + expr) | (USING + LPAR + delim_list(column_name) + RPAR) )) # http://dev.mysql.com/doc/refman/5.7/en/join.html join_op = Group( COMMA | STRAIGHT_JOIN | NATURAL + (Optional(LEFT | RIGHT) + Optional(OUTER)) + JOIN | (INNER | CROSS) + JOIN | Optional(LEFT | RIGHT) + Optional(OUTER) + JOIN # ignores antique ODBC "{ OJ ... }" syntax ) join_source = Forward() single_source = ( ( table_spec.copy().setResultsName("from_tables", listAllMatches=True) + Optional(PARTITION + partition_list) + Optional(Optional(AS) + table_alias) + Optional(index_hint_list) ) | (select_statement + Optional(AS) + table_alias) + (LPAR + join_source + RPAR) ) join_source << Group( single_source + ZeroOrMore(join_op + single_source + join_constraint) )("join_source") # ... must have a Group to append to it later, it seems # ... but name it "join_source" here, or it gets enclosed in a further list # when you name it later aggregate_function = ( # https://dev.mysql.com/doc/refman/5.7/en/group-by-functions.html AVG | BIT_AND | BIT_OR | BIT_XOR | COUNT | # also: special handling for COUNT(DISTINCT ...), see below GROUP_CONCAT | MAX | MIN | STD | STDDEV | STDDEV_POP | STDDEV_SAMP | SUM | VAR_POP | VAR_SAMP | VARIANCE ) result_base = ( # Aggregate functions: e.g. "MAX(" allowed, "MAX (" not allowed Combine(COUNT + LPAR) + '*' + RPAR | # special aggregate function Combine(COUNT + LPAR) + DISTINCT + expr + RPAR | # special aggregate function # noqa Combine(aggregate_function + LPAR) + expr + RPAR | expr | '*' | Combine(table_name + '.' + '*') | column_spec | literal_value ) result_column = ( result_base + Optional(Optional(AS) + column_alias) ).setResultsName("select_columns", listAllMatches=True) # ------------------------------------------------------------------------- # SELECT # ------------------------------------------------------------------------- # http://dev.mysql.com/doc/refman/5.7/en/select.html """ SELECT [ALL | DISTINCT | DISTINCTROW ] [HIGH_PRIORITY] [MAX_STATEMENT_TIME = N] [STRAIGHT_JOIN] [SQL_SMALL_RESULT] [SQL_BIG_RESULT] [SQL_BUFFER_RESULT] [SQL_CACHE | SQL_NO_CACHE] [SQL_CALC_FOUND_ROWS] select_expr [, select_expr ...] [FROM table_references [PARTITION partition_list] [WHERE where_condition] [GROUP BY {col_name | expr | position} [ASC | DESC], ... [WITH ROLLUP]] [HAVING where_condition] [ORDER BY {col_name | expr | position} [ASC | DESC], ...] [LIMIT {[offset,] row_count | row_count OFFSET offset}] ... ignore below here... [PROCEDURE procedure_name(argument_list)] [INTO OUTFILE 'file_name' [CHARACTER SET charset_name] export_options | INTO DUMPFILE 'file_name' | INTO var_name [, var_name]] [FOR UPDATE | LOCK IN SHARE MODE]] """ where_expr = Group(expr).setResultsName("where_expr") where_clause = Group( Optional(WHERE + where_expr) ).setResultsName("where_clause") select_core = ( SELECT + Group(Optional(ALL | DISTINCT | DISTINCTROW))("select_specifier") + Optional(HIGH_PRIORITY) + Optional(MAX_STATEMENT_TIME + '=' + integer) + Optional(STRAIGHT_JOIN) + Optional(SQL_SMALL_RESULT) + Optional(SQL_BIG_RESULT) + Optional(SQL_BUFFER_RESULT) + Optional(SQL_CACHE | SQL_NO_CACHE) + Optional(SQL_CALC_FOUND_ROWS) + Group(delim_list(result_column))("select_expression") + Optional( FROM + join_source + Optional(PARTITION + partition_list) + where_clause + Optional( GROUP + BY + delim_list(ordering_term + Optional(ASC | DESC))("group_by_term") + Optional(WITH + ROLLUP) ) + Optional(HAVING + expr("having_expr")) ) ) select_statement << ( select_core + ZeroOrMore(compound_operator + select_core) + Optional( ORDER + BY + delim_list(ordering_term + Optional(ASC | DESC))("order_by_terms") ) + Optional(LIMIT + ( (Optional(integer("offset") + COMMA) + integer("row_count")) | (integer("row_count") + OFFSET + integer("offset")) )) + # PROCEDURE ignored # rest ignored Optional(';') ) select_statement.ignore(comment) # http://dev.mysql.com/doc/refman/5.7/en/identifiers.html # ... approximately (and conservatively): MYSQL_INVALID_FIRST_IF_UNQUOTED = re.compile(r"[^a-zA-Z_$]") MYSQL_INVALID_IF_UNQUOTED = re.compile(r"[^a-zA-Z0-9_$]") def __init__(self): super().__init__() @classmethod def quote_identifier(cls, identifier: str) -> str: return "`{}`".format(identifier) @classmethod def is_quoted(cls, identifier: str) -> bool: return identifier.startswith("`") and identifier.endswith("`") @classmethod def requires_quoting(cls, identifier: str) -> bool: assert identifier, "Empty identifier" if cls.MYSQL_INVALID_IF_UNQUOTED.search(identifier): return True firstchar = identifier[0] if cls.MYSQL_INVALID_FIRST_IF_UNQUOTED.search(firstchar): return True if identifier.upper() in cls.mysql_keywords: return True return False @classmethod def get_grammar(cls): # Grammar (here, just SELECT) return cls.select_statement @classmethod def get_column_spec(cls): return cls.column_spec @classmethod def get_result_column(cls): return cls.result_column @classmethod def get_join_op(cls): return cls.join_op @classmethod def get_table_spec(cls): return cls.table_spec @classmethod def get_join_constraint(cls): return cls.join_constraint @classmethod def get_select_statement(cls): return cls.select_statement @classmethod def get_expr(cls): return cls.expr @classmethod def get_where_clause(cls): return cls.where_clause @classmethod def get_where_expr(cls): return cls.where_expr @classmethod def test_dialect_specific_1(cls): log.info("Testing MySQL-specific aspects (1/2)...") test_fail(cls.case_expr, "one two three four") test_fail(cls.match_expr, "one two three four") test_fail(cls.bind_parameter, "one two three four") test_fail(cls.variable, "one two three four") test_fail(cls.function_call, "one two three four") test_fail(literal_value, "one two three four") # test_fail(cls.column_spec, "one two three four") # matches "one" @classmethod def test_dialect_specific_2(cls): log.info("Testing MySQL-specific aspects (2/2)...") log.info("Testing expr") test_succeed(cls.expr, "a DIV b") test_succeed(cls.expr, "a MOD b") log.info("Testing quoted identifiers") test_succeed(cls.identifier, "`a`") test_succeed(cls.identifier, "`FROM`") test_succeed(cls.identifier, "`SELECT FROM`") # MySQL uses up to: schema.table.column test_succeed(cls.table_spec, "mydb.`my silly table`") test_succeed(cls.table_spec, "myschema.mytable") test_fail(cls.table_spec, "mydb.myschema.mytable") # ... but not 4: test_succeed(cls.column_spec, "`my silly table`.`my silly column`") test_succeed(cls.column_spec, "myschema.mytable.mycol") test_succeed(cls.column_spec, "starfeeder.mass_event.thing") test_succeed(cls.column_spec, "starfeeder.mass_event.at") test_fail(cls.column_spec, "mydb.myschema.mytable.mycol") log.info("Testing variable") test_succeed(cls.variable, "@myvar") log.info("Testing argument_list") test_succeed(cls.argument_list, "@myvar, 5") log.info("Testing function_call") test_succeed(cls.function_call, "myfunc(@myvar, 5)") log.info("Testing index_list") test_succeed(cls.index_list, "idx1, idx2") log.info("Testing index_hint") test_succeed(cls.index_hint, "USE INDEX FOR JOIN (idx1, idx2)") log.info("Testing case_expr") test_succeed(cls.case_expr, """ CASE v WHEN 2 THEN x WHEN 3 THEN y ELSE -99 END """) log.info("Testing match_expr") test_succeed(cls.match_expr, """ MATCH (content_field) AGAINST('+keyword1 +keyword2') """) test_succeed(cls.match_expr, """ MATCH (content_field) AGAINST('+keyword1 +keyword2' IN BOOLEAN MODE) """)
ParserElement.enablePackrat() # Significant speedup. from . import operators from .units import Q_ # Definitions. ######################################################### # These are reserved words that cannot be variables, constants, or units. keyword = oneOf("to") # Special symbols for currencies. currency_symbols = oneOf("€ £ $ ₪ ¥ ¥ ₩ ₩ ฿ ₹") # Identifier, must start with unicode letter, can then contain unicode # letter, unicode number, or underscore. identifier = currency_symbols | (NotAny(keyword) + Regex(r'[^\W\d_]\w*')) # Operands integer = Word(nums).setParseAction(operators.process_int) float_ = Regex( r'''[0-9]+ # integer part (?: (?: # optional decimal part followed by e-part (?: \.[0-9]* )? [eE] [-\u2212+]? # U+2212 is unicode minus [0-9]+ ) | (?: \.[0-9]* ) # mandatory decimal part without e-part
# the label definition ignores the fact that labels should not end in an hyphen label = Word(initChars=alphanums, bodyChars=alphanums + '-', max=63) domain_tld = Or(tlds) domain_name = (alphanum_word_start + Combine( Combine(OneOrMore(label + ('.' + FollowedBy(Word(alphanums + '-'))))) ('domain_labels') + domain_tld('tld')) + alphanum_word_end).setParseAction(downcaseTokens) ipv4_section = (Word( nums, asKeyword=True, max=3).setParseAction(lambda x: str(int(x[0]))).addCondition( lambda tokens: int(tokens[0]) < 256)) # basically, the grammar below says: start any words that start with a '.' or a number; I want to match words that start with a '.' because this will fail later in the grammar and I do not want to match anything that start with a '.' ipv4_address = (alphanum_word_start + WordStart('.' + nums) + Combine((ipv4_section + '.') * 3 + ipv4_section) + NotAny(Regex('\.\S')) + alphanum_word_end) hexadectet = Word(hexnums, min=1, max=4) ipv6_address_full = alphanum_word_start + Combine((hexadectet + ":") * 7 + hexadectet) ipv6_shortened_word_start = copy.deepcopy(alphanum_word_start) # the condition on the end of this grammar is designed to make sure that any shortened ipv6 addresses have '::' in them ipv6_address_shortened = Combine( OneOrMore(Or([hexadectet + Word(':'), Word(':')])) + hexadectet).addCondition(lambda tokens: tokens[0].count('::') > 0) ipv6_address = (Or([ipv6_address_full, ipv6_address_shortened ]).addCondition(lambda tokens: tokens[0].count(':') > 1) + alphanum_word_end)
for line in input_text.split("\n"): # print("Word " + str(i) + " --- " + line) line_p = line.replace("\xa0", " ") # line_p = unicodedata.normalize("NFC", line) word_text.append(line_p) word_bold = (Literal("**").suppress() + Concat( OneOrMore( Word(alphas) ^ Cleanup( Literal("(").suppress() + Word(alphas) + Literal(")").suppress()))) + Literal("**").suppress()) word_def = ( LineStart() + Optional(Word(nums + " /")).suppress() + Concat(SkipTo(Word("►¶"))).setResultsName("definition") + OneOrMore( Literal("►").suppress() + NotAny(Literal("►")).suppress() + Concat(SkipTo(oneOf(genders) ^ Word("|¶►") ^ LineEnd())).setResultsName("words") + Concat( Optional(OneOrMore( oneOf(genders) + Optional(Literal(" ")).suppress()), default="na").setResultsName("gender")) + Optional( ( SkipTo(Literal("¶")).suppress() + Literal("¶").suppress() + Concat(SkipTo(Literal("►") ^ LineEnd())) # SkipTo(Word("►¶")).suppress() ).setResultsName("sources"), default="na")) + Optional( (SkipTo(Literal("►►")).suppress() + Literal("►►").suppress() + SkipTo(Literal("►") ^ LineEnd())).setResultsName("misc"),
class Parser(object): """Lexical and Syntax analysis""" @property def semantic_analyser(self): return self._AST.semantic_analyser def __init__(self): self._AST = Syntax_tree() # keywords self.int_ = Keyword('Int') self.false_ = Keyword('False') self.true_ = Keyword('True') self.bit_ = Combine(Optional(Literal("@")) + Keyword('Bit')) self.sbox_ = Keyword('Sbox') self.l_shift_ = Keyword('<<') self.r_shift_ = Keyword('>>') self.circ_l_shift_ = Keyword('<<<') self.circ_r_shift_ = Keyword('>>>') self.bit_val = self.false_ ^ self.true_ self.if_ = Keyword('if') self.for_ = Keyword('for') self.return_ = Keyword('return') self.void_ = Keyword('void') self.ID = NotAny(self.sbox_ ^ self.int_ ^ self.bit_ ^ self.false_ ^ self.true_ ^ self.if_ ^ self.for_ ^ self.sbox_) + Word(alphas + '_', alphanums + '_') # NOQA self.ID_ = NotAny(self.sbox_ ^ self.int_ ^ self.bit_ ^ self.false_ ^ self.true_ ^ self.if_ ^ self.for_ ^ self.sbox_) + Word(alphas + '_', alphanums + '_') # Other Tokens self.l_bracket = Literal('(') self.r_bracket = Literal(')') self.eq_set = Literal('=')("set") self.term_st = Literal(';') self.b_2_num = Combine(Literal("0b") + Word("01")) self.b_2_num.setParseAction(self.semantic_analyser.convert_base_to_str) self.b_16_num = Combine(Literal("0x") + Word(srange("[0-9a-fA-F]"))) self.b_16_num.setParseAction(self.semantic_analyser.convert_base_to_str) self.b_10_num = Word(nums) self.bit_and = Literal('&') self.bit_or = Keyword('|') self.bit_xor = Keyword('^') self.bit_not = Literal('~') self.eq_compare = Literal('==') self.neq_compare = Literal('!=') self.l_brace = Literal('{') self.r_brace = Literal('}') self.bin_add = Literal('+') self.bin_mult = Literal('*') self.bin_sub = Literal('-') self.bin_mod = Literal('%') self.bin_div = Literal('/') self.g_than = Literal('>') self.ge_than = Literal('>=') self.l_than = Literal('<') self.le_than = Literal('<=') self.log_and = Keyword('&&') self.log_or = Keyword('||') self.l_sq_b = Literal('[') self.r_sq_b = Literal(']') # Operator Productions self.log_op = self.log_and ^ self.log_or self.comparison_op = self.g_than ^ self.ge_than ^ self.l_than ^ self.le_than ^ self.eq_compare ^ self.neq_compare self.arith_op = self.bin_add ^ self.bin_mult ^ self.bin_sub ^ self.bin_mod ^ self.bin_div self.bitwise_op = self.bit_and ^ self.bit_or ^ self.bit_xor ^ self.bit_not ^ self.l_shift_ ^ self.r_shift_ ^ self.circ_l_shift_ ^ self.circ_r_shift_ # Grammar self.stmt = Forward() self.for_loop = Forward() self.cast = Forward() self.seq_val = Forward() self.int_value = self.b_2_num ^ self.b_16_num ^ self.b_10_num self.expr = Forward() self.function_call = Forward() self.index_select = Forward() self.seq_ = Forward() self.operand = Forward() self.seq_range = Forward() # #######Operands self.sbox_call = Group((self.ID ^ self.seq_val) + ~White() + Literal(".") + ~White() + self.sbox_ + ~White() + self.l_bracket + (self.ID ^ self.int_value) + self.r_bracket) self.operand = self.index_select | self.seq_val | self.function_call | self.ID | self.int_value | self.cast | self.bit_val self.seq_val.setParseAction(lambda t: ['Seq_val'] + [t.asList()]) self.index_select.setParseAction(lambda t: ['index_select'] + [t.asList()]) self.function_call.setParseAction(lambda t: ['function_call'] + [t.asList()]) self.ID.setParseAction(lambda t: ['ID'] + [t.asList()]) self.int_value.setParseAction(lambda t: ['Int_val'] + [t.asList()]) self.cast.setParseAction(lambda t: ['cast'] + [t.asList()]) self.bit_val.setParseAction(lambda t: ['Bit_val'] + [t.asList()]) self.seq_range.setParseAction(lambda t: ['seq_range'] + [t.asList()]) # #######Expressions self.expr = Group(infixNotation(Group(self.operand), [(self.bitwise_op, 2, opAssoc.LEFT, self.nest_operand_pairs), (self.comparison_op, 2, opAssoc.LEFT, self.nest_operand_pairs), (self.log_op, 2, opAssoc.LEFT, self.nest_operand_pairs), (self.arith_op, 2, opAssoc.LEFT, self.nest_operand_pairs)])) # self.expr.setParseAction(self.expr_p) self.int_size = Combine(Optional(Literal("@")) + self.int_)("decl") + ~White() + Suppress(self.l_bracket) + self.expr + Suppress(self.r_bracket) self.sbox_size = self.sbox_ + ~White() + Suppress(self.l_bracket) + self.expr + Suppress(self.r_bracket) self.seq_range << self.expr + Suppress(Literal(":")) + self.expr self.seq_val << Suppress(self.l_sq_b) + Optional(Group(delimitedList(self.expr))) + Suppress(self.r_sq_b) self.seq_ << (self.int_size | self.bit_ | self.sbox_size)("type") +\ Group(OneOrMore(~White() + Suppress(self.l_sq_b) + self.expr + Suppress(self.r_sq_b)))("seq_size") self.function_call << self.ID("function_name") + ~White() + Suppress(self.l_bracket) +\ Optional(Group(delimitedList(self.expr)))("param_list") + Suppress(self.r_bracket) self.cast << Suppress(self.l_bracket) + Group((self.seq_ | self.int_size | self.bit_)) +\ Suppress(self.r_bracket) + (self.expr)("target") self.index_select << (self.ID("ID") ^ (Suppress(self.l_bracket) + self.cast + Suppress(self.r_bracket))("cast")) + ~White() +\ Group(OneOrMore(Suppress(self.l_sq_b) + Group(delimitedList(self.expr ^ Group(Group(self.seq_range))))("index") + Suppress(self.r_sq_b))) # ####### Declarations self.id_set = Group((Group(self.index_select) | self.ID_) + self.eq_set + self.expr) self.id_set.setParseAction(self.AST.id_set) self.int_decl = Group(self.int_size + delimitedList(Group((self.ID_("ID") + self.eq_set + self.expr("set_value")) | self.ID_("ID")))("value")) # NOQA self.int_decl.setParseAction(self.AST.int_decl) self.bit_decl = Group(self.bit_("decl") + delimitedList(Group(self.ID_("ID")) ^ Group(self.ID_("ID") + self.eq_set + self.expr("set_value")))("value")) self.bit_decl.setParseAction(self.AST.bit_decl) self.seq_decl = Group(self.seq_("decl") + Group(self.ID)("ID") + Optional(self.eq_set + Group(self.expr))("value")) self.seq_decl.setParseAction(self.AST.seq_decl) self.decl = self.bit_decl ^ self.int_decl ^ self.seq_decl # ###### Statements self.return_stmt = Group(self.return_ + self.expr) self.return_stmt.setParseAction(self.AST.return_stmt) self.function_start = Literal("{") self.function_start.setParseAction(self.AST.function_start) self.function_end = Literal("}") self.function_decl = Group((Group(self.seq_) | Group(self.int_size) | Group(self.bit_) | Group(self.void_))("return_type") + Group(self.ID)("func_ID") + Suppress(self.l_bracket) + Group(Optional(delimitedList(Group((self.seq_ | self.int_size | self.bit_) + Group(self.ID)))))("func_param") + # NOQA Suppress(self.r_bracket) + Suppress(self.function_start) + Group(self.stmt)("body") + Suppress(self.r_brace)) self.function_decl.setParseAction(self.AST.function_decl) self.for_init = Literal('(') self.for_init.setParseAction(self.AST.begin_for) self.for_terminator = Literal(';') self.for_terminator.setParseAction(self.AST.for_terminator) self.for_increment = Literal(';') self.for_increment.setParseAction(self.AST.for_increment) self.terminator_expr = Group(infixNotation(Group(self.operand), [(self.log_op, 2, opAssoc.LEFT, self.nest_operand_pairs), (self.bitwise_op, 2, opAssoc.LEFT, self.nest_operand_pairs), (self.comparison_op, 2, opAssoc.LEFT, self.nest_operand_pairs), (self.arith_op, 2, opAssoc.LEFT, self.nest_operand_pairs)])) self.terminator_expr.setParseAction(self.AST.terminator_expr) self.for_body = Literal('{') self.for_body.setParseAction(self.AST.for_body) self.end_for = Literal('}') self.end_for.setParseAction(self.AST.end_for) self.for_loop << Group(self.for_ + ~White() + Suppress(self.for_init) + Optional(delimitedList(self.decl ^ self.id_set))("init") + Suppress(self.for_terminator) + Optional(self.terminator_expr) + Suppress(self.for_increment) + Optional(delimitedList(self.id_set))("increm") + Suppress(self.r_bracket) + Suppress(self.for_body) + self.stmt("loop_body") + Suppress(self.end_for)) self.if_condition = Suppress(self.l_bracket) + self.expr + Suppress(self.r_bracket) self.if_condition.setParseAction(self.AST.if_cond) self.if_.setParseAction(self.AST.begin_if) self.if_body_st = Literal('{') self.if_body_st.setParseAction(self.AST.if_body_st) self.if_body_end = Literal('}') self.if_body_end.setParseAction(self.AST.if_body_end) self.if_stmt = Group(self.if_ + self.if_condition("if_cond") + Suppress(self.if_body_st) + Group(self.stmt).setResultsName("body") + Suppress(self.if_body_end)) self.single_expr = self.expr + Suppress(self.term_st) self.single_expr.setParseAction(self.AST.stand_alone_expr) self.stmt << ZeroOrMore(self.decl + Suppress(self.term_st) ^ self.function_decl ^ self.id_set + Suppress(self.term_st) ^ self.single_expr ^ self.for_loop ^ self.if_stmt ^ self.return_stmt + Suppress(self.term_st) ^ self.sbox_call + Suppress(self.term_st)) self.grammar_test = self.stmt + StringEnd() # Allows single statements to be parsed self.grammar = ZeroOrMore(self.function_decl ^ self.seq_decl + Suppress(self.term_st)) + StringEnd() def nest_operand_pairs(self, tokens): tokens = tokens[0] ret = ParseResults(tokens[:3]) remaining = iter(tokens[3:]) done = False while not done: next_pair = (next(remaining, None), next(remaining, None)) if next_pair == (None, None): done = True break ret = ParseResults([ret]) ret += ParseResults(list(next_pair)) return [ret] @property def AST(self): return self._AST @AST.setter def AST(self, value): self._AST = value def analyse_tree_test(self, AST): return self.semantic_analyser.analyse(AST) def parse_test_unit(self, data_in): """Parses single statements""" try: res = self.grammar_test.parseString(data_in) except ParseException as details: print("The following error occured:") print(details) return False if type(res[0]) is not bool: pass # print(res[0].dump()) return [res, True] def parse_test_AST_semantic(self, data_in): """Parses single statements and returns AST""" try: self.grammar_test.parseString(data_in) except ParseException as details: print("The following error occured:") print(details) return False return self.AST def parse_test_integration(self, data_in): """Only Parses Statements in functions""" try: res = self.grammar.parseString(data_in) except ParseException as details: print("The following error occured:") print(details) return False # if type(res[0]) is not bool: # print(res[0].dump()) return [res, True] def parse(self, data_in): """Prod parsing entry point""" self.grammar.parseString(data_in) if self.semantic_analyser.analyse(self.AST, True) is True: return self.semantic_analyser.IR.translate()
def get_parser(self): declaration = Forward() keyword = (Keyword("enum") | Keyword("case") | Keyword("struct") | Keyword("default") | Keyword("switch") | Keyword("union") | Keyword("const") | Keyword("unsigned") | Keyword("int") | Keyword("hyper") | Keyword("float") | Keyword("double") | Keyword("bool") | Keyword("typedef") | Keyword("opaque") | Keyword("string") | Keyword("void") | Keyword("program") | Keyword("version")) identifier = NotAny(keyword) + Word( alphas + alphas.upper(), alphanums + alphanums.upper() + "_", asKeyword=True) constant = Combine(Optional("-") + Word(nums)) constant.setParseAction(lambda s, l, t: [int(t[0])]) value = constant | identifier enum_body = Literal("{").suppress() + identifier + Literal( "=").suppress() + value + ZeroOrMore( Literal(",").suppress() + identifier + Literal("=").suppress() + value) + Literal("}").suppress() enum_type_spec = Literal("enum").suppress() + enum_body enum_body.setParseAction(self.parse_enum) struct_body = Literal("{").suppress() + OneOrMore( declaration + Literal(";").suppress()) + Literal("}").suppress() struct_type_spec = Literal("struct").suppress() + struct_body struct_body.setParseAction(self.parse_struct) case_stmt = Literal("case").suppress() + value + Literal( ":").suppress() + declaration + Literal(";").suppress() default_stmt = Literal("default") + Literal( ":").suppress() + declaration + Literal(";").suppress() union_body = Literal("switch").suppress() + Literal("(").suppress( ) + declaration + Literal(")").suppress() + Literal("{").suppress( ) + Group(OneOrMore(Group(case_stmt)) + Optional(Group(default_stmt))) + Literal("}").suppress() union_type_spec = Literal("union").suppress() + union_body union_body.setParseAction(self.parse_union) constant_def = Literal("const").suppress() + identifier + Literal( "=").suppress() + constant + Literal(";").suppress() constant_def.setParseAction(self.parse_const) type_spec = ((Optional(Literal("unsigned")) + Literal("int")).setParseAction(self.parse_builtin) | (Optional(Literal("unsigned")) + Literal("hyper")).setParseAction(self.parse_builtin) | Literal("float").setParseAction(self.parse_builtin) | Literal("double").setParseAction(self.parse_builtin) | Literal("bool").setParseAction(self.parse_builtin) | enum_type_spec | struct_type_spec | union_type_spec | identifier) proc_return = Literal("void") | type_spec procedure_def = proc_return + identifier + Literal("(").suppress() + ( Literal("void") | type_spec) + ZeroOrMore( Literal(",").suppress() + type_spec) + Literal(")").suppress() + Literal( "=").suppress() + constant + Literal(";").suppress() procedure_def.setParseAction(self.parse_procedure_def) version_def = Literal("version").suppress() + identifier + Literal( "{").suppress() + OneOrMore(procedure_def) + Literal("}").suppress( ) + Literal("=").suppress() + constant + Literal(";").suppress() version_def.setParseAction(self.parse_version_def) program_body = Literal("{").suppress() + Group( OneOrMore(version_def)) + Literal("}").suppress() type_def = ( (Literal("typedef") + declaration + Literal(";")) | (Literal("enum") + identifier + enum_body + Literal(";")) | (Literal("struct") + identifier + struct_body + Literal(";")) | (Literal("union") + identifier + union_body + Literal(";")) | (Literal("program") + identifier + program_body + Literal("=").suppress() + constant + Literal(";"))) type_def.setParseAction(self.parse_type_def) declaration << ( (type_spec + identifier + Literal("[") + value + Literal("]")) | (type_spec + identifier + Literal("<") + value + Literal(">")) | (type_spec + identifier) | (Literal("opaque") + identifier + Literal("[") + value + Literal("]")) | (Literal("opaque") + identifier + Literal("<") + value + Literal(">")) | (Literal("string") + identifier + Literal("<") + value + Literal(">")) | (type_spec + Literal("*") + identifier) | Literal("void")) declaration.setParseAction(self.parse_decl) definition = type_def | constant_def specification = ZeroOrMore(definition) comment = (Literal("#") + restOfLine).suppress() specification.ignore(comment) return specification
KW_DEFAULT = Keyword("default") single = number ^ symbol | QuotedString('"') | QuotedString("'") tuple_ = Group(LPAREN + delimitedList(single) + RPAREN) domain_index = Suppress(index + Keyword("in")) subscript_domain = ( LBRACE + delimitedList(Optional(domain_index) + symbol).setResultsName("subscripts") + RBRACE ) data = single | tuple_ # should not match a single (tr) simple_data = Group(NotAny("(tr)") + data + ZeroOrMore(Optional(Suppress(",")) + data)) # the first element of a set data record cannot be 'dimen', or else # these would match set_def_stmts non_dimen_simple_data = ~Keyword("dimen") + simple_data matrix_row = Group(single + OneOrMore(PLUS | MINUS)) matrix_data = ( ":" + OneOrMore(single).setResultsName("columns") + ":=" + OneOrMore(matrix_row).setResultsName("data") ) matrix_data.setParseAction(MatrixData) tr_matrix_data = Suppress("(tr)") + matrix_data tr_matrix_data.setParseAction(mark_transposed)
class HLBParser(): when_keyword = CaselessKeyword("when").suppress() wait_keyword = CaselessKeyword("wait").suppress() emit_keyword = CaselessKeyword("emit").suppress() not_akeyword = (NotAny(emit_keyword) + NotAny(wait_keyword) + NotAny(when_keyword)) comma = Literal(",").suppress() newline = Literal("\n") wsp = White().suppress() actor = (not_akeyword + Word(alphanums)) actors = actor + ZeroOrMore((comma + wsp + actor | comma + actor)) #action = (Optional(CaselessKeyword("start") | CaselessKeyword("stop") | CaselessKeyword("restart") | CaselessKeyword("check")) + (not_akeyword + Word(alphanums))) action = (not_akeyword + Word(alphanums)) event = (not_akeyword + Word(alphanums)) events = event + ZeroOrMore((comma + wsp + event | comma + event)) when_trigger = when_keyword + events("t_events") wait_time = (not_akeyword + Word(alphanums)) wait_trigger = (wait_keyword + wait_time("wait_time")) trigger = (when_trigger + wsp + wait_trigger | when_trigger | wait_trigger) # Full complete statment. hlb_statement = ( trigger + wsp + actors("actors") + wsp + action("action") + emit_keyword + events("e_events") | trigger + wsp + actors("actors") + wsp + action("action") | actors("actors") + wsp + action("action") + emit_keyword + events("e_events") | actors("actors") + wsp + action("action")) + LineEnd() parts_of_when = (CaselessKeyword("w").suppress() | CaselessKeyword("wh").suppress() | CaselessKeyword("whe").suppress()) + LineEnd() parts_of_wait = (CaselessKeyword("w").suppress() | CaselessKeyword("wa").suppress() | CaselessKeyword("wai").suppress()) + LineEnd() # Partial statements (not incorrect, but not complete either.) hlb_partial_trigger_word = (when_trigger + wsp + parts_of_wait) | parts_of_when | parts_of_wait hlb_missing_when_list = (when_keyword) + LineEnd() hlb_missing_wait_time = (when_trigger + wsp + wait_keyword + LineEnd() | wait_keyword + LineEnd()) hlb_missing_actors = (trigger("trigger") | trigger("trigger") + actors("actors") + comma + LineEnd()) hlb_missing_actors_whenstmt = when_trigger + LineEnd() hlb_missing_actors_waitstmt = wait_trigger + LineEnd( ) | when_trigger + wsp + wait_trigger + LineEnd() hlb_missing_action = (trigger("trigger") + wsp + actors("actors") + LineEnd() | actors("actors") + LineEnd()) hlb_missing_emit_list = ( trigger("trigger") + wsp + actors("actors") | actors("actors")) + wsp + action("action") + emit_keyword + LineEnd() #hlb = hlb_statement + newline + hlb def new_return_dict(self, t_events=None, actors=None, action=None, e_events=None, wait_time=None): # Initializes all the vars we may have to report. # parsed.t_events, parsed.actors, parsed.action, parsed.e_events, parsed.wait_time v = {} # Should be None or list v['t_events'] = self.if_not_none_return_list(t_events) # Should be None or list v['actors'] = self.if_not_none_return_list(actors) # Should be None or str try: v['action'] = ''.join(action) except TypeError: v['action'] = str(action) # Should be None or list v['e_events'] = self.if_not_none_return_list(e_events) # Should be None or str try: v['wait_time'] = ''.join(wait_time) except TypeError: v['wait_time'] = str(wait_time) return v def if_not_none_return_list(self, item): if item != None: return list(item) else: return None def parse_stmt(self, statement): """Parses an HLB statement and returns a tuple: (triggers, actors, action, e_events) Values are returned as "None" if they cannot be extracted. """ try: parsed = self.hlb_statement.parseString(statement) except ParseException as pe: #print("WARNING: Could not parse HLB statement:\n\t%s" % (statement)) return (None, None, None, None, None) if parsed.e_events == None: parsed.e_events = [] return_tuple = (list(parsed.t_events), list(parsed.actors), list(parsed.action), list(parsed.e_events), str(parsed.wait_time)) return (tuple(r if r != [] and r != "" else None for r in return_tuple)) def transitionBstate2(self, ll): type, vals, hints = self.extract_partial(ll) translate = {} translate[HLBHintType.OPT_EMIT_STMT] = "no hint" translate[HLBHintType.REQ_EMIT_LIST] = "emit" translate[HLBHintType.REQ_ACTION] = "action" translate[HLBHintType.REQ_ACTORS] = "nactor" translate[HLBHintType.REQ_WAIT_TIME] = "waitd" translate[HLBHintType.REQ_WHEN_LIST] = "whene" translate[HLBHintType.REQ_ACTORS_HAVEWHEN] = "when" translate[HLBHintType.REQ_ACTORS_HAVEWAIT] = "wait" translate[HLBHintType.NO_HINT] = "no hint" translate[HLBHintType.BLANK] = "start" translate[HLBHintType.UNKNOWN_ERROR] = "no hint" #print("Stmt: \"%s\"\n\ttransitionBstate2: %s\tReturn: %s" % (ll, type.name, translate[type])) rval = translate[type] if (type == HLBHintType.REQ_ACTORS or type == HLBHintType.REQ_ACTION) and vals['actors'] != None and all( a.strip() in globals.actors for a in vals['actors']): if type == HLBHintType.REQ_ACTORS: return "actor" else: return "action" else: return rval def extract_partial(self, partial): #print("Parsing \'%s\'" % partial) if len(partial.strip().split()) == 0: return (HLBHintType.BLANK, self.new_return_dict(), ['WHEN', 'WAIT', '<ACTOR>']) t_events, actors, action, e_events, wait_time = self.parse_stmt( partial) # In case we have a 2 word action with a <keyword string> pattern: try: action = ''.join(action) except TypeError: action = str(action) if actors != None and e_events != None: #print("Complete statement with emit.") return (HLBHintType.NO_HINT, self.new_return_dict(t_events=t_events, actors=actors, action=action, e_events=e_events, wait_time=wait_time), []) elif actors != None and e_events == None: #print("Complete statement, with no emit.") hint_list = ['EMIT ' + action + 'Signal', 'EMIT'] return (HLBHintType.OPT_EMIT_STMT, self.new_return_dict(t_events=t_events, actors=actors, action=action, wait_time=wait_time), hint_list) # Our statement isn't complete or correct: if actors == None: #print("Statement is not complete as is.") ## WARNING: The following is a longest match first: ## For the below to work, the order must be kept ## (along with the break out of conditional statements ## via return or some other mechanism). # Do we have an emit, but missing emit event list? try: parsed = self.hlb_missing_emit_list.parseString(partial) #print("Missing emit list.") hint_list = [action + 'Signal', '<LIST, OF, TRIGGERS>'] return (HLBHintType.REQ_EMIT_LIST, self.new_return_dict(t_events=parsed.t_events, actors=parsed.actors, action=parsed.action, wait_time=parsed.wait_time), hint_list) except ParseException as pe: pass # Do we have a wait? but missing the wait time? try: parsed = self.hlb_missing_wait_time.parseString(partial) #print("Missing wait time") hint_list = ['<VARNAME_FOR_WAIT_TIME>'] return (HLBHintType.REQ_WAIT_TIME, self.new_return_dict(t_events=parsed.t_events), hint_list) except ParseException as pe: pass # Do we have a when? but missing the when event triggers? try: parsed = self.hlb_missing_when_list.parseString(partial) #print("Missing when list.") hint_list = ['<TRIGGER_NAME>'] return (HLBHintType.REQ_WHEN_LIST, self.new_return_dict(), hint_list) except ParseException as pe: pass # Do we just have a start of a keyword? if " " not in partial: hint_list = [] return (HLBHintType.BLANK, self.new_return_dict(), hint_list) try: parsed = self.hlb_partial_trigger_word.parseString(partial) hint_list = [] return (HLBHintType.BLANK, self.new_return_dict(), hint_list) except ParseException as pe: pass # Are we missing an action? try: # Maybe we don't have an actor, but a partial wait or when parsed = self.hlb_missing_action.parseString(partial) #print("Missing action.") hint_list = ['<METHOD_NAME>'] for a in globals.actors: if a != "": hint_list.append(', ' + a.strip()) return (HLBHintType.REQ_ACTION, self.new_return_dict(t_events=parsed.t_events, actors=parsed.actors, wait_time=parsed.wait_time), hint_list) except ParseException as pe: pass # Do we have a trigger, but no actors? try: parsed = self.hlb_missing_actors.parseString(partial) #print("Missing actors") hint_list = [] for a in globals.actors: if a != "": hint_list.append(a.strip()) try: # We may have a 'when xxx' statement, in which case we should also suggest a 'wait' as a hint. parsed_for_when = self.hlb_missing_actors_whenstmt.parseString( partial) hint_list.append('WAIT') return (HLBHintType.REQ_ACTORS_HAVEWHEN, self.new_return_dict(t_events=parsed.t_events), hint_list) except ParseException as pe: # We don't just have a 'when xxxx'. We either have a 'wait xxx' or a 'when xxx wait xxx' try: parsed_for_wait = self.hlb_missing_actors_waitstmt.parseString( partial) return (HLBHintType.REQ_ACTORS_HAVEWAIT, self.new_return_dict( t_events=parsed.t_events, wait_time=parsed.wait_time), hint_list) except ParseException as pe: return (HLBHintType.REQ_ACTORS, self.new_return_dict( t_events=parsed.t_events, wait_time=parsed.wait_time), hint_list) except ParseException as pe: return (HLBHintType.UNKNOWN_ERROR, self.new_return_dict(), [])
Combine(OneOrMore(label + ('.' + FollowedBy(Word(alphanums + '-')))))('domain_labels') + domain_tld('tld') ) + alphanum_word_end ).setParseAction(downcaseTokens) ipv4_section = ( Word(nums, asKeyword=True, max=3) .setParseAction(lambda x: str(int(x[0]))) .addCondition(lambda tokens: int(tokens[0]) < 256) ) # basically, the grammar below says: start any words that start with a '.' or a number; I want to match words that start with a '.' because this will fail later in the grammar and I do not want to match anything that start with a '.' ipv4_address = ( alphanum_word_start + WordStart('.' + nums) + Combine((ipv4_section + '.') * 3 + ipv4_section) + NotAny(Regex('\.\S')) + alphanum_word_end ) hexadectet = Word(hexnums, min=1, max=4) ipv6_address_full = alphanum_word_start + Combine((hexadectet + ":") * 7 + hexadectet) # todo: the ipv6_address_shortened grammar needs some fine-tuning so it doesn't pull in content too broadly ipv6_address_shortened = Combine(OneOrMore(Or([hexadectet + Word(':'), Word(':')])) + hexadectet) ipv6_address = Or([ipv6_address_full, ipv6_address_shortened]) + alphanum_word_end complete_email_comment = Combine('(' + Word(printables.replace(')', '')) + ')') # the complete_email_local_part grammar ignores the fact that characters like <<<(),:;<>@[\] >>> are possible in a quoted complete_email_local_part (and the double-quotes and backslash should be preceded by a backslash) complete_email_local_part = Combine( Optional(complete_email_comment)('email_address_comment') + Word(alphanums + "!#$%&'*+-/=?^_`{|}~." + '"') + Optional(complete_email_comment)('email_address_comment')
'xn--wgbl6a', 'xn--xhq521b', 'xn--xkc2al3hye2a', 'xn--xkc2dl3a5ee0h', 'xn--y9a3aq', 'xn--yfro4i67o', 'xn--ygbi2ammx', 'xn--zfr164b', 'xxx', 'xyz', 'yachts', 'yahoo', 'yamaxun', 'yandex', 'ye', 'yodobashi', 'yoga', 'yokohama', 'you', 'youtube', 'yt', 'yun', 'za', 'zappos', 'zara', 'zero', 'zip', 'zippo', 'zm', 'zone', 'zuerich', 'zw' ] domain_tld = Or(tlds) domain_name = alphanum_word_start + Combine( Combine(OneOrMore(label + ('.')))('domain_labels') + domain_tld('tld')) + alphanum_word_end ipv4_section = Word( nums, asKeyword=True).addCondition(lambda tokens: int(tokens[0]) < 256) # basically, the grammar below says: start any words that start with a '.' or a number; I want to match words that start with a '.' because this will fail later in the grammar and I do not want to match anything that start with a '.' ipv4_address = alphanum_word_start + WordStart('.' + nums) + Combine( (ipv4_section + '.') * 3 + ipv4_section) + NotAny( Regex('\.\S')) + alphanum_word_end hexadectet = Word(hexnums, min=1, max=4) ipv6_address_full = alphanum_word_start + Combine((hexadectet + ":") * 7 + hexadectet) # todo: the ipv6_address_shortened grammar needs some fine-tuning so it doesn't pull in content too broadly ipv6_address_shortened = Combine( OneOrMore(Or([hexadectet + Word(':'), Word(':')])) + hexadectet) ipv6_address = Or([ipv6_address_full, ipv6_address_shortened ]) + alphanum_word_end complete_email_comment = Combine('(' + Word(printables.replace(')', '')) + ')') # the complete_email_local_part grammar ignores the fact that characters like <<<(),:;<>@[\] >>> are possible in a quoted complete_email_local_part (and the double-quotes and backslash should be preceded by a backslash) complete_email_local_part = Combine( Optional(complete_email_comment)('email_address_comment') + Word(alphanums + "!#$%&'*+-/=?^_`{|}~." + '"') +
comma_list = lambda x: x + ZeroOrMore(OW + ',' + OW + x) unary_op = oneOf('- + ~', caseless=True) unary_op |= CKeyword('NOT') # TODO this does not encode precedence binary_op = oneOf("|| * / % + - << >> & | < <= > >= = == != <>", caseless=True) binary_op |= reduce(lambda x,y: x|y, [CKeyword(x) for x in 'IS,IS NOT,IN,LIKE,GLOB,MATCH,REGEXP,AND,OR'.split(',')]) # these direct from the SQLite docs KEYWORDS = 'ABORT ACTION ADD AFTER ALL ALTER ANALYZE AND AS ASC ATTACH AUTOINCREMENT BEFORE BEGIN BETWEEN BY CASCADE CASE CAST CHECK COLLATE COLUMN COMMIT CONFLICT CONSTRAINT CREATE CROSS CURRENT_DATE CURRENT_TIME CURRENT_TIMESTAMP DATABASE DEFAULT DEFERRABLE DEFERRED DELETE DESC DETACH DISTINCT DROP EACH ELSE END ESCAPE EXCEPT EXCLUSIVE EXISTS EXPLAIN FAIL FOR FOREIGN FROM FULL GLOB GROUP HAVING IF IGNORE IMMEDIATE IN INDEX INDEXED INITIALLY INNER INSERT INSTEAD INTERSECT INTO IS ISNULL JOIN KEY LEFT LIKE LIMIT MATCH NATURAL NO NOT NOTNULL NULL OF OFFSET ON OR ORDER OUTER PLAN PRAGMA PRIMARY QUERY RAISE REFERENCES REGEXP REINDEX RELEASE RENAME REPLACE RESTRICT RIGHT ROLLBACK ROW SAVEPOINT SELECT SET TABLE TEMP TEMPORARY THEN TO TRANSACTION TRIGGER UNION UNIQUE UPDATE USING VACUUM VALUES VIEW VIRTUAL WHEN WHERE' # TODO probably not right charset & does not account for escaping identifiers # https://www.sqlite.org/lang_keywords.html identifier = NotAny( reduce(lambda x,y: x|y, [CKeyword(x) for x in KEYWORDS.split(' ')]) ) + Regex('[a-zA-Z_][a-zA-Z0-9_]*') # for the purposes of attaching parse actions to these # objects they need to all be separate. table_in_column # is to distinguish between tables as found in the grammar # and those specifically found (optionally) in a column spec # (which gets triggered whether there's actually a table part # or not.) table_name = identifier.copy() table_in_column = table_name.copy() database_name = identifier.copy() column_name = identifier.copy() column = Optional(database_name + '.') + Optional(table_in_column + '.') + column_name