# This file is part of the Printrun suite. # # Printrun is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # Printrun is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Printrun. If not, see <http://www.gnu.org/licenses/>. """ CSS at-rules""" from pyparsing import Literal, Combine from .identifier import identifier atkeyword = Combine(Literal("@") + identifier)
def ip4_subnet_range_check(strg, loc, toks): """ s = the original string being parsed loc = the location of the matching substring toks = a list of the matched tokens, packaged as a ParseResults object """ value = int(toks[0]) if (value < 1) or (value > 31): print("IPv4 subnet is out of range: %d" % value) print("strg: %s" % strg) print("loc: %s" % loc) return None dotted_decimal = Combine( Word(nums, max=3) + Literal('.') + Word(nums, max=3) + Literal('.') + Word(nums, max=3) + Literal('.') + Word(nums, max=3)) # Bind9 naming convention ip4_addr = pyparsing_common.ipv4_address ip4_addr.setName('<ip4_addr>') ip4s_subnet = Word(nums, min=1, max=2) ip4s_subnet.setName('<ip4_or_ip4_subnet>') ip4s_prefix = Combine(ip4_addr + '/' - ip4s_subnet) ip4s_prefix.setName('<ip4subnet>') # Device Index (aka Unix sin6_scope_id) can be 32-bit integer or 64-char readable device name # _ip6_device_index = r'%([0-9]{1,10})|([a-zA-Z0-9\.\-_]{1,64})' _ip6_device_index = r'%' + \
def convertToFloat(s, loc, toks): try: return float(toks[0]) except BaseException as e: raise ParseException(loc, "invalid float format %s" % toks[0]) from e exponent = CaselessLiteral("e") + Optional(sign) + Word(nums) #note that almost all these fields are optional, #and this can match almost anything. We rely on Pythons built-in #float() function to clear out invalid values - loosely matching like this #speeds up parsing quite a lot floatingPointConstant = Combine( Optional(sign) + Optional(Word(nums)) + Optional(Literal(".") + Optional(Word(nums))) + Optional(exponent)) floatingPointConstant.setParseAction(convertToFloat) number = floatingPointConstant #same as FP constant but don't allow a - sign nonnegativeNumber = Combine( Optional(Word(nums)) + Optional(Literal(".") + Optional(Word(nums))) + Optional(exponent)) nonnegativeNumber.setParseAction(convertToFloat) coordinate = number #comma or whitespace can separate values all over the place in SVG
class CreateParser(object): """ This class can take a plain "CREATE TABLE" SQL as input and parse it into a Table object, so that we have more insight on the detail of this SQL. Example: sql = 'create table foo ( bar int primary key )' parser = CreateParser(sql) try: tbl_obj = parser.parse() except ParseError: log.error("Failed to parse SQL") This set of BNF rules are basically translated from the MySQL manual: http://dev.mysql.com/doc/refman/5.6/en/create-table.html If you don't know how to change the rule or fix the bug, <Getting Started with Pyparsing> is probably the best book to start with. Also this wiki has all supported functions listed: https://pyparsing.wikispaces.com/HowToUsePyparsing If you want have more information how these characters are matching, add .setDebug(True) after the specific token you want to debug """ _parser = None _partitions_parser = None # Basic token WORD_CREATE = CaselessLiteral("CREATE").suppress() WORD_TABLE = CaselessLiteral("TABLE").suppress() COMMA = Literal(",").suppress() DOT = Literal(".") LEFT_PARENTHESES = Literal("(").suppress() RIGHT_PARENTHESES = Literal(")").suppress() QUOTE = Literal("'") | Literal('"') BACK_QUOTE = Optional(Literal("`")).suppress() LENGTH = Word(nums) DECIMAL = Combine(Word(nums) + DOT + Word(nums)) OBJECT_NAME = Word(alphanums + "_" + "-" + "<" + ">" + ":") QUOTED_STRING_WITH_QUOTE = QuotedString( quoteChar="'", escQuote="''", escChar="\\", multiline=True, unquoteResults=False ) | QuotedString( quoteChar='"', escQuote='""', escChar="\\", multiline=True, unquoteResults=False ) QUOTED_STRING = QuotedString( quoteChar="'", escQuote="''", escChar="\\", multiline=True ) | QuotedString(quoteChar='"', escQuote='""', escChar="\\", multiline=True) # Start of a create table statement # Sample: this part of rule will match following section # `table_name` IF NOT EXISTS IF_NOT_EXIST = Optional( CaselessLiteral("IF") + CaselessLiteral("NOT") + CaselessLiteral("EXISTS") ).suppress() TABLE_NAME = ( QuotedString(quoteChar="`", escQuote="``", escChar="\\", unquoteResults=True) | OBJECT_NAME )("table_name") # Column definition # Sample: this part of rule will match following section # `id` bigint(20) unsigned NOT NULL DEFAULT '0', COLUMN_NAME = ( QuotedString(quoteChar="`", escQuote="``", escChar="\\", unquoteResults=True) | OBJECT_NAME )("column_name") COLUMN_NAME_WITH_QUOTE = ( QuotedString(quoteChar="`", escQuote="``", escChar="\\", unquoteResults=False) | OBJECT_NAME )("column_name") UNSIGNED = Optional(CaselessLiteral("UNSIGNED"))("unsigned") ZEROFILL = Optional(CaselessLiteral("ZEROFILL"))("zerofill") COL_LEN = Combine(LEFT_PARENTHESES + LENGTH + RIGHT_PARENTHESES, adjacent=False)( "length" ) INT_TYPE = ( CaselessLiteral("TINYINT") | CaselessLiteral("SMALLINT") | CaselessLiteral("MEDIUMINT") | CaselessLiteral("INT") | CaselessLiteral("INTEGER") | CaselessLiteral("BIGINT") | CaselessLiteral("BINARY") | CaselessLiteral("BIT") ) INT_DEF = INT_TYPE("column_type") + Optional(COL_LEN) + UNSIGNED + ZEROFILL VARBINARY_DEF = CaselessLiteral("VARBINARY")("column_type") + COL_LEN FLOAT_TYPE = ( CaselessLiteral("REAL") | CaselessLiteral("DOUBLE") | CaselessLiteral("FLOAT") | CaselessLiteral("DECIMAL") | CaselessLiteral("NUMERIC") ) FLOAT_LEN = Combine( LEFT_PARENTHESES + LENGTH + Optional(COMMA + LENGTH) + RIGHT_PARENTHESES, adjacent=False, joinString=", ", )("length") FLOAT_DEF = FLOAT_TYPE("column_type") + Optional(FLOAT_LEN) + UNSIGNED + ZEROFILL # time type definition. They contain type_name and an optional FSP section # Sample: DATETIME[(fsp)] FSP = COL_LEN DT_DEF = ( Combine(CaselessLiteral("TIME") + Optional(CaselessLiteral("STAMP"))) | CaselessLiteral("DATETIME") )("column_type") + Optional(FSP) SIMPLE_DEF = ( CaselessLiteral("DATE") | CaselessLiteral("YEAR") | CaselessLiteral("TINYBLOB") | CaselessLiteral("BLOB") | CaselessLiteral("MEDIUMBLOB") | CaselessLiteral("LONGBLOB") | CaselessLiteral("BOOLEAN") | CaselessLiteral("BOOL") | CaselessLiteral("JSON") )("column_type") OPTIONAL_COL_LEN = Optional(COL_LEN) BINARY = Optional(CaselessLiteral("BINARY"))("binary") CHARSET_NAME = ( Optional(QUOTE).suppress() + Word(alphanums + "_")("charset") + Optional(QUOTE).suppress() ) COLLATION_NAME = ( Optional(QUOTE).suppress() + Word(alphanums + "_")("collate") + Optional(QUOTE).suppress() ) CHARSET_DEF = CaselessLiteral("CHARACTER SET").suppress() + CHARSET_NAME COLLATE_DEF = CaselessLiteral("COLLATE").suppress() + COLLATION_NAME CHAR_DEF = CaselessLiteral("CHAR")("column_type") + OPTIONAL_COL_LEN + BINARY VARCHAR_DEF = CaselessLiteral("VARCHAR")("column_type") + COL_LEN + BINARY TEXT_TYPE = ( CaselessLiteral("TINYTEXT") | CaselessLiteral("TEXT") | CaselessLiteral("MEDIUMTEXT") | CaselessLiteral("LONGTEXT") | CaselessLiteral("DOCUMENT") ) TEXT_DEF = TEXT_TYPE("column_type") + BINARY ENUM_VALUE_LIST = Group( QUOTED_STRING_WITH_QUOTE + ZeroOrMore(COMMA + QUOTED_STRING_WITH_QUOTE) )("enum_value_list") ENUM_DEF = ( CaselessLiteral("ENUM")("column_type") + LEFT_PARENTHESES + ENUM_VALUE_LIST + RIGHT_PARENTHESES ) SET_VALUE_LIST = Group( QUOTED_STRING_WITH_QUOTE + ZeroOrMore(COMMA + QUOTED_STRING_WITH_QUOTE) )("set_value_list") SET_DEF = ( CaselessLiteral("SET")("column_type") + LEFT_PARENTHESES + SET_VALUE_LIST + RIGHT_PARENTHESES ) DATA_TYPE = ( INT_DEF | FLOAT_DEF | DT_DEF | SIMPLE_DEF | TEXT_DEF | CHAR_DEF | VARCHAR_DEF | ENUM_DEF | SET_DEF | VARBINARY_DEF ) # Column attributes come after column type and length NULLABLE = CaselessLiteral("NULL") | CaselessLiteral("NOT NULL") DEFAULT_VALUE = CaselessLiteral("DEFAULT").suppress() + ( Optional(Literal("b"))("is_bit") + QUOTED_STRING_WITH_QUOTE("default") | Combine( CaselessLiteral("CURRENT_TIMESTAMP")("default") + Optional(COL_LEN)("ts_len") ) | DECIMAL("default") | Word(alphanums + "_" + "-" + "+")("default") ) ON_UPDATE = ( CaselessLiteral("ON") + CaselessLiteral("UPDATE") + ( CaselessLiteral("CURRENT_TIMESTAMP")("on_update") + Optional(COL_LEN)("on_update_ts_len") ) ) AUTO_INCRE = CaselessLiteral("AUTO_INCREMENT") UNIQ_KEY = CaselessLiteral("UNIQUE") + Optional(CaselessLiteral("KEY")).suppress() PRIMARY_KEY = ( CaselessLiteral("PRIMARY") + Optional(CaselessLiteral("KEY")).suppress() ) COMMENT = Combine( CaselessLiteral("COMMENT").suppress() + QUOTED_STRING_WITH_QUOTE, adjacent=False ) COLUMN_DEF = Group( COLUMN_NAME + DATA_TYPE + ZeroOrMore( NULLABLE("nullable") | DEFAULT_VALUE | ON_UPDATE | AUTO_INCRE("auto_increment") | UNIQ_KEY("uniq_key") | PRIMARY_KEY("primary") | COMMENT("comment") | CHARSET_DEF | COLLATE_DEF ) ) COLUMN_LIST = Group(COLUMN_DEF + ZeroOrMore(COMMA + COLUMN_DEF))("column_list") DOCUMENT_PATH = Combine( COLUMN_NAME_WITH_QUOTE + ZeroOrMore(DOT + COLUMN_NAME_WITH_QUOTE) ) IDX_COL = ( Group( DOCUMENT_PATH + CaselessLiteral("AS") + (CaselessLiteral("INT") | CaselessLiteral("STRING")) + Optional(COL_LEN, default="") ) ) | (Group(COLUMN_NAME + Optional(COL_LEN, default=""))) # Primary key section COL_NAME_LIST = Group(IDX_COL + ZeroOrMore(COMMA + IDX_COL)) IDX_COLS = LEFT_PARENTHESES + COL_NAME_LIST + RIGHT_PARENTHESES WORD_PRI_KEY = ( CaselessLiteral("PRIMARY").suppress() + CaselessLiteral("KEY").suppress() ) KEY_BLOCK_SIZE = ( CaselessLiteral("KEY_BLOCK_SIZE").suppress() + Optional(Literal("=")) + Word(nums)("idx_key_block_size") ) INDEX_USING = CaselessLiteral("USING").suppress() + ( CaselessLiteral("BTREE") | CaselessLiteral("HASH") )("idx_using") INDEX_OPTION = ZeroOrMore(KEY_BLOCK_SIZE | COMMENT("idx_comment") | INDEX_USING) PRI_KEY_DEF = COMMA + WORD_PRI_KEY + IDX_COLS("pri_list") + INDEX_OPTION # Index section KEY_TYPE = (CaselessLiteral("FULLTEXT") | CaselessLiteral("SPATIAL"))("key_type") WORD_UNIQUE = CaselessLiteral("UNIQUE")("unique") WORD_KEY = CaselessLiteral("INDEX").suppress() | CaselessLiteral("KEY").suppress() IDX_NAME = Optional(COLUMN_NAME) IDX_DEF = ( ZeroOrMore( Group( COMMA + Optional(WORD_UNIQUE | KEY_TYPE) + WORD_KEY + IDX_NAME("index_name") + IDX_COLS("index_col_list") + INDEX_OPTION ) ) )("index_section") # Constraint section as this is not a recommended way of using MySQL # we'll treat the whole section as a string CONSTRAINT = Combine( ZeroOrMore( COMMA + Optional(CaselessLiteral("CONSTRAINT")) + # foreign key name except the key word 'FOREIGN' Optional((~CaselessLiteral("FOREIGN") + COLUMN_NAME)) + CaselessLiteral("FOREIGN") + CaselessLiteral("KEY") + LEFT_PARENTHESES + COL_NAME_LIST + RIGHT_PARENTHESES + CaselessLiteral("REFERENCES") + COLUMN_NAME + LEFT_PARENTHESES + COL_NAME_LIST + RIGHT_PARENTHESES + ZeroOrMore(Word(alphanums)) ), adjacent=False, joinString=" ", )("constraint") # Table option section ENGINE = ( CaselessLiteral("ENGINE").suppress() + Optional(Literal("=")).suppress() + COLUMN_NAME("engine").setParseAction(upcaseTokens) ) DEFAULT_CHARSET = ( Optional(CaselessLiteral("DEFAULT")).suppress() + ( ( CaselessLiteral("CHARACTER").suppress() + CaselessLiteral("SET").suppress() ) | (CaselessLiteral("CHARSET").suppress()) ) + Optional(Literal("=")).suppress() + Word(alphanums + "_")("charset") ) TABLE_COLLATE = ( Optional(CaselessLiteral("DEFAULT")).suppress() + CaselessLiteral("COLLATE").suppress() + Optional(Literal("=")).suppress() + COLLATION_NAME ) ROW_FORMAT = ( CaselessLiteral("ROW_FORMAT").suppress() + Optional(Literal("=")).suppress() + Word(alphanums + "_")("row_format").setParseAction(upcaseTokens) ) TABLE_KEY_BLOCK_SIZE = ( CaselessLiteral("KEY_BLOCK_SIZE").suppress() + Optional(Literal("=")).suppress() + Word(nums)("key_block_size").setParseAction(lambda s, l, t: [int(t[0])]) ) COMPRESSION = ( CaselessLiteral("COMPRESSION").suppress() + Optional(Literal("=")).suppress() + Word(alphanums + "_")("compression").setParseAction(upcaseTokens) ) # Parse and make sure auto_increment is an integer # parseAction function is defined as fn( s, loc, toks ), where: # s is the original parse string # loc is the location in the string where matching started # toks is the list of the matched tokens, packaged as a ParseResults_ # object TABLE_AUTO_INCRE = ( CaselessLiteral("AUTO_INCREMENT").suppress() + Optional(Literal("=")).suppress() + Word(nums)("auto_increment").setParseAction(lambda s, l, t: [int(t[0])]) ) TABLE_COMMENT = ( CaselessLiteral("COMMENT").suppress() + Optional(Literal("=")).suppress() + QUOTED_STRING_WITH_QUOTE("comment") ) TABLE_OPTION = ZeroOrMore( ( ENGINE | DEFAULT_CHARSET | TABLE_COLLATE | ROW_FORMAT | TABLE_KEY_BLOCK_SIZE | COMPRESSION | TABLE_AUTO_INCRE | TABLE_COMMENT ) # Table attributes could be comma separated too. + Optional(COMMA).suppress() ) # Partition section PARTITION = Optional( Combine( Combine(Optional(Literal("/*!") + Word(nums))) + CaselessLiteral("PARTITION") + CaselessLiteral("BY") + SkipTo(StringEnd()), adjacent=False, joinString=" ", )("partition") ) # Parse partitions in detail # From https://dev.mysql.com/doc/refman/8.0/en/create-table.html PART_FIELD_NAME = ( QuotedString(quoteChar="`", escQuote="``", escChar="\\", unquoteResults=True) | OBJECT_NAME ) PART_FIELD_LIST = delimitedList(PART_FIELD_NAME)("field_list") # e.g 1, 2, 3 # and 'a', 'b', 'c' # and `NULL` # and _binary 0x123aBc HEX_VALUE = Literal("0x") + OneOrMore(Regex("[0-9a-fA-F]")) day = Word(nums) month = Word(nums) year = Word(nums) dateday = Combine(year + "-" + month + "-" + day) to_days = Combine("to_days('" + dateday + "')") PART_VALUE_LIST = Group( LEFT_PARENTHESES + ( delimitedList( Word(nums) # e.g. (1, 2, 3) | QUOTED_STRING_WITH_QUOTE # e.g. ('a', 'b') | CaselessLiteral("NULL").setParseAction(upcaseTokens) # e.g. (NULL) | to_days # e.g. to_days('2010-11-07') ) | ( LEFT_PARENTHESES + ( delimitedList( QUOTED_STRING_WITH_QUOTE | CaselessLiteral("NULL").setParseAction(upcaseTokens) ) ) + RIGHT_PARENTHESES )( "is_tuple" ) # e.g. (("a", "b")), See test_parts_list_in_tuple15 # e.g. `_binary 0xdeadbeef123`, See test_parts_list_by_cols_with_binary17 # turns to: `_BINARY 0xdeadbeef123` | Combine( CaselessLiteral("_binary").setParseAction(upcaseTokens) + White(" ").setParseAction(replaceWith(" ")) + HEX_VALUE ) ) + RIGHT_PARENTHESES ) PART_VALUES_IN = (CaselessLiteral("IN").suppress() + PART_VALUE_LIST)("p_values_in") # Note: No expr support although full syntax (allowed by mysql8) is # LESS THAN {(expr | value_list) | MAXVALUE} PART_VALUES_LESSTHAN = ( CaselessLiteral("LESS").suppress() + CaselessLiteral("THAN").suppress() + (CaselessLiteral("MAXVALUE").setParseAction(upcaseTokens) | PART_VALUE_LIST) )("p_values_less_than") PART_NAME = ( QuotedString(quoteChar="`", escQuote="``", escChar="\\", unquoteResults=True) | OBJECT_NAME )("part_name") # Options for partition definitions - engine/comments only for now. # DO NOT re-use QUOTED_STRING_WITH_QUOTE for these - # *seems* to trigger a pyparsing bug? P_ENGINE = ( QuotedString(quoteChar="'", escQuote="''", escChar="\\", unquoteResults=True) | QuotedString( quoteChar='"', escQuote='""', escChar="\\", multiline=False, unquoteResults=True, ) | CaselessLiteral("innodb") | CaselessLiteral("ndb") | CaselessLiteral("rocksdb") ) P_COMMENT = QuotedString( quoteChar="'", escQuote="''", escChar="\\", multiline=True, unquoteResults=False ) | QuotedString( quoteChar='"', escQuote='""', escChar="\\", multiline=True, unquoteResults=False ) P_OPT_ENGINE = ( Optional(CaselessLiteral("STORAGE")).suppress() + CaselessLiteral("ENGINE").suppress() + Optional(Literal("=")).suppress() + P_ENGINE.setParseAction(upcaseTokens)("pdef_engine") ) P_OPT_COMMENT = ( CaselessLiteral("COMMENT").suppress() + Optional(Literal("=")).suppress() + P_COMMENT("pdef_comment") ) PDEF_OPTIONS = ZeroOrMore((P_OPT_ENGINE | P_OPT_COMMENT)) # e.g. PARTITION p99 VALUES (LESS THAN|IN) ... PART_DEFS = delimitedList( Group( CaselessLiteral("PARTITION").suppress() + PART_NAME + CaselessLiteral("VALUES").suppress() + (PART_VALUES_LESSTHAN | PART_VALUES_IN) + PDEF_OPTIONS ) ) # No fancy expressions yet, just a list of cols OR something nested in () PART_EXPR = ( ( LEFT_PARENTHESES + delimitedList( QuotedString( quoteChar="`", escQuote="``", escChar="\\", unquoteResults=True ) | OBJECT_NAME ) + RIGHT_PARENTHESES )("via_list") # `RANGE expr` support (test_parts_range_with_expr) | nestedExpr()("via_nested_expr") )("p_expr") SUBTYPE_LINEAR = (Optional(CaselessLiteral("LINEAR")).setParseAction(upcaseTokens))( "p_subtype" ) # Match: [LINEAR] HASH (expr) PTYPE_HASH = ( SUBTYPE_LINEAR + (CaselessLiteral("HASH").setParseAction(upcaseTokens))("part_type") + nestedExpr()("p_hash_expr") # Lousy approximation, needs post processing ) # Match: [LINEAR] KEY [ALGORITHM=1|2] (column_list) PART_ALGO = ( CaselessLiteral("ALGORITHM").suppress() + Literal("=").suppress() + Word(alphanums) )("p_algo") PTYPE_KEY = ( SUBTYPE_LINEAR + (CaselessLiteral("KEY").setParseAction(upcaseTokens))("part_type") + Optional(PART_ALGO) + Literal("(") # don't suppress here + Optional(PART_FIELD_LIST) # e.g. `PARTITION BY KEY() PARTITIONS 2` is valid + Literal(")") ) PART_COL_LIST = ( (CaselessLiteral("COLUMNS").setParseAction(upcaseTokens))("p_subtype") + LEFT_PARENTHESES + PART_FIELD_LIST + RIGHT_PARENTHESES ) PTYPE_RANGE = (CaselessLiteral("RANGE").setParseAction(upcaseTokens))( "part_type" ) + (PART_COL_LIST | PART_EXPR) PTYPE_LIST = (CaselessLiteral("LIST").setParseAction(upcaseTokens))("part_type") + ( PART_COL_LIST | PART_EXPR ) @classmethod def generate_rule(cls): # The final rule for the whole statement match return ( cls.WORD_CREATE + cls.WORD_TABLE + cls.IF_NOT_EXIST + cls.TABLE_NAME + cls.LEFT_PARENTHESES + cls.COLUMN_LIST + Optional(cls.PRI_KEY_DEF) + cls.IDX_DEF + cls.CONSTRAINT + cls.RIGHT_PARENTHESES + cls.TABLE_OPTION("table_options") + cls.PARTITION ) @classmethod def get_parser(cls): if not cls._parser: cls._parser = cls.generate_rule() return cls._parser @classmethod def gen_partitions_parser(cls): # Init full parts matcher only on demand # invalid_partition_prefix - used to detect any invalid prefix # attached to the number of partitions. The prefix is used # later on to flag invalid schemas. return ( Combine(Optional(Literal("/*!") + Word(nums))).suppress() + CaselessLiteral("PARTITION") + CaselessLiteral("BY") + (cls.PTYPE_HASH | cls.PTYPE_KEY | cls.PTYPE_RANGE | cls.PTYPE_LIST) + Optional( CaselessLiteral("PARTITIONS") + Optional(Combine(Regex("[^0-9]")))("invalid_partition_prefix") + Word(nums)("num_partitions") ) + Optional( cls.LEFT_PARENTHESES + cls.PART_DEFS("part_defs") + cls.RIGHT_PARENTHESES ) ) @classmethod def get_partitions_parser(cls): if not cls._partitions_parser: cls._partitions_parser = cls.gen_partitions_parser() return cls._partitions_parser @classmethod def parse_partitions(cls, parts) -> ParseResults: try: return cls.get_partitions_parser().parseString(parts) except ParseException as e: raise ParseError(f"Error parsing partitions: {e.line}, {e.column}") @classmethod def parse(cls, sql): try: if not isinstance(sql, str): sql = sql.decode("utf-8") result = cls.get_parser().parseString(sql) except ParseException as e: raise ParseError( "Failed to parse SQL, unsupported syntax: {}".format(e), e.line, e.column, ) inline_pri_exists = False table = models.Table() table.name = result.table_name table_options = [ "engine", "charset", "collate", "row_format", "key_block_size", "compression", "auto_increment", "comment", ] for table_option in table_options: if table_option in result: setattr(table, table_option, result.get(table_option)) if "partition" in result: # pyparsing will convert newline into two after parsing. So we # need to dedup here table.partition = result.partition.replace("\n\n", "\n") try: presult = cls.parse_partitions(table.partition) table.partition_config = cls.partition_to_model(presult) except ParseException as e: raise ParseError( f"Failed to parse partitions config, unsupported syntax {e}," f" line: {e.line} col {e.column}" ) except PartitionParseError as mpe: raise ParseError( f"Failed to init model from partitions config: {mpe}, " f"ParseResult: {presult.dump()}\nRaw: {table.partition}" ) if "constraint" in result: table.constraint = result.constraint for column_def in result.column_list: if column_def.column_type == "ENUM": column = models.EnumColumn() for enum_value in column_def.enum_value_list: column.enum_list.append(enum_value) elif column_def.column_type == "SET": column = models.SetColumn() for set_value in column_def.set_value_list: column.set_list.append(set_value) elif column_def.column_type in ("TIMESTAMP", "DATETIME"): column = models.TimestampColumn() if "on_update" in column_def: if "on_update_ts_len" in column_def: column.on_update_current_timestamp = "{}({})".format( column_def.on_update, column_def.on_update_ts_len ) else: column.on_update_current_timestamp = column_def.on_update else: column = models.Column() column.name = column_def.column_name column.column_type = column_def.column_type if column.column_type == "JSON": table.has_80_features = True # We need to check whether each column property exist in the # create table string, because not specifying a "COMMENT" is # different from specifying "COMMENT" equals to empty string. # The former one will ends up being # column=None # and the later one being # column='' if "comment" in column_def: column.comment = column_def.comment if "nullable" in column_def: if column_def.nullable == "NULL": column.nullable = True elif column_def.nullable == "NOT NULL": column.nullable = False if "unsigned" in column_def: if column_def.unsigned == "UNSIGNED": column.unsigned = True if "default" in column_def: if "ts_len" in column_def: column.default = "{}({})".format( column_def.default, column_def.ts_len ) else: column.default = column_def.default if "is_bit" in column_def: column.is_default_bit = True if "charset" in column_def: column.charset = column_def.charset if "length" in column_def: column.length = column_def.length if "collate" in column_def: column.collate = column_def.collate if "auto_increment" in column_def: column.auto_increment = True if "primary" in column_def: idx_col = models.IndexColumn() idx_col.name = column_def.column_name table.primary_key.column_list.append(idx_col) inline_pri_exists = True table.column_list.append(column) if "pri_list" in result: if inline_pri_exists: raise ParseError("Multiple primary keys defined") table.primary_key.name = "PRIMARY" for col in result.pri_list: for name, length in col: idx_col = models.IndexColumn() idx_col.name = name if length: idx_col.length = length table.primary_key.column_list.append(idx_col) if "idx_key_block_size" in result: table.primary_key.key_block_size = result.pri_key_block_size if "idx_comment" in result: table.primary_key.comment = result.idx_comment if "index_section" in result: for idx_def in result.index_section: idx = models.TableIndex() idx.name = idx_def.index_name if "idx_key_block_size" in idx_def: idx.key_block_size = idx_def.idx_key_block_size if "idx_comment" in idx_def: idx.comment = idx_def.idx_comment if "idx_using" in idx_def: idx.using = idx_def.idx_using if "key_type" in idx_def: idx.key_type = idx_def.key_type if "unique" in idx_def: idx.is_unique = True for col in idx_def.index_col_list: for col_def in col: if len(col_def) == 4 and col_def[1].upper() == "AS": (document_path, word_as, key_type, length) = col_def idx_col = models.DocStoreIndexColumn() idx_col.document_path = document_path idx_col.key_type = key_type if length: idx_col.length = length idx.column_list.append(idx_col) else: (name, length) = col_def idx_col = models.IndexColumn() idx_col.name = name if length: idx_col.length = length idx.column_list.append(idx_col) table.indexes.append(idx) return table @classmethod def partition_to_model(cls, presult: ParseResults) -> models.PartitionConfig: # Convert ParseResults from parsing a partitions config into a # model. This can throw a PartitionParseError mytype = presult.get("part_type", None) mysubtype = presult.get("p_subtype", None) if ( (not mytype and not mysubtype) or mytype not in models.PartitionConfig.KNOWN_PARTITION_TYPES or ( mysubtype is not None and mysubtype not in models.PartitionConfig.KNOWN_PARTITION_SUBTYPES ) ): raise PartitionParseError( "partition_to_model Cannot init mode.PartitionConfig: " f"type {mytype} subtype {mysubtype}" ) pc = models.PartitionConfig() pc.part_type = mytype pc.p_subtype = mysubtype def _strip_ticks(fields: Union[str, List[str]]) -> Union[str, List[str]]: if isinstance(fields, str): return fields.replace("`", "") return [_strip_ticks(f) for f in fields] if presult.get("invalid_partition_prefix"): raise PartitionParseError( f"Partition type {pc.part_type} cannot " "have invalid partition number prefix defined" ) # set fields_or_expr, full_type if ( pc.part_type == models.PartitionConfig.PTYPE_LIST or pc.part_type == models.PartitionConfig.PTYPE_RANGE ): pc.num_partitions = len(presult.get("part_defs", [])) if pc.num_partitions == 0: raise PartitionParseError( f"Partition type {pc.part_type} MUST have partitions defined" ) pc.part_defs = _process_partition_definitions(presult.part_defs) if not pc.p_subtype: pc.full_type = pc.part_type pc.via_nested_expr = ( "via_nested_expr" in presult and "via_list" not in presult ) pc.fields_or_expr = presult.p_expr.asList() if pc.via_nested_expr: # strip backticks e.g. to_days(`date`) -> [to_days, [date]] pc.fields_or_expr = _strip_ticks(pc.fields_or_expr) else: pc.full_type = f"{pc.part_type} {pc.p_subtype}" pc.fields_or_expr = presult.field_list.asList() elif pc.part_type == models.PartitionConfig.PTYPE_KEY: pc.full_type = ( pc.part_type if not pc.p_subtype else f"{pc.p_subtype} {pc.part_type}" ) pc.num_partitions = int(presult.get("num_partitions", 1)) fl = presult.get("field_list", None) pc.fields_or_expr = fl.asList() if fl else [] # This is the only place p_algo is valid. algorithm_for_key algo_result = presult.get("p_algo") if algo_result and len(algo_result.asList()) > 0: pc.algorithm_for_key = int(algo_result.asList()[0]) elif pc.part_type == models.PartitionConfig.PTYPE_HASH: pc.full_type = ( pc.part_type if not pc.p_subtype else f"{pc.p_subtype} {pc.part_type}" ) pc.num_partitions = int(presult.get("num_partitions", 1)) hexpr = presult.get("p_hash_expr", None) if not hexpr: raise PartitionParseError( f"Partition type {pc.part_type} MUST have p_hash_expr defined" ) pc.fields_or_expr = _strip_ticks(hexpr.asList()) else: # unreachable since we checked for all part_types earlier. raise PartitionParseError(f"Unknown partition type {pc.part_type}") # We avoid escaping fields/expr in partitions with backticks since # its tricky to distinguish between a list of columns and an expression # e.g. unix_timestamp(ts) - ts could be escaped but unix_ts cannot. # Our parser will strip out backticks wherever possible. For nestedExpr # usecases, this is done via _strip_ticks instead. def _has_backticks(fields: Union[str, List[str]]) -> bool: if isinstance(fields, list): return any(_has_backticks(f) for f in fields) return "`" in fields if isinstance(fields, str) else False if _has_backticks(pc.fields_or_expr): raise PartitionParseError( f"field_or_expr cannot have backticks {pc.fields_or_expr}" ) if len(pc.part_defs) > 0 and any( pd.pdef_name.upper() == "NULL" for pd in pc.part_defs ): # We will disallow this even if raw sql passed in as e.g. # PARTITION `null` VALUES IN ... raise PartitionParseError("Partition names may not be literal `null`") return pc
except KeyError: raise MissingVariableException('no value supplied for {}'.format( self._path)) return val def __repr__(self): return 'SubstituteVal(%s)' % self._path # Grammar definition pathDelimiter = '.' # match gcloud's variable identifier = Combine( Optional('${') + Optional('_') + Word(alphas, alphanums + "_") + Optional('}')) # identifier = Word(alphas, alphanums + "_") propertyPath = delimitedList(identifier, pathDelimiter, combine=True) and_ = Keyword("and", caseless=True) or_ = Keyword("or", caseless=True) in_ = Keyword("in", caseless=True) lparen = Suppress('(') rparen = Suppress(')') binaryOp = oneOf("== != < > >= <= in notin", caseless=True)('operator') E = CaselessLiteral("E") numberSign = Word("+-", exact=1)
from evdev.ecodes import ecodes except ImportError: ecodes = None print('WARNING: evdev is not available') try: from functools import lru_cache except ImportError: # don't do caching on old python lru_cache = lambda: (lambda f: f) EOL = LineEnd().suppress() EMPTYLINE = LineEnd() COMMENTLINE = pythonStyleComment + EOL INTEGER = Word(nums) REAL = Combine((INTEGER + Optional('.' + Optional(INTEGER))) ^ ('.' + INTEGER)) SIGNED_REAL = Combine(Optional(Word('-+')) + REAL) UDEV_TAG = Word(string.ascii_uppercase, alphanums + '_') # Those patterns are used in type-specific matches TYPES = { 'mouse': ('usb', 'bluetooth', 'ps2', '*'), 'evdev': ('name', 'atkbd', 'input'), 'id-input': ('modalias'), 'touchpad': ('i8042', 'rmi', 'bluetooth', 'usb'), 'joystick': ('i8042', 'rmi', 'bluetooth', 'usb'), 'keyboard': ('name', ), 'sensor': ('modalias', ), } # Patterns that are used to set general properties on a device
from typing import Optional as Optional_ from typing import Union from pyparsing import (Combine, Literal, Optional, ParseException, ParserElement, ParseResults, Regex, SkipTo, Suppress, Word, nums) PathLike = Union[Path, str] # Literals minus_or_plus = Literal('+') | Literal('-') # Parsing Floats natural = Word(nums) integer = Combine(Optional(minus_or_plus) + natural) float_number = Regex(r'(\-)?\d+(\.)(\d*)?([eE][\-\+]\d+)?') float_number_dot = Regex(r'(\-)?(\d+)?(\.)(\d*)?([eE][\-\+]\d+)?') # Parse Utilities def skip_supress(z: str) -> ParserElement: """Skip until `z` and suppress the skipped values.""" return Suppress(SkipTo(z)) any_char = Regex('.') skipany_char = Suppress(any_char) skip_line = Suppress(skip_supress('\n'))
val = "'" + val[1:-1].replace("''", "\\'") + "'" return {"literal": ast.literal_eval(val)} # NUMBERS realNum = Regex(r"[+-]?(\d+\.\d*|\.\d+)([eE][+-]?\d+)?").addParseAction( unquote) intNum = Regex(r"[+-]?\d+([eE]\+?\d+)?").addParseAction(unquote) # STRINGS, NUMBERS, VARIABLES sqlString = Regex(r"\'(\'\'|\\.|[^'])*\'").addParseAction(to_string) identString = Regex(r'\"(\"\"|\\.|[^"])*\"').addParseAction(unquote) mysqlidentString = Regex(r'\`(\`\`|\\.|[^`])*\`').addParseAction(unquote) ident = Combine(~RESERVED + (delimitedList(Literal("*") | Word(alphas + "_", alphanums + "_$") | identString | mysqlidentString, delim=".", combine=True))).setName("identifier") # EXPRESSIONS expr = Forward() # CASE case = (CASE + Group( ZeroOrMore((WHEN + expr("when") + THEN + expr("then")).addParseAction(to_when_call)))("case") + Optional(ELSE + expr("else")) + END).addParseAction(to_case_call) selectStmt = Forward() compound = ( (Keyword("not", caseless=True)("op").setDebugActions(*debug) +
def Verilog_BNF(): global verilogbnf if verilogbnf is None: # compiler directives compilerDirective = Combine( "`" + \ oneOf("define undef ifdef else endif default_nettype " "include resetall timescale unconnected_drive " "nounconnected_drive celldefine endcelldefine") + \ restOfLine ).setName("compilerDirective") # primitives SEMI, COLON, LPAR, RPAR, LBRACE, RBRACE, LBRACK, RBRACK, DOT, COMMA, EQ = map( Literal, ";:(){}[].,=") identLead = alphas + "$_" identBody = alphanums + "$_" identifier1 = Regex(r"\.?[" + identLead + "][" + identBody + r"]*(\.[" + identLead + "][" + identBody + "]*)*").setName("baseIdent") identifier2 = Regex(r"\\\S+").setParseAction( lambda t: t[0][1:]).setName("escapedIdent") #.setDebug() identifier = identifier1 | identifier2 assert (identifier2 == r'\abc') hexnums = nums + "abcdefABCDEF" + "_?" base = Regex("'[bBoOdDhH]").setName("base") basedNumber = Combine(Optional(Word(nums + "_")) + base + Word(hexnums + "xXzZ"), joinString=" ", adjacent=False).setName("basedNumber") #~ number = ( basedNumber | Combine( Word( "+-"+spacedNums, spacedNums ) + #~ Optional( DOT + Optional( Word( spacedNums ) ) ) + #~ Optional( e + Word( "+-"+spacedNums, spacedNums ) ) ).setName("numeric") ) number = ( basedNumber | \ Regex(r"[+-]?[0-9_]+(\.[0-9_]*)?([Ee][+-]?[0-9_]+)?") \ ).setName("numeric") #~ decnums = nums + "_" #~ octnums = "01234567" + "_" expr = Forward().setName("expr") concat = Group(LBRACE + delimitedList(expr) + RBRACE) multiConcat = Group("{" + expr + concat + "}").setName("multiConcat") funcCall = Group(identifier + LPAR + Optional(delimitedList(expr)) + RPAR).setName("funcCall") subscrRef = Group(LBRACK + delimitedList(expr, COLON) + RBRACK) subscrIdentifier = Group(identifier + Optional(subscrRef)) #~ scalarConst = "0" | (( FollowedBy('1') + oneOf("1'b0 1'b1 1'bx 1'bX 1'B0 1'B1 1'Bx 1'BX 1") )) scalarConst = Regex("0|1('[Bb][01xX])?") mintypmaxExpr = Group(expr + COLON + expr + COLON + expr).setName("mintypmax") primary = (number | (LPAR + mintypmaxExpr + RPAR) | (LPAR + Group(expr) + RPAR).setName("nestedExpr") | multiConcat | concat | dblQuotedString | funcCall | subscrIdentifier) unop = oneOf("+ - ! ~ & ~& | ^| ^ ~^").setName("unop") binop = oneOf( "+ - * / % == != === !== && " "|| < <= > >= & | ^ ^~ >> << ** <<< >>>").setName( "binop") expr << ((unop + expr) | # must be first! (primary + "?" + expr + COLON + expr) | (primary + Optional(binop + expr))) lvalue = subscrIdentifier | concat # keywords if_ = Keyword("if") else_ = Keyword("else") edge = Keyword("edge") posedge = Keyword("posedge") negedge = Keyword("negedge") specify = Keyword("specify") endspecify = Keyword("endspecify") fork = Keyword("fork") join = Keyword("join") begin = Keyword("begin") end = Keyword("end") default = Keyword("default") forever = Keyword("forever") repeat = Keyword("repeat") while_ = Keyword("while") for_ = Keyword("for") case = oneOf("case casez casex") endcase = Keyword("endcase") wait = Keyword("wait") disable = Keyword("disable") deassign = Keyword("deassign") force = Keyword("force") release = Keyword("release") assign = Keyword("assign") eventExpr = Forward() eventTerm = (posedge + expr) | (negedge + expr) | expr | ( LPAR + eventExpr + RPAR) eventExpr << (Group(delimitedList(eventTerm, Keyword("or")))) eventControl = Group("@" + ( (LPAR + eventExpr + RPAR) | identifier | "*")).setName("eventCtrl") delayArg = ( number | Word(alphanums + "$_") | #identifier | (LPAR + Group(delimitedList(mintypmaxExpr | expr)) + RPAR)).setName("delayArg") #.setDebug() delay = Group("#" + delayArg).setName("delay") #.setDebug() delayOrEventControl = delay | eventControl assgnmt = Group(lvalue + EQ + Optional(delayOrEventControl) + expr).setName("assgnmt") nbAssgnmt = Group((lvalue + "<=" + Optional(delay) + expr) | (lvalue + "<=" + Optional(eventControl) + expr)).setName("nbassgnmt") range = LBRACK + expr + COLON + expr + RBRACK paramAssgnmt = Group(identifier + EQ + expr).setName("paramAssgnmt") parameterDecl = Group("parameter" + Optional(range) + delimitedList(paramAssgnmt) + SEMI).setName("paramDecl") inputDecl = Group("input" + Optional(range) + delimitedList(identifier) + SEMI) outputDecl = Group("output" + Optional(range) + delimitedList(identifier) + SEMI) inoutDecl = Group("inout" + Optional(range) + delimitedList(identifier) + SEMI) regIdentifier = Group(identifier + Optional(LBRACK + expr + COLON + expr + RBRACK)) regDecl = Group("reg" + Optional("signed") + Optional(range) + delimitedList(regIdentifier) + SEMI).setName("regDecl") timeDecl = Group("time" + delimitedList(regIdentifier) + SEMI) integerDecl = Group("integer" + delimitedList(regIdentifier) + SEMI) strength0 = oneOf("supply0 strong0 pull0 weak0 highz0") strength1 = oneOf("supply1 strong1 pull1 weak1 highz1") driveStrength = Group(LPAR + ((strength0 + COMMA + strength1) | (strength1 + COMMA + strength0)) + RPAR).setName("driveStrength") nettype = oneOf( "wire tri tri1 supply0 wand triand tri0 supply1 wor trior trireg" ) expandRange = Optional(oneOf("scalared vectored")) + range realDecl = Group("real" + delimitedList(identifier) + SEMI) eventDecl = Group("event" + delimitedList(identifier) + SEMI) blockDecl = (parameterDecl | regDecl | integerDecl | realDecl | timeDecl | eventDecl) stmt = Forward().setName("stmt") #.setDebug() stmtOrNull = stmt | SEMI caseItem = ( delimitedList( expr ) + COLON + stmtOrNull ) | \ ( default + Optional(":") + stmtOrNull ) stmt << Group( (begin + Group(ZeroOrMore(stmt)) + end).setName("begin-end") | (if_ + Group(LPAR + expr + RPAR) + stmtOrNull + Optional(else_ + stmtOrNull)).setName("if") | (delayOrEventControl + stmtOrNull) | (case + LPAR + expr + RPAR + OneOrMore(caseItem) + endcase) | (forever + stmt) | (repeat + LPAR + expr + RPAR + stmt) | (while_ + LPAR + expr + RPAR + stmt) | (for_ + LPAR + assgnmt + SEMI + Group(expr) + SEMI + assgnmt + RPAR + stmt) | (fork + ZeroOrMore(stmt) + join) | (fork + COLON + identifier + ZeroOrMore(blockDecl) + ZeroOrMore(stmt) + end) | (wait + LPAR + expr + RPAR + stmtOrNull) | ("->" + identifier + SEMI) | (disable + identifier + SEMI) | (assign + assgnmt + SEMI) | (deassign + lvalue + SEMI) | (force + assgnmt + SEMI) | (release + lvalue + SEMI) | (begin + COLON + identifier + ZeroOrMore(blockDecl) + ZeroOrMore(stmt) + end).setName("begin:label-end") | # these *have* to go at the end of the list!!! (assgnmt + SEMI) | (nbAssgnmt + SEMI) | (Combine(Optional("$") + identifier) + Optional(LPAR + delimitedList(expr | empty) + RPAR) + SEMI)).setName("stmtBody") """ x::=<blocking_assignment> ; x||= <non_blocking_assignment> ; x||= if ( <expression> ) <statement_or_null> x||= if ( <expression> ) <statement_or_null> else <statement_or_null> x||= case ( <expression> ) <case_item>+ endcase x||= casez ( <expression> ) <case_item>+ endcase x||= casex ( <expression> ) <case_item>+ endcase x||= forever <statement> x||= repeat ( <expression> ) <statement> x||= while ( <expression> ) <statement> x||= for ( <assignment> ; <expression> ; <assignment> ) <statement> x||= <delay_or_event_control> <statement_or_null> x||= wait ( <expression> ) <statement_or_null> x||= -> <name_of_event> ; x||= <seq_block> x||= <par_block> x||= <task_enable> x||= <system_task_enable> x||= disable <name_of_task> ; x||= disable <name_of_block> ; x||= assign <assignment> ; x||= deassign <lvalue> ; x||= force <assignment> ; x||= release <lvalue> ; """ alwaysStmt = Group("always" + Optional(eventControl) + stmt).setName("alwaysStmt") initialStmt = Group("initial" + stmt).setName("initialStmt") chargeStrength = Group(LPAR + oneOf("small medium large") + RPAR).setName("chargeStrength") continuousAssign = Group(assign + Optional(driveStrength) + Optional(delay) + delimitedList(assgnmt) + SEMI).setName("continuousAssign") tfDecl = (parameterDecl | inputDecl | outputDecl | inoutDecl | regDecl | timeDecl | integerDecl | realDecl) functionDecl = Group("function" + Optional(range | "integer" | "real") + identifier + SEMI + Group(OneOrMore(tfDecl)) + Group(ZeroOrMore(stmt)) + "endfunction") inputOutput = oneOf("input output") netDecl1Arg = (nettype + Optional(expandRange) + Optional(delay) + Group(delimitedList(~inputOutput + identifier))) netDecl2Arg = ("trireg" + Optional(chargeStrength) + Optional(expandRange) + Optional(delay) + Group(delimitedList(~inputOutput + identifier))) netDecl3Arg = (nettype + Optional(driveStrength) + Optional(expandRange) + Optional(delay) + Group(delimitedList(assgnmt))) netDecl1 = Group(netDecl1Arg + SEMI).setName("netDecl1") netDecl2 = Group(netDecl2Arg + SEMI).setName("netDecl2") netDecl3 = Group(netDecl3Arg + SEMI).setName("netDecl3") gateType = oneOf("and nand or nor xor xnor buf bufif0 bufif1 " "not notif0 notif1 pulldown pullup nmos rnmos " "pmos rpmos cmos rcmos tran rtran tranif0 " "rtranif0 tranif1 rtranif1") gateInstance = Optional( Group( identifier + Optional( range ) ) ) + \ LPAR + Group( delimitedList( expr ) ) + RPAR gateDecl = Group(gateType + Optional(driveStrength) + Optional(delay) + delimitedList(gateInstance) + SEMI) udpInstance = Group( Group(identifier + Optional(range | subscrRef)) + LPAR + Group(delimitedList(expr)) + RPAR) udpInstantiation = Group(identifier - Optional(driveStrength) + Optional(delay) + delimitedList(udpInstance) + SEMI).setName("udpInstantiation") parameterValueAssignment = Group( Literal("#") + LPAR + Group(delimitedList(expr)) + RPAR) namedPortConnection = Group(DOT + identifier + LPAR + expr + RPAR).setName( "namedPortConnection") #.setDebug() assert (r'.\abc (abc )' == namedPortConnection) modulePortConnection = expr | empty #~ moduleInstance = Group( Group ( identifier + Optional(range) ) + #~ ( delimitedList( modulePortConnection ) | #~ delimitedList( namedPortConnection ) ) ) inst_args = Group(LPAR + (delimitedList(namedPortConnection) | delimitedList(modulePortConnection)) + RPAR).setName("inst_args") moduleInstance = Group( Group(identifier + Optional(range)) + inst_args).setName( "moduleInstance") #.setDebug() moduleInstantiation = Group( identifier + Optional(parameterValueAssignment) + delimitedList(moduleInstance).setName("moduleInstanceList") + SEMI).setName("moduleInstantiation") parameterOverride = Group("defparam" + delimitedList(paramAssgnmt) + SEMI) task = Group("task" + identifier + SEMI + ZeroOrMore(tfDecl) + stmtOrNull + "endtask") specparamDecl = Group("specparam" + delimitedList(paramAssgnmt) + SEMI) pathDescr1 = Group(LPAR + subscrIdentifier + "=>" + subscrIdentifier + RPAR) pathDescr2 = Group(LPAR + Group(delimitedList(subscrIdentifier)) + "*>" + Group(delimitedList(subscrIdentifier)) + RPAR) pathDescr3 = Group(LPAR + Group(delimitedList(subscrIdentifier)) + "=>" + Group(delimitedList(subscrIdentifier)) + RPAR) pathDelayValue = Group(( LPAR + Group(delimitedList(mintypmaxExpr | expr)) + RPAR) | mintypmaxExpr | expr) pathDecl = Group((pathDescr1 | pathDescr2 | pathDescr3) + EQ + pathDelayValue + SEMI).setName("pathDecl") portConditionExpr = Forward() portConditionTerm = Optional(unop) + subscrIdentifier portConditionExpr << portConditionTerm + Optional(binop + portConditionExpr) polarityOp = oneOf("+ -") levelSensitivePathDecl1 = Group(if_ + Group(LPAR + portConditionExpr + RPAR) + subscrIdentifier + Optional(polarityOp) + "=>" + subscrIdentifier + EQ + pathDelayValue + SEMI) levelSensitivePathDecl2 = Group( if_ + Group(LPAR + portConditionExpr + RPAR) + LPAR + Group(delimitedList(subscrIdentifier)) + Optional(polarityOp) + "*>" + Group(delimitedList(subscrIdentifier)) + RPAR + EQ + pathDelayValue + SEMI) levelSensitivePathDecl = levelSensitivePathDecl1 | levelSensitivePathDecl2 edgeIdentifier = posedge | negedge edgeSensitivePathDecl1 = Group( Optional(if_ + Group(LPAR + expr + RPAR)) + LPAR + Optional(edgeIdentifier) + subscrIdentifier + "=>" + LPAR + subscrIdentifier + Optional(polarityOp) + COLON + expr + RPAR + RPAR + EQ + pathDelayValue + SEMI) edgeSensitivePathDecl2 = Group( Optional(if_ + Group(LPAR + expr + RPAR)) + LPAR + Optional(edgeIdentifier) + subscrIdentifier + "*>" + LPAR + delimitedList(subscrIdentifier) + Optional(polarityOp) + COLON + expr + RPAR + RPAR + EQ + pathDelayValue + SEMI) edgeSensitivePathDecl = edgeSensitivePathDecl1 | edgeSensitivePathDecl2 edgeDescr = oneOf("01 10 0x x1 1x x0").setName("edgeDescr") timCheckEventControl = Group(posedge | negedge | (edge + LBRACK + delimitedList(edgeDescr) + RBRACK)) timCheckCond = Forward() timCondBinop = oneOf("== === != !==") timCheckCondTerm = (expr + timCondBinop + scalarConst) | (Optional("~") + expr) timCheckCond << ((LPAR + timCheckCond + RPAR) | timCheckCondTerm) timCheckEvent = Group( Optional(timCheckEventControl) + subscrIdentifier + Optional("&&&" + timCheckCond)) timCheckLimit = expr controlledTimingCheckEvent = Group(timCheckEventControl + subscrIdentifier + Optional("&&&" + timCheckCond)) notifyRegister = identifier systemTimingCheck1 = Group("$setup" + LPAR + timCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit + Optional(COMMA + notifyRegister) + RPAR + SEMI) systemTimingCheck2 = Group("$hold" + LPAR + timCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit + Optional(COMMA + notifyRegister) + RPAR + SEMI) systemTimingCheck3 = Group("$period" + LPAR + controlledTimingCheckEvent + COMMA + timCheckLimit + Optional(COMMA + notifyRegister) + RPAR + SEMI) systemTimingCheck4 = Group("$width" + LPAR + controlledTimingCheckEvent + COMMA + timCheckLimit + Optional(COMMA + expr + COMMA + notifyRegister) + RPAR + SEMI) systemTimingCheck5 = Group("$skew" + LPAR + timCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit + Optional(COMMA + notifyRegister) + RPAR + SEMI) systemTimingCheck6 = Group("$recovery" + LPAR + controlledTimingCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit + Optional(COMMA + notifyRegister) + RPAR + SEMI) systemTimingCheck7 = Group("$setuphold" + LPAR + timCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit + COMMA + timCheckLimit + Optional(COMMA + notifyRegister) + RPAR + SEMI) systemTimingCheck = ( FollowedBy('$') + (systemTimingCheck1 | systemTimingCheck2 | systemTimingCheck3 | systemTimingCheck4 | systemTimingCheck5 | systemTimingCheck6 | systemTimingCheck7)).setName("systemTimingCheck") sdpd = if_ + Group(LPAR + expr + RPAR) + \ ( pathDescr1 | pathDescr2 ) + EQ + pathDelayValue + SEMI specifyItem = ~Keyword("endspecify") + ( specparamDecl | pathDecl | levelSensitivePathDecl | edgeSensitivePathDecl | systemTimingCheck | sdpd) """ x::= <specparam_declaration> x||= <path_declaration> x||= <level_sensitive_path_declaration> x||= <edge_sensitive_path_declaration> x||= <system_timing_check> x||= <sdpd> """ specifyBlock = Group("specify" + ZeroOrMore(specifyItem) + "endspecify").setName("specifyBlock") moduleItem = ~Keyword("endmodule") + ( parameterDecl | inputDecl | outputDecl | inoutDecl | regDecl | netDecl3 | netDecl1 | netDecl2 | timeDecl | integerDecl | realDecl | eventDecl | gateDecl | parameterOverride | continuousAssign | specifyBlock | initialStmt | alwaysStmt | task | functionDecl | # these have to be at the end - they start with identifiers moduleInstantiation | udpInstantiation) """ All possible moduleItems, from Verilog grammar spec x::= <parameter_declaration> x||= <input_declaration> x||= <output_declaration> x||= <inout_declaration> ?||= <net_declaration> (spec does not seem consistent for this item) x||= <reg_declaration> x||= <time_declaration> x||= <integer_declaration> x||= <real_declaration> x||= <event_declaration> x||= <gate_declaration> x||= <UDP_instantiation> x||= <module_instantiation> x||= <parameter_override> x||= <continuous_assign> x||= <specify_block> x||= <initial_statement> x||= <always_statement> x||= <task> x||= <function> """ portRef = subscrIdentifier portExpr = portRef | Group(LBRACE + delimitedList(portRef) + RBRACE) port = portExpr | Group(DOT + identifier + LPAR + portExpr + RPAR) moduleHdr = Group( oneOf("module macromodule") + identifier + Optional(LPAR + Group( Optional( delimitedList( Group( oneOf("input output") + (netDecl1Arg | netDecl2Arg | netDecl3Arg)) | port))) + RPAR) + SEMI).setName("moduleHdr") module = Group(moduleHdr + Group(ZeroOrMore(moduleItem)) + "endmodule").setName("module") #.setDebug() udpDecl = outputDecl | inputDecl | regDecl #~ udpInitVal = oneOf("1'b0 1'b1 1'bx 1'bX 1'B0 1'B1 1'Bx 1'BX 1 0 x X") udpInitVal = (Regex("1'[bB][01xX]") | Regex("[01xX]")).setName("udpInitVal") udpInitialStmt = Group("initial" + identifier + EQ + udpInitVal + SEMI).setName("udpInitialStmt") levelSymbol = oneOf("0 1 x X ? b B") levelInputList = Group(OneOrMore(levelSymbol).setName("levelInpList")) outputSymbol = oneOf("0 1 x X") combEntry = Group(levelInputList + COLON + outputSymbol + SEMI) edgeSymbol = oneOf("r R f F p P n N *") edge = Group( LPAR + levelSymbol + levelSymbol + RPAR ) | \ Group( edgeSymbol ) edgeInputList = Group( ZeroOrMore(levelSymbol) + edge + ZeroOrMore(levelSymbol)) inputList = levelInputList | edgeInputList seqEntry = Group(inputList + COLON + levelSymbol + COLON + (outputSymbol | "-") + SEMI).setName("seqEntry") udpTableDefn = Group("table" + OneOrMore(combEntry | seqEntry) + "endtable").setName("table") """ <UDP> ::= primitive <name_of_UDP> ( <name_of_variable> <,<name_of_variable>>* ) ; <UDP_declaration>+ <UDP_initial_statement>? <table_definition> endprimitive """ udp = Group("primitive" + identifier + LPAR + Group(delimitedList(identifier)) + RPAR + SEMI + OneOrMore(udpDecl) + Optional(udpInitialStmt) + udpTableDefn + "endprimitive") verilogbnf = OneOrMore(module | udp) + StringEnd() verilogbnf.ignore(cppStyleComment) verilogbnf.ignore(compilerDirective) return verilogbnf
def eval(self): val1 = self.value[0].eval() for op,val in operatorOperands(self.value[1:]): fn = EvalComparisonOp.opMap[op] val2 = val.eval() if not fn(val1,val2): break val1 = val2 else: return True return False # define the parser integer = Word(nums) real = Combine(Word(nums) + "." + Word(nums)) variable = Word(alphas,exact=1) operand = real | integer | variable signop = oneOf('+ -') multop = oneOf('* /') plusop = oneOf('+ -') # use parse actions to attach EvalXXX constructors to sub-expressions operand.setParseAction(EvalConstant) arith_expr = operatorPrecedence(operand, [ (signop, 1, opAssoc.RIGHT, EvalSignOp), (multop, 2, opAssoc.LEFT, EvalMultOp), (plusop, 2, opAssoc.LEFT, EvalAddOp), ])
# # Copyright 2010, Paul McGuire # # A partial implementation of a parser of Excel formula expressions. # from pyparsing import (CaselessKeyword, Suppress, Word, alphas, alphanums, nums, Optional, Group, oneOf, Forward, Regex, operatorPrecedence, opAssoc, dblQuotedString, delimitedList, Combine, Literal, QuotedString) EQ, EXCL, LPAR, RPAR, COLON, COMMA = map(Suppress, '=!():,') EXCL, DOLLAR = map(Literal, "!$") sheetRef = Word(alphas, alphanums) | QuotedString("'", escQuote="''") colRef = Optional(DOLLAR) + Word(alphas, max=2) rowRef = Optional(DOLLAR) + Word(nums) cellRef = Combine( Group(Optional(sheetRef + EXCL)("sheet") + colRef("col") + rowRef("row"))) cellRange = (Group(cellRef("start") + COLON + cellRef("end"))("range") | cellRef | Word(alphas, alphanums)) expr = Forward() COMPARISON_OP = oneOf("< = > >= <= != <>") condExpr = expr + COMPARISON_OP + expr ifFunc = (CaselessKeyword("if") + LPAR + Group(condExpr)("condition") + COMMA + expr("if_true") + COMMA + expr("if_false") + RPAR) statFunc = lambda name: CaselessKeyword(name) + LPAR + delimitedList(expr ) + RPAR sumFunc = statFunc("sum")
acl_geoip_country_type = Word(alphanums + '-\'', max=64)('country_type') acl_geoip_region_type = Word(alphanums + '-\'', max=64)('region') acl_geoip_city_type = Word(alphanums + '-\'', max=64)('city') acl_geoip_continent_type = Word(alphanums, max=64)('continent') acl_geoip_postal_type = Word(alphanums + '--/', max=64)('postal') acl_geoip_metro_type = Word(alphanums + '-\'', max=64)('metro') acl_geoip_area_type = Word(alphanums + '-\'', max=64)('area') acl_geoip_tz_type = Word(alphanums + '-/', max=64)('tz') acl_geoip_isp_type = Word(alphanums + '-/.,=&!@#$%^&*()_+-=[]{}\\<>?', max=64)('isp') acl_geoip_org_type = Word(alphanums + '-/.,=&!@#$%^&*()_+-=[]{}\\<>?', max=64)('org') acl_geoip_asnum_type = Combine( CaselessLiteral('A') + CaselessLiteral('S') + Word(nums, max=6)) acl_geoip_domain_type = fqdn_name('domain') acl_geoip_netspeed_type = Word(alphanums + '-+><=,', max=64)('netspeed') acl_geoip_db_element = ( CaselessKeyword('db') + acl_geoip_db_field_value # No semicolon here! ) acl_geoip_country_element = ( CaselessKeyword('country').suppress() + (acl_geoip_country_type ^ acl_geoip_ISO_3166_alpha_3 ^ acl_geoip_ISO_3166_1_alpha_2))('country') acl_geoip_region_element = (CaselessKeyword('region').suppress() +
def label_BNF(): global labelbnf if not labelbnf: # punctuation lparen = Literal("(") rparen = Literal(")") lbrack = Literal("[") rbrack = Literal("]") lcbrack = Literal("{") rcbrack = Literal("}") bang = Literal("!") rvsep = Literal("|") equals = Literal("=") semi = Literal(";") colon = Literal(":") sharp = Literal("#") tilde = Literal("~") akrol = Literal("@") dollar = Literal("$") comma = Literal(",") nbsp = Literal(" ") dotnewline = (Literal("\l") | Literal("\\l")) quote = "'" def exceptfor(x): return "".join([c for c in printables if c not in x]) + " \t" CONST = Combine( Literal(quote) + Word(exceptfor(quote)) + Literal(quote)) BASICID = Combine( Word(alphanums + "_-") + Optional(Literal(".") + Word(nums))) senc = Literal("senc") aenc = Literal("aenc") KEYWORD = senc | aenc ID = ~KEYWORD + Combine(Optional(dollar | tilde | sharp) + BASICID) TIME = Group(akrol + Combine(sharp + BASICID)) TERM = Forward() TERMLIST = TERM + ZeroOrMore(comma + TERM) TUPLE1 = Group(Literal('<') + TERMLIST + Literal('>')) TUPLE2 = Group(Literal('\<') + TERMLIST + Literal('\>')) TUPLE = TUPLE1 | TUPLE2 ARG = Literal('(') + TERMLIST + Literal(')') FUNC = Group(ID + Optional(ARG)) ENC = Group((senc | aenc) + ARG) TERM << (ENC | FUNC | TUPLE | CONST) TPAREN = lparen + TERMLIST + rparen #TBRACK = Literal('[]') TBRACK = lbrack + Optional(TERMLIST) + rbrack FACT = Group( Combine(Optional(bang) + ID) + Optional(TPAREN | TBRACK) + Optional(TIME)) PORT = Combine(Literal("<") + BASICID + Literal(">")) SINGLE = Optional(sharp + ID + colon) + (FACT | TERM) FIELDID = Group(Optional(PORT) + SINGLE) LABEL = Forward() FIELD = (lcbrack + LABEL + rcbrack) | FIELDID LABEL << FIELD + ZeroOrMore(rvsep + FIELD) labelbnf = LABEL labelbnf.ignore(nbsp) labelbnf.ignore(dotnewline) return labelbnf
val1 = self._eval(context) if not val1: return val1 for _eval in self.operator_eval: val2 = _eval(context) val1 = val1 and val2 if not val1: return val1 return val1 word_characters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_0123456789' expr = Forward() # define the parser integer = Word(nums) real = Combine(Word(nums) + "." + Word(nums)) constant = oneOf('True False None yes no') + WordEnd(word_characters) # TODO: expand on variable regex simple_variable = Regex(r'([a-zA-Z0-9_]+)') variable = Regex(r'([a-zA-Z0-9\._]+)') explicit_variable = '$' + Regex(r'([a-zA-Z0-9\._]+)') current_scope = Literal('$$') triple_string = (QuotedString("'''", escChar=None, unquoteResults=False) | QuotedString('"""', escChar=None, unquoteResults=False)) string = (QuotedString('"', escChar='\\', unquoteResults=False) | QuotedString("'", escChar="\\", unquoteResults=False))
def script(self): # constants left_bracket = Suppress("{") right_bracket = Suppress("}") semicolon = Suppress(";") space = White().suppress() keyword = Word(alphanums + ".+-_/") path = Word(alphanums + ".-_/") variable = Word("$_-" + alphanums) value_wq = Regex(r'(?:\([^\s;]*\)|\$\{\w+\}|[^\s;(){}])+') value_sq = NginxQuotedString(quoteChar="'") value_dq = NginxQuotedString(quoteChar='"') value = (value_dq | value_sq | value_wq) # modifier for location uri [ = | ~ | ~* | ^~ ] location_modifier = (Keyword("=") | Keyword("~*") | Keyword("~") | Keyword("^~")) # modifier for if statement if_modifier = Combine( Optional("!") + (Keyword("=") | Keyword("~*") | Keyword("~") | (Literal("-") + (Literal("f") | Literal("d") | Literal("e") | Literal("x"))))) condition_body = ( (if_modifier + Optional(space) + value) | (variable + Optional(space + if_modifier + Optional(space) + value))) # This ugly workaround needed to parse unquoted regex with nested parentheses # pyparsing.nestedExpr doesn't work in some rare cases like: ($http_user_agent ~* \( ) # so we capture all content between parentheses and then parse it:) # TODO(buglloc): may be use something better? condition = Regex(r'\(.*\)').setParseAction( lambda s, l, t: condition_body.parseString(t[0][1:-1])) # rules include = (Keyword("include") + space + value + semicolon)("include") directive = (keyword + ZeroOrMore(space + value) + semicolon)("directive") file_delimiter = (Suppress("# configuration file ") + path + Suppress(":"))("file_delimiter") comment = (Regex(r"#.*"))("comment").setParseAction(_fix_comment) hash_value = Group(value + ZeroOrMore(space + value) + semicolon)("hash_value") generic_block = Forward() if_block = Forward() location_block = Forward() hash_block = Forward() unparsed_block = Forward() sub_block = OneOrMore( Group(if_block | location_block | hash_block | generic_block | include | directive | file_delimiter | comment | unparsed_block)) if_block << ( Keyword("if") + Group(condition) + Group(left_bracket + Optional(sub_block) + right_bracket))("block") location_block << (Keyword("location") + Group( Optional(space + location_modifier) + Optional(space) + value) + Group(left_bracket + Optional(sub_block) + right_bracket))("block") hash_block << (keyword + Group(OneOrMore(space + variable)) + Group(left_bracket + Optional(OneOrMore(hash_value)) + right_bracket))("block") generic_block << ( keyword + Group(ZeroOrMore(space + variable)) + Group(left_bracket + Optional(sub_block) + right_bracket))("block") unparsed_block << ( keyword + Group(ZeroOrMore(space + variable)) + nestedExpr(opener="{", closer="}"))("unparsed_block") return sub_block
pythonStyleComment, ParseBaseException) except ImportError: print('pyparsing is not available') sys.exit(77) try: from evdev.ecodes import ecodes except ImportError: ecodes = None print('WARNING: evdev is not available') EOL = LineEnd().suppress() EMPTYLINE = LineEnd() COMMENTLINE = pythonStyleComment + EOL INTEGER = Word(nums) REAL = Combine((INTEGER + Optional('.' + Optional(INTEGER))) ^ ('.' + INTEGER)) UDEV_TAG = Word(string.ascii_uppercase, alphanums + '_') TYPES = { 'libinput': ('name', 'touchpad', 'mouse', 'keyboard', 'tablet'), } @functools.lru_cache() def hwdb_grammar(): ParserElement.setDefaultWhitespaceChars('') prefix = Or(category + ':' + Or(conn) + ':' for category, conn in TYPES.items()) matchline = Combine(prefix + Word(printables + ' ' + '®')) + EOL propertyline = (White(' ', exact=1).suppress() +
def make_grammar(): from pyparsing import (ParserElement, Literal, Word, Forward, Optional, QuotedString, Combine, ZeroOrMore, Keyword, alphas, alphanums, nums) ParserElement.enablePackrat() plus = Literal("+") minus = Literal("-") mul = Literal("*") div = Literal("/") floordiv = Literal("//") mod = Literal("%") lt = Literal("<") le = Literal("<=") gt = Literal(">") ge = Literal(">=") lshift = Literal("<<") rshift = Literal(">>") equal = Literal("==") | Literal("=") | Literal("!=") bitwise_not = Literal("~") bitwise_and = Literal("&") bitwise_or = Literal("|") bitwise_xor = Literal("^") logical_not = Literal("!") | Keyword("not") logical_and = Literal("&&") | Literal("and") | Keyword("AND") logical_or = Literal("||") | Keyword("or") | Keyword("OR") ident = Word(alphas + "_", alphanums + "_") functionname = Word(alphas + "_", alphanums + "_") unit = Word(alphas) int_number = Word(nums) float_number = Combine(Word(nums) + Optional(Literal(".") + Word(nums))) number = (float_number | int_number) + Optional(unit) lparent = Literal("(").suppress() rparent = Literal(")").suppress() relational_op = (lt | le | gt | ge) shift = (lshift | rshift) add_op = (plus | minus) mul_op = (mul | floordiv | div | mod) expr = Forward() string = (QuotedString('"') | QuotedString("'")) primary_expr = ident | number | string | (lparent + expr + rparent) def make_op(s, loc, toks): if len(toks) == 1: return toks[0] else: def loop(lhs, rest): if len(rest) == 0: return lhs else: return loop(Operator(rest[0], lhs, rest[1]), rest[2:]) return loop(Operator(toks[1], toks[0], toks[2]), toks[3:]) def make_unary(s, loc, toks): if len(toks) == 1: return toks[0] else: return UnaryOperator(toks[0], make_unary(s, loc, toks[1:])) argument_expression_list = expr + ZeroOrMore(Literal(",").suppress() + expr) function_expression = (functionname + lparent + argument_expression_list + rparent) postfix_expression = function_expression | primary_expr unary_expr = ZeroOrMore(bitwise_not | logical_not | minus | plus) + postfix_expression cast_expresion = unary_expr | postfix_expression mult_expr = cast_expresion + ZeroOrMore(mul_op + cast_expresion) # noqa: E221 add_expr = mult_expr + ZeroOrMore(add_op + mult_expr) # noqa: E221 shift_expr = add_expr + ZeroOrMore(shift + add_expr) # noqa: E221 relational_expr = shift_expr + ZeroOrMore(relational_op + shift_expr) # noqa: E221 equality_expr = relational_expr + ZeroOrMore(equal + relational_expr) # noqa: E221 bitwise_and_expr = equality_expr + ZeroOrMore(bitwise_and + equality_expr) # noqa: E221 bitwise_xor_expr = bitwise_and_expr + ZeroOrMore(bitwise_xor + bitwise_and_expr) # noqa: E221 bitwise_or_expr = bitwise_xor_expr + ZeroOrMore(bitwise_or + bitwise_xor_expr) # noqa: E221 logical_and_expr = bitwise_or_expr + ZeroOrMore(logical_and + bitwise_or_expr) # noqa: E221 logical_or_expr = logical_and_expr + ZeroOrMore(logical_or + logical_and_expr) # noqa: E221 expr <<= logical_or_expr function_expression.setParseAction(Function) int_number.setParseAction(lambda s, loc, toks: int(toks[0])) float_number.setParseAction(lambda s, loc, toks: float(toks[0])) number.setParseAction(Number) string.setParseAction(String) ident.setParseAction(Variable) unary_expr.setParseAction(make_unary) mult_expr.setParseAction(make_op) add_expr.setParseAction(make_op) shift_expr.setParseAction(make_op) relational_expr.setParseAction(make_op) equality_expr.setParseAction(make_op) bitwise_and_expr.setParseAction(make_op) bitwise_xor_expr.setParseAction(make_op) bitwise_or_expr.setParseAction(make_op) logical_and_expr.setParseAction(make_op) logical_or_expr.setParseAction(make_op) return expr
def parse_algebra(self): """ Parse an algebraic expression into a tree. Store a `pyparsing.ParseResult` in `self.tree` with proper groupings to reflect parenthesis and order of operations. Leave all operators in the tree and do not parse any strings of numbers into their float versions. Adding the groups and result names makes the `repr()` of the result really gross. For debugging, use something like print OBJ.tree.asXML() """ # 0.33 or 7 or .34 or 16. number_part = Word(nums) inner_number = (number_part + Optional("." + Optional(number_part))) | ("." + number_part) # pyparsing allows spaces between tokens--`Combine` prevents that. inner_number = Combine(inner_number) # SI suffixes and percent. number_suffix = MatchFirst(Literal(k) for k in SUFFIXES) # 0.33k or 17 plus_minus = Literal('+') | Literal('-') number = Group( Optional(plus_minus) + inner_number + Optional( CaselessLiteral("E") + Optional(plus_minus) + number_part) + Optional(number_suffix)) number = number("number") # Predefine recursive variables. expr = Forward() # Handle variables passed in. They must start with a letter # and may contain numbers and underscores afterward. inner_varname = Combine( Word(alphas, alphanums + "_") + ZeroOrMore("'")) # Alternative variable name in tensor format # Tensor name must start with a letter, continue with alphanums # Indices may be alphanumeric # e.g., U_{ijk}^{123} upper_indices = Literal("^{") + Word(alphanums) + Literal("}") lower_indices = Literal("_{") + Word(alphanums) + Literal("}") tensor_lower = Combine( Word(alphas, alphanums) + lower_indices + ZeroOrMore("'")) tensor_mixed = Combine( Word(alphas, alphanums) + Optional(lower_indices) + upper_indices + ZeroOrMore("'")) # Test for mixed tensor first, then lower tensor alone, then generic variable name varname = Group(tensor_mixed | tensor_lower | inner_varname)("variable") varname.setParseAction(self.variable_parse_action) # Same thing for functions. function = Group(inner_varname + Suppress("(") + expr + Suppress(")"))("function") function.setParseAction(self.function_parse_action) atom = number | function | varname | "(" + expr + ")" atom = Group(atom)("atom") # Do the following in the correct order to preserve order of operation. pow_term = atom + ZeroOrMore("^" + atom) pow_term = Group(pow_term)("power") par_term = pow_term + ZeroOrMore('||' + pow_term) # 5k || 4k par_term = Group(par_term)("parallel") prod_term = par_term + ZeroOrMore( (Literal('*') | Literal('/')) + par_term) # 7 * 5 / 4 prod_term = Group(prod_term)("product") sum_term = Optional(plus_minus) + prod_term + ZeroOrMore( plus_minus + prod_term) # -5 + 4 - 3 sum_term = Group(sum_term)("sum") # Finish the recursion. expr << sum_term # pylint: disable=pointless-statement self.tree = (expr + stringEnd).parseString(self.math_expr)[0]
# -*- coding: utf-8 -*- from __future__ import absolute_import from __future__ import division from __future__ import print_function from pyparsing import Combine from pyparsing import hexnums from pyparsing import Literal from pyparsing import Word from undebt.pattern.util import tokens_as_list grammar = Combine(Literal("0x").suppress() + Word(hexnums)) @tokens_as_list(assert_len=1) def replace(tokens): """0x00000001 --> 1 << 0""" flag = int(tokens[0], 16) shift = flag.bit_length() - 1 if shift >= 0 and 1 << shift == flag: return "1 << " + str(shift) else: return None
def parse_file(self): """Parses an existing namelist file and creates a deck of cards to hold the data. After this is executed, you need to call the ``load_model()`` method to extract the variables from this data structure.""" infile = open(self.filename, 'r') data = infile.readlines() infile.close() # Lots of numerical tokens for recognizing various kinds of numbers digits = Word(nums) dot = "." sign = oneOf("+ -") ee = CaselessLiteral('E') | CaselessLiteral('D') num_int = ToInteger(Combine( Optional(sign) + digits )) num_float = ToFloat(Combine( Optional(sign) + ((digits + dot + Optional(digits)) | (dot + digits)) + Optional(ee + Optional(sign) + digits) )) # special case for a float written like "3e5" mixed_exp = ToFloat(Combine( digits + ee + Optional(sign) + digits )) # I don't suppose we need these, but just in case (plus it's easy) nan = ToFloat(oneOf("NaN Inf -Inf")) numval = num_float | mixed_exp | num_int | nan strval = QuotedString(quoteChar='"') | QuotedString(quoteChar="'") b_list = "T TRUE True true F FALSE False false .TRUE. .FALSE. .T. .F." boolval = ToBool(oneOf(b_list)) fieldval = Word(alphanums) # Tokens for parsing a line of data numstr_token = numval + ZeroOrMore(Suppress(',') + numval) \ | strval data_token = numstr_token | boolval index_token = Suppress('(') + num_int + Suppress(')') card_token = Group(fieldval("name") + \ Optional(index_token("index")) + \ Suppress('=') + \ Optional(num_int("dimension") + Suppress('*')) + \ data_token("value") + \ Optional(Suppress('*') + num_int("dimension"))) multi_card_token = (card_token + ZeroOrMore(Suppress(',') + card_token)) array_continuation_token = numstr_token.setResultsName("value") array2D_token = fieldval("name") + Suppress("(") + \ Suppress(num_int) + Suppress(',') + \ num_int("index") + Suppress(')') + \ Suppress('=') + numval + \ ZeroOrMore(Suppress(',') + numval) # Tokens for parsing the group head and tail group_end_token = Literal("/") | Literal("$END") | Literal("$end") group_name_token = (Literal("$") | Literal("&")) + \ Word(alphanums).setResultsName("name") + \ Optional(multi_card_token) + \ Optional(group_end_token) # Comment Token comment_token = Literal("!") # Loop through each line and parse. current_group = None for line in data: line_base = line line = line.strip() # blank line: do nothing if not line: continue if current_group: # Skip comment cards if comment_token.searchString(line): pass # Process orindary cards elif multi_card_token.searchString(line): cards = multi_card_token.parseString(line) for card in cards: name, value = _process_card_info(card) self.cards[-1].append(Card(name, value)) # Catch 2D arrays like -> X(1,1) = 3,4,5 elif array2D_token.searchString(line): card = array2D_token.parseString(line) name = card[0] index = card[1] value = array(card[2:]) if index > 1: old_value = self.cards[-1][-1].value new_value = vstack((old_value, value)) self.cards[-1][-1].value = new_value else: self.cards[-1].append(Card(name, value)) # Arrays can be continued on subsequent lines # The value of the most recent card must be turned into an # array and appended elif array_continuation_token.searchString(line): card = array_continuation_token.parseString(line) if len(card) > 1: element = array(card[0:]) else: element = card.value if isinstance(self.cards[-1][-1].value, ndarray): new_value = append(self.cards[-1][-1].value, element) else: new_value = array([self.cards[-1][-1].value, element]) self.cards[-1][-1].value = new_value # Lastly, look for the group footer elif group_end_token.searchString(line): current_group = None # Everything else must be a pure comment else: print "Comment ignored: %s" % line.rstrip('\n') # Group ending '/' can also conclude a data line. if line[-1] == '/': current_group = None #print self.cards[-1][-1].name, self.cards[-1][-1].value else: group_name = group_name_token.searchString(line) # Group Header if group_name: group_name = group_name_token.parseString(line) current_group = group_name.name self.add_group(current_group) # Sometimes, variable definitions are included on the # same line as the namelist header if len(group_name) > 2: cards = group_name[2:] for card in cards: # Sometimes an end card is on the same line. if group_end_token.searchString(card): current_group = None else: name, value = _process_card_info(card) self.cards[-1].append(Card(name, value)) # If there is an ungrouped card at the start, take it as the # title for the analysis elif len(self.cards) == 0 and self.title == '': self.title = line # All other ungrouped cards are saved as free-form (card-less) # groups. # Note that we can't lstrip because column spacing might be # important. else: self.add_group(line_base.rstrip())
def __init__(self): """ expop :: '^' multop :: '*' | '/' addop :: '+' | '-' integer :: ['+' | '-'] '0'..'9'+ atom :: PI | E | real | fn '(' expr ')' | '(' expr ')' factor :: atom [ expop factor ]* term :: factor [ multop factor ]* expr :: term [ addop term ]* """ point = Literal(".") e = CaselessLiteral("E") fnumber = Combine( Word("+-" + nums, nums) + Optional(point + Optional(Word(nums))) + Optional(e + Word("+-" + nums, nums))) ident = Word(alphas, alphas + nums + "_$") plus = Literal("+") minus = Literal("-") mult = Literal("*") div = Literal("/") lpar = Literal("(").suppress() rpar = Literal(")").suppress() addop = plus | minus multop = mult | div expop = Literal("^") pi = CaselessLiteral("PI") expr = Forward() atom = ((Optional(oneOf("- +")) + (pi | e | fnumber | ident + lpar + expr + rpar).setParseAction(self.pushFirst)) | Optional(oneOf("- +")) + Group(lpar + expr + rpar)).setParseAction(self.pushUMinus) # by defining exponentiation as "atom [ ^ factor ]..." instead of # "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-right # that is, 2^3^2 = 2^(3^2), not (2^3)^2. factor = Forward() factor << atom + ZeroOrMore( (expop + factor).setParseAction(self.pushFirst)) term = factor + ZeroOrMore( (multop + factor).setParseAction(self.pushFirst)) expr << term + ZeroOrMore( (addop + term).setParseAction(self.pushFirst)) # addop_term = ( addop + term ).setParseAction( self.pushFirst ) # general_term = term + ZeroOrMore( addop_term ) | OneOrMore( addop_term) # expr << general_term self.bnf = expr # map operator symbols to corresponding arithmetic operations epsilon = 1e-12 self.opn = { "+": operator.add, "-": operator.sub, "*": operator.mul, "/": operator.truediv, "^": operator.pow } self.fn = { "sin": math.sin, "cos": math.cos, "tan": math.tan, "abs": abs, "trunc": lambda a: int(a), "round": round, "sgn": lambda a: abs(a) > epsilon and cmp(a, 0) or 0 }
quotedString, Regex, Word, ZeroOrMore, ) grammar = Forward() expression = Forward() # Literals intNumber = Regex(r'-?\d+')('integer') floatNumber = Regex(r'-?\d+\.\d+')('float') sciNumber = Combine((floatNumber | intNumber) + CaselessLiteral('e') + intNumber)('scientific') aString = quotedString('string') # Use lookahead to match only numbers in a list (can't remember why this # is necessary) afterNumber = FollowedBy(",") ^ FollowedBy(")") ^ FollowedBy(LineEnd()) number = Group((sciNumber + afterNumber) | (floatNumber + afterNumber) | (intNumber + afterNumber))('number') boolean = Group(CaselessKeyword("true") | CaselessKeyword("false"))('boolean') argname = Word(alphas + '_', alphanums + '_')('argname') funcname = Word(alphas + '_', alphanums + '_')('funcname') # Symbols
# The aim of this parser is not to support database application, # but to create automagically a pgn annotated reading the log console file # of a lecture of ICC (Internet Chess Club), saved by Blitzin. # Of course you can modify the Abstract Syntax Tree to your purpose. # # Copyright 2004, by Alberto Santini http://www.albertosantini.it/chess/ # from pyparsing import alphanums, nums, quotedString from pyparsing import Combine, Forward, Group, Literal, oneOf, OneOrMore, Optional, Suppress, ZeroOrMore, White, Word from pyparsing import ParseException # # define pgn grammar # tag = Suppress("[") + Word(alphanums) + Combine(quotedString) + Suppress("]") comment = Suppress("{") + Word(alphanums + " ") + Suppress("}") dot = Literal(".") piece = oneOf("K Q B N R") file_coord = oneOf("a b c d e f g h") rank_coord = oneOf("1 2 3 4 5 6 7 8") capture = oneOf("x :") promote = Literal("=") castle_queenside = Literal("O-O-O") | Literal("0-0-0") | Literal("o-o-o") castle_kingside = Literal("O-O") | Literal("0-0") | Literal("o-o") move_number = Optional(comment) + Word(nums) + dot m1 = file_coord + rank_coord # pawn move e.g. d4 m2 = file_coord + capture + file_coord + rank_coord # pawn capture move e.g. dxe5 m3 = file_coord + "8" + promote + piece # pawn promotion e.g. e8=Q
bodyChars=alpha_upper + nums).setResultsName("mnemonic") # XXX can't use pyparsing_common.signedInteger as the latest pyparsing 2.1.5 # has a bug which always converts them to floats. Remove this once 2.1.6 is # published on PyPI. signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction( tokenMap(int)) variable = Word(alphas, bodyChars=alphanums) stack_item = Suppress(",") + (signed_integer | Suppress("*") | variable) flag = oneOf(list(VTT_MNEMONIC_FLAGS.keys())) # convert flag to binary string flag.setParseAction(tokenMap(lambda t: VTT_MNEMONIC_FLAGS[t])) flags = Combine(OneOrMore(flag)).setResultsName("flags") delta_point_index = pyparsing_common.integer.setResultsName("point_index") delta_rel_ppem = pyparsing_common.integer.setResultsName("rel_ppem") delta_step_no = signed_integer.setResultsName("step_no") # the step denominator is only used in VTT's DELTA[CP]* instructions, # and must always be 8 (sic!), so we can suppress it. delta_spec = (delta_point_index + Suppress("@") + delta_rel_ppem + delta_step_no + Optional(Literal("/8")).suppress()) delta = nestedExpr("(", ")", delta_spec, ignoreExpr=None) deltas = Group(OneOrMore(delta)).setResultsName("deltas") args = deltas | flags
def __init__(self, query): self._methods = { 'and': self.evaluate_and, 'or': self.evaluate_or, 'not': self.evaluate_not, 'parenthesis': self.evaluate_parenthesis, 'quotes': self.evaluate_quotes, 'word': self.evaluate_word, } self.line = '' self.query = query.lower() if query else '' if self.query: # TODO: Cleanup operator_or = Forward() operator_word = Group(Word(alphanums)).setResultsName('word') operator_quotes_content = Forward() operator_quotes_content << ( (operator_word + operator_quotes_content) | operator_word) operator_quotes = Group( Suppress('"') + operator_quotes_content + Suppress('"')).setResultsName('quotes') | operator_word operator_parenthesis = Group( (Suppress('(') + operator_or + Suppress(")") )).setResultsName0('parenthesis') | operator_quotes operator_not = Forward() operator_not << ( Group(Suppress(Keyword('no', caseless=True)) + operator_not).setResultsName('not') | operator_parenthesis) operator_and = Forward() operator_and << ( Group(operator_not + Suppress(Keyword('and', caseless=True)) + operator_and).setResultsName('and') | Group(operator_not + OneOrMore(~oneOf('and or') + operator_and) ).setResultsName('and') | operator_not) operator_or << ( Group(operator_and + Suppress(Keyword('or', caseless=True)) + operator_or).setResultsName('or') | operator_and) self._query_parser = operator_or.parseString(self.query)[0] else: self._query_parser = False integer = Word(nums).setParseAction(lambda t: int(t[0])) date = Combine((integer + '-' + integer + '-' + integer) + ' ' + integer + ':' + integer) word = Word(printables) self._log_parser = ( date.setResultsName('timestamp') + word.setResultsName('log_level') + word.setResultsName('plugin') + (White(min=16).setParseAction( lambda s, l, t: [t[0].strip()]).setResultsName('task') | (White(min=1).suppress() & word.setResultsName('task'))) + restOfLine.setResultsName('message'))
NilValue = "-" SP = Suppress(White(ws=' ', min=1, max=1)) def toInt(s, loc, toks): return int(toks[0]) def maybeToInt(s, loc, toks): if all(x.isdigit() for x in toks[0]): return int(toks[0]) else: return toks[0] pri = Combine(Suppress(Literal("<")) + Word(nums, min=1, max=3) + Suppress(Literal(">"))).setParseAction(toInt) version = Word(nums).setParseAction(toInt) dash = Literal("-") colon = Literal(":") rfc3164_date = Word(nums, min=4, max=4) + dash + Word(nums, min=2, max=2) + dash + Word(nums, min=2, max=2) rfc3164_time = Word(nums, min=2, max=2) + colon + Word(nums, min=2, max=2) + colon + Word(nums, min=2, max=2) + \ Optional(Literal(".") + Word(nums, min=1, max=6)) rfc3164_timenumoffset = (Literal("-") | Literal("+")) + Word(nums, min=2, max=2) + colon + Word(nums, min=2, max=2) rfc3164_timeoffset = Literal("Z") | rfc3164_timenumoffset rfc3164_timestamp = Combine(rfc3164_date + Literal("T") + rfc3164_time + rfc3164_timeoffset) timestamp = NilValue | rfc3164_timestamp hostname = NilValue | Word(printables, min=1, max=255) appname = NilValue | Word(printables, min=1, max=48) procname = NilValue | Word(printables, min=1, max=128).setParseAction(maybeToInt) msgid = NilValue | Word(printables, min=1, max=32)
##---------------------------------------------------------------------- ## pyParsing tokens ##---------------------------------------------------------------------- ## Copyright (C) 2007-2015 The NOC Project ## See LICENSE for details ##---------------------------------------------------------------------- ## Third-party modules from pyparsing import (alphanums, Combine, Group, LineEnd, nums, Suppress, Word, restOfLine) # Match \s+ SPACE = Suppress(Word(" ").leaveWhitespace()) # Match \n\s+ INDENT = Suppress(LineEnd() + SPACE) # Skip whole line LINE = Suppress(restOfLine) # REST = SPACE + restOfLine # Sequence of numbers DIGITS = Word(nums) # Sequence of letters and numbers ALPHANUMS = Word(alphanums) # Number from 0 to 255 OCTET = Word(nums, max=3) # IPv4 address IPv4_ADDRESS = Combine(OCTET + "." + OCTET + "." + OCTET + "." + OCTET) # RD RD = Combine(Word(nums) + Word(":") + Word(nums))
# Only IDENTIFIER and EQUATIONS are ever used later ############################################################################### # Basic Elements ############################################################################### # identifiers like in C: can start with letter or underscore, then a # combination of letters, numbers and underscores # Note that the check_identifiers function later performs more checks, e.g. # names starting with underscore should only be used internally IDENTIFIER = Word(string.ascii_letters + '_', string.ascii_letters + string.digits + '_').setResultsName('identifier') # very broad definition here, expression will be analysed by sympy anyway # allows for multi-line expressions, where each line can have comments EXPRESSION = Combine(OneOrMore( (CharsNotIn(':#\n') + Suppress(Optional(LineEnd()))).ignore('#' + restOfLine)), joinString=' ').setResultsName('expression') # a unit # very broad definition here, again. Whether this corresponds to a valid unit # string will be checked later UNIT = Word(string.ascii_letters + string.digits + '*/.- ').setResultsName('unit') # a single Flag (e.g. "const" or "event-driven") FLAG = Word(string.ascii_letters, string.ascii_letters + '_- ' + string.digits) # Flags are comma-separated and enclosed in parantheses: "(flag1, flag2)" FLAGS = (Suppress('(') + FLAG + ZeroOrMore(Suppress(',') + FLAG) + Suppress(')')).setResultsName('flags')
space_ = Literal(" ") or_ = Literal("||") and_ = Literal("&&") star_ = Literal("*") leftb_ = Literal("(") rightb_ = Literal(")") leftB_ = Literal("[") rightB_ = Literal("]") comma_ = Literal(",") dot_ = Literal(".") hashtag_ = Literal("#") slash_ = Literal("-") equal_ = Literal("=") # format of version number for AS clause version_ = Combine(OneOrMore(Optional(dot_) + Word(nums))) # format of feature name for AS clause feature_ = Combine(OneOrMore(Word(alphanums) \ + Optional(OneOrMore(underline_^space_)))) # this is for the keywords to search keyword_ = Optional(OneOrMore(underline_^leftb_^hashtag_^space_^dot_^slash_\ ^equal_)) + Word(alphanums) + Optional(OneOrMore(underline_^leftb_\ ^dot_^slash_^equal_)) keywords_ = OneOrMore(Combine(OneOrMore(keyword_)) \ + Optional(space_) + Optional(or_^and_) + Optional(space_)) # Clauses in FQL # CHECK clause check = check_ + Optional(space_) + Suppress(leftb_) + Optional(space_) \ + keywords_ + Optional(space_) + Suppress(rightb_) # this is one WHERE clause
class SqlGrammarMSSQLServer(SqlGrammar): # ------------------------------------------------------------------------- # Forward declarations # ------------------------------------------------------------------------- expr = Forward() select_statement = Forward() # ------------------------------------------------------------------------- # Keywords # ------------------------------------------------------------------------- # https://msdn.microsoft.com/en-us/library/ms189822.aspx sql_server_reserved_words = """ ADD ALL ALTER AND ANY AS ASC AUTHORIZATION BACKUP BEGIN BETWEEN BREAK BROWSE BULK BY CASCADE CASE CHECK CHECKPOINT CLOSE CLUSTERED COALESCE COLLATE COLUMN COMMIT COMPUTE CONSTRAINT CONTAINS CONTAINSTABLE CONTINUE CONVERT CREATE CROSS CURRENT CURRENT_DATE CURRENT_TIME CURRENT_TIMESTAMP CURRENT_USER CURSOR DATABASE DBCC DEALLOCATE DECLARE DEFAULT DELETE DENY DESC DISK DISTINCT DISTRIBUTED DOUBLE DROP DUMP ELSE END ERRLVL ESCAPE EXCEPT EXEC EXECUTE EXISTS EXIT EXTERNAL FETCH FILE FILLFACTOR FOR FOREIGN FREETEXT FREETEXTTABLE FROM FULL FUNCTION GOTO GRANT GROUP HAVING HOLDLOCK IDENTITY IDENTITY_INSERT IDENTITYCOL IF IN INDEX INNER INSERT INTERSECT INTO IS JOIN KEY KILL LEFT LIKE LINENO LOAD MERGE NATIONAL NOCHECK NONCLUSTERED NOT NULL NULLIF OF OFF OFFSETS ON OPEN OPENDATASOURCE OPENQUERY OPENROWSET OPENXML OPTION OR ORDER OUTER OVER PERCENT PIVOT PLAN PRECISION PRIMARY PRINT PROC PROCEDURE PUBLIC RAISERROR READ READTEXT RECONFIGURE REFERENCES REPLICATION RESTORE RESTRICT RETURN REVERT REVOKE RIGHT ROLLBACK ROWCOUNT ROWGUIDCOL RULE SAVE SCHEMA SECURITYAUDIT SELECT SEMANTICKEYPHRASETABLE SEMANTICSIMILARITYDETAILSTABLE SEMANTICSIMILARITYTABLE SESSION_USER SET SETUSER SHUTDOWN SOME STATISTICS SYSTEM_USER TABLE TABLESAMPLE TEXTSIZE THEN TO TOP TRAN TRANSACTION TRIGGER TRUNCATE TRY_CONVERT TSEQUAL UNION UNIQUE UNPIVOT UPDATE UPDATETEXT USE USER VALUES VARYING VIEW WAITFOR WHEN WHERE WHILE WITH WITHIN WRITETEXT """ # ... "WITHIN GROUP" is listed, not "WITHIN", but odbc_reserved_words = """ ABSOLUTE ACTION ADA ADD ALL ALLOCATE ALTER AND ANY ARE AS ASC ASSERTION AT AUTHORIZATION AVG BEGIN BETWEEN BIT BIT_LENGTH BOTH BY CASCADE CASCADED CASE CAST CATALOG CHAR CHAR_LENGTH CHARACTER CHARACTER_LENGTH CHECK CLOSE COALESCE COLLATE COLLATION COLUMN COMMIT CONNECT CONNECTION CONSTRAINT CONSTRAINTS CONTINUE CONVERT CORRESPONDING COUNT CREATE CROSS CURRENT CURRENT_DATE CURRENT_TIME CURRENT_TIMESTAMP CURRENT_USER CURSOR DATE DAY DEALLOCATE DEC DECIMAL DECLARE DEFAULT DEFERRABLE DEFERRED DELETE DESC DESCRIBE DESCRIPTOR DIAGNOSTICS DISCONNECT DISTINCT DOMAIN DOUBLE DROP ELSE END END-EXEC ESCAPE EXCEPT EXCEPTION EXEC EXECUTE EXISTS EXTERNAL EXTRACT FALSE FETCH FIRST FLOAT FOR FOREIGN FORTRAN FOUND FROM FULL GET GLOBAL GO GOTO GRANT GROUP HAVING HOUR IDENTITY IMMEDIATE IN INCLUDE INDEX INDICATOR INITIALLY INNER INPUT INSENSITIVE INSERT INT INTEGER INTERSECT INTERVAL INTO IS ISOLATION JOIN KEY LANGUAGE LAST LEADING LEFT LEVEL LIKE LOCAL LOWER MATCH MAX MIN MINUTE MODULE MONTH NAMES NATIONAL NATURAL NCHAR NEXT NO NONE NOT NULL NULLIF NUMERIC OCTET_LENGTH OF ON ONLY OPEN OPTION OR ORDER OUTER OUTPUT OVERLAPS PAD PARTIAL PASCAL POSITION PRECISION PREPARE PRESERVE PRIMARY PRIOR PRIVILEGES PROCEDURE PUBLIC READ REAL REFERENCES RELATIVE RESTRICT REVOKE RIGHT ROLLBACK ROWS SCHEMA SCROLL SECOND SECTION SELECT SESSION SESSION_USER SET SIZE SMALLINT SOME SPACE SQL SQLCA SQLCODE SQLERROR SQLSTATE SQLWARNING SUBSTRING SUM SYSTEM_USER TABLE TEMPORARY THEN TIME TIMESTAMP TIMEZONE_HOUR TIMEZONE_MINUTE TO TRAILING TRANSACTION TRANSLATE TRANSLATION TRIM TRUE UNION UNIQUE UNKNOWN UPDATE UPPER USAGE USER USING VALUE VALUES VARCHAR VARYING VIEW WHEN WHENEVER WHERE WITH WORK WRITE YEAR ZONE """ # ... who thought "END-EXEC" was a good one? # Then some more: # - WITH ROLLUP: https://technet.microsoft.com/en-us/library/ms189305(v=sql.90).aspx # noqa # - SOUNDEX: https://msdn.microsoft.com/en-us/library/ms187384.aspx rnc_extra_sql_server_keywords = """ ROLLUP SOUNDEX """ sql_server_keywords = sorted(list(set( sql_server_reserved_words.split() + odbc_reserved_words.split() + ANSI92_RESERVED_WORD_LIST.split() ))) # log.critical(sql_server_keywords) keyword = make_words_regex(sql_server_keywords, caseless=True, name="keyword") # ------------------------------------------------------------------------- # Comments # ------------------------------------------------------------------------- # https://msdn.microsoft.com/en-us/library/ff848807.aspx comment = ansi_comment # ----------------------------------------------------------------------------- # identifier # ----------------------------------------------------------------------------- # http://dev.mysql.com/doc/refman/5.7/en/identifiers.html bare_identifier_word = make_regex_except_words( r"\b[a-zA-Z0-9$_]*\b", ANSI92_RESERVED_WORD_LIST, caseless=True, name="bare_identifier_word" ) identifier = ( bare_identifier_word | QuotedString(quoteChar="[", endQuoteChar="]", unquoteResults=False) ).setName("identifier") collation_name = identifier.copy() column_name = identifier.copy() column_alias = identifier.copy() table_name = identifier.copy() table_alias = identifier.copy() schema_name = identifier.copy() index_name = identifier.copy() function_name = identifier.copy() parameter_name = identifier.copy() database_name = identifier.copy() no_dot = NotAny('.') table_spec = ( Combine(database_name + '.' + schema_name + '.' + table_name + no_dot) | Combine(schema_name + '.' + table_name + no_dot) | table_name + no_dot ).setName("table_spec") column_spec = ( Combine(database_name + '.' + schema_name + '.' + table_name + '.' + column_name + no_dot) | Combine(schema_name + '.' + table_name + '.' + column_name + no_dot) | Combine(table_name + '.' + column_name + no_dot) | column_name + no_dot ).setName("column_spec") # I'm unsure if SQL Server allows keywords in the parts after dots, like # MySQL does. # - http://stackoverflow.com/questions/285775/how-to-deal-with-sql-column-names-that-look-like-sql-keywords # noqa bind_parameter = Literal('?') variable = Regex(r"@[a-zA-Z0-9\.$_]+").setName("variable") argument_list = ( delimitedList(expr).setName("arglist").setParseAction(', '.join) ) function_call = Combine(function_name + LPAR) + argument_list + RPAR # Not supported: index hints # ... http://stackoverflow.com/questions/11016935/how-can-i-force-a-query-to-not-use-a-index-on-a-given-table # noqa # ----------------------------------------------------------------------------- # CASE # ----------------------------------------------------------------------------- case_expr = ( ( CASE + expr + OneOrMore(WHEN + expr + THEN + expr) + Optional(ELSE + expr) + END ) | ( CASE + OneOrMore(WHEN + expr + THEN + expr) + Optional(ELSE + expr) + END ) ).setName("case_expr") # ----------------------------------------------------------------------------- # Expressions # ----------------------------------------------------------------------------- aggregate_function = ( # https://msdn.microsoft.com/en-us/library/ms173454.aspx AVG | CHECKSUM_AGG | COUNT | COUNT_BIG | GROUPING | GROUPING_ID | MAX | MIN | STDEV | STDEV_P | SUM | VAR | VARP ) expr_term = ( INTERVAL + expr + time_unit | Optional(EXISTS) + LPAR + select_statement + RPAR | # ... e.g. mycol = EXISTS(SELECT ...) # ... e.g. mycol IN (SELECT ...) LPAR + delim_list(expr) + RPAR | # ... e.g. mycol IN (1, 2, 3) case_expr | bind_parameter | variable | function_call | literal_value | column_spec # not just identifier ) UNARY_OP, BINARY_OP, TERNARY_OP = 1, 2, 3 expr << infixNotation(expr_term, [ # Having lots of operations in the list here SLOWS IT DOWN A LOT. # Just combine them into an ordered list. (COLLATE | oneOf('! - + ~'), UNARY_OP, opAssoc.RIGHT), ( ( oneOf('^ * / %') | oneOf('+ - << >> & | = <=> >= > <= < <> !=') | (IS + Optional(NOT)) | LIKE | (Optional(NOT) + IN) | SOUNDEX # RNC; presumably at same level as LIKE ), BINARY_OP, opAssoc.LEFT ), ((BETWEEN, AND), TERNARY_OP, opAssoc.LEFT), # CASE handled above (hoping precedence is not too much of a problem) (NOT, UNARY_OP, opAssoc.RIGHT), (AND | '&&' | OR | '||' | ':=', BINARY_OP, opAssoc.LEFT), ], lpar=LPAR, rpar=RPAR) # ignores LIKE [ESCAPE] # ------------------------------------------------------------------------- # SELECT # ------------------------------------------------------------------------- compound_operator = UNION + Optional(ALL | DISTINCT) ordering_term = ( expr + Optional(COLLATE + collation_name) + Optional(ASC | DESC) ) join_constraint = Optional(Group( (ON + expr) | (USING + LPAR + delim_list(column_name) + RPAR) )) join_op = Group( COMMA | NATURAL + (Optional(LEFT | RIGHT) + Optional(OUTER)) + JOIN | (INNER | CROSS) + JOIN | Optional(LEFT | RIGHT) + Optional(OUTER) + JOIN ) join_source = Forward() single_source = ( ( table_spec.copy().setResultsName("from_tables", listAllMatches=True) + Optional(Optional(AS) + table_alias) # Optional(index_hint_list) # not supported yet ) | (select_statement + Optional(AS) + table_alias) + (LPAR + join_source + RPAR) ) join_source << Group( single_source + ZeroOrMore(join_op + single_source + join_constraint) )("join_source") # ... must have a Group to append to it later, it seems # ... but name it "join_source" here, or it gets enclosed in a further list # when you name it later result_base = ( # Aggregate functions: e.g. "MAX(" allowed, "MAX (" not allowed Combine(COUNT + LPAR) + '*' + RPAR | # special aggregate function Combine(COUNT + LPAR) + DISTINCT + expr + RPAR | # special aggregate function # noqa Combine(aggregate_function + LPAR) + expr + RPAR | expr | '*' | Combine(table_name + '.' + '*') | column_spec | literal_value ) result_column = ( result_base + Optional(Optional(AS) + column_alias) ).setResultsName("select_columns", listAllMatches=True) # ------------------------------------------------------------------------- # SELECT # ------------------------------------------------------------------------- where_expr = Group(expr).setResultsName("where_expr") where_clause = Group( Optional(WHERE + where_expr) ).setResultsName("where_clause") select_core = ( SELECT + Optional(TOP + integer) + Group(Optional(ALL | DISTINCT))("select_specifier") + Group(delim_list(result_column))("select_expression") + Optional( FROM + join_source + where_clause + Optional( GROUP + BY + delim_list(ordering_term + Optional(ASC | DESC))("group_by_term") + Optional(WITH + ROLLUP) ) + Optional(HAVING + expr("having_expr")) ) ) select_statement << ( select_core + ZeroOrMore(compound_operator + select_core) + Optional( ORDER + BY + delim_list(ordering_term + Optional(ASC | DESC))("order_by_terms") ) + # PROCEDURE ignored # rest ignored Optional(';') ) select_statement.ignore(comment) # https://msdn.microsoft.com/en-us/library/ms175874.aspx # ... approximately (and conservatively): MSSQL_INVALID_FIRST_IF_UNQUOTED = re.compile(r"[^a-zA-Z_@#]") MSSQL_INVALID_IF_UNQUOTED = re.compile(r"[^a-zA-Z0-9_@#$]") def __init__(self): super().__init__() @classmethod def quote_identifier(cls, identifier: str) -> str: return "[{}]".format(identifier) @classmethod def is_quoted(cls, identifier: str) -> bool: return identifier.startswith("[") and identifier.endswith("]") @classmethod def requires_quoting(cls, identifier: str) -> bool: assert identifier, "Empty identifier" if cls.MSSQL_INVALID_IF_UNQUOTED.search(identifier): return True firstchar = identifier[0] if cls.MSSQL_INVALID_FIRST_IF_UNQUOTED.search(firstchar): return True if identifier.upper() in cls.sql_server_keywords: return True return False @classmethod def get_grammar(cls): # Grammar (here, just SELECT) return cls.select_statement @classmethod def get_column_spec(cls): return cls.column_spec @classmethod def get_result_column(cls): return cls.result_column @classmethod def get_join_op(cls): return cls.join_op @classmethod def get_table_spec(cls): return cls.table_spec @classmethod def get_join_constraint(cls): return cls.join_constraint @classmethod def get_select_statement(cls): return cls.select_statement @classmethod def get_expr(cls): return cls.expr @classmethod def get_where_clause(cls): return cls.where_clause @classmethod def get_where_expr(cls): return cls.where_expr @classmethod def test_dialect_specific_2(cls): log.info("Testing Microsoft SQL Server-specific aspects...") log.info("Testing quoted identifiers") test_succeed(cls.identifier, "[FROM]") test_succeed(cls.identifier, "[SELECT FROM]") log.info("Testing table_spec") # SQL Server uses up to: db.schema.table.column test_succeed(cls.table_spec, "mytable") test_succeed(cls.table_spec, "mydb.mytable") test_succeed(cls.table_spec, "mydb.[my silly table]") test_succeed(cls.table_spec, "mydb.myschema.mytable") test_fail(cls.table_spec, "mydb . mytable") test_fail(cls.table_spec, "mydb.myschema.mytable.mycol") log.info("Testing column_spec") test_succeed(cls.column_spec, "mycol") test_succeed(cls.column_spec, "forename") test_succeed(cls.column_spec, "mytable.mycol") test_succeed(cls.column_spec, "t1.a") test_succeed(cls.column_spec, "[my silly table].[my silly column]") test_succeed(cls.column_spec, "mydb.myschema.mytable.mycol") test_succeed(cls.column_spec, "myschema.mytable.mycol") test_fail(cls.column_spec, "myschema . mytable . mycol") log.info("Testing variable") test_succeed(cls.variable, "@myvar") log.info("Testing argument_list") test_succeed(cls.argument_list, "@myvar, 5") log.info("Testing function_call") test_succeed(cls.function_call, "myfunc(@myvar, 5)") # --------------------------------------------------------------------- # Expressions # --------------------------------------------------------------------- log.info("Testing case_expr") test_succeed(cls.case_expr, """ CASE v WHEN 2 THEN x WHEN 3 THEN y ELSE -99 END """)