def __init__(self, slack=None): self.haiku_name = 'Haiku' self.haiku_expr = CaselessLiteral('haiku') + StringEnd() self.haiku_doc = "Generate a random haiku." self.haiku = Haiku()
argument_list = ( (Group(expr) + ZeroOrMore(Suppress(',') + expr))).setResultsName('argumentlist') function = (Or(function_tokens) + lpar + argument_list + rpar).setParseAction( arg_number, pushFirst) primary = function | atom factor = Forward() factor << primary + ZeroOrMore((expop + factor).setParseAction(pushFirst)) term = factor + ZeroOrMore((multop + factor).setParseAction(pushFirst)) expr << term + ZeroOrMore((addop + term).setParseAction(pushFirst)) bnf = Optional((ident + assign).setParseAction(assignVar)) + expr pattern = bnf + StringEnd() # Map operator symbols to corresponding arithmetic operations opn = { "+": (lambda a, b: a + b), "-": (lambda a, b: a - b), "*": (lambda a, b: a * b), "/": (lambda a, b: a / b), "^": (lambda a, b: a**b) } # Recursive function that evaluates the stack #------------------------------------------------------------------------------ def evaluateStack(s, type, snap): op = s.pop()
class CreateParser(object): """ This class can take a plain "CREATE TABLE" SQL as input and parse it into a Table object, so that we have more insight on the detail of this SQL. Example: sql = 'create table foo ( bar int primary key )' parser = CreateParser(sql) try: tbl_obj = parser.parse() except ParseError: log.error("Failed to parse SQL") This set of BNF rules are basically translated from the MySQL manual: http://dev.mysql.com/doc/refman/5.6/en/create-table.html If you don't know how to change the rule or fix the bug, <Getting Started with Pyparsing> is probably the best book to start with. Also this wiki has all supported functions listed: https://pyparsing.wikispaces.com/HowToUsePyparsing If you want have more information how these characters are matching, add .setDebug(True) after the specific token you want to debug """ _parser = None # Basic token WORD_CREATE = CaselessLiteral("CREATE").suppress() WORD_TABLE = CaselessLiteral("TABLE").suppress() COMMA = Literal(',').suppress() DOT = Literal('.') LEFT_PARENTHESES = Literal('(').suppress() RIGHT_PARENTHESES = Literal(')').suppress() QUOTE = Literal("'") | Literal('"') BACK_QUOTE = Optional(Literal('`')).suppress() LENGTH = Word(nums) OBJECT_NAME = Word(alphanums + "_" + "-" + "<" + ">" + ":") QUOTED_STRING_WITH_QUOTE = QuotedString( quoteChar="'", escQuote="''", escChar='\\', multiline=True, unquoteResults=False) | QuotedString(quoteChar='"', escQuote='""', escChar='\\', multiline=True, unquoteResults=False) QUOTED_STRING = QuotedString( quoteChar="'", escQuote="''", escChar='\\', multiline=True) | QuotedString( quoteChar='"', escQuote='""', escChar='\\', multiline=True) # Start of a create table statement # Sample: this part of rule will match following section # `table_name` IF NOT EXISTS IF_NOT_EXIST = Optional( CaselessLiteral("IF") + CaselessLiteral("NOT") + CaselessLiteral("EXISTS")).suppress() TABLE_NAME = (QuotedString( quoteChar="`", escQuote="``", escChar='\\', unquoteResults=True) | OBJECT_NAME)('table_name') # Column definition # Sample: this part of rule will match following section # `id` bigint(20) unsigned NOT NULL DEFAULT '0', COLUMN_NAME = (QuotedString( quoteChar="`", escQuote="``", escChar='\\', unquoteResults=True) | OBJECT_NAME)('column_name') COLUMN_NAME_WITH_QUOTE = (QuotedString( quoteChar="`", escQuote="``", escChar='\\', unquoteResults=False) | OBJECT_NAME)('column_name') UNSIGNED = Optional(CaselessLiteral("UNSIGNED"))('unsigned') ZEROFILL = Optional(CaselessLiteral("ZEROFILL"))('zerofill') COL_LEN = Combine(LEFT_PARENTHESES + LENGTH + RIGHT_PARENTHESES, adjacent=False)('length') INT_TYPE = (CaselessLiteral("TINYINT") | CaselessLiteral("SMALLINT") | CaselessLiteral("MEDIUMINT") | CaselessLiteral("INT") | CaselessLiteral("INTERGER") | CaselessLiteral("BIGINT") | CaselessLiteral("BINARY") | CaselessLiteral("BIT")) INT_DEF = (INT_TYPE('column_type') + Optional(COL_LEN) + UNSIGNED + ZEROFILL) VARBINARY_DEF = (CaselessLiteral('VARBINARY')('column_type') + COL_LEN) FLOAT_TYPE = \ CaselessLiteral("REAL") | CaselessLiteral("DOUBLE") |\ CaselessLiteral("FLOAT") | CaselessLiteral("DECIMAL") |\ CaselessLiteral("NUMERIC") FLOAT_LEN = Combine(LEFT_PARENTHESES + LENGTH + Optional(COMMA + LENGTH) + RIGHT_PARENTHESES, adjacent=False, joinString=', ')('length') FLOAT_DEF = (FLOAT_TYPE('column_type') + Optional(FLOAT_LEN) + UNSIGNED + ZEROFILL) # time type definition. They contain type_name and an optional FSP section # Sample: DATETIME[(fsp)] FSP = COL_LEN DT_DEF = ( Combine(CaselessLiteral("TIME") + Optional(CaselessLiteral("STAMP"))) | CaselessLiteral("DATETIME"))('column_type') + Optional(FSP) SIMPLE_DEF = (CaselessLiteral("DATE") | CaselessLiteral("YEAR") | CaselessLiteral("TINYBLOB") | CaselessLiteral("BLOB") | CaselessLiteral("MEDIUMBLOB") | CaselessLiteral("LONGBLOB") | CaselessLiteral("BOOL") | CaselessLiteral("BOOLEAN"))('column_type') OPTIONAL_COL_LEN = Optional(COL_LEN) BINARY = Optional(CaselessLiteral("BINARY"))('binary') CHARSET_NAME = (Optional(QUOTE).suppress() + Word(alphanums + '_')('charset') + Optional(QUOTE).suppress()) COLLATION_NAME = (Optional(QUOTE).suppress() + Word(alphanums + '_')('collate') + Optional(QUOTE).suppress()) CHARSET_DEF = (CaselessLiteral("CHARACTER SET").suppress() + CHARSET_NAME) COLLATE_DEF = (CaselessLiteral("COLLATE").suppress() + COLLATION_NAME) CHAR_DEF = (CaselessLiteral("CHAR")('column_type') + OPTIONAL_COL_LEN + BINARY) VARCHAR_DEF = (CaselessLiteral("VARCHAR")('column_type') + COL_LEN + BINARY) TEXT_TYPE = (CaselessLiteral("TINYTEXT") | CaselessLiteral("TEXT") | CaselessLiteral("MEDIUMTEXT") | CaselessLiteral("LONGTEXT") | CaselessLiteral("DOCUMENT")) TEXT_DEF = (TEXT_TYPE('column_type') + BINARY) ENUM_VALUE_LIST = Group(QUOTED_STRING_WITH_QUOTE + ZeroOrMore(COMMA + QUOTED_STRING_WITH_QUOTE))( 'enum_value_list') ENUM_DEF = (CaselessLiteral("ENUM")('column_type') + LEFT_PARENTHESES + ENUM_VALUE_LIST + RIGHT_PARENTHESES) SET_VALUE_LIST = Group(QUOTED_STRING_WITH_QUOTE + ZeroOrMore(COMMA + QUOTED_STRING_WITH_QUOTE))( 'set_value_list') SET_DEF = (CaselessLiteral("SET")('column_type') + LEFT_PARENTHESES + SET_VALUE_LIST + RIGHT_PARENTHESES) DATA_TYPE = (INT_DEF | FLOAT_DEF | DT_DEF | SIMPLE_DEF | TEXT_DEF | CHAR_DEF | VARCHAR_DEF | ENUM_DEF | SET_DEF | VARBINARY_DEF) # Column attributes come after column type and length NULLABLE = (CaselessLiteral("NULL") | CaselessLiteral("NOT NULL")) DEFAULT_VALUE = (CaselessLiteral("DEFAULT").suppress() + ( Optional(Literal('b'))('is_bit') + QUOTED_STRING_WITH_QUOTE('default') | Combine( CaselessLiteral("CURRENT_TIMESTAMP")('default') + Optional(COL_LEN) ('ts_len')) | Word(alphanums + '_' + '-' + '+')('default'))) ON_UPDATE = (CaselessLiteral("ON") + CaselessLiteral("UPDATE") + (CaselessLiteral("CURRENT_TIMESTAMP")('on_update') + Optional(COL_LEN)('on_update_ts_len'))) AUTO_INCRE = CaselessLiteral("AUTO_INCREMENT") UNIQ_KEY = (CaselessLiteral("UNIQUE") + Optional(CaselessLiteral("KEY")).suppress()) PRIMARY_KEY = (CaselessLiteral("PRIMARY") + Optional(CaselessLiteral("KEY")).suppress()) COMMENT = Combine(CaselessLiteral("COMMENT").suppress() + QUOTED_STRING_WITH_QUOTE, adjacent=False) COLUMN_DEF = Group(COLUMN_NAME + DATA_TYPE + ZeroOrMore( NULLABLE('nullable') | DEFAULT_VALUE | ON_UPDATE | AUTO_INCRE('auto_increment') | UNIQ_KEY('uniq_key') | PRIMARY_KEY('primary') | COMMENT('comment') | CHARSET_DEF | COLLATE_DEF)) COLUMN_LIST = Group(COLUMN_DEF + ZeroOrMore(COMMA + COLUMN_DEF))('column_list') DOCUMENT_PATH = Combine(COLUMN_NAME_WITH_QUOTE + ZeroOrMore(DOT + COLUMN_NAME_WITH_QUOTE)) IDX_COL = ((Group(DOCUMENT_PATH + CaselessLiteral('AS') + (CaselessLiteral('INT') | CaselessLiteral('STRING')) + Optional(COL_LEN, default=''))) | (Group(COLUMN_NAME + Optional(COL_LEN, default='')))) # Primary key section COL_NAME_LIST = Group(IDX_COL + ZeroOrMore(COMMA + IDX_COL)) IDX_COLS = (LEFT_PARENTHESES + COL_NAME_LIST + RIGHT_PARENTHESES) WORD_PRI_KEY = (CaselessLiteral("PRIMARY").suppress() + CaselessLiteral("KEY").suppress()) KEY_BLOCK_SIZE = (CaselessLiteral("KEY_BLOCK_SIZE").suppress() + Optional(Literal('=')) + Word(nums)('idx_key_block_size')) INDEX_USING = ( CaselessLiteral("USING").suppress() + (CaselessLiteral("BTREE") | CaselessLiteral("HASH"))('idx_using')) INDEX_OPTION = (ZeroOrMore(KEY_BLOCK_SIZE | COMMENT('idx_comment') | INDEX_USING)) PRI_KEY_DEF = (COMMA + WORD_PRI_KEY + IDX_COLS('pri_list') + INDEX_OPTION) # Index section KEY_TYPE = (CaselessLiteral("FULLTEXT") | CaselessLiteral("SPATIAL"))('key_type') WORD_UNIQUE = CaselessLiteral("UNIQUE")('unique') WORD_KEY = (CaselessLiteral("INDEX").suppress() | CaselessLiteral("KEY").suppress()) IDX_NAME = Optional(COLUMN_NAME) IDX_DEF = (ZeroOrMore( Group(COMMA + Optional(WORD_UNIQUE | KEY_TYPE) + WORD_KEY + IDX_NAME('index_name') + IDX_COLS('index_col_list') + INDEX_OPTION)))('index_section') # Constraint section as this is not a recommended way of using MySQL # we'll treat the whole section as a string CONSTRAINT = Combine( ZeroOrMore(COMMA + Optional(CaselessLiteral('CONSTRAINT')) + # foreign key name except the key word 'FOREIGN' Optional((~CaselessLiteral('FOREIGN') + COLUMN_NAME)) + CaselessLiteral('FOREIGN') + CaselessLiteral('KEY') + LEFT_PARENTHESES + COL_NAME_LIST + RIGHT_PARENTHESES + CaselessLiteral('REFERENCES') + COLUMN_NAME + LEFT_PARENTHESES + COL_NAME_LIST + RIGHT_PARENTHESES + ZeroOrMore(Word(alphanums))), adjacent=False, joinString=' ')('constraint') # Table option section ENGINE = (CaselessLiteral("ENGINE").suppress() + Optional(Literal('=')).suppress() + COLUMN_NAME('engine').setParseAction(upcaseTokens)) DEFAULT_CHARSET = (Optional(CaselessLiteral("DEFAULT")).suppress() + ((CaselessLiteral("CHARACTER").suppress() + CaselessLiteral("SET").suppress()) | (CaselessLiteral("CHARSET").suppress())) + Optional(Literal('=')).suppress() + Word(alphanums + '_')('charset')) TABLE_COLLATE = (Optional(CaselessLiteral("DEFAULT")).suppress() + CaselessLiteral("COLLATE").suppress() + Optional(Literal('=')).suppress() + COLLATION_NAME) ROW_FORMAT = ( CaselessLiteral("ROW_FORMAT").suppress() + Optional(Literal('=')).suppress() + Word(alphanums + '_')('row_format').setParseAction(upcaseTokens)) TABLE_KEY_BLOCK_SIZE = ( CaselessLiteral("KEY_BLOCK_SIZE").suppress() + Optional(Literal('=')).suppress() + Word(nums) ('key_block_size').setParseAction(lambda s, l, t: [int(t[0])])) COMPRESSION = ( CaselessLiteral("COMPRESSION").suppress() + Optional(Literal('=')).suppress() + Word(alphanums + '_')('compression').setParseAction(upcaseTokens)) # Parse and make sure auto_increment is an interger # parseAction function is defined as fn( s, loc, toks ), where: # s is the original parse string # loc is the location in the string where matching started # toks is the list of the matched tokens, packaged as a ParseResults_ # object TABLE_AUTO_INCRE = ( CaselessLiteral("AUTO_INCREMENT").suppress() + Optional(Literal('=')).suppress() + Word(nums) ('auto_increment').setParseAction(lambda s, l, t: [int(t[0])])) TABLE_COMMENT = (CaselessLiteral("COMMENT").suppress() + Optional(Literal('=')).suppress() + QUOTED_STRING_WITH_QUOTE('comment')) TABLE_OPTION = ZeroOrMore(ENGINE | DEFAULT_CHARSET | TABLE_COLLATE | ROW_FORMAT | TABLE_KEY_BLOCK_SIZE | COMPRESSION | TABLE_AUTO_INCRE | TABLE_COMMENT) # Partition section PARTITION = Optional( Combine(Combine(Optional(Literal('/*!') + Word(nums))) + CaselessLiteral("PARTITION") + CaselessLiteral("BY") + SkipTo(StringEnd()), adjacent=False, joinString=" ")('partition')) @classmethod def generate_rule(cls): # The final rule for the whole statement match return (cls.WORD_CREATE + cls.WORD_TABLE + cls.IF_NOT_EXIST + cls.TABLE_NAME + cls.LEFT_PARENTHESES + cls.COLUMN_LIST + Optional(cls.PRI_KEY_DEF) + cls.IDX_DEF + cls.CONSTRAINT + cls.RIGHT_PARENTHESES + cls.TABLE_OPTION('table_options') + cls.PARTITION) @classmethod def get_parser(cls): if not cls._parser: cls._parser = cls.generate_rule() return cls._parser @classmethod def parse(cls, sql): try: if not isinstance(sql, str): sql = sql.decode('utf-8') result = cls.get_parser().parseString(sql) except ParseException as e: raise ParseError( "Failed to parse SQL, unsupported syntax: {}".format(e), e.line, e.column) inline_pri_exists = False table = models.Table() table.name = result.table_name table_options = [ 'engine', 'charset', 'collate', 'row_format', 'key_block_size', 'compression', 'auto_increment', 'comment' ] for table_option in table_options: if table_option in result: setattr(table, table_option, result.get(table_option)) if 'partition' in result: # pyparsing will convert newline into two after parsing. So we # need to dedup here table.partition = result.partition.replace("\n\n", "\n") if 'constraint' in result: table.constraint = result.constraint for column_def in result.column_list: if column_def.column_type == 'ENUM': column = models.EnumColumn() for enum_value in column_def.enum_value_list: column.enum_list.append(enum_value) elif column_def.column_type == 'SET': column = models.SetColumn() for set_value in column_def.set_value_list: column.set_list.append(set_value) elif column_def.column_type in ('TIMESTAMP', 'DATETIME'): column = models.TimestampColumn() if 'on_update' in column_def: if 'on_update_ts_len' in column_def: column.on_update_current_timestamp = \ "{}({})".format( column_def.on_update, column_def.on_update_ts_len) else: column.on_update_current_timestamp = \ column_def.on_update else: column = models.Column() column.name = column_def.column_name column.column_type = column_def.column_type # We need to check whether each column property exist in the # create table string, because not specifying a "COMMENT" is # different from specifying "COMMENT" equals to empty string. # The former one will ends up being # column=None # and the later one being # column='' if 'comment' in column_def: column.comment = column_def.comment if 'nullable' in column_def: if column_def.nullable == 'NULL': column.nullable = True elif column_def.nullable == 'NOT NULL': column.nullable = False if 'unsigned' in column_def: if column_def.unsigned == 'UNSIGNED': column.unsigned = True if 'default' in column_def: if 'ts_len' in column_def: column.default = "{}({})".format(column_def.default, column_def.ts_len) else: column.default = column_def.default if 'is_bit' in column_def: column.is_default_bit = True if 'charset' in column_def: column.charset = column_def.charset if 'length' in column_def: column.length = column_def.length if 'collate' in column_def: column.collate = column_def.collate if 'auto_increment' in column_def: column.auto_increment = True if 'primary' in column_def: idx_col = models.IndexColumn() idx_col.name = column_def.column_name table.primary_key.column_list.append(idx_col) inline_pri_exists = True table.column_list.append(column) if 'pri_list' in result: if inline_pri_exists: raise ParseError("Multiple primary keys defined") table.primary_key.name = 'PRIMARY' for col in result.pri_list: for name, length in col: idx_col = models.IndexColumn() idx_col.name = name if length: idx_col.length = length table.primary_key.column_list.append(idx_col) if 'idx_key_block_size' in result: table.primary_key.key_block_size = result.pri_key_block_size if 'idx_comment' in result: table.primary_key.comment = result.idx_comment if 'index_section' in result: for idx_def in result.index_section: idx = models.TableIndex() idx.name = idx_def.index_name if 'idx_key_block_size' in idx_def: idx.key_block_size = idx_def.idx_key_block_size if 'idx_comment' in idx_def: idx.comment = idx_def.idx_comment if 'idx_using' in idx_def: idx.using = idx_def.idx_using if 'key_type' in idx_def: idx.key_type = idx_def.key_type if 'unique' in idx_def: idx.is_unique = True for col in idx_def.index_col_list: for col_def in col: if len(col_def) == 4 and col_def[1].upper() == 'AS': (document_path, word_as, key_type, length) = col_def idx_col = models.DocStoreIndexColumn() idx_col.document_path = document_path idx_col.key_type = key_type if length: idx_col.length = length idx.column_list.append(idx_col) else: (name, length) = col_def idx_col = models.IndexColumn() idx_col.name = name if length: idx_col.length = length idx.column_list.append(idx_col) table.indexes.append(idx) return table
from pyparsing import oneOf, OneOrMore, printables, StringEnd test = "The quick brown fox named 'Aloysius' lives at 123 Main Street (and jumps over lazy dogs in his spare time)." nonAlphas = [c for c in printables if not c.isalpha()] print "Extract vowels, consonants, and special characters from this test string:" print "'" + test + "'" print print "Define grammar using normal results names" print "(only last matching symbol is saved)" vowels = oneOf(list("aeiouy"), caseless=True).setResultsName("vowels") cons = oneOf(list("bcdfghjklmnpqrstvwxz"), caseless=True).setResultsName("cons") other = oneOf(list(nonAlphas)).setResultsName("others") letters = OneOrMore(cons | vowels | other) + StringEnd() results = letters.parseString(test) print results print results.vowels print results.cons print results.others print print "Define grammar using results names, with listAllMatches=True" print "(all matching symbols are saved)" vowels = oneOf(list("aeiouy"), caseless=True).setResultsName("vowels", listAllMatches=True) cons = oneOf(list("bcdfghjklmnpqrstvwxz"), caseless=True).setResultsName("cons", listAllMatches=True) other = oneOf(list(nonAlphas)).setResultsName("others", listAllMatches=True)
| Literal('uA') | Literal('b') | Literal('uB') | Literal('yM') | Literal('yS') | Literal('r') | Literal('wC') | Literal('wL') | empty) selector << Or( [(Literal('g') + selector)('global'), (Literal('<') + Optional(predicates) + Literal('>'))('window'), (Literal('[') + Optional(predicates) + Literal(']'))('workspace'), (Literal('{') + Optional(predicates) + Literal('}'))('application'), Literal('%')('current'), Literal('#')('prior')]) parser = Or([ Suppress(ZeroOrMore(White())), selector('selector') + command('command') + direction('direction'), other, ]) + ZeroOrMore(comment) + StringEnd() def parse(line): try: return parser.parseString(line) except ParseException, e: raise WimException("Parse Exception: " + e.msg)
def _create_dbc_grammar(): """Create DBC grammar. """ # DBC file grammar. word = Word(printables.replace(';', '').replace(':', '')) integer = Group(Optional('-') + Word(nums)) positive_integer = Word(nums) number = Word(nums + '.Ee-+') colon = Suppress(Literal(':')) scolon = Suppress(Literal(';')) pipe = Suppress(Literal('|')) at = Suppress(Literal('@')) sign = Literal('+') | Literal('-') lp = Suppress(Literal('(')) rp = Suppress(Literal(')')) lb = Suppress(Literal('[')) rb = Suppress(Literal(']')) comma = Suppress(Literal(',')) node = Word(alphas + nums + '_-').setWhitespaceChars(' ') version = Group(Keyword('VERSION') + QuotedString('"', multiline=True)) symbol = Word(alphas + '_') + Suppress(LineEnd()) symbols = Group(Keyword('NS_') + colon + Group(ZeroOrMore(symbol))) discard = Suppress(Keyword('BS_') + colon) nodes = Group(Keyword('BU_') + colon + Group(ZeroOrMore(node))) signal = Group( Keyword(SIGNAL) + Group(word + Optional(word)) + colon + Group(positive_integer + pipe + positive_integer + at + positive_integer + sign) + Group(lp + number + comma + number + rp) + Group(lb + number + pipe + number + rb) + QuotedString('"', multiline=True) + Group(delimitedList(node))) message = Group( Keyword(MESSAGE) + positive_integer + word + colon + positive_integer + word + Group(ZeroOrMore(signal))) event = Suppress( Keyword(EVENT) + word + colon + positive_integer + lb + number + pipe + number + rb + QuotedString('"', multiline=True) + number + number + word + node + scolon) comment = Group( Keyword(COMMENT) + ((Keyword(MESSAGE) + positive_integer + QuotedString('"', multiline=True) + scolon) | (Keyword(SIGNAL) + positive_integer + word + QuotedString('"', multiline=True) + scolon) | (Keyword(NODES) + word + QuotedString('"', multiline=True) + scolon) | (Keyword(EVENT) + word + QuotedString('"', multiline=True) + scolon))) attribute = Group( Keyword(ATTRIBUTE) + ((QuotedString('"', multiline=True)) | (Keyword(SIGNAL) + QuotedString('"', multiline=True)) | (Keyword(MESSAGE) + QuotedString('"', multiline=True)) | (Keyword(EVENT) + QuotedString('"', multiline=True)) | (Keyword(NODES) + QuotedString('"', multiline=True))) + word + ((scolon) | (Group( ZeroOrMore( Group((comma | Empty()) + QuotedString('"', multiline=True)))) + scolon) | (Group(ZeroOrMore(number)) + scolon))) default_attr = Group( Keyword(DEFAULT_ATTR) + QuotedString('"', multiline=True) + (positive_integer | QuotedString('"', multiline=True)) + scolon) attr_definition = Group( Keyword(ATTR_DEFINITION) + QuotedString('"', multiline=True) + Group( Optional((Keyword(MESSAGE) + positive_integer) | (Keyword(SIGNAL) + positive_integer + word) | (Keyword(NODES) + word))) + (QuotedString('"', multiline=True) | positive_integer) + scolon) choice = Group( Keyword(CHOICE) + Optional(positive_integer) + word + Group(OneOrMore(Group(integer + QuotedString('"', multiline=True)))) + scolon) value_table = Group( Keyword(VALUE_TABLE) + word + Group(OneOrMore(Group(integer + QuotedString('"', multiline=True)))) + scolon) entry = (version | symbols | discard | nodes | message | comment | attribute | default_attr | attr_definition | choice | value_table | event) grammar = OneOrMore(entry) + StringEnd() return grammar
def __init__(self): filename = os.path.join(paths.lib_dir(), 'bauble.glade') self.widgets = utils.load_widgets(filename) self.window = self.widgets.main_window self.window.hide() # restore the window size geometry = prefs[self.window_geometry_pref] if geometry is not None: self.window.set_default_size(*geometry) self.window.connect('delete-event', self.on_delete_event) self.window.connect("destroy", self.on_quit) self.window.set_title(self.title) try: pixbuf = gtk.gdk.pixbuf_new_from_file(bauble.default_icon) self.window.set_icon(pixbuf) except Exception: logger.warning( _('Could not load icon from %s' % bauble.default_icon)) logger.warning(traceback.format_exc()) menubar = self.create_main_menu() self.widgets.menu_box.pack_start(menubar) combo = self.widgets.main_comboentry model = gtk.ListStore(str) combo.set_model(model) self.populate_main_entry() main_entry = combo.child main_entry.connect('activate', self.on_main_entry_activate) accel_group = gtk.AccelGroup() main_entry.add_accelerator("grab-focus", accel_group, ord('L'), gtk.gdk.CONTROL_MASK, gtk.ACCEL_VISIBLE) self.window.add_accel_group(accel_group) go_button = self.widgets.go_button go_button.connect('clicked', self.on_go_button_clicked) query_button = self.widgets.query_button query_button.connect('clicked', self.on_query_button_clicked) self.set_default_view() # add a progressbar to the status bar # Warning: this relies on gtk.Statusbar internals and could break in # future versions of gtk statusbar = self.widgets.statusbar statusbar.set_spacing(10) statusbar.set_has_resize_grip(True) self._cids = [] def on_statusbar_push(sb, cid, txt): if cid not in self._cids: self._cids.append(cid) statusbar.connect('text-pushed', on_statusbar_push) # remove label from frame frame = statusbar.get_children()[0] #frame.modify_bg(gtk.STATE_NORMAL, gtk.gdk.color_parse('#FF0000')) label = frame.get_children()[0] frame.remove(label) # replace label with hbox and put label and progress bar in hbox hbox = gtk.HBox(False, 5) frame.add(hbox) hbox.pack_start(label, True, True, 0) vbox = gtk.VBox(True, 0) hbox.pack_end(vbox, False, True, 15) self.progressbar = gtk.ProgressBar() vbox.pack_start(self.progressbar, False, False, 0) self.progressbar.set_size_request(-1, 10) vbox.show() hbox.show() from pyparsing import StringStart, Word, alphanums, restOfLine, \ StringEnd cmd = StringStart() + ':' + Word(alphanums + '-_').setResultsName('cmd') arg = restOfLine.setResultsName('arg') self.cmd_parser = (cmd + StringEnd()) | (cmd + '=' + arg) | arg combo.grab_focus()
def create_bnf(stack): point = Literal(".") e = CaselessLiteral("E") inumber = Word(nums) fnumber = Combine( Word("+-" + nums, nums) + Optional(point + Optional(Word(nums))) + Optional(e + Word("+-" + nums, nums))) _of = Literal('of') _in = Literal('in') _by = Literal('by') _copy = Literal('copy') _mv = Literal('-v').setParseAction(replace('OA_SubV')) _me = Literal('-e').setParseAction(replace('OA_SubE')) _mf = Literal('-f').setParseAction(replace('OA_SubF')) _mc = Literal('-c').setParseAction(replace('OA_SubC')) _ms = Literal('-s').setParseAction(replace('OA_SubS')) _pv = Literal('+v').setParseAction(replace('OA_AddV')) _pe = Literal('+e').setParseAction(replace('OA_AddE')) _pf = Literal('+f').setParseAction(replace('OA_AddF')) _pc = Literal('+c').setParseAction(replace('OA_AddC')) _ps = Literal('+s').setParseAction(replace('OA_AddS')) _inv = Literal('*v').setParseAction(replace('OA_IntersectV')) _ine = Literal('*e').setParseAction(replace('OA_IntersectE')) _inf = Literal('*f').setParseAction(replace('OA_IntersectF')) _inc = Literal('*c').setParseAction(replace('OA_IntersectC')) _ins = Literal('*s').setParseAction(replace('OA_IntersectS')) regop = (_mv | _me | _mf | _mc | _ms | _pv | _pe | _pf | _pc | _ps | _inv | _ine | _inf | _inc | _ins) lpar = Literal("(").suppress() rpar = Literal(")").suppress() _all = Literal('all').setParseAction(replace('KW_All')) vertex = Literal('vertex') vertices = Literal('vertices') cell = Literal('cell') cells = Literal('cells') group = Literal('group') _set = Literal('set') surface = Literal('surface') ident = Word(alphas + '_.', alphanums + '_.') set_name = Word(nums) | ident function = Word(alphas + '_', alphanums + '_') function = Group(function).setParseAction(join_tokens) region = Combine( Literal('r.') + Word(alphas + '_-', '_-' + alphas + nums + '.')) region = Group(Optional(_copy, default='nocopy') + region) region.setParseAction(replace('KW_Region', keep=True)) coor = oneOf('x y z') boolop = oneOf('& |') relop = oneOf('< > <= >= != ==') bool_term = (ZeroOrMore('(') + (coor | fnumber) + relop + (coor | fnumber) + ZeroOrMore(')')) relation = Forward() relation << (ZeroOrMore('(') + bool_term + ZeroOrMore(boolop + relation) + ZeroOrMore(')')) relation = Group(relation).setParseAction(join_tokens) nos = Group(vertices + _of + surface).setParseAction(replace('E_VOS')) nir = Group(vertices + _in + relation).setParseAction( replace('E_VIR', keep=True)) nbf = Group(vertices + _by + function).setParseAction( replace('E_VBF', keep=True)) ebf = Group(cells + _by + function).setParseAction( replace('E_CBF', keep=True)) eog = Group(cells + _of + group + Word(nums)).setParseAction( replace('E_COG', keep=True)) nog = Group(vertices + _of + group + Word(nums)).setParseAction( replace('E_VOG', keep=True)) onir = Group(vertex + _in + region).setParseAction( replace_with_region('E_OVIR', 2)) ni = Group(vertex + delimitedList(inumber)).setParseAction( replace('E_VI', keep=True)) ei = Group(cell + delimitedList(inumber)).setParseAction( replace('E_CI', keep=True)) noset = Group(vertices + _of + _set + set_name).setParseAction( replace('E_VOSET', keep=True)) eoset = Group(cells + _of + _set + set_name).setParseAction( replace('E_COSET', keep=True)) region_expression = Forward() atom1 = (_all | region | ni | onir | nos | nir | nbf | ei | ebf | eog | nog | noset | eoset) atom1.setParseAction(to_stack(stack)) atom2 = (lpar + region_expression.suppress() + rpar) atom = (atom1 | atom2) aux = (regop + region_expression) aux.setParseAction(to_stack(stack)) region_expression << atom + ZeroOrMore(aux) region_expression = StringStart() + region_expression + StringEnd() return region_expression
val = clampColourPerc(val) #normalize to bytes return int(255 * (val / 100.0)) colorByte = Optional(sign) + integerConstant.setParseAction( lambda t: clampColourByte(t[0])) colorPerc = number.setParseAction(parseColorPerc) + Literal("%").suppress() rgb = ( Literal("rgb(").setParseAction(lambda t: "RGB") + ( #integer constants, ie 255,255,255 Group(colorByte + comma + colorByte + comma + colorByte) ^ #percentage values, ie 100%, 50% Group(colorPerc + comma + colorPerc + comma + colorPerc)) + Literal(")").suppress() + StringEnd()) def parseShortHex(t): return tuple(int(x * 2, 16) for x in t[0]) doubleHex = Word(hexnums, exact=2).setParseAction(lambda t: int(t[0], 16)) hexLiteral = (Literal("#").setParseAction(lambda t: "RGB") + (Group(doubleHex + doubleHex + doubleHex) | Word(hexnums, exact=3).setParseAction(parseShortHex)) + StringEnd()) def parseNamedColour(t): try:
def _make_arabic_parser(): escapechar = "//" alephba = u""" abcdefghijklmnopqrstuvwxyz_ األآإـتنمكطدجحخهعغفقثصضشسيبئءؤرىةوزظذ """ wordtext = CharsNotIn(u'//*؟^():"{}[]$><%~#،,\' +-|') escape = Suppress(escapechar) \ + (Word(printables, exact=1) | White(exact=1)) wordtoken = Combine(OneOrMore(wordtext | escape)) # A plain old word. plainWord = Group(wordtoken).setResultsName("Word") # A wildcard word containing * or ?. wildchars = Word(u"؟?*") # Start with word chars and then have wild chars mixed in wildmixed = wordtoken + OneOrMore(wildchars + Optional(wordtoken)) # Or, start with wildchars, and then either a mixture of word and wild chars # , or the next token wildstart = wildchars \ + (OneOrMore(wordtoken + Optional(wildchars)) \ | FollowedBy(White() \ | StringEnd())) wildcard = Group(Combine(wildmixed | wildstart)).setResultsName("Wildcard") # A range of terms startfence = Literal("[") endfence = Literal("]") rangeitem = QuotedString('"') | wordtoken to = Keyword(u"الى") \ | Keyword(u"إلى") \ | Keyword("To") \ | Keyword("to") \ | Keyword("TO") openstartrange = Group(Empty()) \ + Suppress(to + White()) \ + Group(rangeitem) openendrange = Group(rangeitem) \ + Suppress(White() + to) \ + Group(Empty()) normalrange = Group(rangeitem) \ + Suppress(White() + to + White()) \ + Group(rangeitem) range = Group( startfence \ + (normalrange | openstartrange | openendrange) \ + endfence).setResultsName("Range") # synonyms syn_symbol = Literal("~") synonym = Group(syn_symbol + wordtoken).setResultsName("Synonyms") # antonyms ant_symbol = Literal("#") antonym = Group(ant_symbol + wordtoken).setResultsName("Antonyms") # derivation level 1,2 derive_symbole = Literal(u"<") | Literal(u">") derivation = Group(OneOrMore(derive_symbole) + wordtoken).setResultsName("Derivation") # spellerrors # spellerrors=Group(QuotedString('\'')).setResultsName("Errors") spellerrors_symbole = Literal(u"%") spellerrors = Group(spellerrors_symbole + wordtoken).setResultsName("SpellErrors") # shakl:must uplevel to boostable tashkil_symbol = Literal("'") tashkil = Group( tashkil_symbol + \ ZeroOrMore(wordtoken | White()) + \ tashkil_symbol ).setResultsName("Tashkil") # tuple search (root,pattern,type) starttuple = Literal("{") endtuple = Literal("}") bettuple = Literal(u"،") | Literal(",") wordtuple = Group(Optional(wordtoken)) tuple = Group( starttuple + \ wordtuple + \ ZeroOrMore(bettuple + wordtuple) + \ endtuple ).setResultsName("Tuple") # A word-like thing generalWord = range | wildcard | plainWord | tuple | antonym | synonym | \ derivation | tashkil | spellerrors # A quoted phrase quotedPhrase = Group(QuotedString('"')).setResultsName("Quotes") expression = Forward() # Parentheses can enclose (group) any expression parenthetical = Group( (Suppress("(") + expression + Suppress(")"))).setResultsName("Group") boostableUnit = generalWord | quotedPhrase boostedUnit = Group( boostableUnit + \ Suppress("^") + \ Word("0123456789", ".0123456789") ).setResultsName("Boost") # The user can flag that a parenthetical group, quoted phrase, or word # should be searched in a particular field by prepending 'fn:', where fn is # the name of the field. fieldableUnit = parenthetical | boostedUnit | boostableUnit fieldedUnit = Group( (Word(alephba + "_") | Word(alphanums + "_")) + \ Suppress(':') + \ fieldableUnit ).setResultsName("Field") # Units of content unit = fieldedUnit | fieldableUnit # A unit may be "not"-ed. operatorNot = Group( Suppress(Keyword(u"ليس") | Keyword(u"NOT")) + \ Suppress(White()) + \ unit ).setResultsName("Not") generalUnit = operatorNot | unit andToken = Keyword(u"و") | Keyword(u"AND") orToken = Keyword(u"أو") | Keyword(u"او") | Keyword(u"OR") andNotToken = Keyword(u"وليس") | Keyword(u"ANDNOT") operatorAnd = Group( (generalUnit + \ Suppress(White()) + \ Suppress(andToken) + \ Suppress(White()) + \ expression) | \ (generalUnit + \ Suppress(Literal(u"+")) + \ expression) ).setResultsName("And") operatorOr = Group( (generalUnit + \ Suppress(White()) + \ Suppress(orToken) + \ Suppress(White()) + \ expression) | \ (generalUnit + \ Suppress(Literal(u"|")) + \ expression) ).setResultsName("Or") operatorAndNot = Group( (unit + \ Suppress(White()) + \ Suppress(andNotToken) + \ Suppress(White()) + \ expression) | \ (unit + \ Suppress(Literal(u"-")) + \ expression) ).setResultsName("AndNot") expression << (OneOrMore(operatorAnd | operatorOr | operatorAndNot | \ generalUnit | Suppress(White())) | Empty()) toplevel = Group(expression).setResultsName("Toplevel") + StringEnd() return toplevel.parseString
def build_parser(self): self._parser = (StringStart() + self.define_operation() + StringEnd())
def crn_document_setup(modular = False): """Parse a formal chemical reaction network. Args: modular <optional:bool>: Adds an additional nesting for modules within a CRN. Use one line per module (';' separates reactions). Format: # A list of reactions, optionally with reaction rates: # <- this is a comment! B + B -> C # [k = 1] C + A <=> D # [kf = 1, kr = 1] <=> A [kf = 15, kr = 6] # Note that you can write multiple reactions in one line: A + 2C -> E [k = 13.78]; E + F <=> 2A [kf = 13, kr = 14] Returns: """ # NOTE: If you want to add support for multiple modules per line, you can use # the '|' character. W = Word G = Group S = Suppress O = Optional C = Combine L = Literal def T(x, tag): """ Return a *Tag* to distinguish (ir)reversible reactions """ def TPA(tag): return lambda s, l, t: [tag] + t.asList() return x.setParseAction(TPA(tag)) crn_DWC = "".join( [x for x in ParseElementEnhance.DEFAULT_WHITE_CHARS if x != "\n"]) ParseElementEnhance.setDefaultWhitespaceChars(crn_DWC) identifier = W(alphas, alphanums + "_") multiplier = W(nums) species = G(O(multiplier) + identifier) number = W(nums, nums) num_flt = C(number + O(L('.') + number)) num_sci = C(number + O(L('.') + number) + L('e') + O(L('-') | L('+')) + W(nums)) gorf = num_sci | num_flt # Make specification of forward, backward, reverse more flexible kf = S('kf') | S('fw') kr = S('kr') | S('bw') | S('rv') k = G(S('[') + O(S('k') + S('=')) + gorf + S(']')) rev_k = G(S('[') + kf + S('=') + gorf + S(',') + kr + S('=') + gorf + S(']')) | G(S('[') + gorf + S(',') + gorf + S(']')) concentration = T(species + S('@') + G(L("initial") | L("i") | L("constant") | L("c")) + G(gorf), 'concentration') reaction = T(G(O(delimitedList(species, "+"))) + S("->") + G(O(delimitedList(species, "+"))) + O(k), 'irreversible') rev_reaction = T(G(O(delimitedList(species, "+"))) + S("<=>") + G(O(delimitedList(species, "+"))) + O(rev_k), 'reversible') expr = G(reaction | rev_reaction | concentration) if modular: module = G(expr + ZeroOrMore(S(";") + expr)) else: module = expr + ZeroOrMore(S(";") + expr) crn = OneOrMore(module + ZeroOrMore(S(LineEnd()))) document = StringStart() + ZeroOrMore(S(LineEnd())) + crn + StringEnd() document.ignore(pythonStyleComment) return document
def show_ip_interface(): # TODO move this to a seperate parser building blocks file # TODO combine them to IOS interface states?? state_enabled_disabled = oneOf(['enabled', 'disabled']) state_always_never = oneOf(['always', 'never']) acl_name = Word(alphanums, bodyChars=alphanums + '_') interface_name = Word(alphas, bodyChars=alphanums + '/.-') interface_status = oneOf(['up', 'down', 'deleted']) line_status = oneOf(['up', 'down']) interface_ip_unnumbered = Suppress('Interface is unnumbered. Using address of ') +\ interface_name('interface_unnumbered_name') +\ Suppress('(') + ipaddress('interface_unnumbered_ipaddress') + Suppress(')') interface_ip_prefix = Suppress('Internet address is') + ipprefix( 'ipprefix') internet_address = (interface_ip_prefix | interface_ip_unnumbered) mtu = Word(nums).setParseAction(lambda tokens: int(tokens[0])) # FIXME Returns list of lists, instead of just a list with ipaddresses helper_addresses = Suppress('Helper addresses are') + ipaddress_list( 'helper_addresses') helper_address = ipaddress helper_address.setParseAction(lambda x: [x]) helper_address = Suppress('Helper address is') + ( Suppress('not set') | helper_address('helper_addresses')) ip_helper = (helper_address | helper_addresses) directed_broadcasts_acl = Suppress('- but restricted by access list' ) + acl_name('directed_broadcasts_acl') directed_broadcasts = Suppress('Directed broadcast forwarding is') + state_enabled_disabled('directed_broadcasts') +\ Optional(directed_broadcasts_acl) outgoing_acl = Suppress('Outgoing access list is') + ( Suppress('not set') | acl_name('outbound_acl')) inbound_acl = Suppress('Inbound access list is') + ( Suppress('not set') | acl_name('inbound_acl')) proxyarp = Suppress('Proxy ARP is') + state_enabled_disabled('proxyarp') local_proxyarp = Suppress('Local Proxy ARP is') + state_enabled_disabled( 'local_proxyarp') # TODO securitylevel = Suppress('Security level is default') splithorizon = Suppress('Split horizon is') + state_enabled_disabled( 'splithorizon') icmp_redirects = Suppress('ICMP redirects are') + state_always_never( 'icmp_redirects') + Suppress('sent') icmp_unreachables = Suppress('ICMP unreachables are') + state_always_never( 'icmp_unreachables') + Suppress('sent') icmp_mask_replies = Suppress('ICMP mask replies are') + state_always_never( 'icmp_maskreplies') + Suppress('sent') ip_fast_switching = Suppress( 'IP fast switching is') + state_enabled_disabled('ip_fast_switching') ip_fast_switching_sameinterface = Suppress( 'IP fast switching on the same interface is') + state_enabled_disabled( 'ip_fast_switching_sameinterface') ip_flow_switching = Suppress( 'IP Flow switching is') + state_enabled_disabled('ip_flow_switching') ip_cef_switching = Suppress( 'IP CEF switching is') + state_enabled_disabled('ip_cef_switching') turbo_vector = oneOf(['CEF switching', 'CEF turbo switching', 'Null']) ip_turbo_vector = Suppress('IP') + turbo_vector( 'ip_turbo_vector') + Suppress('turbo vector') ip_turbo_vector2 = Suppress('IP') + turbo_vector( 'ip_turbo_vector2') + Suppress('turbo vector') # TODO What are valid chars for VRF names? vrfname = Word(alphanums) vrf = Suppress('VPN Routing/Forwarding "') + Optional( vrfname('vrf')) + Suppress('"') downstreamvrf = Suppress('Downstream VPN Routing/Forwarding "') + Optional( vrfname('downstreamvrf')) + Suppress('"') ip_multicast_fastswitching = Suppress('IP multicast fast switching is') +\ state_enabled_disabled('ip_multicast_fastswitching') ip_multicast_distributed_fastswitching = Suppress('IP multicast distributed fast switching is') +\ state_enabled_disabled('ip_multicast_distributed_fastswitching') ip_routecache_flags = Suppress('IP route-cache flags are') + comma_list( 'ip_routecache_flags') routerdiscovery = Suppress('Router Discovery is') + state_enabled_disabled( 'routerdiscovery') ip_output_accounting = Suppress( 'IP output packet accounting is') + state_enabled_disabled( 'ip_output_accounting') ip_violation_accounting = Suppress( 'IP access violation accounting is') + state_enabled_disabled( 'ip_violation_accounting') tcpip_header_compression = Suppress( 'TCP/IP header compression is') + state_enabled_disabled( 'tcpip_header_compression') rtpip_header_compression = Suppress( 'RTP/IP header compression is') + state_enabled_disabled( 'rtpip_header_compression') routemap_name = Word(alphas, bodyChars=alphanums + '_-') policy_routing_enabled = Suppress(', using route map') + routemap_name( 'policy_routing_routemap') policy_routing = Suppress('Policy routing is') + state_enabled_disabled( 'policy_routing') + Optional(policy_routing_enabled) nat_inside_outside = oneOf(['inside', 'outside']) nat_domain = Suppress(', interface in domain') + nat_inside_outside( 'nat_domain') nat = Suppress('Network address translation is') + state_enabled_disabled( 'nat_state') + Optional(nat_domain) bgp_policy_map = Word(alphanums, bodyChars=alphanums + '-') bgp_policies = Optional(Suppress('(output') + bgp_policy_map('bgp_policy_map_output') + Suppress(')')) +\ Optional(Suppress('(input') + bgp_policy_map('bgp_policy_map_input') + Suppress(')')) bgp_policy = Suppress('BGP Policy Mapping is') + state_enabled_disabled( 'bgp_policy') + bgp_policies input_features = Suppress('Input features:') + comma_list('input_features') output_features = Suppress('Output features:') + comma_list( 'output_features') wccp_outbound = Optional(Suppress('IPv4')) + Suppress( 'WCCP Redirect outbound is') + state_enabled_disabled('wccp_outbound') wccp_inbound = Optional(Suppress('IPv4')) + Suppress( 'WCCP Redirect inbound is') + state_enabled_disabled('wccp_inbound') wccp_exclude = Optional(Suppress('IPv4')) + Suppress( 'WCCP Redirect exclude is') + state_enabled_disabled('wccp_exclude') interface = interface_name('interface_name') + Suppress('is') + Optional(Suppress('administratively')) +\ interface_status('interface_status') + Suppress(',') +\ Suppress('line protocol is') + line_status('line_status') +\ internet_address +\ Suppress('Broadcast address is') + ipaddress('broadcast_address')+\ Optional(Suppress('Address determined by setup command')) +\ Suppress('MTU is') + mtu('mtu') + Suppress('bytes') +\ ip_helper +\ directed_broadcasts +\ outgoing_acl +\ inbound_acl +\ proxyarp +\ local_proxyarp +\ securitylevel +\ splithorizon +\ icmp_redirects +\ icmp_unreachables +\ icmp_mask_replies +\ ip_fast_switching +\ ip_fast_switching_sameinterface +\ ip_flow_switching +\ ip_cef_switching +\ ip_turbo_vector +\ Optional(ip_turbo_vector2) +\ Optional(vrf) +\ Optional(downstreamvrf) +\ ip_multicast_fastswitching +\ ip_multicast_distributed_fastswitching +\ ip_routecache_flags +\ routerdiscovery +\ ip_output_accounting +\ ip_violation_accounting +\ tcpip_header_compression +\ rtpip_header_compression +\ policy_routing +\ nat +\ bgp_policy +\ input_features +\ Optional(output_features) +\ wccp_outbound +\ wccp_inbound +\ wccp_exclude interface_ip_disabled = interface_name('interface_name') + Suppress('is') + Optional(Suppress('administratively')) +\ interface_status('interface_status') +\ Suppress(',') +\ Suppress('line protocol is') + line_status('line_status') +\ Suppress('Internet protocol processing') + state_enabled_disabled('ip_state') parser = OneOrMore(Group(interface) | Group(interface_ip_disabled)) + StringEnd() return parser
^ hs_scalar[LATEST_VER] ^ trio_safe_string ^ Suppress(LineEnd())) hs_trio_tagpair = ( hs_id + Suppress(':') + (hs_trio_scalar + Suppress(hs_nl) ^ Suppress(hs_nl)) ).setParseAction(lambda toks: tuple(toks[:2])).setName('tagPair') hs_trio_tag = ((Optional(hs_id) + Suppress(hs_nl)) ^ hs_trio_tagpair).setName('tag') hs_row = hs_trio_tag | Suppress(Regex("//.*[\n\r]+").leaveWhitespace()) hs_record = (hs_row[...] + Suppress((LineStart() + Literal('-')[1, ...] + hs_nl) | StringEnd())).setParseAction(toks_to_dict) def _gen_grid(toks: Iterable[Entity]): grid = Grid(LATEST_VER) grid.extend(toks) grid.extends_columns() return grid hs_trio = ZeroOrMore(hs_record, stopOn=StringEnd()).setParseAction(_gen_grid) def parse_grid(grid_data: str, parse_all: bool = True) -> Grid: """Parse the incoming grid.
def parse_selector(cls, selector): """ Parse a selector and return list of tokens Selector formats: ALL (or blank) match any entity <val1> == <val2> values are same <val1> in <val2> second value is list containing 1st value, or values are same, or val1 is None. <val1> <name> <val2> invoke comparison method from supplied FieldComparison object <val1> and <val2> may be: [<field-id>] refers to field in entity under test <name>[<field-id>] refers to field of context value, or None if the indicated context value or field is not defined. "<string>" literal string value. Quotes within are escaped. <field_id> values are URIs or CURIEs, using characters defined by RFC3986, except "[" and "]" RFC3986: unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" reserved = gen-delims / sub-delims gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" Parser uses pyparsing combinators (cf. http://pyparsing.wikispaces.com). """ def get_value(val_list): if len(val_list) == 1: return { 'type': 'literal', 'name': None, 'field_id': None, 'value': val_list[0] } elif val_list[0] == '[': return { 'type': 'entity', 'name': None, 'field_id': val_list[1], 'value': None } elif val_list[1] == '[': return { 'type': 'context', 'name': val_list[0], 'field_id': val_list[2], 'value': None } else: return { 'type': 'unknown', 'name': None, 'field_id': None, 'value': None } p_name = Word(alphas + "_", alphanums + "_") p_id = Word(alphas + "_@", alphanums + "_-.~:/?#@!$&'()*+,;=)") p_val = (Group(Literal("[") + p_id + Literal("]")) | Group(p_name + Literal("[") + p_id + Literal("]")) | Group(QuotedString('"', "\\")) | Group(QuotedString("'", "\\")) | Group(p_id)) p_comp = (Literal("==") | Literal("in") | p_name) p_selector = (p_val + p_comp + p_val + StringEnd()) try: resultlist = p_selector.parseString(selector).asList() except ParseException: return None resultdict = {} if resultlist: resultdict['val1'] = get_value(resultlist[0]) resultdict['comp'] = resultlist[1] resultdict['val2'] = get_value(resultlist[2]) return resultdict
) raw = file.read() try: wordlist = nltk.word_tokenize(raw) lemmatizer = WordNetLemmatizer() print lemmatizer.lemmatize("ran") lanster = LancasterStemmer() porter = PorterStemmer() snowball = SnowballStemmer("english") isri = ISRIStemmer() rslp = RSLPStemmer() porter2 = Stemmer('english') endOfString = StringEnd() prefix = oneOf( "uni inter intro de con com anti pre pro per an ab ad af ac at as re in im ex en em un dis over sub syn out thermo philo geo for fore back" ) suffix = oneOf("ish") #suffix = oneOf("or er ed ish ian ary ation tion al ing ible able ate ly ment ism ous ness ent ic ive " # "ative tude ence ance ise ant age cide ium ion") word = (Optional(prefix)("prefixes") + SkipTo(suffix | suffix + FollowedBy(endOfString) | endOfString)("root") + ZeroOrMore(suffix | suffix + FollowedBy(endOfString))("suffix")) #word = (Optional(prefix)("prefixes") + SkipTo(FollowedBy(endOfString))("root")) for wd in wordlist: print wd
def parse(cls, content, basedir=None, resolve=True, unresolved_value=DEFAULT_SUBSTITUTION): """parse a HOCON content :param content: HOCON content to parse :type content: basestring :param resolve: if true, resolve substitutions :type resolve: boolean :param unresolved_value: assigned value value to unresolved substitution. If overriden with a default value, it will replace all unresolved value to the default value. If it is set to to pyhocon.STR_SUBSTITUTION then it will replace the value by its substitution expression (e.g., ${x}) :type unresolved_value: boolean :return: a ConfigTree or a list """ unescape_pattern = re.compile(r'\\.') def replace_escape_sequence(match): value = match.group(0) return cls.REPLACEMENTS.get(value, value) def norm_string(value): return unescape_pattern.sub(replace_escape_sequence, value) def unescape_string(tokens): return ConfigUnquotedString(norm_string(tokens[0])) def parse_multi_string(tokens): # remove the first and last 3 " return tokens[0][3: -3] def convert_number(tokens): n = tokens[0] try: return int(n, 10) except ValueError: return float(n) def convert_period(tokens): period_value = int(tokens.value) period_identifier = tokens.unit period_unit = next((single_unit for single_unit, values in cls.get_supported_period_type_map().items() if period_identifier in values)) return period(period_value, period_unit) # ${path} or ${?path} for optional substitution SUBSTITUTION_PATTERN = r"\$\{(?P<optional>\?)?(?P<variable>[^}]+)\}(?P<ws>[ \t]*)" def create_substitution(instring, loc, token): # remove the ${ and } match = re.match(SUBSTITUTION_PATTERN, token[0]) variable = match.group('variable') ws = match.group('ws') optional = match.group('optional') == '?' substitution = ConfigSubstitution(variable, optional, ws, instring, loc) return substitution # ${path} or ${?path} for optional substitution STRING_PATTERN = '"(?P<value>(?:[^"\\\\]|\\\\.)*)"(?P<ws>[ \t]*)' def create_quoted_string(instring, loc, token): # remove the ${ and } match = re.match(STRING_PATTERN, token[0]) value = norm_string(match.group('value')) ws = match.group('ws') return ConfigQuotedString(value, ws, instring, loc) def include_config(instring, loc, token): url = None file = None required = False if token[0] == 'required': required = True final_tokens = token[1:] else: final_tokens = token if len(final_tokens) == 1: # include "test" value = final_tokens[0].value if isinstance(final_tokens[0], ConfigQuotedString) else final_tokens[0] if value.startswith("http://") or value.startswith("https://") or value.startswith("file://"): url = value else: file = value elif len(final_tokens) == 2: # include url("test") or file("test") value = final_tokens[1].value if isinstance(final_tokens[1], ConfigQuotedString) else final_tokens[1] if final_tokens[0] == 'url': url = value elif final_tokens[0] == 'package': file = asset.load(value).filename else: file = value if url is not None: logger.debug('Loading config from url %s', url) obj = ConfigFactory.parse_URL( url, resolve=False, required=required, unresolved_value=NO_SUBSTITUTION ) elif file is not None: path = file if basedir is None else os.path.join(basedir, file) logger.debug('Loading config from file %s', path) obj = ConfigFactory.parse_file( path, resolve=False, required=required, unresolved_value=NO_SUBSTITUTION ) else: raise ConfigException('No file or URL specified at: {loc}: {instring}', loc=loc, instring=instring) return ConfigInclude(obj if isinstance(obj, list) else obj.items()) @contextlib.contextmanager def set_default_white_spaces(): default = ParserElement.DEFAULT_WHITE_CHARS ParserElement.setDefaultWhitespaceChars(' \t') yield ParserElement.setDefaultWhitespaceChars(default) with set_default_white_spaces(): assign_expr = Forward() true_expr = Keyword("true", caseless=True).setParseAction(replaceWith(True)) false_expr = Keyword("false", caseless=True).setParseAction(replaceWith(False)) null_expr = Keyword("null", caseless=True).setParseAction(replaceWith(NoneValue())) key = QuotedString('"', escChar='\\', unquoteResults=False) | Word(alphanums + alphas8bit + '._- /') eol = Word('\n\r').suppress() eol_comma = Word('\n\r,').suppress() comment = (Literal('#') | Literal('//')) - SkipTo(eol | StringEnd()) comment_eol = Suppress(Optional(eol_comma) + comment) comment_no_comma_eol = (comment | eol).suppress() number_expr = Regex(r'[+-]?(\d*\.\d+|\d+(\.\d+)?)([eE][+\-]?\d+)?(?=$|[ \t]*([\$\}\],#\n\r]|//))', re.DOTALL).setParseAction(convert_number) period_types = itertools.chain.from_iterable(cls.get_supported_period_type_map().values()) period_expr = Regex(r'(?P<value>\d+)\s*(?P<unit>' + '|'.join(period_types) + ')$' ).setParseAction(convert_period) # multi line string using """ # Using fix described in http://pyparsing.wikispaces.com/share/view/3778969 multiline_string = Regex('""".*?"*"""', re.DOTALL | re.UNICODE).setParseAction(parse_multi_string) # single quoted line string quoted_string = Regex(r'"(?:[^"\\\n]|\\.)*"[ \t]*', re.UNICODE).setParseAction(create_quoted_string) # unquoted string that takes the rest of the line until an optional comment # we support .properties multiline support which is like this: # line1 \ # line2 \ # so a backslash precedes the \n unquoted_string = Regex(r'(?:[^^`+?!@*&"\[\{\s\]\}#,=\$\\]|\\.)+[ \t]*', re.UNICODE).setParseAction(unescape_string) substitution_expr = Regex(r'[ \t]*\$\{[^\}]+\}[ \t]*').setParseAction(create_substitution) string_expr = multiline_string | quoted_string | unquoted_string value_expr = period_expr | number_expr | true_expr | false_expr | null_expr | string_expr include_content = ( quoted_string | ((Keyword('url') | Keyword('file') | Keyword('package')) - Literal('(').suppress() - quoted_string - Literal(')').suppress()) ) include_expr = ( Keyword("include", caseless=True).suppress() + ( include_content | ( Keyword("required") - Literal('(').suppress() - include_content - Literal(')').suppress() ) ) ).setParseAction(include_config) root_dict_expr = Forward() dict_expr = Forward() list_expr = Forward() multi_value_expr = ZeroOrMore(comment_eol | include_expr | substitution_expr | dict_expr | list_expr | value_expr | (Literal( '\\') - eol).suppress()) # for a dictionary : or = is optional # last zeroOrMore is because we can have t = {a:4} {b: 6} {c: 7} which is dictionary concatenation inside_dict_expr = ConfigTreeParser(ZeroOrMore(comment_eol | include_expr | assign_expr | eol_comma)) inside_root_dict_expr = ConfigTreeParser(ZeroOrMore(comment_eol | include_expr | assign_expr | eol_comma), root=True) dict_expr << Suppress('{') - inside_dict_expr - Suppress('}') root_dict_expr << Suppress('{') - inside_root_dict_expr - Suppress('}') list_entry = ConcatenatedValueParser(multi_value_expr) list_expr << Suppress('[') - ListParser(list_entry - ZeroOrMore(eol_comma - list_entry)) - Suppress(']') # special case when we have a value assignment where the string can potentially be the remainder of the line assign_expr << Group( key - ZeroOrMore(comment_no_comma_eol) - (dict_expr | (Literal('=') | Literal(':') | Literal('+=')) - ZeroOrMore( comment_no_comma_eol) - ConcatenatedValueParser(multi_value_expr)) ) # the file can be { ... } where {} can be omitted or [] config_expr = ZeroOrMore(comment_eol | eol) + (list_expr | root_dict_expr | inside_root_dict_expr) + ZeroOrMore( comment_eol | eol_comma) config = config_expr.parseString(content, parseAll=True)[0] if resolve: allow_unresolved = resolve and unresolved_value is not DEFAULT_SUBSTITUTION and unresolved_value is not MANDATORY_SUBSTITUTION has_unresolved = cls.resolve_substitutions(config, allow_unresolved) if has_unresolved and unresolved_value is MANDATORY_SUBSTITUTION: raise ConfigSubstitutionException('resolve cannot be set to True and unresolved_value to MANDATORY_SUBSTITUTION') if unresolved_value is not NO_SUBSTITUTION and unresolved_value is not DEFAULT_SUBSTITUTION: cls.unresolve_substitutions_to_value(config, unresolved_value) return config
def _create_grammar_6_0(): """Create the SYM 6.0 grammar. """ word = Word(printables.replace(';', '').replace(':', '')) positive_integer = Word(nums) number = Word(nums + '.Ee-+') lp = Suppress(Literal('(')) rp = Suppress(Literal(')')) lb = Suppress(Literal('[')) rb = Suppress(Literal(']')) name = Word(alphas + nums + '_-').setWhitespaceChars(' ') assign = Suppress(Literal('=')) comma = Suppress(Literal(',')) type_ = name version = Group(Keyword('FormatVersion') - assign - Keyword('6.0')) title = Group(Keyword('Title') - assign - QuotedString('"')) enum_value = Group(number + assign + QuotedString('"')) enum = Group( Suppress(Keyword('Enum')) - assign - name - Suppress(lp) + Group(delimitedList(enum_value)) - Suppress(rp)) sig_unit = Group(Literal('/u:') + word) sig_factor = Group(Literal('/f:') + word) sig_offset = Group(Literal('/o:') + word) sig_min = Group(Literal('/min:') + word) sig_max = Group(Literal('/max:') + word) sig_default = Group(Literal('/d:') + word) sig_long_name = Group(Literal('/ln:') + word) sig_enum = Group(Literal('/e:') + word) signal = Group( Suppress(Keyword('Sig')) - Suppress(assign) - name - type_ + Group(Optional(positive_integer)) + Group(Optional(Keyword('-m'))) + Group( Optional(sig_unit) + Optional(sig_factor) + Optional(sig_offset) + Optional(sig_min) + Optional(sig_max) + Optional(sig_default) + Optional(sig_long_name) + Optional(sig_enum))) symbol = Group( Suppress(lb) - name - Suppress(rb) - Group(Optional(Keyword('ID') + assign + word)) - Group(Keyword('Len') + assign + positive_integer) + Group( Optional( Keyword('Mux') + assign + word + positive_integer + comma + positive_integer + positive_integer)) + Group(Optional(Keyword('CycleTime') + assign + positive_integer)) + Group(Optional(Keyword('Timeout') + assign + positive_integer)) + Group(Optional(Keyword('MinInterval') + assign + positive_integer)) + Group( ZeroOrMore(Group( Keyword('Sig') + assign + name + positive_integer)))) enums = Group(Keyword('{ENUMS}') + Group(ZeroOrMore(enum))) signals = Group(Keyword('{SIGNALS}') + Group(ZeroOrMore(signal))) send = Group(Keyword('{SEND}') + Group(ZeroOrMore(symbol))) receive = Group(Keyword('{RECEIVE}') + Group(ZeroOrMore(symbol))) sendreceive = Group(Keyword('{SENDRECEIVE}') + Group(ZeroOrMore(symbol))) section = (enums | signals | send | receive | sendreceive) grammar = (version - title + Group(OneOrMore(section)) + StringEnd()) grammar.ignore(dblSlashComment) return grammar
def __init__(self): # define grammar point = Literal('.') e = CaselessLiteral('E') plusorminus = Literal('+') | Literal('-') number = Word(nums) integer = Combine(Optional(plusorminus) + number) floatnumber = Combine(integer + Optional(point + Optional(number)) + Optional(e + integer)) ident = Word('$', alphanums + '_') plus = Literal("+") minus = Literal("-") mult = Literal("*") div = Literal("/") lpar = Literal("(").suppress() rpar = Literal(")").suppress() addop = plus | minus multop = mult | div expop = Literal("^") expr = Forward() def defineFunction(name, parameterCount=None): keyword = CaselessKeyword(name).setParseAction(self.pushEnd) funcPattern = keyword + lpar if parameterCount == None: funcPattern += Optional(expr + ZeroOrMore(Literal(',') + expr)) elif parameterCount > 0: funcPattern += expr for i in range(parameterCount - 1): funcPattern += Literal(',') + expr funcPattern += rpar return funcPattern.setParseAction(self.pushFirst) maxFunc = defineFunction('max') minFunc = defineFunction('min') casesFunc = defineFunction('cases') cases1Func = defineFunction('cases1', parameterCount=5) cases2Func = defineFunction('cases2', parameterCount=8) cases3Func = defineFunction('cases3', parameterCount=11) cases333Func = defineFunction('cases333', parameterCount=11) round3downFunc = defineFunction('round3down', parameterCount=1) #func = (funcident.setParseAction(self.pushEnd)+lpar +Optional(expr+ZeroOrMore(Literal(',')+expr))+rpar).setParseAction(self.pushFirst) atom = (maxFunc | minFunc | casesFunc | cases1Func | cases2Func | cases3Func | cases333Func | round3downFunc | (e | floatnumber | integer | ident).setParseAction( self.pushFirst) | (lpar + expr.suppress() + rpar)) factor = Forward() factor << atom + ZeroOrMore( (expop + factor).setParseAction(self.pushFirst)) term = factor + ZeroOrMore( (multop + factor).setParseAction(self.pushFirst)) expr << term + ZeroOrMore( (addop + term).setParseAction(self.pushFirst)) self.pattern = expr + StringEnd() # map operator symbols to corresponding arithmetic operations self.opn = { "+": self.handleNone(lambda a, b: a + b), "-": self.handleNone(lambda a, b: a - b), "*": self.handleNone(lambda a, b: a * b, none_survives=True), "/": self.handleNone(lambda a, b: a / b, none_survives=True), "^": self.handleNone(lambda a, b: a**b, none_survives=True) } self.functions = { 'max': max, 'min': self.min, 'cases': self.cases, 'cases1': self.cases1, 'cases2': self.cases2, 'cases3': self.cases3, 'cases333': self.cases333, 'round3down': self.round3down }
# (originally I had pyparsing pulling out the $(Macro) references from inside names # as well, but the framework doesn't work especially well without whitespace delimiters between # tokens so we just do simple find/replace in a second pass pv_name = Word(alphanums + ":._$()") pv_value = (float_number | Word(alphanums)) pv_assignment = pv_name + pv_value comment = Literal("#") + Regex(r".*") macro = Group(Word(alphas) + Literal("=").suppress() + pv_name) macros = Optional(macro + ZeroOrMore(Word(";,").suppress() + macro)) #file_include = Literal("file") + pv_name + macros file_include = Literal("file") + \ (file_name | ignored_quote + file_name + ignored_quote) \ + Optional(ignored_comma) + macros def line(contents): return LineStart() + ZeroOrMore(Group(contents)) + LineEnd().suppress() req_line = line(file_include | comment.suppress() | pv_name) req_file = OneOrMore(req_line) + StringEnd().suppress() sav_line = line(comment.suppress() | Literal("<END>").suppress() | pv_assignment) sav_file = OneOrMore(sav_line) + StringEnd().suppress()
def parse_math_str(input_string, variables={}): # Uncomment the line below for readline support on interactive terminal # import readline import re from pyparsing import Word, alphas, ParseException, Literal, CaselessLiteral, Combine, Optional, nums, Or, Forward, ZeroOrMore, StringEnd, alphanums import math # Debugging flag can be set to either "debug_flag=True" or "debug_flag=False" debug_flag = False exprStack = [] varStack = [] def pushFirst(str, loc, toks): exprStack.append(toks[0]) def assignVar(str, loc, toks): varStack.append(toks[0]) # define grammar point = Literal('.') e = CaselessLiteral('E') plusorminus = Literal('+') | Literal('-') number = Word(nums) integer = Combine(Optional(plusorminus) + number) floatnumber = Combine(integer + Optional(point + Optional(number)) + Optional(e + integer)) ident = Word(alphas, alphanums + '_') plus = Literal("+") minus = Literal("-") mult = Literal("*") div = Literal("/") lpar = Literal("(").suppress() rpar = Literal(")").suppress() addop = plus | minus multop = mult | div expop = Literal("^") assign = Literal("=") expr = Forward() atom = ((e | floatnumber | integer | ident).setParseAction(pushFirst) | (lpar + expr.suppress() + rpar)) factor = Forward() factor << atom + ZeroOrMore((expop + factor).setParseAction(pushFirst)) term = factor + ZeroOrMore((multop + factor).setParseAction(pushFirst)) expr << term + ZeroOrMore((addop + term).setParseAction(pushFirst)) bnf = Optional((ident + assign).setParseAction(assignVar)) + expr pattern = bnf + StringEnd() # map operator symbols to corresponding arithmetic operations opn = { "+": (lambda a, b: a + b), "-": (lambda a, b: a - b), "*": (lambda a, b: a * b), "/": (lambda a, b: a / b), "^": (lambda a, b: a**b) } # Recursive function that evaluates the stack def evaluateStack(s): op = s.pop() if op in "+-*/^": op2 = evaluateStack(s) op1 = evaluateStack(s) return opn[op](op1, op2) elif op == "PI": return math.pi elif op == "E": return math.e elif re.search('^[a-zA-Z][a-zA-Z0-9_]*$', op): if op in variables: return variables[op] else: return 0 elif re.search('^[-+]?[0-9]+$', op): return int(op) else: return float(op) # Start with a blank exprStack and a blank varStack exprStack = [] varStack = [] if input_string != '': # try parsing the input string try: L = pattern.parseString(input_string) except ParseException as err: L = ['Parse Failure', input_string] # show result of parsing the input string if debug_flag: print(input_string, "->", L) if len(L) == 0 or L[0] != 'Parse Failure': if debug_flag: print("exprStack=", exprStack) # calculate result , store a copy in ans , display the result to user result = evaluateStack(exprStack) variables['ans'] = result #print result return result # Assign result to a variable if required if debug_flag: print("var=", varStack) if len(varStack) == 1: variables[varStack.pop()] = result if debug_flag: print("variables=", variables) else: print('Parse Failure') print(err.line) print(" " * (err.column - 1) + "^") print(err)
def Verilog_BNF(): global verilogbnf if verilogbnf is None: # compiler directives compilerDirective = Combine( "`" + \ oneOf("define undef ifdef else endif default_nettype " "include resetall timescale unconnected_drive " "nounconnected_drive celldefine endcelldefine") + \ restOfLine ).setName("compilerDirective") # primitives SEMI, COLON, LPAR, RPAR, LBRACE, RBRACE, LBRACK, RBRACK, DOT, COMMA, EQ = map( Literal, ";:(){}[].,=") identLead = alphas + "$_" identBody = alphanums + "$_" identifier1 = Regex(r"\.?[" + identLead + "][" + identBody + r"]*(\.[" + identLead + "][" + identBody + "]*)*").setName("baseIdent") identifier2 = Regex(r"\\\S+").setParseAction( lambda t: t[0][1:]).setName("escapedIdent") #.setDebug() identifier = identifier1 | identifier2 assert (identifier2 == r'\abc') hexnums = nums + "abcdefABCDEF" + "_?" base = Regex("'[bBoOdDhH]").setName("base") basedNumber = Combine(Optional(Word(nums + "_")) + base + Word(hexnums + "xXzZ"), joinString=" ", adjacent=False).setName("basedNumber") #~ number = ( basedNumber | Combine( Word( "+-"+spacedNums, spacedNums ) + #~ Optional( DOT + Optional( Word( spacedNums ) ) ) + #~ Optional( e + Word( "+-"+spacedNums, spacedNums ) ) ).setName("numeric") ) number = ( basedNumber | \ Regex(r"[+-]?[0-9_]+(\.[0-9_]*)?([Ee][+-]?[0-9_]+)?") \ ).setName("numeric") #~ decnums = nums + "_" #~ octnums = "01234567" + "_" expr = Forward().setName("expr") concat = Group(LBRACE + delimitedList(expr) + RBRACE) multiConcat = Group("{" + expr + concat + "}").setName("multiConcat") funcCall = Group(identifier + LPAR + Optional(delimitedList(expr)) + RPAR).setName("funcCall") subscrRef = Group(LBRACK + delimitedList(expr, COLON) + RBRACK) subscrIdentifier = Group(identifier + Optional(subscrRef)) #~ scalarConst = "0" | (( FollowedBy('1') + oneOf("1'b0 1'b1 1'bx 1'bX 1'B0 1'B1 1'Bx 1'BX 1") )) scalarConst = Regex("0|1('[Bb][01xX])?") mintypmaxExpr = Group(expr + COLON + expr + COLON + expr).setName("mintypmax") primary = (number | (LPAR + mintypmaxExpr + RPAR) | (LPAR + Group(expr) + RPAR).setName("nestedExpr") | multiConcat | concat | dblQuotedString | funcCall | subscrIdentifier) unop = oneOf("+ - ! ~ & ~& | ^| ^ ~^").setName("unop") binop = oneOf( "+ - * / % == != === !== && " "|| < <= > >= & | ^ ^~ >> << ** <<< >>>").setName( "binop") expr << ((unop + expr) | # must be first! (primary + "?" + expr + COLON + expr) | (primary + Optional(binop + expr))) lvalue = subscrIdentifier | concat # keywords if_ = Keyword("if") else_ = Keyword("else") edge = Keyword("edge") posedge = Keyword("posedge") negedge = Keyword("negedge") specify = Keyword("specify") endspecify = Keyword("endspecify") fork = Keyword("fork") join = Keyword("join") begin = Keyword("begin") end = Keyword("end") default = Keyword("default") forever = Keyword("forever") repeat = Keyword("repeat") while_ = Keyword("while") for_ = Keyword("for") case = oneOf("case casez casex") endcase = Keyword("endcase") wait = Keyword("wait") disable = Keyword("disable") deassign = Keyword("deassign") force = Keyword("force") release = Keyword("release") assign = Keyword("assign") eventExpr = Forward() eventTerm = (posedge + expr) | (negedge + expr) | expr | ( LPAR + eventExpr + RPAR) eventExpr << (Group(delimitedList(eventTerm, Keyword("or")))) eventControl = Group("@" + ( (LPAR + eventExpr + RPAR) | identifier | "*")).setName("eventCtrl") delayArg = ( number | Word(alphanums + "$_") | #identifier | (LPAR + Group(delimitedList(mintypmaxExpr | expr)) + RPAR)).setName("delayArg") #.setDebug() delay = Group("#" + delayArg).setName("delay") #.setDebug() delayOrEventControl = delay | eventControl assgnmt = Group(lvalue + EQ + Optional(delayOrEventControl) + expr).setName("assgnmt") nbAssgnmt = Group((lvalue + "<=" + Optional(delay) + expr) | (lvalue + "<=" + Optional(eventControl) + expr)).setName("nbassgnmt") range = LBRACK + expr + COLON + expr + RBRACK paramAssgnmt = Group(identifier + EQ + expr).setName("paramAssgnmt") parameterDecl = Group("parameter" + Optional(range) + delimitedList(paramAssgnmt) + SEMI).setName("paramDecl") inputDecl = Group("input" + Optional(range) + delimitedList(identifier) + SEMI) outputDecl = Group("output" + Optional(range) + delimitedList(identifier) + SEMI) inoutDecl = Group("inout" + Optional(range) + delimitedList(identifier) + SEMI) regIdentifier = Group(identifier + Optional(LBRACK + expr + COLON + expr + RBRACK)) regDecl = Group("reg" + Optional("signed") + Optional(range) + delimitedList(regIdentifier) + SEMI).setName("regDecl") timeDecl = Group("time" + delimitedList(regIdentifier) + SEMI) integerDecl = Group("integer" + delimitedList(regIdentifier) + SEMI) strength0 = oneOf("supply0 strong0 pull0 weak0 highz0") strength1 = oneOf("supply1 strong1 pull1 weak1 highz1") driveStrength = Group(LPAR + ((strength0 + COMMA + strength1) | (strength1 + COMMA + strength0)) + RPAR).setName("driveStrength") nettype = oneOf( "wire tri tri1 supply0 wand triand tri0 supply1 wor trior trireg" ) expandRange = Optional(oneOf("scalared vectored")) + range realDecl = Group("real" + delimitedList(identifier) + SEMI) eventDecl = Group("event" + delimitedList(identifier) + SEMI) blockDecl = (parameterDecl | regDecl | integerDecl | realDecl | timeDecl | eventDecl) stmt = Forward().setName("stmt") #.setDebug() stmtOrNull = stmt | SEMI caseItem = ( delimitedList( expr ) + COLON + stmtOrNull ) | \ ( default + Optional(":") + stmtOrNull ) stmt << Group( (begin + Group(ZeroOrMore(stmt)) + end).setName("begin-end") | (if_ + Group(LPAR + expr + RPAR) + stmtOrNull + Optional(else_ + stmtOrNull)).setName("if") | (delayOrEventControl + stmtOrNull) | (case + LPAR + expr + RPAR + OneOrMore(caseItem) + endcase) | (forever + stmt) | (repeat + LPAR + expr + RPAR + stmt) | (while_ + LPAR + expr + RPAR + stmt) | (for_ + LPAR + assgnmt + SEMI + Group(expr) + SEMI + assgnmt + RPAR + stmt) | (fork + ZeroOrMore(stmt) + join) | (fork + COLON + identifier + ZeroOrMore(blockDecl) + ZeroOrMore(stmt) + end) | (wait + LPAR + expr + RPAR + stmtOrNull) | ("->" + identifier + SEMI) | (disable + identifier + SEMI) | (assign + assgnmt + SEMI) | (deassign + lvalue + SEMI) | (force + assgnmt + SEMI) | (release + lvalue + SEMI) | (begin + COLON + identifier + ZeroOrMore(blockDecl) + ZeroOrMore(stmt) + end).setName("begin:label-end") | # these *have* to go at the end of the list!!! (assgnmt + SEMI) | (nbAssgnmt + SEMI) | (Combine(Optional("$") + identifier) + Optional(LPAR + delimitedList(expr | empty) + RPAR) + SEMI)).setName("stmtBody") """ x::=<blocking_assignment> ; x||= <non_blocking_assignment> ; x||= if ( <expression> ) <statement_or_null> x||= if ( <expression> ) <statement_or_null> else <statement_or_null> x||= case ( <expression> ) <case_item>+ endcase x||= casez ( <expression> ) <case_item>+ endcase x||= casex ( <expression> ) <case_item>+ endcase x||= forever <statement> x||= repeat ( <expression> ) <statement> x||= while ( <expression> ) <statement> x||= for ( <assignment> ; <expression> ; <assignment> ) <statement> x||= <delay_or_event_control> <statement_or_null> x||= wait ( <expression> ) <statement_or_null> x||= -> <name_of_event> ; x||= <seq_block> x||= <par_block> x||= <task_enable> x||= <system_task_enable> x||= disable <name_of_task> ; x||= disable <name_of_block> ; x||= assign <assignment> ; x||= deassign <lvalue> ; x||= force <assignment> ; x||= release <lvalue> ; """ alwaysStmt = Group("always" + Optional(eventControl) + stmt).setName("alwaysStmt") initialStmt = Group("initial" + stmt).setName("initialStmt") chargeStrength = Group(LPAR + oneOf("small medium large") + RPAR).setName("chargeStrength") continuousAssign = Group(assign + Optional(driveStrength) + Optional(delay) + delimitedList(assgnmt) + SEMI).setName("continuousAssign") tfDecl = (parameterDecl | inputDecl | outputDecl | inoutDecl | regDecl | timeDecl | integerDecl | realDecl) functionDecl = Group("function" + Optional(range | "integer" | "real") + identifier + SEMI + Group(OneOrMore(tfDecl)) + Group(ZeroOrMore(stmt)) + "endfunction") inputOutput = oneOf("input output") netDecl1Arg = (nettype + Optional(expandRange) + Optional(delay) + Group(delimitedList(~inputOutput + identifier))) netDecl2Arg = ("trireg" + Optional(chargeStrength) + Optional(expandRange) + Optional(delay) + Group(delimitedList(~inputOutput + identifier))) netDecl3Arg = (nettype + Optional(driveStrength) + Optional(expandRange) + Optional(delay) + Group(delimitedList(assgnmt))) netDecl1 = Group(netDecl1Arg + SEMI).setName("netDecl1") netDecl2 = Group(netDecl2Arg + SEMI).setName("netDecl2") netDecl3 = Group(netDecl3Arg + SEMI).setName("netDecl3") gateType = oneOf("and nand or nor xor xnor buf bufif0 bufif1 " "not notif0 notif1 pulldown pullup nmos rnmos " "pmos rpmos cmos rcmos tran rtran tranif0 " "rtranif0 tranif1 rtranif1") gateInstance = Optional( Group( identifier + Optional( range ) ) ) + \ LPAR + Group( delimitedList( expr ) ) + RPAR gateDecl = Group(gateType + Optional(driveStrength) + Optional(delay) + delimitedList(gateInstance) + SEMI) udpInstance = Group( Group(identifier + Optional(range | subscrRef)) + LPAR + Group(delimitedList(expr)) + RPAR) udpInstantiation = Group(identifier - Optional(driveStrength) + Optional(delay) + delimitedList(udpInstance) + SEMI).setName("udpInstantiation") parameterValueAssignment = Group( Literal("#") + LPAR + Group(delimitedList(expr)) + RPAR) namedPortConnection = Group(DOT + identifier + LPAR + expr + RPAR).setName( "namedPortConnection") #.setDebug() assert (r'.\abc (abc )' == namedPortConnection) modulePortConnection = expr | empty #~ moduleInstance = Group( Group ( identifier + Optional(range) ) + #~ ( delimitedList( modulePortConnection ) | #~ delimitedList( namedPortConnection ) ) ) inst_args = Group(LPAR + (delimitedList(namedPortConnection) | delimitedList(modulePortConnection)) + RPAR).setName("inst_args") moduleInstance = Group( Group(identifier + Optional(range)) + inst_args).setName( "moduleInstance") #.setDebug() moduleInstantiation = Group( identifier + Optional(parameterValueAssignment) + delimitedList(moduleInstance).setName("moduleInstanceList") + SEMI).setName("moduleInstantiation") parameterOverride = Group("defparam" + delimitedList(paramAssgnmt) + SEMI) task = Group("task" + identifier + SEMI + ZeroOrMore(tfDecl) + stmtOrNull + "endtask") specparamDecl = Group("specparam" + delimitedList(paramAssgnmt) + SEMI) pathDescr1 = Group(LPAR + subscrIdentifier + "=>" + subscrIdentifier + RPAR) pathDescr2 = Group(LPAR + Group(delimitedList(subscrIdentifier)) + "*>" + Group(delimitedList(subscrIdentifier)) + RPAR) pathDescr3 = Group(LPAR + Group(delimitedList(subscrIdentifier)) + "=>" + Group(delimitedList(subscrIdentifier)) + RPAR) pathDelayValue = Group(( LPAR + Group(delimitedList(mintypmaxExpr | expr)) + RPAR) | mintypmaxExpr | expr) pathDecl = Group((pathDescr1 | pathDescr2 | pathDescr3) + EQ + pathDelayValue + SEMI).setName("pathDecl") portConditionExpr = Forward() portConditionTerm = Optional(unop) + subscrIdentifier portConditionExpr << portConditionTerm + Optional(binop + portConditionExpr) polarityOp = oneOf("+ -") levelSensitivePathDecl1 = Group(if_ + Group(LPAR + portConditionExpr + RPAR) + subscrIdentifier + Optional(polarityOp) + "=>" + subscrIdentifier + EQ + pathDelayValue + SEMI) levelSensitivePathDecl2 = Group( if_ + Group(LPAR + portConditionExpr + RPAR) + LPAR + Group(delimitedList(subscrIdentifier)) + Optional(polarityOp) + "*>" + Group(delimitedList(subscrIdentifier)) + RPAR + EQ + pathDelayValue + SEMI) levelSensitivePathDecl = levelSensitivePathDecl1 | levelSensitivePathDecl2 edgeIdentifier = posedge | negedge edgeSensitivePathDecl1 = Group( Optional(if_ + Group(LPAR + expr + RPAR)) + LPAR + Optional(edgeIdentifier) + subscrIdentifier + "=>" + LPAR + subscrIdentifier + Optional(polarityOp) + COLON + expr + RPAR + RPAR + EQ + pathDelayValue + SEMI) edgeSensitivePathDecl2 = Group( Optional(if_ + Group(LPAR + expr + RPAR)) + LPAR + Optional(edgeIdentifier) + subscrIdentifier + "*>" + LPAR + delimitedList(subscrIdentifier) + Optional(polarityOp) + COLON + expr + RPAR + RPAR + EQ + pathDelayValue + SEMI) edgeSensitivePathDecl = edgeSensitivePathDecl1 | edgeSensitivePathDecl2 edgeDescr = oneOf("01 10 0x x1 1x x0").setName("edgeDescr") timCheckEventControl = Group(posedge | negedge | (edge + LBRACK + delimitedList(edgeDescr) + RBRACK)) timCheckCond = Forward() timCondBinop = oneOf("== === != !==") timCheckCondTerm = (expr + timCondBinop + scalarConst) | (Optional("~") + expr) timCheckCond << ((LPAR + timCheckCond + RPAR) | timCheckCondTerm) timCheckEvent = Group( Optional(timCheckEventControl) + subscrIdentifier + Optional("&&&" + timCheckCond)) timCheckLimit = expr controlledTimingCheckEvent = Group(timCheckEventControl + subscrIdentifier + Optional("&&&" + timCheckCond)) notifyRegister = identifier systemTimingCheck1 = Group("$setup" + LPAR + timCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit + Optional(COMMA + notifyRegister) + RPAR + SEMI) systemTimingCheck2 = Group("$hold" + LPAR + timCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit + Optional(COMMA + notifyRegister) + RPAR + SEMI) systemTimingCheck3 = Group("$period" + LPAR + controlledTimingCheckEvent + COMMA + timCheckLimit + Optional(COMMA + notifyRegister) + RPAR + SEMI) systemTimingCheck4 = Group("$width" + LPAR + controlledTimingCheckEvent + COMMA + timCheckLimit + Optional(COMMA + expr + COMMA + notifyRegister) + RPAR + SEMI) systemTimingCheck5 = Group("$skew" + LPAR + timCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit + Optional(COMMA + notifyRegister) + RPAR + SEMI) systemTimingCheck6 = Group("$recovery" + LPAR + controlledTimingCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit + Optional(COMMA + notifyRegister) + RPAR + SEMI) systemTimingCheck7 = Group("$setuphold" + LPAR + timCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit + COMMA + timCheckLimit + Optional(COMMA + notifyRegister) + RPAR + SEMI) systemTimingCheck = ( FollowedBy('$') + (systemTimingCheck1 | systemTimingCheck2 | systemTimingCheck3 | systemTimingCheck4 | systemTimingCheck5 | systemTimingCheck6 | systemTimingCheck7)).setName("systemTimingCheck") sdpd = if_ + Group(LPAR + expr + RPAR) + \ ( pathDescr1 | pathDescr2 ) + EQ + pathDelayValue + SEMI specifyItem = ~Keyword("endspecify") + ( specparamDecl | pathDecl | levelSensitivePathDecl | edgeSensitivePathDecl | systemTimingCheck | sdpd) """ x::= <specparam_declaration> x||= <path_declaration> x||= <level_sensitive_path_declaration> x||= <edge_sensitive_path_declaration> x||= <system_timing_check> x||= <sdpd> """ specifyBlock = Group("specify" + ZeroOrMore(specifyItem) + "endspecify").setName("specifyBlock") moduleItem = ~Keyword("endmodule") + ( parameterDecl | inputDecl | outputDecl | inoutDecl | regDecl | netDecl3 | netDecl1 | netDecl2 | timeDecl | integerDecl | realDecl | eventDecl | gateDecl | parameterOverride | continuousAssign | specifyBlock | initialStmt | alwaysStmt | task | functionDecl | # these have to be at the end - they start with identifiers moduleInstantiation | udpInstantiation) """ All possible moduleItems, from Verilog grammar spec x::= <parameter_declaration> x||= <input_declaration> x||= <output_declaration> x||= <inout_declaration> ?||= <net_declaration> (spec does not seem consistent for this item) x||= <reg_declaration> x||= <time_declaration> x||= <integer_declaration> x||= <real_declaration> x||= <event_declaration> x||= <gate_declaration> x||= <UDP_instantiation> x||= <module_instantiation> x||= <parameter_override> x||= <continuous_assign> x||= <specify_block> x||= <initial_statement> x||= <always_statement> x||= <task> x||= <function> """ portRef = subscrIdentifier portExpr = portRef | Group(LBRACE + delimitedList(portRef) + RBRACE) port = portExpr | Group((DOT + identifier + LPAR + portExpr + RPAR)) moduleHdr = Group( oneOf("module macromodule") + identifier + Optional(LPAR + Group( Optional( delimitedList( Group( oneOf("input output") + (netDecl1Arg | netDecl2Arg | netDecl3Arg)) | port))) + RPAR) + SEMI).setName("moduleHdr") module = Group(moduleHdr + Group(ZeroOrMore(moduleItem)) + "endmodule").setName("module") #.setDebug() udpDecl = outputDecl | inputDecl | regDecl #~ udpInitVal = oneOf("1'b0 1'b1 1'bx 1'bX 1'B0 1'B1 1'Bx 1'BX 1 0 x X") udpInitVal = (Regex("1'[bB][01xX]") | Regex("[01xX]")).setName("udpInitVal") udpInitialStmt = Group("initial" + identifier + EQ + udpInitVal + SEMI).setName("udpInitialStmt") levelSymbol = oneOf("0 1 x X ? b B") levelInputList = Group(OneOrMore(levelSymbol).setName("levelInpList")) outputSymbol = oneOf("0 1 x X") combEntry = Group(levelInputList + COLON + outputSymbol + SEMI) edgeSymbol = oneOf("r R f F p P n N *") edge = Group( LPAR + levelSymbol + levelSymbol + RPAR ) | \ Group( edgeSymbol ) edgeInputList = Group( ZeroOrMore(levelSymbol) + edge + ZeroOrMore(levelSymbol)) inputList = levelInputList | edgeInputList seqEntry = Group(inputList + COLON + levelSymbol + COLON + (outputSymbol | "-") + SEMI).setName("seqEntry") udpTableDefn = Group("table" + OneOrMore(combEntry | seqEntry) + "endtable").setName("table") """ <UDP> ::= primitive <name_of_UDP> ( <name_of_variable> <,<name_of_variable>>* ) ; <UDP_declaration>+ <UDP_initial_statement>? <table_definition> endprimitive """ udp = Group("primitive" + identifier + LPAR + Group(delimitedList(identifier)) + RPAR + SEMI + OneOrMore(udpDecl) + Optional(udpInitialStmt) + udpTableDefn + "endprimitive") verilogbnf = OneOrMore(module | udp) + StringEnd() verilogbnf.ignore(cppStyleComment) verilogbnf.ignore(compilerDirective) return verilogbnf
def parse_javascript_vars(data): '''Receives a string of JavaScript-like data and tries to parse it. Returns a dict with each var. Several assumptions are made: - Only the assignment operator '=' is supported. - The script is composed of one or more assignments, and nothing else. - The "var " prefix before an assignment is optional. - No variable is assigned more than once. - Comments should be correctly ignored, as well as whitespace. - Values can be numbers, strings, arrays or dictionaries. - Arrays and dictionaries can only contain number and strings. - Dictionary keys can be numbers, strings, or an identifier. Sample input for this grammar: var i = 0; // Optional var, optional semicolon. j = 0x10 // 16 k = -010 // -8 f = 1.0 g = +.9 // Optional leading 0, optional signal. s = 'single quoted' t = "double quoted" a = [] b = [0, 1, 'string', "double", 3.14] c = {} d = { foo: 'without quotes', 'bar': "as a string", 3: 'as a number' } This code can parse cgi_adsl_info.cgi, but it can't parse cgi_atm_info.cgi. ''' from pyparsing import Combine, Dict, Group, Keyword, LineEnd, OneOrMore, \ Optional, StringEnd, Suppress, White, Word, alphanums, alphas, \ cppStyleComment, dblQuotedString, dblSlashComment, delimitedList, \ hexnums, nums, removeQuotes, sglQuotedString # AKA identifier. varname = Word(alphas + '_$', alphanums + '_$') # This Optional(Suppress(White)) is required to because of the firstOf # operator when defining number. number_signal = Optional(Word('-+', exact=1)) + Optional(Suppress(White())) decimal_number = number_signal + Word('123456789', nums) # Scientific notation is not supported. float_number = number_signal + Optional(Word(nums)) + '.' + Word(nums) # For convenience, zero is considered an octal number. octal_number = number_signal + Word('0', '01234567') hex_number = number_signal + '0x' + Word(hexnums) number = Combine(float_number | decimal_number | hex_number | octal_number) def convert_number(toks): s = toks[0] signal = s[0] if s[0] in '+-' else '' number = s[1:] if signal else s if '.' in s: return float(s) elif number.startswith('0x'): return int(signal + number[2:], base=16) elif number.startswith('0'): return int(s, base=8) else: return int(s, base=10) number.setParseAction(convert_number) string = (dblQuotedString.setParseAction(removeQuotes) | sglQuotedString.setParseAction(removeQuotes)) # Nested arrays/dicts are not supported. array_list = Group( Suppress('[') + Optional(delimitedList(number | string)) + Suppress(']')) array_associative = Group( Dict( Suppress('{') + Optional( delimitedList( Group((number | string | varname) + Suppress(':') + (number | string)))) + Suppress('}'))) value = number | string | array_list | array_associative assignment = Group( Optional(Suppress(Keyword('var'))) + varname + Suppress('=') + value + Suppress(';' | LineEnd())) parser = Dict(OneOrMore(assignment)) + StringEnd() parser.ignore(dblSlashComment) parser.ignore(cppStyleComment) tree = parser.parseString(data) # Converting the pyparsing.ParseResults tree into a simple Python dict. ret = {} for var, subtree in tree.asDict().items(): if isinstance(subtree, pyparsing.ParseResults): try: # Using .asDict() converts all integer keys to strings. # ret[var] = subtree.asDict() # Using .asList() retains numbers as numbers. ret[var] = dict(subtree.asList()) except TypeError: ret[var] = subtree.asList() else: # Most likely already a number or string. ret[var] = subtree return ret
definition = Word(alphas)('type') + Word(nums)('size') + \ identifier + Word(printables)('name') signal = Group(s('$var') + definition + s('$end'))('signal') content = SkipTo('$end')('content') + s('$end') section = Group(s('$') + Word(alphas)('name') + content)('section') unit = s('1') + oneOf('s ms ns us ps fs') timescale = (s('$timescale') + unit + s('$end'))('timescale') scope = Group(s('$scope module') + Word(printables)('module') + s('$end'))('scope') upscope = Group(s('$upscope') + s(content))('upscope') enddefinitions = s('$enddefinitions' + content) time = s('#') + Word(nums)('time') std_logic = oneOf('U X 0 1 Z W L H-')('std_logic') std_logic_vector = Word('b', 'UX01ZWLH-')('std_logic_vector') value = Group((std_logic | std_logic_vector) + identifier)('value') step = Group(time + ZeroOrMore(value))('step') headers = signal | timescale | scope | upscope changes = enddefinitions + ZeroOrMore(step) + StringEnd() vcd = ZeroOrMore(headers | changes | section)('vcd')
def SPICE_BNF(): global bnf if not bnf: # punctuation colon = Literal(":").suppress() lbrace = Literal("{").suppress() rbrace = Literal("}").suppress() lbrack = Literal("[").suppress() rbrack = Literal("]").suppress() lparen = Literal("(").suppress() rparen = Literal(")").suppress() equals = Literal("=").suppress() comma = Literal(",").suppress() semi = Literal(";").suppress() # primitive types int8_ = Keyword("int8").setParseAction(replaceWith(ptypes.int8)) uint8_ = Keyword("uint8").setParseAction(replaceWith(ptypes.uint8)) int16_ = Keyword("int16").setParseAction(replaceWith(ptypes.int16)) uint16_ = Keyword("uint16").setParseAction(replaceWith(ptypes.uint16)) int32_ = Keyword("int32").setParseAction(replaceWith(ptypes.int32)) uint32_ = Keyword("uint32").setParseAction(replaceWith(ptypes.uint32)) int64_ = Keyword("int64").setParseAction(replaceWith(ptypes.int64)) uint64_ = Keyword("uint64").setParseAction(replaceWith(ptypes.uint64)) # keywords enum32_ = Keyword("enum32").setParseAction(replaceWith(32)) enum16_ = Keyword("enum16").setParseAction(replaceWith(16)) enum8_ = Keyword("enum8").setParseAction(replaceWith(8)) flags32_ = Keyword("flags32").setParseAction(replaceWith(32)) flags16_ = Keyword("flags16").setParseAction(replaceWith(16)) flags8_ = Keyword("flags8").setParseAction(replaceWith(8)) channel_ = Keyword("channel") server_ = Keyword("server") client_ = Keyword("client") protocol_ = Keyword("protocol") typedef_ = Keyword("typedef") struct_ = Keyword("struct") message_ = Keyword("message") image_size_ = Keyword("image_size") bytes_ = Keyword("bytes") cstring_ = Keyword("cstring") switch_ = Keyword("switch") default_ = Keyword("default") case_ = Keyword("case") identifier = Word( alphas, alphanums + "_" ) enumname = Word( alphanums + "_" ) integer = ( Combine( CaselessLiteral("0x") + Word( nums+"abcdefABCDEF" ) ) | Word( nums+"+-", nums ) ).setName("int").setParseAction(cvtInt) typename = identifier.copy().setParseAction(lambda toks : ptypes.TypeRef(str(toks[0]))) # This is just normal "types", i.e. not channels or messages typeSpec = Forward() attributeValue = integer ^ identifier attribute = Group(Combine ("@" + identifier) + Optional(lparen + delimitedList(attributeValue) + rparen)) attributes = Group(ZeroOrMore(attribute)) arraySizeSpecImage = Group(image_size_ + lparen + integer + comma + identifier + comma + identifier + rparen) arraySizeSpecBytes = Group(bytes_ + lparen + identifier + comma + identifier + rparen) arraySizeSpecCString = Group(cstring_ + lparen + rparen) arraySizeSpec = lbrack + Optional(identifier ^ integer ^ arraySizeSpecImage ^ arraySizeSpecBytes ^arraySizeSpecCString, default="") + rbrack variableDef = Group(typeSpec + Optional("*", default=None) + identifier + Optional(arraySizeSpec, default=None) + attributes - semi) \ .setParseAction(parseVariableDef) switchCase = Group(Group(OneOrMore(default_.setParseAction(replaceWith(None)) + colon | Group(case_.suppress() + Optional("!", default="") + identifier) + colon)) + variableDef) \ .setParseAction(lambda toks: ptypes.SwitchCase(toks[0][0], toks[0][1])) switchBody = Group(switch_ + lparen + delimitedList(identifier,delim='.', combine=True) + rparen + lbrace + Group(OneOrMore(switchCase)) + rbrace + identifier + attributes - semi) \ .setParseAction(lambda toks: ptypes.Switch(toks[0][1], toks[0][2], toks[0][3], toks[0][4])) messageBody = structBody = Group(lbrace + ZeroOrMore(variableDef | switchBody) + rbrace) structSpec = Group(struct_ + identifier + structBody + attributes).setParseAction(lambda toks: ptypes.StructType(toks[0][1], toks[0][2], toks[0][3])) # have to use longest match for type, in case a user-defined type name starts with a keyword type, like "channel_type" typeSpec << ( structSpec ^ int8_ ^ uint8_ ^ int16_ ^ uint16_ ^ int32_ ^ uint32_ ^ int64_ ^ uint64_ ^ typename).setName("type") flagsBody = enumBody = Group(lbrace + delimitedList(Group (enumname + Optional(equals + integer))) + Optional(comma) + rbrace) messageSpec = Group(message_ + messageBody + attributes).setParseAction(lambda toks: ptypes.MessageType(None, toks[0][1], toks[0][2])) | typename channelParent = Optional(colon + typename, default=None) channelMessage = Group(messageSpec + identifier + Optional(equals + integer, default=None) + semi) \ .setParseAction(lambda toks: ptypes.ChannelMember(toks[0][1], toks[0][0], toks[0][2])) channelBody = channelParent + Group(lbrace + ZeroOrMore( server_ + colon | client_ + colon | channelMessage) + rbrace) enum_ = (enum32_ | enum16_ | enum8_) flags_ = (flags32_ | flags16_ | flags8_) enumDef = Group(enum_ + identifier + enumBody + attributes - semi).setParseAction(lambda toks: ptypes.EnumType(toks[0][0], toks[0][1], toks[0][2], toks[0][3])) flagsDef = Group(flags_ + identifier + flagsBody + attributes - semi).setParseAction(lambda toks: ptypes.FlagsType(toks[0][0], toks[0][1], toks[0][2], toks[0][3])) messageDef = Group(message_ + identifier + messageBody + attributes - semi).setParseAction(lambda toks: ptypes.MessageType(toks[0][1], toks[0][2], toks[0][3])) channelDef = Group(channel_ + identifier + channelBody + attributes - semi).setParseAction(lambda toks: ptypes.ChannelType(toks[0][1], toks[0][2], toks[0][3], toks[0][4])) structDef = Group(struct_ + identifier + structBody + attributes - semi).setParseAction(lambda toks: ptypes.StructType(toks[0][1], toks[0][2], toks[0][3])) typedefDef = Group(typedef_ + identifier + typeSpec + attributes - semi).setParseAction(lambda toks: ptypes.TypeAlias(toks[0][1], toks[0][2], toks[0][3])) definitions = typedefDef | structDef | enumDef | flagsDef | messageDef | channelDef protocolChannel = Group(typename + identifier + Optional(equals + integer, default=None) + semi) \ .setParseAction(lambda toks: ptypes.ProtocolMember(toks[0][1], toks[0][0], toks[0][2])) protocolDef = Group(protocol_ + identifier + Group(lbrace + ZeroOrMore(protocolChannel) + rbrace) + semi) \ .setParseAction(lambda toks: ptypes.ProtocolType(toks[0][1], toks[0][2])) bnf = ZeroOrMore (definitions) + protocolDef + StringEnd() singleLineComment = "//" + restOfLine bnf.ignore( singleLineComment ) bnf.ignore( cStyleComment ) return bnf
elements = ['Ac', 'Ag', 'Al', 'Am', 'Ar', 'As', 'At', 'Au', 'B', 'Ba', 'Be', 'Bh', 'Bi', 'Bk', 'Br', 'C', 'Ca', 'Cd', 'Ce', 'Cf', 'Cl', 'Cm', 'Cn', 'Co', 'Cr', 'Cs', 'Cu', 'Db', 'Ds', 'Dy', 'Er', 'Es', 'Eu', 'F', 'Fe', 'Fl', 'Fm', 'Fr', 'Ga', 'Gd', 'Ge', 'H', 'He', 'Hf', 'Hg', 'Ho', 'Hs', 'I', 'In', 'Ir', 'K', 'Kr', 'La', 'Li', 'Lr', 'Lu', 'Lv', 'Md', 'Mg', 'Mn', 'Mo', 'Mt', 'N', 'Na', 'Nb', 'Nd', 'Ne', 'Ni', 'No', 'Np', 'O', 'Os', 'P', 'Pa', 'Pb', 'Pd', 'Pm', 'Po', 'Pr', 'Pt', 'Pu', 'Ra', 'Rb', 'Re', 'Rf', 'Rg', 'Rh', 'Rn', 'Ru', 'S', 'Sb', 'Sc', 'Se', 'Sg', 'Si', 'Sm', 'Sn', 'Sr', 'Ta', 'Tb', 'Tc', 'Te', 'Th', 'Ti', 'Tl', 'Tm', 'U', 'Uuo', 'Uup', 'Uus', 'Uut', 'V', 'W', 'Xe', 'Y', 'Yb', 'Zn', 'Zr'] digits = map(str, range(10)) symbols = list("[](){}^+-/") phases = ["(s)", "(l)", "(g)", "(aq)"] tokens = reduce(lambda a, b: a ^ b, map(Literal, elements + digits + symbols + phases)) tokenizer = OneOrMore(tokens) + StringEnd() def _orjoin(l): return "'" + "' | '".join(l) + "'" ## Defines an NLTK parser for tokenized expressions grammar = """ S -> multimolecule | multimolecule '+' S multimolecule -> count molecule | molecule count -> number | number '/' number molecule -> unphased | unphased phase unphased -> group | paren_group_round | paren_group_square element -> """ + _orjoin(elements) + """ digit -> """ + _orjoin(digits) + """ phase -> """ + _orjoin(phases) + """
def __init__(self, money_channels, currency_name, slack=None): self.currency_name = currency_name self.channel_order = money_channels # Attribute used for slack API limiting of commands self.channels = set(money_channels) self.work_commands = {'bootstraps': 0, 'study': 1, 'utilizesynergy': 2} self.level_commands = {'college': 0, 'graduate': 1} self.level_costs = (250, 50) self.check_commands = {'bank': False, 'transcript': True} self.check_name = 'Check your status'.format(currency_name) self.check_expr = reduce(or_, (CaselessLiteral(s) for s in self.check_commands)).setResultsName('command') + StringEnd() self.check_doc = 'Check the amount of {} or grades you have'.format(currency_name) self.gain_name = 'Income' self.gain_expr = reduce(or_, (CaselessLiteral(s) for s in self.work_commands)).setResultsName('command') + StringEnd() self.gain_doc = ('Earn your way through:\n' + '\t{} | {} | {}'.format(*self.work_commands)) self.level_name = 'Increase your status' self.level_expr = reduce(or_, (CaselessLiteral(s) for s in self.level_commands)).setResultsName('command') + StringEnd() self.level_doc = ('Work your way up:\n' + '\t{} | {}'.format(*self.level_commands)) self.give_name = 'Give {}'.format(currency_name) self.give_expr = CaselessLiteral('give') + symbols.user_name.setResultsName('user') + symbols.int_num.setResultsName('amount') + StringEnd() self.give_doc = 'Create {} in a user\'s account'.format(currency_name)
def _create_grammar(): """Create the DBC grammar. """ word = Word(printables.replace(';', '').replace(':', '')) integer = Group(Optional('-') + Word(nums)) positive_integer = Word(nums).setName('positive integer') number = Word(nums + '.Ee-+') colon = Suppress(Literal(':')) scolon = Suppress(Literal(';')) pipe = Suppress(Literal('|')) at = Suppress(Literal('@')) sign = Literal('+') | Literal('-') lp = Suppress(Literal('(')) rp = Suppress(Literal(')')) lb = Suppress(Literal('[')) rb = Suppress(Literal(']')) comma = Suppress(Literal(',')) node = Word(alphas + nums + '_-').setWhitespaceChars(' ') frame_id = Word(nums).setName('frame id') version = Group(Keyword('VERSION') - QuotedString()) version.setName(VERSION) symbol = Word(alphas + '_') + Suppress(LineEnd()) symbols = Group(Keyword('NS_') - colon - Group(ZeroOrMore(symbol))) symbols.setName('NS_') discard = Suppress(Keyword('BS_') - colon).setName('BS_') nodes = Group(Keyword('BU_') - colon - Group(ZeroOrMore(node))) nodes.setName('BU_') signal = Group( Keyword(SIGNAL) - Group(word + Optional(word)) - colon - Group(positive_integer - pipe - positive_integer - at - positive_integer - sign) - Group(lp - number - comma - number - rp) - Group(lb - number - pipe - number - rb) - QuotedString() - Group(delimitedList(node))) signal.setName(SIGNAL) message = Group( Keyword(MESSAGE) - frame_id - word - colon - positive_integer - word - Group(ZeroOrMore(signal))) message.setName(MESSAGE) event = Suppress( Keyword(EVENT) - word - colon - positive_integer - lb - number - pipe - number - rb - QuotedString() - number - number - word - node - scolon) event.setName(EVENT) comment = Group( Keyword(COMMENT) - ((Keyword(SIGNAL) - frame_id - word - QuotedString() - scolon).setName(SIGNAL) | (Keyword(MESSAGE) - frame_id - QuotedString() - scolon).setName(MESSAGE) | (Keyword(EVENT) - word - QuotedString() - scolon).setName(EVENT) | (Keyword(NODES) - word - QuotedString() - scolon).setName(NODES) | (QuotedString() - scolon).setName('QuotedString'))) comment.setName(COMMENT) attribute_definition = Group( Keyword(ATTRIBUTE_DEFINITION) - ((QuotedString()) | (Keyword(SIGNAL) | Keyword(MESSAGE) | Keyword(EVENT) | Keyword(NODES)) + QuotedString()) - word - (scolon | (Group(ZeroOrMore(Group( (comma | Empty()) + QuotedString()))) + scolon) | (Group(ZeroOrMore(number)) + scolon))) attribute_definition.setName(ATTRIBUTE_DEFINITION) attribute_definition_default = Group( Keyword(ATTRIBUTE_DEFINITION_DEFAULT) - QuotedString() - (number | QuotedString()) - scolon) attribute_definition_default.setName(ATTRIBUTE_DEFINITION_DEFAULT) attribute = Group( Keyword(ATTRIBUTE) - QuotedString() - Group( Optional((Keyword(MESSAGE) + frame_id) | (Keyword(SIGNAL) + frame_id + word) | (Keyword(NODES) + word))) - (QuotedString() | number) - scolon) attribute.setName(ATTRIBUTE) choice = Group( Keyword(CHOICE) - Group(Optional(frame_id)) - word - Group(OneOrMore(Group(integer + QuotedString()))) - scolon) choice.setName(CHOICE) value_table = Group( Keyword(VALUE_TABLE) - word - Group(OneOrMore(Group(integer + QuotedString()))) - scolon) value_table.setName(VALUE_TABLE) signal_type = Group( Keyword(SIGNAL_TYPE) - frame_id - word - colon - positive_integer - scolon) signal_type.setName(SIGNAL_TYPE) signal_multiplexer_values = Group( Keyword(SIGNAL_MULTIPLEXER_VALUES) - frame_id - word - word - Group( delimitedList(positive_integer - Suppress('-') - Suppress(positive_integer))) - scolon) signal_multiplexer_values.setName(SIGNAL_MULTIPLEXER_VALUES) message_add_sender = Group( Keyword(MESSAGE_TX_NODE) - frame_id - colon - Group(delimitedList(node)) - scolon) message_add_sender.setName(MESSAGE_TX_NODE) attribute_definition_rel = Group( Keyword(ATTRIBUTE_DEFINITION_REL) - (QuotedString() | (Keyword(NODES_REL) + QuotedString())) - word - (scolon | (Group(ZeroOrMore(Group( (comma | Empty()) + QuotedString()))) + scolon) | (Group(ZeroOrMore(number)) + scolon))) attribute_definition_rel.setName(ATTRIBUTE_DEFINITION_REL) attribute_definition_default_rel = Group( Keyword(ATTRIBUTE_DEFINITION_DEFAULT_REL) - QuotedString() - (number | QuotedString()) - scolon) attribute_definition_default_rel.setName(ATTRIBUTE_DEFINITION_DEFAULT_REL) attribute_rel = Group( Keyword(ATTRIBUTE_REL) - QuotedString() - Keyword(NODES_REL) - word - Keyword(SIGNAL) - frame_id - word - (positive_integer | QuotedString()) - scolon) attribute_rel.setName(ATTRIBUTE_REL) signal_group = Group( Keyword(SIGNAL_GROUP) - frame_id - word - integer - colon - OneOrMore(word) - scolon) signal_group.setName(SIGNAL_GROUP) entry = (message | comment | attribute | choice | attribute_definition | attribute_definition_default | attribute_rel | attribute_definition_rel | attribute_definition_default_rel | signal_group | event | message_add_sender | value_table | signal_type | signal_multiplexer_values | discard | nodes | symbols | version) frame_id.setParseAction(lambda _s, _l, t: int(t[0])) return OneOrMore(entry) + StringEnd()
NO_BRTS = printables.replace('(', '').replace(')', '') SINGLE = Word(NO_BRTS.replace('*', '')) WILDCARDS = Optional('*') + SINGLE + Optional('*') + WordEnd(wordChars=NO_BRTS) QUOTED = quotedString.setParseAction(removeQuotes) OPER_AND = CaselessLiteral('and') OPER_OR = CaselessLiteral('or') OPER_NOT = '-' TERM = Combine( Optional(Word(alphas).setResultsName('meta') + ':') + (QUOTED.setResultsName('query') | WILDCARDS.setResultsName('query'))) TERM.setParseAction(createQ) EXPRESSION = operatorPrecedence( TERM, [(OPER_NOT, 1, opAssoc.RIGHT), (OPER_OR, 2, opAssoc.LEFT), (Optional(OPER_AND, default='and'), 2, opAssoc.LEFT)]) EXPRESSION.setParseAction(unionQ) QUERY = OneOrMore(EXPRESSION) + StringEnd() QUERY.setParseAction(unionQ) def advanced_search(pattern): """ Parse the grammar of a pattern and build a queryset with it. """ query_parsed = QUERY.parseString(pattern) return Entry.published.filter(query_parsed[0]).distinct()
class CreateParser(object): """ This class can take a plain "CREATE TABLE" SQL as input and parse it into a Table object, so that we have more insight on the detail of this SQL. Example: sql = 'create table foo ( bar int primary key )' parser = CreateParser(sql) try: tbl_obj = parser.parse() except ParseError: log.error("Failed to parse SQL") This set of BNF rules are basically translated from the MySQL manual: http://dev.mysql.com/doc/refman/5.6/en/create-table.html If you don't know how to change the rule or fix the bug, <Getting Started with Pyparsing> is probably the best book to start with. Also this wiki has all supported functions listed: https://pyparsing.wikispaces.com/HowToUsePyparsing If you want have more information how these characters are matching, add .setDebug(True) after the specific token you want to debug """ _parser = None _partitions_parser = None # Basic token WORD_CREATE = CaselessLiteral("CREATE").suppress() WORD_TABLE = CaselessLiteral("TABLE").suppress() COMMA = Literal(",").suppress() DOT = Literal(".") LEFT_PARENTHESES = Literal("(").suppress() RIGHT_PARENTHESES = Literal(")").suppress() QUOTE = Literal("'") | Literal('"') BACK_QUOTE = Optional(Literal("`")).suppress() LENGTH = Word(nums) DECIMAL = Combine(Word(nums) + DOT + Word(nums)) OBJECT_NAME = Word(alphanums + "_" + "-" + "<" + ">" + ":") QUOTED_STRING_WITH_QUOTE = QuotedString( quoteChar="'", escQuote="''", escChar="\\", multiline=True, unquoteResults=False ) | QuotedString( quoteChar='"', escQuote='""', escChar="\\", multiline=True, unquoteResults=False ) QUOTED_STRING = QuotedString( quoteChar="'", escQuote="''", escChar="\\", multiline=True ) | QuotedString(quoteChar='"', escQuote='""', escChar="\\", multiline=True) # Start of a create table statement # Sample: this part of rule will match following section # `table_name` IF NOT EXISTS IF_NOT_EXIST = Optional( CaselessLiteral("IF") + CaselessLiteral("NOT") + CaselessLiteral("EXISTS") ).suppress() TABLE_NAME = ( QuotedString(quoteChar="`", escQuote="``", escChar="\\", unquoteResults=True) | OBJECT_NAME )("table_name") # Column definition # Sample: this part of rule will match following section # `id` bigint(20) unsigned NOT NULL DEFAULT '0', COLUMN_NAME = ( QuotedString(quoteChar="`", escQuote="``", escChar="\\", unquoteResults=True) | OBJECT_NAME )("column_name") COLUMN_NAME_WITH_QUOTE = ( QuotedString(quoteChar="`", escQuote="``", escChar="\\", unquoteResults=False) | OBJECT_NAME )("column_name") UNSIGNED = Optional(CaselessLiteral("UNSIGNED"))("unsigned") ZEROFILL = Optional(CaselessLiteral("ZEROFILL"))("zerofill") COL_LEN = Combine(LEFT_PARENTHESES + LENGTH + RIGHT_PARENTHESES, adjacent=False)( "length" ) INT_TYPE = ( CaselessLiteral("TINYINT") | CaselessLiteral("SMALLINT") | CaselessLiteral("MEDIUMINT") | CaselessLiteral("INT") | CaselessLiteral("INTEGER") | CaselessLiteral("BIGINT") | CaselessLiteral("BINARY") | CaselessLiteral("BIT") ) INT_DEF = INT_TYPE("column_type") + Optional(COL_LEN) + UNSIGNED + ZEROFILL VARBINARY_DEF = CaselessLiteral("VARBINARY")("column_type") + COL_LEN FLOAT_TYPE = ( CaselessLiteral("REAL") | CaselessLiteral("DOUBLE") | CaselessLiteral("FLOAT") | CaselessLiteral("DECIMAL") | CaselessLiteral("NUMERIC") ) FLOAT_LEN = Combine( LEFT_PARENTHESES + LENGTH + Optional(COMMA + LENGTH) + RIGHT_PARENTHESES, adjacent=False, joinString=", ", )("length") FLOAT_DEF = FLOAT_TYPE("column_type") + Optional(FLOAT_LEN) + UNSIGNED + ZEROFILL # time type definition. They contain type_name and an optional FSP section # Sample: DATETIME[(fsp)] FSP = COL_LEN DT_DEF = ( Combine(CaselessLiteral("TIME") + Optional(CaselessLiteral("STAMP"))) | CaselessLiteral("DATETIME") )("column_type") + Optional(FSP) SIMPLE_DEF = ( CaselessLiteral("DATE") | CaselessLiteral("YEAR") | CaselessLiteral("TINYBLOB") | CaselessLiteral("BLOB") | CaselessLiteral("MEDIUMBLOB") | CaselessLiteral("LONGBLOB") | CaselessLiteral("BOOLEAN") | CaselessLiteral("BOOL") | CaselessLiteral("JSON") )("column_type") OPTIONAL_COL_LEN = Optional(COL_LEN) BINARY = Optional(CaselessLiteral("BINARY"))("binary") CHARSET_NAME = ( Optional(QUOTE).suppress() + Word(alphanums + "_")("charset") + Optional(QUOTE).suppress() ) COLLATION_NAME = ( Optional(QUOTE).suppress() + Word(alphanums + "_")("collate") + Optional(QUOTE).suppress() ) CHARSET_DEF = CaselessLiteral("CHARACTER SET").suppress() + CHARSET_NAME COLLATE_DEF = CaselessLiteral("COLLATE").suppress() + COLLATION_NAME CHAR_DEF = CaselessLiteral("CHAR")("column_type") + OPTIONAL_COL_LEN + BINARY VARCHAR_DEF = CaselessLiteral("VARCHAR")("column_type") + COL_LEN + BINARY TEXT_TYPE = ( CaselessLiteral("TINYTEXT") | CaselessLiteral("TEXT") | CaselessLiteral("MEDIUMTEXT") | CaselessLiteral("LONGTEXT") | CaselessLiteral("DOCUMENT") ) TEXT_DEF = TEXT_TYPE("column_type") + BINARY ENUM_VALUE_LIST = Group( QUOTED_STRING_WITH_QUOTE + ZeroOrMore(COMMA + QUOTED_STRING_WITH_QUOTE) )("enum_value_list") ENUM_DEF = ( CaselessLiteral("ENUM")("column_type") + LEFT_PARENTHESES + ENUM_VALUE_LIST + RIGHT_PARENTHESES ) SET_VALUE_LIST = Group( QUOTED_STRING_WITH_QUOTE + ZeroOrMore(COMMA + QUOTED_STRING_WITH_QUOTE) )("set_value_list") SET_DEF = ( CaselessLiteral("SET")("column_type") + LEFT_PARENTHESES + SET_VALUE_LIST + RIGHT_PARENTHESES ) DATA_TYPE = ( INT_DEF | FLOAT_DEF | DT_DEF | SIMPLE_DEF | TEXT_DEF | CHAR_DEF | VARCHAR_DEF | ENUM_DEF | SET_DEF | VARBINARY_DEF ) # Column attributes come after column type and length NULLABLE = CaselessLiteral("NULL") | CaselessLiteral("NOT NULL") DEFAULT_VALUE = CaselessLiteral("DEFAULT").suppress() + ( Optional(Literal("b"))("is_bit") + QUOTED_STRING_WITH_QUOTE("default") | Combine( CaselessLiteral("CURRENT_TIMESTAMP")("default") + Optional(COL_LEN)("ts_len") ) | DECIMAL("default") | Word(alphanums + "_" + "-" + "+")("default") ) ON_UPDATE = ( CaselessLiteral("ON") + CaselessLiteral("UPDATE") + ( CaselessLiteral("CURRENT_TIMESTAMP")("on_update") + Optional(COL_LEN)("on_update_ts_len") ) ) AUTO_INCRE = CaselessLiteral("AUTO_INCREMENT") UNIQ_KEY = CaselessLiteral("UNIQUE") + Optional(CaselessLiteral("KEY")).suppress() PRIMARY_KEY = ( CaselessLiteral("PRIMARY") + Optional(CaselessLiteral("KEY")).suppress() ) COMMENT = Combine( CaselessLiteral("COMMENT").suppress() + QUOTED_STRING_WITH_QUOTE, adjacent=False ) COLUMN_DEF = Group( COLUMN_NAME + DATA_TYPE + ZeroOrMore( NULLABLE("nullable") | DEFAULT_VALUE | ON_UPDATE | AUTO_INCRE("auto_increment") | UNIQ_KEY("uniq_key") | PRIMARY_KEY("primary") | COMMENT("comment") | CHARSET_DEF | COLLATE_DEF ) ) COLUMN_LIST = Group(COLUMN_DEF + ZeroOrMore(COMMA + COLUMN_DEF))("column_list") DOCUMENT_PATH = Combine( COLUMN_NAME_WITH_QUOTE + ZeroOrMore(DOT + COLUMN_NAME_WITH_QUOTE) ) IDX_COL = ( Group( DOCUMENT_PATH + CaselessLiteral("AS") + (CaselessLiteral("INT") | CaselessLiteral("STRING")) + Optional(COL_LEN, default="") ) ) | (Group(COLUMN_NAME + Optional(COL_LEN, default=""))) # Primary key section COL_NAME_LIST = Group(IDX_COL + ZeroOrMore(COMMA + IDX_COL)) IDX_COLS = LEFT_PARENTHESES + COL_NAME_LIST + RIGHT_PARENTHESES WORD_PRI_KEY = ( CaselessLiteral("PRIMARY").suppress() + CaselessLiteral("KEY").suppress() ) KEY_BLOCK_SIZE = ( CaselessLiteral("KEY_BLOCK_SIZE").suppress() + Optional(Literal("=")) + Word(nums)("idx_key_block_size") ) INDEX_USING = CaselessLiteral("USING").suppress() + ( CaselessLiteral("BTREE") | CaselessLiteral("HASH") )("idx_using") INDEX_OPTION = ZeroOrMore(KEY_BLOCK_SIZE | COMMENT("idx_comment") | INDEX_USING) PRI_KEY_DEF = COMMA + WORD_PRI_KEY + IDX_COLS("pri_list") + INDEX_OPTION # Index section KEY_TYPE = (CaselessLiteral("FULLTEXT") | CaselessLiteral("SPATIAL"))("key_type") WORD_UNIQUE = CaselessLiteral("UNIQUE")("unique") WORD_KEY = CaselessLiteral("INDEX").suppress() | CaselessLiteral("KEY").suppress() IDX_NAME = Optional(COLUMN_NAME) IDX_DEF = ( ZeroOrMore( Group( COMMA + Optional(WORD_UNIQUE | KEY_TYPE) + WORD_KEY + IDX_NAME("index_name") + IDX_COLS("index_col_list") + INDEX_OPTION ) ) )("index_section") # Constraint section as this is not a recommended way of using MySQL # we'll treat the whole section as a string CONSTRAINT = Combine( ZeroOrMore( COMMA + Optional(CaselessLiteral("CONSTRAINT")) + # foreign key name except the key word 'FOREIGN' Optional((~CaselessLiteral("FOREIGN") + COLUMN_NAME)) + CaselessLiteral("FOREIGN") + CaselessLiteral("KEY") + LEFT_PARENTHESES + COL_NAME_LIST + RIGHT_PARENTHESES + CaselessLiteral("REFERENCES") + COLUMN_NAME + LEFT_PARENTHESES + COL_NAME_LIST + RIGHT_PARENTHESES + ZeroOrMore(Word(alphanums)) ), adjacent=False, joinString=" ", )("constraint") # Table option section ENGINE = ( CaselessLiteral("ENGINE").suppress() + Optional(Literal("=")).suppress() + COLUMN_NAME("engine").setParseAction(upcaseTokens) ) DEFAULT_CHARSET = ( Optional(CaselessLiteral("DEFAULT")).suppress() + ( ( CaselessLiteral("CHARACTER").suppress() + CaselessLiteral("SET").suppress() ) | (CaselessLiteral("CHARSET").suppress()) ) + Optional(Literal("=")).suppress() + Word(alphanums + "_")("charset") ) TABLE_COLLATE = ( Optional(CaselessLiteral("DEFAULT")).suppress() + CaselessLiteral("COLLATE").suppress() + Optional(Literal("=")).suppress() + COLLATION_NAME ) ROW_FORMAT = ( CaselessLiteral("ROW_FORMAT").suppress() + Optional(Literal("=")).suppress() + Word(alphanums + "_")("row_format").setParseAction(upcaseTokens) ) TABLE_KEY_BLOCK_SIZE = ( CaselessLiteral("KEY_BLOCK_SIZE").suppress() + Optional(Literal("=")).suppress() + Word(nums)("key_block_size").setParseAction(lambda s, l, t: [int(t[0])]) ) COMPRESSION = ( CaselessLiteral("COMPRESSION").suppress() + Optional(Literal("=")).suppress() + Word(alphanums + "_")("compression").setParseAction(upcaseTokens) ) # Parse and make sure auto_increment is an integer # parseAction function is defined as fn( s, loc, toks ), where: # s is the original parse string # loc is the location in the string where matching started # toks is the list of the matched tokens, packaged as a ParseResults_ # object TABLE_AUTO_INCRE = ( CaselessLiteral("AUTO_INCREMENT").suppress() + Optional(Literal("=")).suppress() + Word(nums)("auto_increment").setParseAction(lambda s, l, t: [int(t[0])]) ) TABLE_COMMENT = ( CaselessLiteral("COMMENT").suppress() + Optional(Literal("=")).suppress() + QUOTED_STRING_WITH_QUOTE("comment") ) TABLE_OPTION = ZeroOrMore( ( ENGINE | DEFAULT_CHARSET | TABLE_COLLATE | ROW_FORMAT | TABLE_KEY_BLOCK_SIZE | COMPRESSION | TABLE_AUTO_INCRE | TABLE_COMMENT ) # Table attributes could be comma separated too. + Optional(COMMA).suppress() ) # Partition section PARTITION = Optional( Combine( Combine(Optional(Literal("/*!") + Word(nums))) + CaselessLiteral("PARTITION") + CaselessLiteral("BY") + SkipTo(StringEnd()), adjacent=False, joinString=" ", )("partition") ) # Parse partitions in detail # From https://dev.mysql.com/doc/refman/8.0/en/create-table.html PART_FIELD_NAME = ( QuotedString(quoteChar="`", escQuote="``", escChar="\\", unquoteResults=True) | OBJECT_NAME ) PART_FIELD_LIST = delimitedList(PART_FIELD_NAME)("field_list") # e.g 1, 2, 3 # and 'a', 'b', 'c' # and `NULL` # and _binary 0x123aBc HEX_VALUE = Literal("0x") + OneOrMore(Regex("[0-9a-fA-F]")) day = Word(nums) month = Word(nums) year = Word(nums) dateday = Combine(year + "-" + month + "-" + day) to_days = Combine("to_days('" + dateday + "')") PART_VALUE_LIST = Group( LEFT_PARENTHESES + ( delimitedList( Word(nums) # e.g. (1, 2, 3) | QUOTED_STRING_WITH_QUOTE # e.g. ('a', 'b') | CaselessLiteral("NULL").setParseAction(upcaseTokens) # e.g. (NULL) | to_days # e.g. to_days('2010-11-07') ) | ( LEFT_PARENTHESES + ( delimitedList( QUOTED_STRING_WITH_QUOTE | CaselessLiteral("NULL").setParseAction(upcaseTokens) ) ) + RIGHT_PARENTHESES )( "is_tuple" ) # e.g. (("a", "b")), See test_parts_list_in_tuple15 # e.g. `_binary 0xdeadbeef123`, See test_parts_list_by_cols_with_binary17 # turns to: `_BINARY 0xdeadbeef123` | Combine( CaselessLiteral("_binary").setParseAction(upcaseTokens) + White(" ").setParseAction(replaceWith(" ")) + HEX_VALUE ) ) + RIGHT_PARENTHESES ) PART_VALUES_IN = (CaselessLiteral("IN").suppress() + PART_VALUE_LIST)("p_values_in") # Note: No expr support although full syntax (allowed by mysql8) is # LESS THAN {(expr | value_list) | MAXVALUE} PART_VALUES_LESSTHAN = ( CaselessLiteral("LESS").suppress() + CaselessLiteral("THAN").suppress() + (CaselessLiteral("MAXVALUE").setParseAction(upcaseTokens) | PART_VALUE_LIST) )("p_values_less_than") PART_NAME = ( QuotedString(quoteChar="`", escQuote="``", escChar="\\", unquoteResults=True) | OBJECT_NAME )("part_name") # Options for partition definitions - engine/comments only for now. # DO NOT re-use QUOTED_STRING_WITH_QUOTE for these - # *seems* to trigger a pyparsing bug? P_ENGINE = ( QuotedString(quoteChar="'", escQuote="''", escChar="\\", unquoteResults=True) | QuotedString( quoteChar='"', escQuote='""', escChar="\\", multiline=False, unquoteResults=True, ) | CaselessLiteral("innodb") | CaselessLiteral("ndb") | CaselessLiteral("rocksdb") ) P_COMMENT = QuotedString( quoteChar="'", escQuote="''", escChar="\\", multiline=True, unquoteResults=False ) | QuotedString( quoteChar='"', escQuote='""', escChar="\\", multiline=True, unquoteResults=False ) P_OPT_ENGINE = ( Optional(CaselessLiteral("STORAGE")).suppress() + CaselessLiteral("ENGINE").suppress() + Optional(Literal("=")).suppress() + P_ENGINE.setParseAction(upcaseTokens)("pdef_engine") ) P_OPT_COMMENT = ( CaselessLiteral("COMMENT").suppress() + Optional(Literal("=")).suppress() + P_COMMENT("pdef_comment") ) PDEF_OPTIONS = ZeroOrMore((P_OPT_ENGINE | P_OPT_COMMENT)) # e.g. PARTITION p99 VALUES (LESS THAN|IN) ... PART_DEFS = delimitedList( Group( CaselessLiteral("PARTITION").suppress() + PART_NAME + CaselessLiteral("VALUES").suppress() + (PART_VALUES_LESSTHAN | PART_VALUES_IN) + PDEF_OPTIONS ) ) # No fancy expressions yet, just a list of cols OR something nested in () PART_EXPR = ( ( LEFT_PARENTHESES + delimitedList( QuotedString( quoteChar="`", escQuote="``", escChar="\\", unquoteResults=True ) | OBJECT_NAME ) + RIGHT_PARENTHESES )("via_list") # `RANGE expr` support (test_parts_range_with_expr) | nestedExpr()("via_nested_expr") )("p_expr") SUBTYPE_LINEAR = (Optional(CaselessLiteral("LINEAR")).setParseAction(upcaseTokens))( "p_subtype" ) # Match: [LINEAR] HASH (expr) PTYPE_HASH = ( SUBTYPE_LINEAR + (CaselessLiteral("HASH").setParseAction(upcaseTokens))("part_type") + nestedExpr()("p_hash_expr") # Lousy approximation, needs post processing ) # Match: [LINEAR] KEY [ALGORITHM=1|2] (column_list) PART_ALGO = ( CaselessLiteral("ALGORITHM").suppress() + Literal("=").suppress() + Word(alphanums) )("p_algo") PTYPE_KEY = ( SUBTYPE_LINEAR + (CaselessLiteral("KEY").setParseAction(upcaseTokens))("part_type") + Optional(PART_ALGO) + Literal("(") # don't suppress here + Optional(PART_FIELD_LIST) # e.g. `PARTITION BY KEY() PARTITIONS 2` is valid + Literal(")") ) PART_COL_LIST = ( (CaselessLiteral("COLUMNS").setParseAction(upcaseTokens))("p_subtype") + LEFT_PARENTHESES + PART_FIELD_LIST + RIGHT_PARENTHESES ) PTYPE_RANGE = (CaselessLiteral("RANGE").setParseAction(upcaseTokens))( "part_type" ) + (PART_COL_LIST | PART_EXPR) PTYPE_LIST = (CaselessLiteral("LIST").setParseAction(upcaseTokens))("part_type") + ( PART_COL_LIST | PART_EXPR ) @classmethod def generate_rule(cls): # The final rule for the whole statement match return ( cls.WORD_CREATE + cls.WORD_TABLE + cls.IF_NOT_EXIST + cls.TABLE_NAME + cls.LEFT_PARENTHESES + cls.COLUMN_LIST + Optional(cls.PRI_KEY_DEF) + cls.IDX_DEF + cls.CONSTRAINT + cls.RIGHT_PARENTHESES + cls.TABLE_OPTION("table_options") + cls.PARTITION ) @classmethod def get_parser(cls): if not cls._parser: cls._parser = cls.generate_rule() return cls._parser @classmethod def gen_partitions_parser(cls): # Init full parts matcher only on demand # invalid_partition_prefix - used to detect any invalid prefix # attached to the number of partitions. The prefix is used # later on to flag invalid schemas. return ( Combine(Optional(Literal("/*!") + Word(nums))).suppress() + CaselessLiteral("PARTITION") + CaselessLiteral("BY") + (cls.PTYPE_HASH | cls.PTYPE_KEY | cls.PTYPE_RANGE | cls.PTYPE_LIST) + Optional( CaselessLiteral("PARTITIONS") + Optional(Combine(Regex("[^0-9]")))("invalid_partition_prefix") + Word(nums)("num_partitions") ) + Optional( cls.LEFT_PARENTHESES + cls.PART_DEFS("part_defs") + cls.RIGHT_PARENTHESES ) ) @classmethod def get_partitions_parser(cls): if not cls._partitions_parser: cls._partitions_parser = cls.gen_partitions_parser() return cls._partitions_parser @classmethod def parse_partitions(cls, parts) -> ParseResults: try: return cls.get_partitions_parser().parseString(parts) except ParseException as e: raise ParseError(f"Error parsing partitions: {e.line}, {e.column}") @classmethod def parse(cls, sql): try: if not isinstance(sql, str): sql = sql.decode("utf-8") result = cls.get_parser().parseString(sql) except ParseException as e: raise ParseError( "Failed to parse SQL, unsupported syntax: {}".format(e), e.line, e.column, ) inline_pri_exists = False table = models.Table() table.name = result.table_name table_options = [ "engine", "charset", "collate", "row_format", "key_block_size", "compression", "auto_increment", "comment", ] for table_option in table_options: if table_option in result: setattr(table, table_option, result.get(table_option)) if "partition" in result: # pyparsing will convert newline into two after parsing. So we # need to dedup here table.partition = result.partition.replace("\n\n", "\n") try: presult = cls.parse_partitions(table.partition) table.partition_config = cls.partition_to_model(presult) except ParseException as e: raise ParseError( f"Failed to parse partitions config, unsupported syntax {e}," f" line: {e.line} col {e.column}" ) except PartitionParseError as mpe: raise ParseError( f"Failed to init model from partitions config: {mpe}, " f"ParseResult: {presult.dump()}\nRaw: {table.partition}" ) if "constraint" in result: table.constraint = result.constraint for column_def in result.column_list: if column_def.column_type == "ENUM": column = models.EnumColumn() for enum_value in column_def.enum_value_list: column.enum_list.append(enum_value) elif column_def.column_type == "SET": column = models.SetColumn() for set_value in column_def.set_value_list: column.set_list.append(set_value) elif column_def.column_type in ("TIMESTAMP", "DATETIME"): column = models.TimestampColumn() if "on_update" in column_def: if "on_update_ts_len" in column_def: column.on_update_current_timestamp = "{}({})".format( column_def.on_update, column_def.on_update_ts_len ) else: column.on_update_current_timestamp = column_def.on_update else: column = models.Column() column.name = column_def.column_name column.column_type = column_def.column_type if column.column_type == "JSON": table.has_80_features = True # We need to check whether each column property exist in the # create table string, because not specifying a "COMMENT" is # different from specifying "COMMENT" equals to empty string. # The former one will ends up being # column=None # and the later one being # column='' if "comment" in column_def: column.comment = column_def.comment if "nullable" in column_def: if column_def.nullable == "NULL": column.nullable = True elif column_def.nullable == "NOT NULL": column.nullable = False if "unsigned" in column_def: if column_def.unsigned == "UNSIGNED": column.unsigned = True if "default" in column_def: if "ts_len" in column_def: column.default = "{}({})".format( column_def.default, column_def.ts_len ) else: column.default = column_def.default if "is_bit" in column_def: column.is_default_bit = True if "charset" in column_def: column.charset = column_def.charset if "length" in column_def: column.length = column_def.length if "collate" in column_def: column.collate = column_def.collate if "auto_increment" in column_def: column.auto_increment = True if "primary" in column_def: idx_col = models.IndexColumn() idx_col.name = column_def.column_name table.primary_key.column_list.append(idx_col) inline_pri_exists = True table.column_list.append(column) if "pri_list" in result: if inline_pri_exists: raise ParseError("Multiple primary keys defined") table.primary_key.name = "PRIMARY" for col in result.pri_list: for name, length in col: idx_col = models.IndexColumn() idx_col.name = name if length: idx_col.length = length table.primary_key.column_list.append(idx_col) if "idx_key_block_size" in result: table.primary_key.key_block_size = result.pri_key_block_size if "idx_comment" in result: table.primary_key.comment = result.idx_comment if "index_section" in result: for idx_def in result.index_section: idx = models.TableIndex() idx.name = idx_def.index_name if "idx_key_block_size" in idx_def: idx.key_block_size = idx_def.idx_key_block_size if "idx_comment" in idx_def: idx.comment = idx_def.idx_comment if "idx_using" in idx_def: idx.using = idx_def.idx_using if "key_type" in idx_def: idx.key_type = idx_def.key_type if "unique" in idx_def: idx.is_unique = True for col in idx_def.index_col_list: for col_def in col: if len(col_def) == 4 and col_def[1].upper() == "AS": (document_path, word_as, key_type, length) = col_def idx_col = models.DocStoreIndexColumn() idx_col.document_path = document_path idx_col.key_type = key_type if length: idx_col.length = length idx.column_list.append(idx_col) else: (name, length) = col_def idx_col = models.IndexColumn() idx_col.name = name if length: idx_col.length = length idx.column_list.append(idx_col) table.indexes.append(idx) return table @classmethod def partition_to_model(cls, presult: ParseResults) -> models.PartitionConfig: # Convert ParseResults from parsing a partitions config into a # model. This can throw a PartitionParseError mytype = presult.get("part_type", None) mysubtype = presult.get("p_subtype", None) if ( (not mytype and not mysubtype) or mytype not in models.PartitionConfig.KNOWN_PARTITION_TYPES or ( mysubtype is not None and mysubtype not in models.PartitionConfig.KNOWN_PARTITION_SUBTYPES ) ): raise PartitionParseError( "partition_to_model Cannot init mode.PartitionConfig: " f"type {mytype} subtype {mysubtype}" ) pc = models.PartitionConfig() pc.part_type = mytype pc.p_subtype = mysubtype def _strip_ticks(fields: Union[str, List[str]]) -> Union[str, List[str]]: if isinstance(fields, str): return fields.replace("`", "") return [_strip_ticks(f) for f in fields] if presult.get("invalid_partition_prefix"): raise PartitionParseError( f"Partition type {pc.part_type} cannot " "have invalid partition number prefix defined" ) # set fields_or_expr, full_type if ( pc.part_type == models.PartitionConfig.PTYPE_LIST or pc.part_type == models.PartitionConfig.PTYPE_RANGE ): pc.num_partitions = len(presult.get("part_defs", [])) if pc.num_partitions == 0: raise PartitionParseError( f"Partition type {pc.part_type} MUST have partitions defined" ) pc.part_defs = _process_partition_definitions(presult.part_defs) if not pc.p_subtype: pc.full_type = pc.part_type pc.via_nested_expr = ( "via_nested_expr" in presult and "via_list" not in presult ) pc.fields_or_expr = presult.p_expr.asList() if pc.via_nested_expr: # strip backticks e.g. to_days(`date`) -> [to_days, [date]] pc.fields_or_expr = _strip_ticks(pc.fields_or_expr) else: pc.full_type = f"{pc.part_type} {pc.p_subtype}" pc.fields_or_expr = presult.field_list.asList() elif pc.part_type == models.PartitionConfig.PTYPE_KEY: pc.full_type = ( pc.part_type if not pc.p_subtype else f"{pc.p_subtype} {pc.part_type}" ) pc.num_partitions = int(presult.get("num_partitions", 1)) fl = presult.get("field_list", None) pc.fields_or_expr = fl.asList() if fl else [] # This is the only place p_algo is valid. algorithm_for_key algo_result = presult.get("p_algo") if algo_result and len(algo_result.asList()) > 0: pc.algorithm_for_key = int(algo_result.asList()[0]) elif pc.part_type == models.PartitionConfig.PTYPE_HASH: pc.full_type = ( pc.part_type if not pc.p_subtype else f"{pc.p_subtype} {pc.part_type}" ) pc.num_partitions = int(presult.get("num_partitions", 1)) hexpr = presult.get("p_hash_expr", None) if not hexpr: raise PartitionParseError( f"Partition type {pc.part_type} MUST have p_hash_expr defined" ) pc.fields_or_expr = _strip_ticks(hexpr.asList()) else: # unreachable since we checked for all part_types earlier. raise PartitionParseError(f"Unknown partition type {pc.part_type}") # We avoid escaping fields/expr in partitions with backticks since # its tricky to distinguish between a list of columns and an expression # e.g. unix_timestamp(ts) - ts could be escaped but unix_ts cannot. # Our parser will strip out backticks wherever possible. For nestedExpr # usecases, this is done via _strip_ticks instead. def _has_backticks(fields: Union[str, List[str]]) -> bool: if isinstance(fields, list): return any(_has_backticks(f) for f in fields) return "`" in fields if isinstance(fields, str) else False if _has_backticks(pc.fields_or_expr): raise PartitionParseError( f"field_or_expr cannot have backticks {pc.fields_or_expr}" ) if len(pc.part_defs) > 0 and any( pd.pdef_name.upper() == "NULL" for pd in pc.part_defs ): # We will disallow this even if raw sql passed in as e.g. # PARTITION `null` VALUES IN ... raise PartitionParseError("Partition names may not be literal `null`") return pc