def __init__(self, global_config, logger, out_dir="/tmp/"): self.hexify = global_config.hexify self.logger = logger self.out_dir = out_dir self.my_tables = {} self.table_file = {} self.mysql_con = mysql_connection(global_config) self.mysql_con.connect_db() self.get_table_metadata() self.my_streamer = None self.replica_batch_size = self.mysql_con.replica_batch_size self.master_status = [] self.id_batch = None self.sql_token = sql_token() self.pause_on_reindex = global_config.pause_on_reindex
def __init__(self, global_config, logger): """ Class constructor """ self.hexify = global_config.hexify self.logger = logger self.out_dir = global_config.out_dir self.my_tables = {} self.table_file = {} self.mysql_con = mysql_connection(global_config) self.mysql_con.connect_db() self.get_table_metadata() self.my_streamer = None self.replica_batch_size = self.mysql_con.replica_batch_size self.master_status = [] self.id_batch = None self.sql_token = sql_token() self.pause_on_reindex = global_config.pause_on_reindex self.stat_skip = ['BEGIN', 'COMMIT'] self.tables_limit = global_config.tables_limit self.my_schema = global_config.my_database
ALTER TABLE `test` DROP PRIMARY KEY; """ #statement="""alter table test_pk drop primary key;""" #statement="""ALTER TABLE test ADD COLUMN `dkdkd` timestamp NULL;""" #statement="""create table test_pk (id int ,PRIMARY KEY (id) ); """ #statement="""alter table test change date_create_new date_create_new timestamp;""" #statement="""ALTER TABLE `test_table` MODIFY `test_column` bigint(20) DEFAULT NULL; #ALTER TABLE table2 CHANGE column1 column2 bigint(20); #ALTER TABLE `test_table` MODIFY `test_column` enum('blah','dd') DEFAULT NULL; """ #statement="""ALTER TABLE `test_table` ADD UNIQUE INDEX `idx_unique` (`log`, `status`);""" #statement = """CREATE TABLE test (id integer null auto_increment primary key, `test_col` bigint(20)) ;""" #statement = """CREATE TABLE test (id integer auto_increment, primary key(id) )""" statement = """TRUNCATE table `test`;""" token_sql=sql_token() token_sql.parse_sql(statement) print (token_sql.tokenised) for token in token_sql.tokenised: print(token) # if token["command"]=="ALTER TABLE": # alter_cmd = token["alter_cmd"][0] # if alter_cmd["command"] == "MODIFY" and alter_cmd["type"] == 'enum': # print(alter_cmd["dimension"].split(',')) #else:
def read_replica_stream(self, batch_data): """ Stream the replica using the batch data. This method evaluates the different events streamed from MySQL and manages them accordingly. The BinLogStreamReader function is called with the only_event parameter which restricts the event type received by the streamer. The events managed are the following. RotateEvent which happens whether mysql restarts or the binary log file changes. QueryEvent which happens when a new row image comes in (BEGIN statement) or a DDL is executed. The BEGIN is always skipped. The DDL is parsed using the sql_token class. [Write,Update,Delete]RowEvents are the row images pulled from the mysql replica. The RotateEvent and the QueryEvent cause the batch to be closed. The for loop reads the row events, builds the dictionary carrying informations like the destination schema, the binlog coordinates and store them into the group_insert list. When the number of events exceeds the replica_batch_size the group_insert is written into PostgreSQL. The batch is not closed in that case and the method exits only if there are no more rows available in the stream. Therefore the replica_batch_size is just the maximum size of the single insert and the size of replayed batch on PostgreSQL. The binlog switch or a captured DDL determines whether a batch is closed and processed. The update row event stores in a separate key event_before the row image before the update. This is required to allow updates where the primary key is updated as well. Each row event is scanned for data types requiring conversion to hex string. :param batch_data: The list with the master's batch data. :return: the batch's data composed by binlog name, binlog position and last event timestamp read from the mysql replica stream. :rtype: dictionary """ skip_tables = None size_insert=0 if self.skip_tables: skip_tables = [table.split('.')[1] for table in self.skip_tables] sql_tokeniser = sql_token() table_type_map = self.get_table_type_map() inc_tables = self.pg_engine.get_inconsistent_tables() close_batch = False master_data = {} group_insert = [] id_batch = batch_data[0][0] log_file = batch_data[0][1] log_position = batch_data[0][2] log_table = batch_data[0][3] my_stream = BinLogStreamReader( connection_settings = self.replica_conn, server_id = self.my_server_id, only_events = [RotateEvent, DeleteRowsEvent, WriteRowsEvent, UpdateRowsEvent, QueryEvent], log_file = log_file, log_pos = log_position, resume_stream = True, only_schemas = self.schema_replica, ignored_tables = skip_tables, ) self.logger.debug("log_file %s, log_position %s. id_batch: %s " % (log_file, log_position, id_batch)) for binlogevent in my_stream: if isinstance(binlogevent, RotateEvent): event_time = binlogevent.timestamp binlogfile = binlogevent.next_binlog position = binlogevent.position self.logger.debug("ROTATE EVENT - binlogfile %s, position %s. " % (binlogfile, position)) if log_file != binlogfile: close_batch = True if close_batch: if log_file!=binlogfile: master_data["File"]=binlogfile master_data["Position"]=position master_data["Time"]=event_time if len(group_insert)>0: self.pg_engine.write_batch(group_insert) group_insert=[] my_stream.close() return [master_data, close_batch] elif isinstance(binlogevent, QueryEvent): event_time = binlogevent.timestamp try: schema_query = binlogevent.schema.decode() except: schema_query = binlogevent.schema if binlogevent.query.strip().upper() not in self.statement_skip and schema_query in self.schema_mappings: close_batch=True destination_schema = self.schema_mappings[schema_query] log_position = binlogevent.packet.log_pos master_data["File"] = binlogfile master_data["Position"] = log_position master_data["Time"] = event_time if len(group_insert)>0: self.pg_engine.write_batch(group_insert) group_insert=[] self.logger.info("QUERY EVENT - binlogfile %s, position %s.\n--------\n%s\n-------- " % (binlogfile, log_position, binlogevent.query)) sql_tokeniser.parse_sql(binlogevent.query) for token in sql_tokeniser.tokenised: write_ddl = True table_name = token["name"] table_key_dic = "%s.%s" % (destination_schema, table_name) if table_key_dic in inc_tables: write_ddl = False log_seq = int(log_file.split('.')[1]) log_pos = int(log_position) table_dic = inc_tables[table_key_dic] if log_seq > table_dic["log_seq"]: write_ddl = True elif log_seq == table_dic["log_seq"] and log_pos >= table_dic["log_pos"]: write_ddl = True if write_ddl: self.logger.info("CONSISTENT POINT FOR TABLE %s REACHED - binlogfile %s, position %s" % (table_key_dic, binlogfile, log_position)) self.pg_engine.set_consistent_table(table_name, destination_schema) inc_tables = self.pg_engine.get_inconsistent_tables() if write_ddl: event_time = binlogevent.timestamp self.logger.debug("TOKEN: %s" % (token)) if len(token)>0: query_data={ "binlog":log_file, "logpos":log_position, "schema": destination_schema, "batch_id":id_batch, "log_table":log_table } self.pg_engine.write_ddl(token, query_data, destination_schema) sql_tokeniser.reset_lists() if close_batch: my_stream.close() return [master_data, close_batch] else: for row in binlogevent.rows: event_after={} event_before={} event_insert = {} add_row = True log_file=binlogfile log_position=binlogevent.packet.log_pos table_name=binlogevent.table event_time=binlogevent.timestamp schema_row = binlogevent.schema destination_schema = self.schema_mappings[schema_row] table_key_dic = "%s.%s" % (destination_schema, table_name) if table_key_dic in inc_tables: table_consistent = False log_seq = int(log_file.split('.')[1]) log_pos = int(log_position) table_dic = inc_tables[table_key_dic] if log_seq > table_dic["log_seq"]: table_consistent = True elif log_seq == table_dic["log_seq"] and log_pos >= table_dic["log_pos"]: table_consistent = True self.logger.info("CONSISTENT POINT FOR TABLE %s REACHED - binlogfile %s, position %s" % (table_key_dic, binlogfile, log_position)) if table_consistent: add_row = True self.pg_engine.set_consistent_table(table_name, destination_schema) inc_tables = self.pg_engine.get_inconsistent_tables() else: add_row = False column_map=table_type_map[schema_row][table_name] global_data={ "binlog":log_file, "logpos":log_position, "schema": destination_schema, "table": table_name, "batch_id":id_batch, "log_table":log_table, "event_time":event_time } if add_row: if isinstance(binlogevent, DeleteRowsEvent): global_data["action"] = "delete" event_after=row["values"] elif isinstance(binlogevent, UpdateRowsEvent): global_data["action"] = "update" event_after=row["after_values"] event_before=row["before_values"] elif isinstance(binlogevent, WriteRowsEvent): global_data["action"] = "insert" event_after=row["values"] for column_name in event_after: column_type=column_map[column_name] if column_type in self.hexify and event_after[column_name]: event_after[column_name]=binascii.hexlify(event_after[column_name]).decode() elif column_type in self.hexify and isinstance(event_after[column_name], bytes): event_after[column_name] = '' for column_name in event_before: column_type=column_map[column_name] if column_type in self.hexify and event_before[column_name]: event_before[column_name]=binascii.hexlify(event_before[column_name]).decode() elif column_type in self.hexify and isinstance(event_before[column_name], bytes): event_before[column_name] = '' event_insert={"global_data":global_data,"event_after":event_after, "event_before":event_before} size_insert += len(str(event_insert)) group_insert.append(event_insert) master_data["File"]=log_file master_data["Position"]=log_position master_data["Time"]=event_time if len(group_insert)>=self.replica_batch_size: self.logger.info("Max rows per batch reached. Writing %s. rows. Size in bytes: %s " % (len(group_insert), size_insert)) self.logger.debug("Master coordinates: %s" % (master_data, )) self.pg_engine.write_batch(group_insert) size_insert=0 group_insert=[] close_batch=True my_stream.close() if len(group_insert)>0: self.logger.debug("writing the last %s events" % (len(group_insert), )) self.pg_engine.write_batch(group_insert) close_batch=True return [master_data, close_batch]
ADD COLUMN new_enum ENUM('asd','r') NOT NULL AFTER `log`, ADD COLUMN status INT(10) UNSIGNED NOT NULL AFTER `new_enum`; ALTER TABLE `test` DROP COLUMN `count` , ADD COLUMN newstatus INT(10) UNSIGNED NOT NULL AFTER `log`; ALTER TABLE `test` DROP PRIMARY KEY; """ statement="""ALTER TABLE t_user_info ADD ( group_id INT(11) UNSIGNED DEFAULT NULL, contact_phone VARCHAR(20) DEFAULT NULL );""" statement = """ALTER TABLE foo RENAME TO bar;""" statement = """RENAME TABLE `sakila`.`test_partition` TO `sakila`.`_test_partition_old`, `_test_partition_new` TO `test_partition`;""" #statement="""ALTER TABLE foo MODIFY bar INT UNSIGNED DEFAULT NULL;""" #statement="""ALTER TABLE foo change bar bar INT UNSIGNED;""" statement="""ALTER TABLE `some_sch`.`my_great_table` CHANGE COLUMN `IMEI` `IMEI` VARCHAR(255) NULL DEFAULT NULL COMMENT 'IMEI datatype changed'""" token_sql=sql_token() token_sql.parse_sql(statement) print (token_sql.tokenised) #for token in token_sql.tokenised: #print (token) # for column in token["columns"]: # print(column) #else: