Ejemplo n.º 1
0
 def __init__(self, global_config, logger, out_dir="/tmp/"):
     self.hexify = global_config.hexify
     self.logger = logger
     self.out_dir = out_dir
     self.my_tables = {}
     self.table_file = {}
     self.mysql_con = mysql_connection(global_config)
     self.mysql_con.connect_db()
     self.get_table_metadata()
     self.my_streamer = None
     self.replica_batch_size = self.mysql_con.replica_batch_size
     self.master_status = []
     self.id_batch = None
     self.sql_token = sql_token()
     self.pause_on_reindex = global_config.pause_on_reindex
Ejemplo n.º 2
0
    def __init__(self, global_config, logger):
        """
			Class constructor
		"""
        self.hexify = global_config.hexify
        self.logger = logger
        self.out_dir = global_config.out_dir
        self.my_tables = {}
        self.table_file = {}
        self.mysql_con = mysql_connection(global_config)
        self.mysql_con.connect_db()
        self.get_table_metadata()
        self.my_streamer = None
        self.replica_batch_size = self.mysql_con.replica_batch_size
        self.master_status = []
        self.id_batch = None
        self.sql_token = sql_token()
        self.pause_on_reindex = global_config.pause_on_reindex
        self.stat_skip = ['BEGIN', 'COMMIT']
        self.tables_limit = global_config.tables_limit
        self.my_schema = global_config.my_database
Ejemplo n.º 3
0
ALTER TABLE `test` DROP PRIMARY KEY;

				"""

#statement="""alter table test_pk drop primary key;"""
#statement="""ALTER TABLE test ADD COLUMN `dkdkd` timestamp NULL;"""
#statement="""create table test_pk (id int ,PRIMARY KEY  (id) ); """
#statement="""alter table test change   date_create_new date_create_new timestamp;"""
#statement="""ALTER TABLE `test_table` MODIFY `test_column` bigint(20) DEFAULT NULL; 
#ALTER TABLE table2 CHANGE column1 column2 bigint(20);
#ALTER TABLE `test_table` MODIFY `test_column` enum('blah','dd') DEFAULT NULL; """
#statement="""ALTER TABLE `test_table` ADD UNIQUE INDEX `idx_unique` (`log`, `status`);"""
#statement = """CREATE TABLE test (id integer null auto_increment primary key, `test_col` bigint(20)) ;"""
#statement = """CREATE TABLE test (id integer auto_increment, primary key(id)  )"""
statement = """TRUNCATE table     


`test`;"""
token_sql=sql_token()
token_sql.parse_sql(statement)
print (token_sql.tokenised)
for token in token_sql.tokenised:
	print(token)
#	if   token["command"]=="ALTER TABLE":
#		alter_cmd = token["alter_cmd"][0]
#		if alter_cmd["command"] == "MODIFY" and alter_cmd["type"] == 'enum':
#			print(alter_cmd["dimension"].split(','))
	#else:	
	
Ejemplo n.º 4
0
	def read_replica_stream(self, batch_data):
		"""
		Stream the replica using the batch data. This method evaluates the different events streamed from MySQL 
		and manages them accordingly. The BinLogStreamReader function is called with the only_event parameter which
		restricts the event type received by the streamer.
		The events managed are the following.
		RotateEvent which happens whether mysql restarts or the binary log file changes.
		QueryEvent which happens when a new row image comes in (BEGIN statement) or a DDL is executed.
		The BEGIN is always skipped. The DDL is parsed using the sql_token class. 
		[Write,Update,Delete]RowEvents are the row images pulled from the mysql replica.
		
		The RotateEvent and the QueryEvent cause the batch to be closed.
		
		The for loop reads the row events, builds the dictionary carrying informations like the destination schema,
		the 	binlog coordinates and store them into the group_insert list.
		When the number of events exceeds the replica_batch_size the group_insert is written into PostgreSQL.
		The batch is not closed in that case and the method exits only if there are no more rows available in the stream.
		Therefore the replica_batch_size is just the maximum size of the single insert and the size of replayed batch on PostgreSQL.
		The binlog switch or a captured DDL determines whether a batch is closed and processed.
		
		The update row event stores in a separate key event_before the row image before the update. This is required
		to allow updates where the primary key is updated as well.
		
		Each row event is scanned for data types requiring conversion to hex string.
		
		:param batch_data: The list with the master's batch data.
		:return: the batch's data composed by binlog name, binlog position and last event timestamp read from the mysql replica stream.
		:rtype: dictionary
		"""
		skip_tables = None
		size_insert=0
		if self.skip_tables:
			skip_tables = [table.split('.')[1] for table in self.skip_tables]
		
		sql_tokeniser = sql_token()
		table_type_map = self.get_table_type_map()	
		inc_tables = self.pg_engine.get_inconsistent_tables()
		close_batch = False
		master_data = {}
		group_insert = []
		
		id_batch = batch_data[0][0]
		log_file = batch_data[0][1]
		log_position = batch_data[0][2]
		log_table = batch_data[0][3]
		
		my_stream = BinLogStreamReader(
			connection_settings = self.replica_conn, 
			server_id = self.my_server_id, 
			only_events = [RotateEvent, DeleteRowsEvent, WriteRowsEvent, UpdateRowsEvent, QueryEvent], 
			log_file = log_file, 
			log_pos = log_position, 
			resume_stream = True, 
			only_schemas = self.schema_replica, 
			ignored_tables = skip_tables, 
		)
		self.logger.debug("log_file %s, log_position %s. id_batch: %s " % (log_file, log_position, id_batch))
		
		for binlogevent in my_stream:
			if isinstance(binlogevent, RotateEvent):
				event_time = binlogevent.timestamp
				binlogfile = binlogevent.next_binlog
				position = binlogevent.position
				self.logger.debug("ROTATE EVENT - binlogfile %s, position %s. " % (binlogfile, position))
				if log_file != binlogfile:
					close_batch = True
				if close_batch:
					if log_file!=binlogfile:
						master_data["File"]=binlogfile
						master_data["Position"]=position
						master_data["Time"]=event_time
					if len(group_insert)>0:
						self.pg_engine.write_batch(group_insert)
						group_insert=[]
					my_stream.close()
					return [master_data, close_batch]
			elif isinstance(binlogevent, QueryEvent):
				event_time = binlogevent.timestamp
				try:
					schema_query = binlogevent.schema.decode()
				except:
					schema_query = binlogevent.schema
				
				if binlogevent.query.strip().upper() not in self.statement_skip and schema_query in self.schema_mappings: 
					close_batch=True
					destination_schema = self.schema_mappings[schema_query]
					log_position = binlogevent.packet.log_pos
					master_data["File"] = binlogfile
					master_data["Position"] = log_position
					master_data["Time"] = event_time
					if len(group_insert)>0:
						self.pg_engine.write_batch(group_insert)
						group_insert=[]
					self.logger.info("QUERY EVENT - binlogfile %s, position %s.\n--------\n%s\n-------- " % (binlogfile, log_position, binlogevent.query))
					sql_tokeniser.parse_sql(binlogevent.query)
					for token in sql_tokeniser.tokenised:
						write_ddl = True
						table_name = token["name"] 
						table_key_dic = "%s.%s" % (destination_schema, table_name)
						if table_key_dic in inc_tables:
							write_ddl = False
							log_seq = int(log_file.split('.')[1])
							log_pos = int(log_position)
							table_dic = inc_tables[table_key_dic]
							if log_seq > table_dic["log_seq"]:
								write_ddl = True
							elif log_seq == table_dic["log_seq"] and log_pos >= table_dic["log_pos"]:
								write_ddl = True
							if write_ddl:
								self.logger.info("CONSISTENT POINT FOR TABLE %s REACHED  - binlogfile %s, position %s" % (table_key_dic, binlogfile, log_position))
								self.pg_engine.set_consistent_table(table_name, destination_schema)
								inc_tables = self.pg_engine.get_inconsistent_tables()
						if write_ddl:
							event_time = binlogevent.timestamp
							self.logger.debug("TOKEN: %s" % (token))
							
							if len(token)>0:
								query_data={
									"binlog":log_file, 
									"logpos":log_position, 
									"schema": destination_schema, 
									"batch_id":id_batch, 
									"log_table":log_table
								}
								self.pg_engine.write_ddl(token, query_data, destination_schema)
								
							
						
					sql_tokeniser.reset_lists()
				if close_batch:
					my_stream.close()
					return [master_data, close_batch]
			else:
				
				for row in binlogevent.rows:
					event_after={}
					event_before={}
					event_insert = {}
					add_row = True
					log_file=binlogfile
					log_position=binlogevent.packet.log_pos
					table_name=binlogevent.table
					event_time=binlogevent.timestamp
					schema_row = binlogevent.schema
					destination_schema = self.schema_mappings[schema_row]
					table_key_dic = "%s.%s" % (destination_schema, table_name)
					if table_key_dic in inc_tables:
						table_consistent = False
						log_seq = int(log_file.split('.')[1])
						log_pos = int(log_position)
						table_dic = inc_tables[table_key_dic]
						if log_seq > table_dic["log_seq"]:
							table_consistent = True
						elif log_seq == table_dic["log_seq"] and log_pos >= table_dic["log_pos"]:
							table_consistent = True
							self.logger.info("CONSISTENT POINT FOR TABLE %s REACHED  - binlogfile %s, position %s" % (table_key_dic, binlogfile, log_position))
						if table_consistent:
							add_row = True
							self.pg_engine.set_consistent_table(table_name, destination_schema)
							inc_tables = self.pg_engine.get_inconsistent_tables()
						else:
							add_row = False
					
					column_map=table_type_map[schema_row][table_name]
					global_data={
										"binlog":log_file, 
										"logpos":log_position, 
										"schema": destination_schema, 
										"table": table_name, 
										"batch_id":id_batch, 
										"log_table":log_table, 
										"event_time":event_time
									}
					if add_row:
						if isinstance(binlogevent, DeleteRowsEvent):
							global_data["action"] = "delete"
							event_after=row["values"]
						elif isinstance(binlogevent, UpdateRowsEvent):
							global_data["action"] = "update"
							event_after=row["after_values"]
							event_before=row["before_values"]
						elif isinstance(binlogevent, WriteRowsEvent):
							global_data["action"] = "insert"
							event_after=row["values"]
						for column_name in event_after:
							column_type=column_map[column_name]
							if column_type in self.hexify and event_after[column_name]:
								event_after[column_name]=binascii.hexlify(event_after[column_name]).decode()
							elif column_type in self.hexify and isinstance(event_after[column_name], bytes):
								event_after[column_name] = ''
						for column_name in event_before:
							column_type=column_map[column_name]
							if column_type in self.hexify and event_before[column_name]:
								event_before[column_name]=binascii.hexlify(event_before[column_name]).decode()
							elif column_type in self.hexify and isinstance(event_before[column_name], bytes):
								event_before[column_name] = ''
						event_insert={"global_data":global_data,"event_after":event_after,  "event_before":event_before}
						size_insert += len(str(event_insert))
						group_insert.append(event_insert)
						
					master_data["File"]=log_file
					master_data["Position"]=log_position
					master_data["Time"]=event_time
					
					if len(group_insert)>=self.replica_batch_size:
						self.logger.info("Max rows per batch reached. Writing %s. rows. Size in bytes: %s " % (len(group_insert), size_insert))
						self.logger.debug("Master coordinates: %s" % (master_data, ))
						self.pg_engine.write_batch(group_insert)
						size_insert=0
						group_insert=[]
						close_batch=True
						
						
						
		my_stream.close()
		if len(group_insert)>0:
			self.logger.debug("writing the last %s events" % (len(group_insert), ))
			self.pg_engine.write_batch(group_insert)
			close_batch=True
		
		return [master_data, close_batch]
Ejemplo n.º 5
0
ADD COLUMN new_enum ENUM('asd','r') NOT NULL AFTER `log`,
ADD COLUMN status INT(10) UNSIGNED NOT NULL AFTER `new_enum`;


ALTER TABLE `test`
DROP COLUMN `count` ,
ADD COLUMN newstatus INT(10) UNSIGNED NOT NULL AFTER `log`;

ALTER TABLE `test` DROP PRIMARY KEY;

				"""
statement="""ALTER TABLE t_user_info ADD (
group_id INT(11) UNSIGNED DEFAULT NULL,
contact_phone VARCHAR(20) DEFAULT NULL
);"""
statement = """ALTER TABLE foo RENAME TO bar;"""
statement = """RENAME TABLE `sakila`.`test_partition` TO `sakila`.`_test_partition_old`, `_test_partition_new` TO `test_partition`;"""
#statement="""ALTER TABLE foo MODIFY bar INT UNSIGNED DEFAULT NULL;"""
#statement="""ALTER TABLE foo change bar bar INT UNSIGNED;"""
statement="""ALTER TABLE `some_sch`.`my_great_table` CHANGE COLUMN `IMEI` `IMEI` VARCHAR(255) NULL DEFAULT NULL COMMENT 'IMEI datatype changed'"""

token_sql=sql_token()
token_sql.parse_sql(statement)
print (token_sql.tokenised)
#for token in token_sql.tokenised:
	#print (token)
#	for column in token["columns"]:
#		print(column)
	#else: