def __init_info(self,des=None): ''' 初始化数据库主链接信息 :return: ''' if des: if self.destination_type == 'phoenix': self.des_mysql_conn = InitPhoenixDB(user=self.des_conn_info['mysql_user'], passwd=self.des_conn_info['mysql_password'], host=self.des_conn_info['mysql_host'], port=self.des_conn_info['mysql_port'], jar=self.jar,jar_conf=self.jar_conf).Init() #self.des_mysql_cur = self.des_mysql_conn.cursor() self.des_mysql_cur = self.des_mysql_conn.cursor(cursor_factory=phoenixdb.cursor.DictCursor) else: self.des_mysql_conn = InitMyDB(**dict(self.des_kwargs,**{'type':self.destination_type})).Init() self.des_mysql_cur = self.des_mysql_conn.cursor() if self.destination_type and self.destination_type != 'mysql': pass else: if self.binlog is None: self.des_mysql_cur.execute('set sql_log_bin=0') self.des_mysql_cur.execute('SET SESSION wait_timeout = 2147483;') self.des_thread_list.append({'conn': self.des_mysql_conn, 'cur': self.des_mysql_cur}) self.dump = Dump(cur=self.cur, des_conn=self.des_mysql_conn, des_cur=self.des_mysql_cur, destination_type=self.destination_type,table_column_struct=self.table_column_struct, des_conn_info=self.des_conn_info,jar=self.jar,jar_conf=self.jar_conf,binlog=self.binlog) else: self.conn, self.cur = self.init_conn(primary_t=True)
def __init_status(self): ''' 首次初始化链接 :return: ''' for i in range(60): try: self.connection = InitMyDB(mysql_host=self.host, mysql_port=self.port, mysql_user=self.user, mysql_password=self.passwd, unix_socket=self.socket, auto_commit=False).Init() if self.connection: self.cur = self.connection.cursor() if self.binlog is None: self.cur.execute('set sql_log_bin=0;') break except pymysql.Error as e: Logging(msg=e.args, level='error') time.sleep(1) else: Logging( msg='retry 60 time on status db is failed,exist thread now', level='error') sys.exit()
def __init_slave_conn(self): ''' 初始化同步所需的状态库、目标库的链接 :return: ''' for i in range(60): try: self.destination_conn = InitMyDB(mysql_host=self.dhost, mysql_port=self.dport, mysql_user=self.duser, mysql_password=self.dpasswd, auto_commit=False).Init() self.destination_cur = self.destination_conn.cursor() if self.binlog is None: self.destination_cur.execute( 'set sql_log_bin=0;') # 设置binlog参数 self.destination_cur.execute( 'SET SESSION wait_timeout = 2147483;') break except pymysql.Error as e: Logging(msg=e.args, level='error') time.sleep(1) else: Logging(msg='retry 60 time on slave db is failed,exist thread now', level='error') self.error_queue.put(1) sys.exit()
def __retry_conn(self): ''' 尝试重连60次,每次间隔1秒 :return: ''' for i in range(60): Logging(msg='retry connection for status db again!!', level='error') try: self.connection = InitMyDB(mysql_host=self.host, mysql_port=self.port, mysql_user=self.user, mysql_password=self.passwd, unix_socket=self.socket, auto_commit=False).Init() if self.connection: self.cur = self.connection.cursor() if self.binlog: pass else: self.cur.execute('set sql_log_bin=0;') return True except pymysql.Error: Logging(msg=traceback.format_exc(), level='error') time.sleep(1) else: Logging( msg='retry 60 time on status db is failed,exist thread now', level='error') sys.exit()
def __retry_connection_destion(self): ''' 目标库链接丢失重试60次,如果60次都失败将退出整个程序 使用30次的原因是有可能目标数据在发生宕机切换,如果30 秒都无法完成重连那表示数据库已经宕机或无法链接 :return: ''' import time for i in range(60): time.sleep(1) Logging(msg='connection to destination db try agian!!!', level='info') try: self.destination_conn = InitMyDB(mysql_host=self.dhost, mysql_port=self.dport, mysql_user=self.duser, mysql_password=self.dpasswd, auto_commit=False, type='postgresql').Init() self.destination_cur = self.destination_conn.cursor() Logging(msg='connection success!!!', level='info') # if self.binlog is None: # self.destination_cur.execute('set sql_log_bin=0;') # 设置binlog参数 # self.destination_cur.execute('SET SESSION wait_timeout = 2147483;') return True except: Logging(msg=traceback.format_exc(), level='error') else: Logging( msg= 'try 60 times to fail for conncetion destination db,exist now', level='error') self.error_queue.put(1) sys.exit()
def __retry_connection_destion(self): ''' 目标库链接丢失重试60次,如果60次都失败将退出整个程序 使用30次的原因是有可能目标数据在发生宕机切换,如果30 秒都无法完成重连那表示数据库已经宕机或无法链接 :return: ''' import time if self.des_mysql_conn: try: Logging(msg='close destination db connection', level='info') self.des_mysql_conn.close() self.des_mysql_cur.close() except: Logging(msg=traceback.format_exc(), level='error') time.sleep(10) for i in range(60): Logging(msg='connection to destination db try agian!!!', level='info') if self.destination_type == 'phoenix': self.des_mysql_conn = InitPhoenixDB( host=self.des_conn_info['mysql_host'], port=self.des_conn_info['mysql_port'], user=self.des_conn_info['mysql_user'], passwd=self.des_conn_info['mysql_password'], jar=self.jar, jar_conf=self.jar_conf).Init() if self.des_mysql_conn: try: self.des_mysql_cur = self.des_mysql_conn.cursor() return True except: Logging(msg=traceback.format_exc(), level='error') else: self.des_mysql_conn = InitMyDB( **dict(self.des_conn_info, ** {'type': self.destination_type})).Init() if self.des_mysql_conn: try: self.des_mysql_cur = self.des_mysql_conn.cursor() if self.destination_type and self.destination_type != 'mysql': pass else: if self.binlog is None: self.des_mysql_cur.execute( 'set sql_log_bin=0;') self.des_mysql_cur.execute( 'SET SESSION wait_timeout = 2147483;') return True except: Logging(msg=traceback.format_exc(), level='error') time.sleep(1) else: Logging( msg= 'try 60 times to fail for conncetion destination db,exist now', level='error') sys.exit()
def __init__(self, block=None, server_id=None, log_file=None, log_pos=None): self._log_file = log_file self._log_pos = log_pos self.block = block if block != None else False self.server_id = server_id if server_id != None else 133 self.connection = InitMyDB().Init()
def init_des_conn(self, binlog=None): ''' 在线导出时用于初始化目标库的链接 默认不记录binlog,需指定--binlog参数才能记录binlog session链接timeout时间为2147483 :param binlog: :return: ''' for i in range(self.threads - 1): if self.destination_type == 'phoenix': conn = InitPhoenixDB( host=self.des_conn_info['mysql_host'], port=self.des_conn_info['mysql_port'], user=self.des_conn_info['mysql_user'], passwd=self.des_conn_info['mysql_password'], jar=self.jar, jar_conf=self.jar_conf).Init() if conn: try: cur = conn.cursor() except: Logging(msg=traceback.format_exc(), level='error') else: conn = InitMyDB(**self.des_conn_info).Init() if conn: try: cur = conn.cursor() if binlog is None: cur.execute('set sql_log_bin=0;') cur.execute('SET SESSION wait_timeout = 2147483;') except: Logging(msg=traceback.format_exc(), level='error') self.des_thread_list.append({'conn': conn, 'cur': cur})
def __init_info(self): ''' 初始化数据库主链接信息 :return: ''' self.des_mysql_conn = InitMyDB(**self.des_kwargs).Init() self.des_mysql_cur = self.des_mysql_conn.cursor() self.des_thread_list.append({ 'conn': self.des_mysql_conn, 'cur': self.des_mysql_cur }) if self.binlog is None: self.des_mysql_cur.execute('set sql_log_bin=0') self.des_mysql_cur.execute('SET SESSION wait_timeout = 2147483;') self.conn, self.cur = self.init_conn(primary_t=True) self.dump = Dump(cur=self.cur, des_conn=self.des_mysql_conn, des_cur=self.des_mysql_cur)
def init_des_conn(self, binlog=None): ''' 在线导出时用于初始化目标库的链接 默认不记录binlog,需指定--binlog参数才能记录binlog session链接timeout时间为2147483 :param binlog: :return: ''' for i in range(self.threads - 1): conn = InitMyDB(**self.des_conn_info).Init() if conn: try: cur = conn.cursor() if binlog is None: cur.execute('set sql_log_bin=0;') cur.execute('SET SESSION wait_timeout = 2147483;') self.des_thread_list.append({'conn': conn, 'cur': cur}) except: Logging(msg=traceback.format_exc(), level='error')
class GetStruct: def __init__(self): self.connection = InitMyDB().Init() self.cur = self.connection.cursor() def GetColumn(self, *args): '''args顺序 database、tablename''' column_list = [] pk_idex = None sql = 'select COLUMN_NAME,COLUMN_KEY from INFORMATION_SCHEMA.COLUMNS where table_schema=%s and table_name=%s order by ORDINAL_POSITION;' self.cur.execute(sql, args=args) result = self.cur.fetchall() for idex, row in enumerate(result): column_list.append(row['COLUMN_NAME']) if row['COLUMN_KEY'] == 'PRI': pk_idex = idex self.cur.close() self.connection.close() return column_list, pk_idex
def init_conn(self, primary_t=None): ''' 初始化数据库链接,所有链接添加到链接列表 :param primary_t: :return: ''' if primary_t: conn = InitMyDB(**self.db_conn_info).Init() if conn: try: cur = conn.cursor() state = self.__init_transaction(cur=cur, primary_t=True) if state is None: sys.exit() self.thread_list.append({'conn': conn, 'cur': cur}) return conn, cur except pymysql.Error: Logging(msg=traceback.format_exc(), level='error') sys.exit() else: for i in range(self.threads - 1): conn = InitMyDB(**self.db_conn_info).Init() if conn: try: cur = conn.cursor() state = self.__init_transaction(cur=cur) if state: self.thread_list.append({'conn': conn, 'cur': cur}) except: Logging(msg=traceback.format_exc(), level='error')
class GetStruct: def __init__(self,host=None,port=None,user=None,passwd=None,socket=None): self.connection = InitMyDB(mysql_host=host,mysql_port=port,mysql_user=user,mysql_password=passwd,unix_socket=socket).Init() self.cur = self.connection.cursor() self.cur.execute('set sql_log_bin=0;') def GetColumn(self,*args): '''args顺序 database、tablename''' column_list = [] column_type_list = [] sql = 'select COLUMN_NAME,COLUMN_KEY,COLUMN_TYPE from INFORMATION_SCHEMA.COLUMNS where table_schema=%s and table_name=%s order by ORDINAL_POSITION;' self.cur.execute(sql,args=args) result = self.cur.fetchall() pk_idex = [] for idex,row in enumerate(result): column_list.append(row['COLUMN_NAME']) column_type_list.append(row['COLUMN_TYPE']) if row['COLUMN_KEY'] == 'PRI': pk_idex.append(idex) return column_list,pk_idex,column_type_list def CreateTmp(self): self.cur.execute('CREATE DATABASE IF NOT EXISTS dump2db;') #创建临时库 self.cur.execute('DROP TABLE IF EXISTS dump2db.dump_status;') #删除表 self.cur.execute('CREATE TABLE dump2db.dump_status(id INT,exe_gtid VARCHAR(50),logname VARCHAR(100),at_pos BIGINT,next_pos BIGINT,PRIMARY KEY(id));') #创建临时表 def SaveStatus(self,logname,at_pos,next_pos,server_id,gtid=None): if gtid: sql = 'INSERT INTO dump2db.dump_status(id,exe_gtid,logname,at_pos,next_pos) VALUES(%s,%s,%s,%s,%s) ON DUPLICATE KEY UPDATE exe_gtid=%s,logname=%s,at_pos=%s,next_pos=%s;' self.cur.execute(sql, (server_id, gtid, logname, at_pos, next_pos, gtid, logname, at_pos, next_pos)) else: sql = 'INSERT INTO dump2db.dump_status(id,logname,at_pos,next_pos) VALUES(%s,%s,%s,%s) ON DUPLICATE KEY UPDATE logname=%s,at_pos=%s,next_pos=%s;' self.cur.execute(sql,(server_id,logname,at_pos,next_pos,logname,at_pos,next_pos)) self.connection.commit() def close(self): self.cur.close() self.connection.close()
class Dump(escape): def __init__(self,**kwargs): super(Dump,self).__init__() self.mysql_cur = kwargs['cur'] self.des_conn_info = kwargs['des_conn_info'] self.des_mysql_conn = kwargs['des_conn'] if 'des_conn' in kwargs else None self.des_mysql_cur = kwargs['des_cur'] if 'des_cur' in kwargs else None self.destination_type = kwargs['destination_type'] self.table_column_struct = kwargs['table_column_struct'] self.result = None self.jar = kwargs['jar'] if 'jar' in kwargs else None self.jar_conf = kwargs['jar_conf'] if 'jar_conf' in kwargs else None self.binlog = kwargs['binlog'] self.database = None self.sql = None self.queal_struct = None def prepare_structe(self,database,tablename): ''' 在目标库准备对于的数据库、表结构 目标库的数据表如果存在将直接删除 如果目标表有数据需要注意是否可以直接删除 :param database: :param tablename: :return: ''' if self.destination_type != 'phoenix': self.__check_stat(self.__raise_sql(sql='CREATE DATABASE IF NOT EXISTS {}'.format(database),retry=True)) self.mysql_cur.execute('SHOW CREATE TABLE {}.{}'.format(database,tablename)) result = self.mysql_cur.fetchall() create_sql = result[0]['Create Table'] self.__check_stat(self.__raise_sql(sql='USE {}'.format(database),retry=True)) self.__check_stat(self.__raise_sql(sql='DROP TABLE IF EXISTS {}'.format(tablename),retry=True)) self.__check_stat(self.__raise_sql(sql=create_sql,retry=True)) return True def __column_join(self,cols): if self.destination_type == 'phoenix': if self.queal_struct: return ','.join(['{}'.format(cols[col]) for col in cols]) return ','.join(['{}'.format(col) for col in cols]) if self.queal_struct: return ','.join(['`{}`'.format(cols[col]) for col in cols]) return ','.join(['`{}`'.format(col) for col in cols]) def dump_to_new_db(self,database,tablename,idx,pri_idx,chunk_list=None,bytes_col_list=None,tbl=None,cols=None): self.database = database for cl in chunk_list: start_num = cl[0] end_num = cl[1] limit_num = 0 while True: ''' 第一次使用分块大小limit N,M, N代表起始个数位置(chunks大小),M代表条数 第一次执行之后获取最大主键或唯一索引值范围查找 每个线程查询一次累加条数当剩余条数小于1000时调用__get_from_source_db_list 每个chunk剩余条数大于1000固定调用__get_from_source_db_limit1000 ''' if tbl and cols: self.queal_struct = True _cols = ','.join(['`{}`'.format(c) for c in cols]) sql = 'SELECT {} FROM {}.{} WHERE {}>=%s and {}<=%s ORDER BY {} LIMIT {},%s'.format(_cols,database, tablename, idx, idx, idx, limit_num) else: sql = 'SELECT {} FROM {}.{} WHERE {}>=%s and {}<=%s ORDER BY {} LIMIT {},%s'.format(self.__column_join(self.table_column_struct), database, tablename, idx, idx, idx, limit_num) self.__get_from_source_db_limit2000(sql=sql, args_value=[start_num, end_num]) '''======================================================================================================''' ''' 拼接行数据为pymysql格式化列表 如果返回数据为空直接退出 ''' all_value = [] sql_type = 'INSERT' if self.result: _len = len(self.result[0]) _num = len(self.result) if self.destination_type != 'phoenix': for row in self.result: all_value += row.values() else: sql_type = 'UPSERT' else: #Logging(msg='return value is empty',level='warning') break if tbl: if cols: sql = '{} INTO {}.{}({}) VALUES'.format(sql_type,tbl[0], tbl[1], self.__column_join(cols)) else: sql = '{} INTO {}.{}({}) VALUES'.format(sql_type,tbl[0], tbl[1], self.__column_join(self.table_column_struct)) else: sql = '{} INTO {}.{}({}) VALUES'.format(sql_type,database, tablename, self.__column_join(self.table_column_struct)) if self.destination_type == 'phoenix': for row in self.result: _len = len(row) _values = [_ for _ in row.values()] _sql = sql + '{}'.format(self.__combination_value_format(_len=_len,_num=None)) _values = self.escape_string(_values) _values = self.escape_args(_values) self.sql = _sql % tuple(_values) self.__check_stat(self.__raise_sql(sql=self.sql, retry=True)) self.__check_stat(self.__raise_sql(sql='commit')) else: # for row in self.result: # _len = len(row) # _values = [_ for _ in row.values()] # _sql = sql + '{}'.format(self.__combination_value_format(_len=_len,_num=None)) # self.__check_stat(self.__raise_sql(sql=_sql, args=_values,retry=True)) # self.__check_stat(self.__raise_sql(sql='commit')) self.sql = sql + '{}'.format(self.__combination_value_format(_len=_len,_num=_num)) self.__check_stat(self.__raise_sql(sql=self.sql,args=all_value)) self.__check_stat(self.__raise_sql(sql='commit')) ''' 每次循环结束计算该线程还剩未处理的条数(limit_num) 当返回条数少于1000条时将退出整个循环 ''' return_len = len(self.result) limit_num += return_len if return_len < 2000: break '''==========================================''' def __get_from_source_db_list(self,sql,pri_value=None): try: self.mysql_cur.execute(sql,pri_value) self.result = self.mysql_cur.fetchall() except pymysql.Error: Logging(msg=traceback.format_exc(),level='error') sys.exit() def __get_from_source_db_limit2000(self,sql,args_value): try: if args_value: self.mysql_cur.execute(sql, args_value + [2000]) else: self.mysql_cur.execute(sql,2000) self.result = self.mysql_cur.fetchall() except pymysql.Error: Logging(msg=traceback.format_exc(),level='error') sys.exit() # def __split_data(self,start,end,idx_name,db,table): # ''' # 该函数主要作用是在数据量大时在各个线程中继续对数据进行分区 # 在大于10000条时才会进行继续拆分 # 用10000作为拆分的基数条件获取模糊的分区数 # 再利用去重后的索引值获取每个分区最大最小值 # :param start: # :param end: # :param idx_name: # :param db: # :param table: # :return: # ''' # sql = 'select count(*) as count from {}.{} where {}>=%s and {}<=%s'.format(idx_name,db,table,idx_name,idx_name) # self.mysql_cur.execute(sql,args=[start,end]) # result = self.mysql_cur.fetchall() # total_rows = result[0]['count'] # split_list = [] # if total_rows > 20000: # split_chunks = int(total_rows/10000) # _sort_values = sorted(set(values), key=values.index) # _nums = int(_sort_values/split_chunks) # _n = 0 # for v in range(split_chunks): # if v == (split_chunks -1): # _t = _sort_values[_n:-1] # split_list.append([_t[0],_t[-1]]) # else: # _t = _sort_values[_n:_n+_nums] # split_list.append([_t[0],_t[-1]]) # _n += _nums # # else: # split_list.append[start,end] # return split_list def __combination_value_format(self,_len,_num): '''拼接格式化字符''' one_format = '({})'.format(','.join(['%s' for i in range(_len)])) if _num: all_ = ','.join(one_format for i in range(_num)) return all_ return one_format def __raise_sql(self, sql, args=[],retry=None): ''' 追加binlog数据到目标库 :param sql: :param args: :return: ''' try: args = self.escape_string(args) if args else [] except: Logging(msg=traceback.format_exc(),level='error') try: if sql == 'commit': self.des_mysql_conn.commit() else: self.des_mysql_cur.execute(sql, args) except pymysql.Error as e: Logging(msg=e.args, level='error') if e.args[0] in ErrorCode: if ErrorCode[e.args[0]]: if sql == 'commit': self.__retry_execute(retry=retry) else: self.__retry_execute(sql=sql,args=args,retry=retry) return True #Logging(msg='sql:{},values:{}'.format(sql, args), level='error') Logging(msg=e, level='error') return None except: #Logging(msg='sql:{},values:{}'.format(sql, args), level='error') Logging(msg=traceback.format_exc(), level='error') return None return True def __check_stat(self, state): if state: return else: Logging(msg='desthread failed!!!!', level='error') sys.exit() def __retry_execute(self,sql=None,args=None,retry=None): ''' 异常重试 :param sql: sql语句 :param args: 参数列表 :param type: 报错时是否需要重新执行该sql :param retry: 需要全事务重新执行 :return: ''' self.__retry_connection_destion() if retry: self.__raise_sql(sql=sql, args=args, retry=retry) return else: #Logging(msg='sql={},args={},retry={},type={}'.format(sql, args,retry, type), level='info') Logging(msg='retry execute sql {}'.format(self.sql),level='info') self.__raise_sql(sql=self.sql, args=args,retry=True) self.__raise_sql('commit') return def __retry_connection_destion(self): ''' 目标库链接丢失重试60次,如果60次都失败将退出整个程序 使用30次的原因是有可能目标数据在发生宕机切换,如果30 秒都无法完成重连那表示数据库已经宕机或无法链接 :return: ''' import time for i in range(60): Logging(msg='connection to destination db try agian!!!', level='info') if self.destination_type == 'phoenix': self.des_mysql_conn = InitPhoenixDB(host=self.des_conn_info['mysql_host'], port=self.des_conn_info['mysql_port'], user=self.des_conn_info['mysql_user'], passwd=self.des_conn_info['mysql_password'], jar=self.jar, jar_conf=self.jar_conf).Init() if self.des_mysql_conn: try: self.des_mysql_cur = self.des_mysql_conn.cursor() return True except: Logging(msg=traceback.format_exc(), level='error') else: self.des_mysql_conn = InitMyDB(**self.des_conn_info).Init() if self.des_mysql_conn: try: self.des_mysql_cur = self.des_mysql_conn.cursor() if self.binlog is None: self.des_mysql_cur.execute('set sql_log_bin=0;') self.des_mysql_cur.execute('SET SESSION wait_timeout = 2147483;') return True except: Logging(msg=traceback.format_exc(), level='error') time.sleep(1) else: Logging(msg='try 60 times to fail for conncetion destination db,exist now', level='error') sys.exit()
class GetStruct: def __init__(self, host=None, port=None, user=None, passwd=None, socket=None, binlog=None): self.host = host self.port = port self.user = user self.passwd = passwd self.socket = socket self.binlog = binlog self.connection = None self.cur = None self.__init_status() self.apply_conn = None self.insert_sql_list = [] def __init_status(self): ''' 首次初始化链接 :return: ''' for i in range(60): try: self.connection = InitMyDB(mysql_host=self.host, mysql_port=self.port, mysql_user=self.user, mysql_password=self.passwd, unix_socket=self.socket, auto_commit=False).Init() if self.connection: self.cur = self.connection.cursor() if self.binlog is None: self.cur.execute('set sql_log_bin=0;') break except pymysql.Error as e: Logging(msg=e.args, level='error') time.sleep(1) else: Logging( msg='retry 60 time on status db is failed,exist thread now', level='error') sys.exit() def CreateTmp(self): self.__raise('CREATE DATABASE IF NOT EXISTS dump2db;') #创建临时库 #self.__raise('DROP TABLE IF EXISTS dump2db.dump_status;') self.__raise( 'CREATE TABLE IF NOT EXISTS dump2db.dump_status(id INT,excute_gtid json,logname VARCHAR(100),' 'at_pos BIGINT,next_pos BIGINT,gtid_uid varchar(64),PRIMARY KEY(id));' ) #创建临时表 def SaveStatus(self, logname, at_pos, next_pos, server_id, gtid=None, apply_conn=None, gno_uid=None): if apply_conn: return self.__xa_transaction(apply_conn, server_id, logname, at_pos, next_pos, gtid, gno_uid) else: if gtid: sql = 'INSERT INTO dump2db.dump_status(id,excute_gtid,logname,at_pos,next_pos,gtid_uid) VALUES(%s,%s,%s,%s,%s,%s)' \ ' ON DUPLICATE KEY UPDATE excute_gtid=%s,logname=%s,at_pos=%s,next_pos=%s,gtid_uid=%s;' self.__raise( sql, (server_id, gtid, logname, at_pos, next_pos, gno_uid, gtid, logname, at_pos, next_pos, gno_uid)) return self.__raise('commit') else: sql = 'INSERT INTO dump2db.dump_status(id,logname,at_pos,next_pos,gtid_uid) VALUES(%s,%s,%s,%s,%s) ' \ 'ON DUPLICATE KEY UPDATE logname=%s,at_pos=%s,next_pos=%s,gtid_uid=%s;' self.__raise(sql, (server_id, logname, at_pos, next_pos, gno_uid, logname, at_pos, next_pos, gno_uid)) return self.__raise('commit') def close(self): self.cur.close() self.connection.close() def __raise(self, sql, args=None): ''' 一直尝试重新执行,直到成功或超过递归上限 如遇语法错误将直接退出整个程序 :param sql: :return: ''' try: if sql == 'commit': self.connection.commit() else: self.cur.execute(sql, args) except pymysql.Error as e: Logging(msg=traceback.format_exc(), level='error') Logging(msg='error code {}'.format(e.args[0]), level='error') if ErrorCode[e.args[0]]: self.__retry_conn() Logging(msg='connection success on status db', level='info') if sql == 'commit': self.__raise(self.insert_sql_list[0], self.insert_sql_list[1]) self.__raise('commit') else: self.__raise(sql, args) return True else: Logging(msg='error code test', level='error') Logging(msg=e, level='error') sys.exit() except: Logging(msg='error code test status db', level='error') Logging(msg=traceback.format_exc(), level='error') sys.exit() return True def __retry_conn(self): ''' 尝试重连60次,每次间隔1秒 :return: ''' for i in range(60): Logging(msg='retry connection for status db again!!', level='error') try: self.connection = InitMyDB(mysql_host=self.host, mysql_port=self.port, mysql_user=self.user, mysql_password=self.passwd, unix_socket=self.socket, auto_commit=False).Init() if self.connection: self.cur = self.connection.cursor() if self.binlog: pass else: self.cur.execute('set sql_log_bin=0;') return True except pymysql.Error: Logging(msg=traceback.format_exc(), level='error') time.sleep(1) else: Logging( msg='retry 60 time on status db is failed,exist thread now', level='error') sys.exit() def __xa_transaction(self, apply_conn, server_id, logname, at_pos, next_pos, gtid=None, gno_uid=None): ''' 目标库数据提交和状态值在不同库上,完全保证都能提交很难,在次使用数据先提交,确保数据进去之后再 保存状态数据,控制所有表都有唯一约束或合适的主键的情况下,即使状态丢失也只会在丢失插入数据的事 务时产生报错,如果先提交状态值在数据提交时断开,这样在其他同步节点接手任务后会遗漏数据 :param apply_conn: :param server_id: :param logname: :param at_pos: :param next_pos: :param gtid: :return: ''' self.apply_conn = apply_conn if gtid: sql = 'INSERT INTO dump2db.dump_status(id,excute_gtid,logname,at_pos,next_pos,gtid_uid) VALUES(%s,%s,%s,%s,%s,%s)' \ ' ON DUPLICATE KEY UPDATE excute_gtid=%s,logname=%s,at_pos=%s,next_pos=%s,gtid_uid=%s;' self.insert_sql_list = [ sql, [ server_id, gtid, logname, at_pos, next_pos, gno_uid, gtid, logname, at_pos, next_pos, gno_uid ] ] self.__raise(sql, (server_id, gtid, logname, at_pos, next_pos, gno_uid, gtid, logname, at_pos, next_pos, gno_uid)) else: sql = 'INSERT INTO dump2db.dump_status(id,logname,at_pos,next_pos,gtid_uid) VALUES(%s,%s,%s,%s,%s) ' \ 'ON DUPLICATE KEY UPDATE logname=%s,at_pos=%s,next_pos=%s,gtid_uid=%s;' self.insert_sql_list = [ sql, [ server_id, logname, at_pos, next_pos, gno_uid, logname, at_pos, next_pos, gno_uid ] ] self.__raise(sql, (server_id, logname, at_pos, next_pos, gno_uid, logname, at_pos, next_pos, gno_uid)) try: apply_conn.commit() self.__raise('commit') return True except: Logging(msg=traceback.format_exc(), level='error') return False def get_daemon_info(self, server_id): ''' 重启获取已经读取的binlog信息 :return: ''' state = self.__raise( 'select logname,next_pos,excute_gtid,gtid_uid from dump2db.dump_status where id = %s;', server_id) if state: try: result = self.cur.fetchall() except pymysql.Error as e: if ErrorCode[e.args[0]]: self.get_daemon_info(server_id) _gtid = [] gtid_uid = result[0]['gtid_uid'] if result[0]['excute_gtid']: gtid = eval(result[0]['excute_gtid']) #_gtid = ['{}:{}'.format(uuid,gtid[uuid]) for uuid in gtid] #return result[0]['logname'],result[0]['next_pos'],','.join(_gtid),gtid_uid,gno_id return result[0]['logname'], result[0]['next_pos'], gtid, gtid_uid else: sys.exit()
class desthread(GetSql): def __init__(self, **kwargs): super(desthread, self).__init__(table_pk_idex_list=kwargs['table_pk_idex_list'], table_struct_list=kwargs['table_struct_list'], map_conf=kwargs['map_conf'], queal_struct=kwargs['queal_struct']) self.thread_lock = kwargs['thread_lock'] self.thread_lock_queue = kwargs['thread_lock_queue'] self.chunk_list_status_th = kwargs['chunk_list_status_th'] self.table_struct_list = kwargs['table_struct_list'] self.table_pk_idex_list = kwargs['table_pk_idex_list'] self.thread_id = kwargs['thread_id'] self.queue = kwargs['queue'] self.dhost, self.dport, self.duser, self.dpasswd = kwargs[ 'dhost'], kwargs['dport'], kwargs['duser'], kwargs[ 'dpassword'] # 目标库连接相关信息 self.binlog = kwargs['binlog'] # 是否在目标库记录binlog的参数 self.destination_conn = None self.destination_cur = None self.server_id = kwargs['server_id'] self.trancaction_list = [] # 已执行的事务sql,用于重连之后重新执行 self.queue_list = [] #队列列表,一个队列对应一个线程,[queue1,queue2] self.group_list = [ ] #存储每个队列分配的db.tbl组合,对应队列列表索引关系[['db:tbl','db:tbl'],['db:tbl']] self.status_row = [ ] #存储每个对应库、表是否已存在于标签表,用于改善insert ....update语句,降低死锁风险 self.error_queue = kwargs['error_queue'] self.kwargs = None self.__init_slave_conn() def __init_slave_conn(self): ''' 初始化同步所需的状态库、目标库的链接 :return: ''' for i in range(60): try: self.destination_conn = InitMyDB(mysql_host=self.dhost, mysql_port=self.dport, mysql_user=self.duser, mysql_password=self.dpasswd, auto_commit=False).Init() self.destination_cur = self.destination_conn.cursor() if self.binlog is None: self.destination_cur.execute( 'set sql_log_bin=0;') # 设置binlog参数 self.destination_cur.execute( 'SET SESSION wait_timeout = 2147483;') break except pymysql.Error as e: Logging(msg=e.args, level='error') time.sleep(1) else: Logging(msg='retry 60 time on slave db is failed,exist thread now', level='error') self.error_queue.put(1) sys.exit() def __restart_trancaction_sql(self): ''' 在提交时链接断开的情况下需要重新执行所有事务操作 :return: ''' #self.__retry_connection_destion() for row in self.trancaction_list: if self.__raise_sql(sql=row[0], args=row[1], tmp=True) is None: return None return True def __raise_sql(self, sql, args=[], retry=None, type=None, tmp=None): ''' 追加binlog数据到目标库 :param sql: :param args: :param retry: 为True代表如果报错需要重新执行整个事务,用于commit :param type: 为True代表报错时需要重新执行sql,用于select :param tmp: 为True代表该sql为重试得sql,如果报错直接返回 :return: ''' try: args = self.escape_string(args) if args else [] except: Logging(msg=traceback.format_exc(), level='error') self.error_queue.put(1) try: if sql == 'commit': self.destination_conn.commit() else: self.destination_cur.execute(sql, args) except pymysql.Error as e: Logging(msg=e.args, level='error') if tmp: return None if e.args[0] in RetryErrorCode: Logging(msg='sql: {} args:{}'.format(sql, args), level='error') self.__raise_sql(sql=sql, args=args, retry=retry, type=type) elif e.args[0] in ErrorCode: self.__rollback() if ErrorCode[e.args[0]]: if sql == 'commit': self.__retry_execute(retry=retry) else: self.__retry_execute(sql=sql, args=args, retry=retry, type=type) return True Logging(msg='sql:{},values:{}'.format(sql, args), level='error') Logging(msg=traceback.format_exc(), level='error') return None except: Logging(msg='sql:{},values:{}'.format(sql, args), level='error') Logging(msg=traceback.format_exc(), level='error') return None return True def __rollback(self): ''' 关闭链接,用于兼容中间件 :return: ''' try: Logging(msg='close connection for db now !!!', level='warning') self.destination_cur.close() self.destination_conn.close() except pymysql.Error as e: Logging(msg=e.args, level='warning') except: Logging(msg=traceback.format_exc(), level='warning') def __retry_execute(self, sql=None, args=None, retry=None, type=None): ''' 异常重试 :param sql: sql语句 :param args: 参数列表 :param type: 报错时是否需要重新执行该sql :param retry: 需要全事务重新执行 :return: ''' self.__retry_connection_destion() if type: self.__raise_sql(sql=sql, args=args, type=type, retry=retry) return elif retry is None: Logging(msg='sql={},args={},retry={},type={}'.format( sql, args, retry, type), level='info') Logging( msg='retry execute trancaction list, list length {}'.format( len(self.trancaction_list)), level='info') for row in self.trancaction_list: Logging(msg='retry sql : {} args:{}'.format(row[0], row[1]), level='warning') if self.__raise_sql(row[0], row[1], tmp=True) is None: self.__rollback() self.__retry_execute() return return elif retry: Logging(msg='sql={},args={},retry={},type={}'.format( sql, args, retry, type), level='info') Logging(msg='retry all trancaction list, list length {}'.format( len(self.trancaction_list)), level='info') if self.__restart_trancaction_sql() is None: self.__retry_execute(retry=True) self.__raise_sql('commit', retry=True) return self.__raise_sql('commit', retry=True) return def __retry_connection_destion(self): ''' 目标库链接丢失重试60次,如果60次都失败将退出整个程序 使用30次的原因是有可能目标数据在发生宕机切换,如果30 秒都无法完成重连那表示数据库已经宕机或无法链接 :return: ''' import time for i in range(60): time.sleep(1) Logging(msg='connection to destination db try agian!!!', level='info') try: self.destination_conn = InitMyDB(mysql_host=self.dhost, mysql_port=self.dport, mysql_user=self.duser, mysql_password=self.dpasswd, auto_commit=False).Init() self.destination_cur = self.destination_conn.cursor() Logging(msg='connection success!!!', level='info') if self.binlog is None: self.destination_cur.execute( 'set sql_log_bin=0;') # 设置binlog参数 self.destination_cur.execute( 'SET SESSION wait_timeout = 2147483;') return True except: Logging(msg=traceback.format_exc(), level='error') else: Logging( msg= 'try 60 times to fail for conncetion destination db,exist now', level='error') self.error_queue.put(1) sys.exit() def __check_stat(self, state, only_state=None): if state: if only_state: return else: Logging(msg='desthread failed!!!!', level='error') self.error_queue.put(1) sys.exit() def __check_queue(self): ''' 检查所有队列是否有满的,如果有一个满的表示可能阻塞了 二是为了防止某一个表落后很多 :return: ''' if self.queue.full(): return False return True def __set_mark(self, db_name, tbl_name, gtid, gno_id, at_pos, binlog): ''' 标签操作、也是状态记录,记得该表执行到的gtid、position位置 :param db_name: :param tbl_name: :param gtid: :param gno_id: :param at_pos: :return: ''' _name = '{}:{}:{}'.format(db_name, tbl_name, self.server_id) if _name in self.status_row: sql = 'UPDATE repl_mark.mark_status SET gno_uid=%s,gno_id=%s,at_pos=%s,binlog=%s where db_name=%s and tbl_name=%s and server_id = %s;' args = [ gtid, gno_id, at_pos, binlog, db_name, tbl_name, self.server_id ] else: sql = 'select 1 from repl_mark.mark_status where db_name = %s and tbl_name = %s and server_id=%s;' if self.__raise_sql(sql=sql, args=[db_name, tbl_name, self.server_id], type=True): _s = self.destination_cur.fetchall() if _s: sql = 'UPDATE repl_mark.mark_status SET gno_uid=%s,gno_id=%s,binlog=%s,at_pos=%s where db_name=%s and tbl_name = %s and server_id=%s;' args = [ gtid, gno_id, binlog, at_pos, db_name, tbl_name, self.server_id ] else: sql = 'INSERT INTO repl_mark.mark_status(db_name,tbl_name,gno_uid,gno_id,binlog,at_pos,server_id) VALUES(%s,%s,%s,%s,%s,%s,%s);' args = [ db_name, tbl_name, gtid, gno_id, binlog, at_pos, self.server_id ] self.status_row.append(_name) else: Logging( msg='execute sql [{}] error , exit now!!!!'.format(sql), level='error') self.error_queue.put(1) sys.exit() self.trancaction_list.append([sql, args]) self.__check_stat(self.__raise_sql(sql=sql, args=args)) def __get_fetch_all(self): ''' 重起获取所有库、表已执行的游标 :return: ''' sql = 'SELECT db_name,tbl_name,gno_uid,gno_id,binlog,at_pos,server_id FROM repl_mark.mark_status' self.__check_stat(self.__raise_sql(sql=sql, type=True)) result = self.destination_cur.fetchall() fetch_value = {} for row in result: fetch_value['{}:{}:{}'.format(row['db_name'], row['tbl_name'], row['server_id'])] = [ row['gno_uid'], row['gno_id'], row['at_pos'], row['binlog'] ] return fetch_value def __check_lock(self, db_tbl_name, lock_state=None): ''' 同一库、表顺序锁 :param db_tbl_name: :return: ''' if db_tbl_name in self.thread_lock_queue: if lock_state: self.thread_lock.release() if self.thread_id in self.thread_lock_queue[db_tbl_name]: while 1: if self.thread_lock_queue[db_tbl_name][ 0] == self.thread_id: return True time.sleep(0.001) else: self.thread_lock_queue[db_tbl_name].append(self.thread_id) self.__check_lock(db_tbl_name=db_tbl_name) else: if lock_state: self.thread_lock_queue[db_tbl_name] = [self.thread_id] self.thread_lock.release() self.__check_lock(db_tbl_name=db_tbl_name) self.thread_lock.acquire() self.__check_lock(db_tbl_name=db_tbl_name, lock_state=True) def __enter__(self): fetch_value = self.__get_fetch_all() __mark_status = None while 1: if not self.queue.empty(): queue_value = self.queue.get() db_tbl_name = queue_value[0] trancaction_all = queue_value[1] _uuid = queue_value[2] if trancaction_all: self.__check_lock(db_tbl_name) for trancaction in trancaction_all: tran_len = len(trancaction_all) value_list = trancaction['value_list'] if value_list: db_name, tbl_name = value_list[1], value_list[2] gno_uid, gno_id, binlog, at_pos = trancaction[ 'gno_uid'], trancaction['gno_id'], trancaction[ 'binlog'], value_list[3] db_tbl_name_serverid = '{}:{}'.format( db_tbl_name, self.server_id) if db_tbl_name_serverid in fetch_value: _state = self.__fetch_check( fetch_value[db_tbl_name_serverid], gno_uid, gno_id, at_pos, binlog) if _state: del fetch_value[db_tbl_name_serverid] else: continue if tran_len >= 2: if __mark_status is None: self.__set_mark(db_name=db_name, tbl_name=tbl_name, gtid=gno_uid, gno_id=gno_id, at_pos=at_pos, binlog=binlog) __mark_status = True else: self.__set_mark(db_name=db_name, tbl_name=tbl_name, gtid=gno_uid, gno_id=gno_id, binlog=binlog, at_pos=at_pos) sql_list = self.GetSQL(_values=value_list[0], event_code=value_list[4], database_name=db_name, table_name=tbl_name) if sql_list: for sql in sql_list: self.trancaction_list.append( [sql[0], sql[1]]) self.__check_stat( self.__raise_sql(sql=sql[0], args=sql[1])) if __mark_status: self.__set_mark(db_name=db_name, tbl_name=tbl_name, gtid=gno_uid, gno_id=gno_id, at_pos=at_pos, binlog=binlog) self.__raise_sql('commit', retry=True) self.chunk_list_status_th[_uuid].append(self.thread_id) self.trancaction_list = [] __mark_status = None self.thread_lock_queue[db_tbl_name].pop(0) else: time.sleep(0.01) def __fetch_check(self, fetch, gno_uid, gno_id, at_pos, binlog): ''' 重启或接管任务时通过游标库中相应库表保存的同步状态信息,比对获取到binlog的信息 如果游标库中的uid和获取到的binlog中不同,这种情况可能是由于mysql宕机切换导致, 由于游标库先于状态库落盘,所以以游标库为准 :param fetch: [gno_uid,gno_id,at_pos] :param gno_uid: :param gno_id: :param at_pos: :return: ''' if gno_uid: if str(gno_uid) == str(fetch[0]): if int(fetch[1]) > int(gno_id): return False elif int(gno_id) == int( fetch[1]) and int(fetch[2]) >= int(at_pos): return False else: return True elif str(gno_uid) != str(fetch[0]): return False else: if fetch[-1] == binlog: if int(fetch[2] >= int(at_pos)): return False else: return True else: binlog_num_cur = int(fetch[-1].split('.')[-1]) binlog_num_sync = int(binlog.split('.')[-1]) if binlog_num_cur < binlog_num_sync: return False else: return True def __exit__(self, exc_type, exc_val, exc_tb): self.destination_cur.close() self.destination_conn.close()
class Dump(escape): def __init__(self, **kwargs): super(Dump, self).__init__() self.mysql_cur = kwargs['cur'] self.des_conn_info = kwargs['des_conn_info'] self.des_mysql_conn = kwargs[ 'des_conn'] if 'des_conn' in kwargs else None self.des_mysql_cur = kwargs['des_cur'] if 'des_cur' in kwargs else None self.destination_type = kwargs['destination_type'] self.table_column_struct = kwargs['table_column_struct'] self.result = None self.jar = kwargs['jar'] if 'jar' in kwargs else None self.jar_conf = kwargs['jar_conf'] if 'jar_conf' in kwargs else None self.binlog = kwargs['binlog'] self.database = None self.sql = None self.queal_struct = None if self.destination_type == 'postgresql': import psycopg2 elif self.destination_type == 'phoenix': import phoenixdb from mode.phoenix.InitDB import InitDB as InitPhoenixDB def prepare_structe(self, database, tablename): ''' 在目标库准备对于的数据库、表结构 目标库的数据表如果存在将直接删除 如果目标表有数据需要注意是否可以直接删除 :param database: :param tablename: :return: ''' if self.destination_type != 'phoenix': self.__check_stat( self.__raise_sql( sql='CREATE DATABASE IF NOT EXISTS {}'.format(database), retry=True)) self.mysql_cur.execute('SHOW CREATE TABLE {}.{}'.format( database, tablename)) result = self.mysql_cur.fetchall() create_sql = result[0]['Create Table'] self.__check_stat( self.__raise_sql(sql='USE {}'.format(database), retry=True)) self.__check_stat( self.__raise_sql( sql='DROP TABLE IF EXISTS {}'.format(tablename), retry=True)) self.__check_stat(self.__raise_sql(sql=create_sql, retry=True)) return True def __column_join(self, cols, source=None): if self.destination_type in ('phoenix', 'postgresql'): if source: if self.queal_struct: return ','.join(['`{}`'.format(cols[col]) for col in cols]) return ','.join(['`{}`'.format(col) for col in cols]) else: if self.queal_struct: return ','.join(['{}'.format(cols[col]) for col in cols]) return ','.join(['{}'.format(col) for col in cols]) if self.queal_struct: return ','.join(['`{}`'.format(cols[col]) for col in cols]) return ','.join(['`{}`'.format(col) for col in cols]) def dump_to_new_db(self, database, tablename, idx, pri_idx, chunk_list=None, bytes_col_list=None, tbl=None, cols=None, iso=None): self.database = database iso_value = Iso(**{'iso': iso, 'conf_cols': cols}) '''''' if tbl and cols: self.queal_struct = True _cols, _dcols, _ucols = iso_value.get_cols() _cols = ','.join(['`{}`'.format(c) for c in _cols]) sql_type = 'INSERT' if self.destination_type in ('phoenix', 'postgresql'): sql_type = 'UPSERT' if tbl: if cols: if iso: if self.destination_type == 'phoenix': _cl = ','.join(['{}'.format(col) for col in _dcols]) else: _cl = ','.join(['`{}`'.format(col) for col in _dcols]) insert_sql = '{} INTO {}.{}({}) VALUES'.format( sql_type, tbl[0], tbl[1], _cl) else: insert_sql = '{} INTO {}.{}({}) VALUES'.format( sql_type, tbl[0], tbl[1], self.__column_join(cols)) else: insert_sql = '{} INTO {}.{}({}) VALUES'.format( sql_type, tbl[0], tbl[1], self.__column_join(self.table_column_struct)) else: insert_sql = '{} INTO {}.{}({}) VALUES'.format( sql_type, database, tablename, self.__column_join(self.table_column_struct)) '''''' for cl in chunk_list: start_num = cl[0] end_num = cl[1] limit_num = 0 while True: ''' 第一次使用分块大小limit N,M, N代表起始个数位置(chunks大小),M代表条数 第一次执行之后获取最大主键或唯一索引值范围查找 每个线程查询一次累加条数当剩余条数小于1000时调用__get_from_source_db_list 每个chunk剩余条数大于1000固定调用__get_from_source_db_limit1000 ''' if tbl and cols: sql = 'SELECT {} FROM {}.{} WHERE {}>=%s and {}<=%s ORDER BY {} LIMIT {},%s'.format( _cols, database, tablename, idx, idx, idx, limit_num) else: sql = 'SELECT {} FROM {}.{} WHERE {}>=%s and {}<=%s ORDER BY {} LIMIT {},%s'.format( self.__column_join(self.table_column_struct, True), database, tablename, idx, idx, idx, limit_num) self.__get_from_source_db_limit2000( sql=sql, args_value=[start_num, end_num]) '''======================================================================================================''' ''' 拼接行数据为pymysql格式化列表 如果返回数据为空直接退出 ''' self.all_value = [] if self.result: _len = len(self.result[0]) _num = len(self.result) '''''' if self.destination_type == 'phoenix': _valus_list = [] for row in self.result: if self.queal_struct: row = iso_value.iso_value(row) _values = [_ for _ in row.values()] _valus_list.append(self.escape_string(_values)) _len = len(_valus_list[0]) _sql = insert_sql + '{}'.format( self.__combination_value_format(_len=_len, _num=None)) self.__check_stat( self.__raise_sql(sql=_sql, args=_valus_list, retry=True)) else: # for row in self.result: # if self.queal_struct: # row = iso_value.iso_value(row) # _values = [_ for _ in row.values()] # _len = len(_values) # _sql = insert_sql + '{}'.format(self.__combination_value_format(_len=_len, _num=None)) # self.__check_stat(self.__raise_sql(sql=_sql, args=_values, retry=True)) # self.__check_stat(self.__raise_sql(sql='commit')) for row in self.result: if self.queal_struct: row = iso_value.iso_value(valus=row) self.all_value += row.values() continue self.all_value += row.values() if self.queal_struct: _len = len(_dcols) self.sql = insert_sql + '{}'.format( self.__combination_value_format(_len=_len, _num=_num)) self.__check_stat( self.__raise_sql(sql=self.sql, args=self.all_value)) self.__check_stat(self.__raise_sql(sql='commit')) else: #Logging(msg='return value is empty',level='warning') break ''' 每次循环结束计算该线程还剩未处理的条数(limit_num) 当返回条数少于1000条时将退出整个循环 ''' return_len = len(self.result) limit_num += return_len if return_len < 2000: break '''==========================================''' def __get_from_source_db_list(self, sql, pri_value=None): try: self.mysql_cur.execute(sql, pri_value) self.result = self.mysql_cur.fetchall() except pymysql.Error: Logging(msg=traceback.format_exc(), level='error') sys.exit() def __get_from_source_db_limit2000(self, sql, args_value): try: if args_value: #print(sql,args_value) self.mysql_cur.execute(sql, args_value + [2000]) else: self.mysql_cur.execute(sql, 2000) self.result = self.mysql_cur.fetchall() except pymysql.Error: Logging(msg=traceback.format_exc(), level='error') sys.exit() def __combination_value_format(self, _len, _num): '''拼接格式化字符''' if self.destination_type == 'phoenix': one_format = '({})'.format(','.join(['?' for i in range(_len)])) else: one_format = '({})'.format(','.join(['%s' for i in range(_len)])) if _num: all_ = ','.join(one_format for i in range(_num)) return all_ return one_format def __raise_sql(self, sql, args=[], retry=None): ''' 追加binlog数据到目标库 :param sql: :param args: :return: ''' try: args = self.escape_string(args) if args else [] except: Logging(msg=traceback.format_exc(), level='error') try: if sql == 'commit': self.des_mysql_conn.commit() else: if self.destination_type == 'phoenix': self.des_mysql_cur.executemany(sql, args) else: self.des_mysql_cur.execute(sql, args) except phoenixdb.errors.InternalError: Logging(msg=traceback.format_exc(), level='error') self.__retry_execute(sql=sql, args=args, retry=retry) return True except psycopg2.OperationalError as e: Logging(msg=e.args, level='error') #Logging(msg='sql:{},values:{}'.format(sql, args), level='error') if sql == 'commit': self.__retry_execute(retry=retry) else: self.__retry_execute(sql=sql, args=args, retry=retry) return True except psycopg2.InterfaceError as e: Logging(msg=e.args, level='error') if sql == 'commit': self.__retry_execute(retry=retry) else: self.__retry_execute(sql=sql, args=args, retry=retry) return True except pymysql.Error as e: Logging(msg=e.args, level='error') if e.args[0] in ErrorCode: if ErrorCode[e.args[0]]: if sql == 'commit': self.__retry_execute(retry=retry) else: self.__retry_execute(sql=sql, args=args, retry=retry) return True #Logging(msg='sql:{},values:{}'.format(sql, args), level='error') Logging(msg=e, level='error') return None except: #Logging(msg='sql:{},values:{}'.format(sql, args), level='error') Logging(msg=traceback.format_exc(), level='error') return None return True def __check_stat(self, state): if state: return else: Logging(msg='desthread failed!!!!', level='error') sys.exit() def __retry_execute(self, sql=None, args=None, retry=None): ''' 异常重试 :param sql: sql语句 :param args: 参数列表 :param type: 报错时是否需要重新执行该sql :param retry: 需要全事务重新执行 :return: ''' self.__retry_connection_destion() if retry: self.__raise_sql(sql=sql, args=args, retry=retry) return else: #Logging(msg='sql={},args={},retry={},type={}'.format(sql, args,retry, type), level='info') Logging(msg='retry execute sql', level='info') self.__raise_sql(sql=self.sql, args=self.all_value, retry=True) self.__raise_sql('commit') return def __retry_connection_destion(self): ''' 目标库链接丢失重试60次,如果60次都失败将退出整个程序 使用30次的原因是有可能目标数据在发生宕机切换,如果30 秒都无法完成重连那表示数据库已经宕机或无法链接 :return: ''' import time if self.des_mysql_conn: try: Logging(msg='close destination db connection', level='info') self.des_mysql_conn.close() self.des_mysql_cur.close() except: Logging(msg=traceback.format_exc(), level='error') time.sleep(10) for i in range(60): Logging(msg='connection to destination db try agian!!!', level='info') if self.destination_type == 'phoenix': self.des_mysql_conn = InitPhoenixDB( host=self.des_conn_info['mysql_host'], port=self.des_conn_info['mysql_port'], user=self.des_conn_info['mysql_user'], passwd=self.des_conn_info['mysql_password'], jar=self.jar, jar_conf=self.jar_conf).Init() if self.des_mysql_conn: try: self.des_mysql_cur = self.des_mysql_conn.cursor() return True except: Logging(msg=traceback.format_exc(), level='error') else: self.des_mysql_conn = InitMyDB( **dict(self.des_conn_info, ** {'type': self.destination_type})).Init() if self.des_mysql_conn: try: self.des_mysql_cur = self.des_mysql_conn.cursor() if self.destination_type and self.destination_type != 'mysql': pass else: if self.binlog is None: self.des_mysql_cur.execute( 'set sql_log_bin=0;') self.des_mysql_cur.execute( 'SET SESSION wait_timeout = 2147483;') return True except: Logging(msg=traceback.format_exc(), level='error') time.sleep(1) else: Logging( msg= 'try 60 times to fail for conncetion destination db,exist now', level='error') sys.exit()
class processdump(Prepare): def __init__(self,threads=None,dbs=None,tables=None,src_kwargs=None,des_kwargs=None,binlog=None, map_conf=None,queal_struct=None,destination_type=None,jar=None,jar_conf=None): super(processdump,self).__init__(threads=threads,src_kwargs=src_kwargs,des_kwargs=des_kwargs, jar=jar,jar_conf=jar_conf,destination_type=destination_type) '''异构对应项''' self.map_conf = map_conf self.queal_struct = queal_struct self._tmp_queal_struct = None self.jar,self.jar_conf = jar,jar_conf self.destination_type = destination_type if self.destination_type == 'phoenix': import phoenixdb.cursor from mode.phoenix.InitDB import InitDB as InitPhoenixDB '''''' self.binlog = binlog self.des_kwargs = des_kwargs self.databases = dbs self.tables = tables self.queue = queue.Queue() self.des_mysql_conn = None self.des_mysql_cur = None self.conn,self.cur = None,None self.dump = None self.table_column_struct = [] self.__init_info() def __init_info(self,des=None): ''' 初始化数据库主链接信息 :return: ''' if des: if self.destination_type == 'phoenix': self.des_mysql_conn = InitPhoenixDB(user=self.des_conn_info['mysql_user'], passwd=self.des_conn_info['mysql_password'], host=self.des_conn_info['mysql_host'], port=self.des_conn_info['mysql_port'], jar=self.jar,jar_conf=self.jar_conf).Init() #self.des_mysql_cur = self.des_mysql_conn.cursor() self.des_mysql_cur = self.des_mysql_conn.cursor(cursor_factory=phoenixdb.cursor.DictCursor) else: self.des_mysql_conn = InitMyDB(**dict(self.des_kwargs,**{'type':self.destination_type})).Init() self.des_mysql_cur = self.des_mysql_conn.cursor() if self.destination_type and self.destination_type != 'mysql': pass else: if self.binlog is None: self.des_mysql_cur.execute('set sql_log_bin=0') self.des_mysql_cur.execute('SET SESSION wait_timeout = 2147483;') self.des_thread_list.append({'conn': self.des_mysql_conn, 'cur': self.des_mysql_cur}) self.dump = Dump(cur=self.cur, des_conn=self.des_mysql_conn, des_cur=self.des_mysql_cur, destination_type=self.destination_type,table_column_struct=self.table_column_struct, des_conn_info=self.des_conn_info,jar=self.jar,jar_conf=self.jar_conf,binlog=self.binlog) else: self.conn, self.cur = self.init_conn(primary_t=True) def __get_queal_conf(self): ''' 获取异构配置 :return: ''' from lib.get_conf import GetStruct self._tmp_queal_struct = GetStruct(self.map_conf).get() def __equeal(self,database,tables,threads=None): for table in tables: state = self.__queal_table_check(database, table) if state: tbl, cols,iso = self.__queal_col_check(database, table) _parmeter = [database, table, tbl, cols,iso] if threads: self.__mul_dump_go(*_parmeter) else: self.__dump_go(database=database,tablename=table,tbl=tbl,cols=cols,iso=iso) def __queal_start(self,threads=None,database=None): if database in self._tmp_queal_struct: if self.tables: self.__equeal(database,self.tables,threads=threads) else: tables = self.__queal_table_check(database,check=True) if tables: self.__equeal(database,tables,threads=threads) def __queal_table_check(self,database,tablename=None,check=None): ''' 检查异构表是否存在 :param database: :param tablename: :return: ''' db_struct = self._tmp_queal_struct[database] if check: tables = [] for table in db_struct: tables.append(table) return tables if tablename in db_struct: return True return None def __queal_col_check(self,database,tablename): ''' 返回异构对应库、表、字段关系 :param database: :param tablename: :return: ''' db_struct = self._tmp_queal_struct[database] tbl_info = db_struct[tablename] tbl = tbl_info['tbl'] cols = None iso = None if 'cols' in tbl_info: cols = tbl_info['cols'] if 'iso' in tbl_info: iso = tbl_info['iso'] return tbl,cols,iso def start(self): ''' 所有在线导出操作将在该函数内部直接完成,直至退出并返回binlog相关信息 binlog信息在所有链接初始化完成后获取,因为所有链接都采用的SNAPSHOT 因此主链接会执行全局读锁,但非常短暂,在所有链接初始化完成将释放 :return: ''' if self.queal_struct: self.__get_queal_conf() binlog_file,binlog_pos,excute_gtid = self.master_info(cur=self.cur) if (binlog_file and binlog_pos) or (excute_gtid): pass else: self.cur.execute('UNLOCK TABLES') self.close(self.cur,self.conn) Logging(msg='invalid master info , file {} position {} gtid {}'.format(binlog_file,binlog_pos, excute_gtid),level='error') sys.exit() '''初始化源库所有链接''' if self.threads and self.threads > 1: self.init_conn() # self.init_des_conn(binlog=self.binlog) self.cur.execute('UNLOCK TABLES') if self.threads and self.threads > 1: '''多线程导出''' for database in self.databases: if self.queal_struct: self.__queal_start(threads=True,database=database) elif self.tables: for tablename in self.tables: _parmeter = [database,tablename] self.__mul_dump_go(*_parmeter) else: tables = self.get_tables(cur=self.cur, db=database) for tablename in tables: _parmeter = [database, tablename] self.__mul_dump_go(*_parmeter) else: '''单线程导出''' for database in self.databases: if self.queal_struct: self.__queal_start(database=database) elif self.tables: for tablename in self.tables: _parameter = [database,tablename] self.__dump_go(*_parameter) else: '''全库导出''' tables = self.get_tables(cur=self.cur,db=database) for table in tables: _parameter = [database, table] self.__dump_go(*_parameter) '''操作完成关闭所有数据库链接''' if self.threads and self.threads > 1: for thread in self.thread_list: self.close(thread['cur'],thread['conn']) for thread in self.des_thread_list: self.close(thread['cur'], thread['conn']) return binlog_file,binlog_pos,excute_gtid def __dump_go(self,database,tablename,idx_name=None,pri_idx=None,max_min=None,bytes_col_list=None,tbl=None,cols=None,iso=None): ''' 单线程导出函数 :param database: :param tablename: :return: ''' self.__getcolumn(database, tablename) if len(self.des_thread_list) < 1: self.__init_info(des=True) stat = self.queal_struct if self.queal_struct else self.dump.prepare_structe(database=database, tablename=tablename) if stat: if idx_name is None and pri_idx is None: idx_name,pri_idx = self.check_pri(cur=self.cur, db=database, table=tablename) bytes_col_list = self.check_byte_col(cur=self.cur, db=database, table=tablename) max_min = self.split_data( self.cur,self.get_max_min(cur=self.cur,databases=database,tables=tablename,index_name=idx_name), idx_name,database,tablename,None) if max_min and max_min[0]: dump = Dump(cur=self.cur, des_conn=self.des_mysql_conn, des_cur=self.des_mysql_cur, destination_type=self.destination_type, table_column_struct=self.table_column_struct, des_conn_info=self.des_conn_info, jar=self.jar, jar_conf=self.jar_conf, binlog=self.binlog) dump.dump_to_new_db(database=database, tablename=tablename, idx=idx_name, pri_idx=pri_idx, chunk_list=max_min,bytes_col_list=bytes_col_list,tbl=tbl,cols=cols,iso=iso) else: Logging(msg='Initialization structure error', level='error') sys.exit() def __mul_dump_go(self,database,tablename,tbl=None,cols=None,iso=None): ''' 多线程导出函数 尽量选择合适的索引,通过索引值拆分每个线程数操作的值区间 :param database: :param tablename: :return: ''' idx_name, pri_idx = self.check_pri(cur=self.cur, db=database, table=tablename) chunks_list,uli = self.get_chunks(cur=self.cur, databases=database, tables=tablename,index_name=idx_name) if len(self.des_thread_list) < 1: self.__init_info(des=True) stat = self.queal_struct if self.queal_struct else self.dump.prepare_structe(database=database, tablename=tablename) #bytes_col_list = self.check_byte_col(cur=self.cur,db=database,table=tablename) if chunks_list is None: Logging(msg='this table {} chunks_list is None,maybe this table not data'.format(tablename),level='warning') return if uli: '''多线程''' '''初始化目标库所有并发链接及函数''' if len(self.des_thread_list) <= 1: if self.threads and self.threads > 1: # self.init_conn() self.init_des_conn(binlog=self.binlog) # stat = self.queal_struct if self.queal_struct else self.dump.prepare_structe(database=database, # tablename=tablename) if stat: self.__getcolumn(database,tablename) for t in range(len(self.thread_list)): dump = Dump(cur=self.thread_list[t]['cur'], des_conn=self.des_thread_list[t]['conn'], des_cur=self.des_thread_list[t]['cur'],destination_type=self.destination_type, table_column_struct=self.table_column_struct,des_conn_info=self.des_conn_info, jar=self.jar,jar_conf=self.jar_conf,binlog=self.binlog) __dict_ = [self.queue, dump, chunks_list[t], database, tablename, idx_name, pri_idx,tbl,cols,iso] _t = ThreadDump(*__dict_) _t.start() self.__get_queue() else: Logging(msg='Initialization structure error', level='error') sys.exit() else: '''单线程''' self.__dump_go(database,tablename,idx_name,pri_idx,chunks_list,tbl=tbl,cols=cols) def __get_queue(self): ''' 获取queue中的数据个数,直到取回个数与并发线程数相等才退出 :return: ''' _count = len(self.thread_list) _tmp_count = 0 while _tmp_count < _count: value = self.queue.get() if value: _tmp_count += 1 def __getcolumn(self,*args): '''args顺序 database、tablename''' self.table_column_struct = [] sql = 'select COLUMN_NAME from INFORMATION_SCHEMA.COLUMNS where table_schema=%s and table_name=%s order by ORDINAL_POSITION;' self.cur.execute(sql,args=args) result = self.cur.fetchall() for idex,row in enumerate(result): self.table_column_struct.append(row['COLUMN_NAME'])
def __init__(self,host=None,port=None,user=None,passwd=None,socket=None): self.connection = InitMyDB(mysql_host=host,mysql_port=port,mysql_user=user,mysql_password=passwd,unix_socket=socket).Init() self.cur = self.connection.cursor() self.cur.execute('set sql_log_bin=0;')
class processdump(Prepare): def __init__(self, threads=None, dbs=None, tables=None, src_kwargs=None, des_kwargs=None, binlog=None): super(processdump, self).__init__(threads=threads, src_kwargs=src_kwargs, des_kwargs=des_kwargs) self.binlog = binlog self.des_kwargs = des_kwargs self.databases = dbs self.tables = tables self.queue = queue.Queue() self.des_mysql_conn = None self.des_mysql_cur = None self.conn, self.cur = None, None self.dump = None self.__init_info() def __init_info(self): ''' 初始化数据库主链接信息 :return: ''' self.des_mysql_conn = InitMyDB(**self.des_kwargs).Init() self.des_mysql_cur = self.des_mysql_conn.cursor() self.des_thread_list.append({ 'conn': self.des_mysql_conn, 'cur': self.des_mysql_cur }) if self.binlog is None: self.des_mysql_cur.execute('set sql_log_bin=0') self.des_mysql_cur.execute('SET SESSION wait_timeout = 2147483;') self.conn, self.cur = self.init_conn(primary_t=True) self.dump = Dump(cur=self.cur, des_conn=self.des_mysql_conn, des_cur=self.des_mysql_cur) def start(self): ''' 所有在线导出操作将在该函数内部直接完成,直至退出并返回binlog相关信息 binlog信息在所有链接初始化完成后获取,因为所有链接都采用的SNAPSHOT 因此主链接会执行全局读锁,但非常短暂,在所有链接初始化完成将释放 :return: ''' binlog_file, binlog_pos = self.master_info(cur=self.cur) if binlog_file and binlog_pos: pass else: self.cur.execute('UNLOCK TABLES') self.close(self.cur, self.conn) Logging(msg='invalid master info , file {} position {}'.format( binlog_file, binlog_pos), level='error') sys.exit() '''初始化源库、目标库所有链接''' if self.threads and self.threads > 1: self.init_conn() self.init_des_conn(binlog=self.binlog) self.cur.execute('UNLOCK TABLES') if self.threads and self.threads > 1: '''多线程导出''' for database in self.databases: if self.tables: for tablename in self.tables: _parmeter = [database, tablename] self.__mul_dump_go(*_parmeter) else: tables = self.get_tables(cur=self.cur, db=database) for tablename in tables: _parmeter = [database, tablename] self.__mul_dump_go(*_parmeter) else: '''单线程导出''' for database in self.databases: if self.tables: for tablename in self.tables: _parameter = [database, tablename] self.__dump_go(*_parameter) else: '''全库导出''' tables = self.get_tables(cur=self.cur, db=database) for table in tables: _parameter = [database, table] self.__dump_go(*_parameter) '''操作完成关闭所有数据库链接''' if self.threads and self.threads > 1: for thread in self.thread_list: self.close(thread['cur'], thread['conn']) for thread in self.des_thread_list: self.close(thread['cur'], thread['conn']) return binlog_file, binlog_pos def __dump_go(self, database, tablename, idx_name=None, pri_idx=None, max_min=None, bytes_col_list=None): ''' 单线程导出函数 :param database: :param tablename: :return: ''' stat = self.dump.prepare_structe(database=database, tablename=tablename) if stat: if idx_name is None and pri_idx is None: idx_name, pri_idx = self.check_pri(cur=self.cur, db=database, table=tablename) bytes_col_list = self.check_byte_col(cur=self.cur, db=database, table=tablename) max_min = self.get_max_min(cur=self.cur, database=database, tables=tablename, index_name=idx_name) if max_min: self.dump.dump_to_new_db(database=database, tablename=tablename, idx=idx_name, pri_idx=pri_idx, chunk_list=max_min, bytes_col_list=bytes_col_list) else: Logging(msg='Initialization structure error', level='error') sys.exit() def __mul_dump_go(self, database, tablename): ''' 多线程导出函数 尽量选择合适的索引,通过索引值拆分每个线程数操作的值区间 :param database: :param tablename: :return: ''' idx_name, pri_idx = self.check_pri(cur=self.cur, db=database, table=tablename) chunks_list, uli = self.get_chunks(cur=self.cur, databases=database, tables=tablename, index_name=idx_name) #bytes_col_list = self.check_byte_col(cur=self.cur,db=database,table=tablename) if chunks_list is None: Logging( msg='this table {} chunks_list is None,maybe this table not data' .format(tablename), level='warning') if uli: '''多线程''' if self.dump.prepare_structe(database=database, tablename=tablename): for t in range(len(self.thread_list)): dump = Dump(cur=self.thread_list[t]['cur'], des_conn=self.des_thread_list[t]['conn'], des_cur=self.des_thread_list[t]['cur']) __dict_ = [ self.queue, dump, chunks_list[t], database, tablename, idx_name, pri_idx ] _t = ThreadDump(*__dict_) _t.start() self.__get_queue() else: Logging(msg='Initialization structure error', level='error') sys.exit() else: '''单线程''' self.__dump_go(database, tablename, idx_name, pri_idx, chunks_list) def __get_queue(self): ''' 获取queue中的数据个数,直到取回个数与并发线程数相等才退出 :return: ''' _count = len(self.thread_list) _tmp_count = 0 while _tmp_count < _count: value = self.queue.get() if value: _tmp_count += 1
class ReplicationMysql: def __init__(self, block=None, server_id=None, log_file=None, log_pos=None): self._log_file = log_file self._log_pos = log_pos self.block = block if block != None else False self.server_id = server_id if server_id != None else 133 self.connection = InitMyDB().Init() def __checksum_enabled(self): """Return True if binlog-checksum = CRC32. Only for MySQL > 5.6""" with self.connection.cursor() as cur: sql = 'SHOW GLOBAL VARIABLES LIKE "BINLOG_CHECKSUM";' cur.execute(sql) result = cur.fetchone() if result is None: return False if 'Value' in result and result['Value'] is None: return False return True def __set_checksum(self): with self.connection.cursor() as cur: cur.execute( "set @master_binlog_checksum= @@global.binlog_checksum;") def GetFile(self): with self.connection.cursor() as cur: sql = "show master status;" cur.execute(sql) result = cur.fetchone() return result['File'], result['Position'] def PackeByte(self): ''' Format for mysql packet position file_length: 4bytes dump_type: 1bytes position: 4bytes flags: 2bytes 0: BINLOG_DUMP_BLOCK 1: BINLOG_DUMP_NON_BLOCK server_id: 4bytes log_file :return: ''' COM_BINLOG_DUMP = 0x12 if self._log_file is None: if self._log_pos is None: self._log_file, self._log_pos = self.GetFile() else: self._log_file, _ = self.GetFile() elif self._log_file and self._log_pos is None: self._log_pos = 4 prelude = struct.pack('<i', len(self._log_file) + 11) \ + struct.pack("!B", COM_BINLOG_DUMP) prelude += struct.pack('<I', self._log_pos) if self.block: prelude += struct.pack('<h', 0) else: prelude += struct.pack('<h', 1) prelude += struct.pack('<I', self.server_id) prelude += self._log_file.encode() return prelude def ReadPack(self): _packet = self.PackeByte() if self.__checksum_enabled(): self.__set_checksum() if pymysql.__version__ < "0.6": self.connection.wfile.write(_packet) self.connection.wfile.flush() else: self.connection._write_bytes(_packet) self.connection._next_seq_id = 1 return self.connection '''
def __init__(self): self.connection = InitMyDB().Init() self.cur = self.connection.cursor()