def __check_task_state(self): ''' 数据列表状态于线程列表对比 :return: ''' for idx,task in enumerate(master_thread_list): if task['state'] == 'started': if task['task_name'] not in self.task_thread_list: Logging(msg='start task for {} now....'.format(task['task_name']),level='info') self.__create_task(task_name=task['task_name']) elif task['state'] == 'watch': if task['task_name'] not in self.watch_thread_list: Logging(msg='start watch task for {} now....'.format(task['task_name']),level='info') if self.__create_watch(task_name=task['task_name']): Logging(msg='watch success .......',level='info') task['start_time'] = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) master_thread_list[idx] = task.copy() else: Logging(msg='watch is failed, because this task not exists for zookeepr,start task for now....',level='warning') elif task['state'] == 'stoped': if task['task_name'] in self.task_thread_list: for t in self.task_thread_list[task['task_name']]: t.terminate() self.heart_thread_list['task_name'].terminate() elif task['task_name'] in self.watch_thread_list: self.watch_thread_list[task['task_name']].terminate()
def __check_state(self): while True: if self.task_thread_list: #不管源库读取任务、目标库操作任务其中一个宕机将直接退出任务 for task_name in self.task_thread_list: for thread in self.task_thread_list[task_name]: if thread.is_alive(): pass else: Logging(msg='replication thread {} is down '.format(self.task_thread_list[task_name]),level='error') for t in self.task_thread_list[task_name]: try: t.terminate() except: pass del self.task_thread_list[task_name] Logging(msg='stop heart thread {} is down '.format(self.heart_thread_list[task_name]),level='error') self.heart_thread_list[task_name].terminate() del self.heart_thread_list[task_name] if self.watch_thread_list: for task_name in self.watch_thread_list: if self.watch_thread_list[task_name].is_alive(): pass else: del self.watch_thread_list[task_name] self.__create_repl(task_name=task_name) time.sleep(1)
def __raise_sql(self, sql, args=[],retry=None,type=None): ''' 追加binlog数据到目标库 :param sql: :param args: :return: ''' try: args = self.escape_string(args) if args else [] #args = self.escape_args(args) except: Logging(msg=traceback.format_exc(),level='error') self.error_queue.put(1) try: if sql == 'commit': self.destination_conn.commit() else: #sql = sql % tuple(args) self.destination_cur.execute(sql,args) except phoenixdb.errors.InternalError: Logging(msg=traceback.format_exc(), level='error') self.__retry_execute(sql=sql,args=args,retry=retry,type=type) return True except: Logging(msg='sql:{},values:{}'.format(sql, args), level='error') Logging(msg=traceback.format_exc(), level='error') return None return True
def prepare_structe(self, database, tablename): ''' 在目标库准备对于的数据库、表结构 目标库的数据表如果存在将直接删除 如果目标表有数据需要注意是否可以直接删除 :param database: :param tablename: :return: ''' try: self.des_mysql_cur.execute( 'CREATE DATABASE IF NOT EXISTS {}'.format(database)) except pymysql.Warning: Logging(msg=traceback.format_list(), level='warning') except pymysql.Error: Logging(msg=traceback.format_list(), level='error') return False self.mysql_cur.execute('SHOW CREATE TABLE {}.{}'.format( database, tablename)) result = self.mysql_cur.fetchall() create_sql = result[0]['Create Table'] try: self.des_mysql_cur.execute('USE {}'.format(database)) self.des_mysql_cur.execute( 'DROP TABLE IF EXISTS {}'.format(tablename)) self.des_mysql_cur.execute(create_sql) except pymysql.Warning: Logging(msg=traceback.format_list(), level='warning') except pymysql.Error: Logging(msg=traceback.format_list(), level='error') return False return True
def __retry_execute(self, sql=None, args=None, retry=None, type=None): ''' 异常重试 :param sql: sql语句 :param args: 参数列表 :param type: 报错时是否需要重新执行该sql :param retry: 需要全事务重新执行 :return: ''' self.__retry_connection_destion() if type: self.__raise_sql(sql=sql, args=args, type=type, retry=retry) return elif retry is None: Logging(msg='sql={},args={},retry={},type={}'.format( sql, args, retry, type), level='info') Logging( msg='retry execute trancaction list, list length {}'.format( len(self.trancaction_list)), level='info') for row in self.trancaction_list: self.__raise_sql(row[0], row[1]) return elif retry: self.__restart_trancaction_sql() self.__raise_sql('commit')
def __retry_connection_destion(self): ''' 目标库链接丢失重试60次,如果60次都失败将退出整个程序 使用30次的原因是有可能目标数据在发生宕机切换,如果30 秒都无法完成重连那表示数据库已经宕机或无法链接 :return: ''' import time for i in range(60): Logging(msg='connection to destination db try agian!!!', level='info') try: self.destination_conn = InitDB(host=self.dhost, port=self.dport, user=self.duser, passwd=self.dpasswd, jar=self.jar, jar_conf=self.jar_conf).Init() self.destination_cur = self.destination_conn.cursor() Logging(msg='connection success!!!', level='info') return True except: Logging(msg=traceback.format_exc(), level='error') time.sleep(1) else: Logging( msg= 'try 60 times to fail for conncetion destination db,exist now', level='error') self.error_queue.put(1) sys.exit()
def __init_status(self): ''' 首次初始化链接 :return: ''' for i in range(60): try: self.connection = InitMyDB(mysql_host=self.host, mysql_port=self.port, mysql_user=self.user, mysql_password=self.passwd, unix_socket=self.socket, auto_commit=False).Init() if self.connection: self.cur = self.connection.cursor() if self.binlog is None: self.cur.execute('set sql_log_bin=0;') break except pymysql.Error as e: Logging(msg=e.args, level='error') time.sleep(1) else: Logging( msg='retry 60 time on status db is failed,exist thread now', level='error') sys.exit()
def init_conn(self, primary_t=None): ''' 初始化数据库链接,所有链接添加到链接列表 :param primary_t: :return: ''' if primary_t: conn = InitMyDB(**self.db_conn_info).Init() if conn: try: cur = conn.cursor() state = self.__init_transaction(cur=cur, primary_t=True) if state is None: sys.exit() self.thread_list.append({'conn': conn, 'cur': cur}) return conn, cur except pymysql.Error: Logging(msg=traceback.format_exc(), level='error') sys.exit() else: for i in range(self.threads - 1): conn = InitMyDB(**self.db_conn_info).Init() if conn: try: cur = conn.cursor() state = self.__init_transaction(cur=cur) if state: self.thread_list.append({'conn': conn, 'cur': cur}) except: Logging(msg=traceback.format_exc(), level='error')
def __retry_conn(self): ''' 尝试重连60次,每次间隔1秒 :return: ''' for i in range(60): Logging(msg='retry connection for status db again!!', level='error') try: self.connection = InitMyDB(mysql_host=self.host, mysql_port=self.port, mysql_user=self.user, mysql_password=self.passwd, unix_socket=self.socket, auto_commit=False).Init() if self.connection: self.cur = self.connection.cursor() if self.binlog: pass else: self.cur.execute('set sql_log_bin=0;') return True except pymysql.Error: Logging(msg=traceback.format_exc(), level='error') time.sleep(1) else: Logging( msg='retry 60 time on status db is failed,exist thread now', level='error') sys.exit()
def __raise_sql(self, sql, args=[],retry=None): ''' 追加binlog数据到目标库 :param sql: :param args: :return: ''' try: args = self.escape_string(args) if args else [] except: Logging(msg=traceback.format_exc(),level='error') try: if sql == 'commit': self.des_mysql_conn.commit() else: self.des_mysql_cur.execute(sql, args) except pymysql.Error as e: Logging(msg=e.args, level='error') if e.args[0] in ErrorCode: if ErrorCode[e.args[0]]: if sql == 'commit': self.__retry_execute(retry=retry) else: self.__retry_execute(sql=sql,args=args,retry=retry) return True #Logging(msg='sql:{},values:{}'.format(sql, args), level='error') Logging(msg=e, level='error') return None except: #Logging(msg='sql:{},values:{}'.format(sql, args), level='error') Logging(msg=traceback.format_exc(), level='error') return None return True
def __retry_connection_destion(self): ''' 目标库链接丢失重试60次,如果60次都失败将退出整个程序 使用30次的原因是有可能目标数据在发生宕机切换,如果30 秒都无法完成重连那表示数据库已经宕机或无法链接 :return: ''' import time for i in range(60): time.sleep(1) Logging(msg='connection to destination db try agian!!!', level='info') try: self.destination_conn = InitMyDB(mysql_host=self.dhost, mysql_port=self.dport, mysql_user=self.duser, mysql_password=self.dpasswd, auto_commit=False, type='postgresql').Init() self.destination_cur = self.destination_conn.cursor() Logging(msg='connection success!!!', level='info') # if self.binlog is None: # self.destination_cur.execute('set sql_log_bin=0;') # 设置binlog参数 # self.destination_cur.execute('SET SESSION wait_timeout = 2147483;') return True except: Logging(msg=traceback.format_exc(), level='error') else: Logging( msg= 'try 60 times to fail for conncetion destination db,exist now', level='error') self.error_queue.put(1) sys.exit()
def __init_slave_conn(self): ''' 初始化同步所需的状态库、目标库的链接 :return: ''' for i in range(60): try: self.destination_conn = InitMyDB(mysql_host=self.dhost, mysql_port=self.dport, mysql_user=self.duser, mysql_password=self.dpasswd, auto_commit=False).Init() self.destination_cur = self.destination_conn.cursor() if self.binlog is None: self.destination_cur.execute( 'set sql_log_bin=0;') # 设置binlog参数 self.destination_cur.execute( 'SET SESSION wait_timeout = 2147483;') break except pymysql.Error as e: Logging(msg=e.args, level='error') time.sleep(1) else: Logging(msg='retry 60 time on slave db is failed,exist thread now', level='error') self.error_queue.put(1) sys.exit()
def init_des_conn(self, binlog=None): ''' 在线导出时用于初始化目标库的链接 默认不记录binlog,需指定--binlog参数才能记录binlog session链接timeout时间为2147483 :param binlog: :return: ''' for i in range(self.threads - 1): if self.destination_type == 'phoenix': conn = InitPhoenixDB( host=self.des_conn_info['mysql_host'], port=self.des_conn_info['mysql_port'], user=self.des_conn_info['mysql_user'], passwd=self.des_conn_info['mysql_password'], jar=self.jar, jar_conf=self.jar_conf).Init() if conn: try: cur = conn.cursor() except: Logging(msg=traceback.format_exc(), level='error') else: conn = InitMyDB(**self.des_conn_info).Init() if conn: try: cur = conn.cursor() if binlog is None: cur.execute('set sql_log_bin=0;') cur.execute('SET SESSION wait_timeout = 2147483;') except: Logging(msg=traceback.format_exc(), level='error') self.des_thread_list.append({'conn': conn, 'cur': cur})
def dump_to_new_db(self, database, tablename, idx, pri_idx, chunk_list=None, bytes_col_list=None): for list in chunk_list: start_num = list[0] end_num = list[1] limit_num = 0 while True: ''' 第一次使用分块大小limit N,M, N代表起始个数位置(chunks大小),M代表条数 第一次执行之后获取最大主键或唯一索引值范围查找 每个线程查询一次累加条数当剩余条数小于1000时调用__get_from_source_db_list 每个chunk剩余条数大于1000固定调用__get_from_source_db_limit1000 ''' sql = 'SELECT * FROM {}.{} WHERE {}>=%s and {}<=%s ORDER BY {} LIMIT {},%s'.format( database, tablename, idx, idx, idx, limit_num) self.__get_from_source_db_limit2000( sql=sql, args_value=[start_num, end_num]) '''======================================================================================================''' ''' 拼接行数据为pymysql格式化列表 如果返回数据为空直接退出 ''' all_value = [] if self.result: _len = len(self.result[0]) _num = len(self.result) for row in self.result: all_value += row.values() else: Logging(msg='return value is empty', level='warning') break sql = 'INSERT INTO {}.{} VALUES{}'.format( database, tablename, self.__combination_value_format(_len=_len, _num=_num)) try: self.des_mysql_cur.execute(sql, all_value) self.des_mysql_conn.commit() except pymysql.Warning: Logging(msg=traceback.format_list(), level='warning') except pymysql.Error: Logging(msg=traceback.format_list(), level='error') self.__retry_(sql, all_value) ''' 每次循环结束计算该线程还剩未处理的条数(limit_num) 当返回条数少于1000条时将退出整个循环 ''' return_len = len(self.result) limit_num += return_len if return_len < 2000: break '''=========================================='''
def my_listener(state): if state == KazooState.LOST: Logging(msg='Zookeeper session lost, state: CLOSED', level='info') elif state == KazooState.SUSPENDED: Logging(msg='Zookeeper session lost, state: SUSPENDED', level='info') else: Logging(msg='Zookeeper session lost, state: Connected', level='info') self.retry_state = "Connected"
def __rollback(self): ''' 关闭链接,用于兼容中间件 :return: ''' try: Logging(msg='close connection for db now !!!', level='warning') self.destination_cur.close() self.destination_conn.close() except pymysql.Error as e: Logging(msg=e.args, level='warning') except: Logging(msg=traceback.format_exc(), level='warning')
def __retry_(self, sql, all_value): '''单个事务失败重试三次,如果都失败将退出整个迁移程序''' retry_num = 0 while retry_num < 3: try: Logging(msg='retry ...............', level='warning') self.des_mysql_cur.execute(sql, all_value) self.des_mysql_conn.commit() break except pymysql.Error: Logging(msg=traceback.format_list(), level='error') time.sleep(1) else: sys.exit()
def __get_from_source_db_list(self,sql,pri_value=None): try: self.mysql_cur.execute(sql,pri_value) self.result = self.mysql_cur.fetchall() except pymysql.Error: Logging(msg=traceback.format_exc(),level='error') sys.exit()
def __enter__(self): ''' 首先进行集群初始化,判断是否初次启动或宕机启动,如果宕机启动将获取cluster_status文件内容,判断是否有自己为leader的任务, 有leader的任务将进行广播让其他节点接受,如果没有就直接广播信息获取最新数据 :return: ''' if self.cluster_type == 'leader_mode': s = self.protocoludp() p_ser = ThreadDump(socke=s, global_queue=global_queue) p_ser.start() if p_ser.is_alive(): self.checkstatus() print(self.global_status) if self.global_status: for _host in self.global_status: if self.global_status[_host]['isself']: for name in self.global_status[_host]['task_list']: if self.global_status[_host]['task_list'][ name] == 'LEADING': self.restartleader(name) else: self.restartleader(nodes=_host) else: for task in self.tasks: vote_result = self.vote(nodes=self.node_list, task_name=task) else: Logging(msg='starting udp server failed', level='error') sys.exit() elif self.cluster_type == 'zk_mode': with ClusterOp(task_list=self.tasks, zk_hosts=self.zk_hosts) as cz: pass
def __dump_go(self,database,tablename,idx_name=None,pri_idx=None,max_min=None,bytes_col_list=None,tbl=None,cols=None,iso=None): ''' 单线程导出函数 :param database: :param tablename: :return: ''' self.__getcolumn(database, tablename) if len(self.des_thread_list) < 1: self.__init_info(des=True) stat = self.queal_struct if self.queal_struct else self.dump.prepare_structe(database=database, tablename=tablename) if stat: if idx_name is None and pri_idx is None: idx_name,pri_idx = self.check_pri(cur=self.cur, db=database, table=tablename) bytes_col_list = self.check_byte_col(cur=self.cur, db=database, table=tablename) max_min = self.split_data( self.cur,self.get_max_min(cur=self.cur,databases=database,tables=tablename,index_name=idx_name), idx_name,database,tablename,None) if max_min and max_min[0]: dump = Dump(cur=self.cur, des_conn=self.des_mysql_conn, des_cur=self.des_mysql_cur, destination_type=self.destination_type, table_column_struct=self.table_column_struct, des_conn_info=self.des_conn_info, jar=self.jar, jar_conf=self.jar_conf, binlog=self.binlog) dump.dump_to_new_db(database=database, tablename=tablename, idx=idx_name, pri_idx=pri_idx, chunk_list=max_min,bytes_col_list=bytes_col_list,tbl=tbl,cols=cols,iso=iso) else: Logging(msg='Initialization structure error', level='error') sys.exit()
def __set_mark(self, db_name, tbl_name, gtid, gno_id, at_pos): ''' 标签操作、也是状态记录,记得该表执行到的gtid、position位置 :param db_name: :param tbl_name: :param gtid: :param gno_id: :param at_pos: :return: ''' _name = '{}:{}'.format(db_name, tbl_name) if _name in self.status_row: sql = 'UPDATE repl_mark.mark_status SET gno_uid=%s,gno_id=%s,at_pos=%s where db_name=%s and tbl_name=%s;' args = [gtid, gno_id, at_pos, db_name, tbl_name] else: sql = 'select 1 from repl_mark.mark_status where db_name = %s and tbl_name = %s;' if self.__raise_sql(sql=sql, args=[db_name, tbl_name], type=True): _s = self.destination_cur.fetchall() if _s: sql = 'UPDATE repl_mark.mark_status SET gno_uid=%s,gno_id=%s,at_pos=%s where db_name=%s and tbl_name = %s;' args = [gtid, gno_id, at_pos, db_name, tbl_name] else: sql = 'INSERT INTO repl_mark.mark_status(db_name,tbl_name,gno_uid,gno_id,at_pos) VALUES(%s,%s,%s,%s,%s);' args = [db_name, tbl_name, gtid, gno_id, at_pos] self.status_row.append('{}:{}'.format(db_name, tbl_name)) else: Logging( msg='execute sql [{}] error , exit now!!!!'.format(sql), level='error') self.error_queue.put(1) sys.exit() self.trancaction_list.append([sql, args]) self.__check_stat(self.__raise_sql(sql=sql, args=args))
def protocoludp(self): ''' 采用UDP协议,client端无需等待确认是否收到 如果采用tcp需要确定是否链接建立以及是否收到,这样直接影响效率以及不必要的报错 使用UDP可以不管对方是否有监听或者是否收到广播信息,节点存活检查都使用接收到的 数据包进行判断 :return: ''' try: s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) s.bind(('0.0.0.0', 9999)) Logging(msg='Bind UDP on prot:9999, listening 0.0.0.0', level='error') return s except: Logging(msg=traceback.format_exc(), level='error')
def init(): _get = get_conf.GetIso() _argv = _get.get() if _argv['cluster']: path = os.path.abspath(os.path.join(os.path.dirname(__file__))) file_list = [] for root, dirs, files in os.walk('{}/conf/include'.format(path)): for file in files: if file.split('.')[-1] == 'conf': file_list.append(file) task_list = {} for file in file_list: task_list[file.split('.')[0]] = start(file) if _argv['cluster_type'] == 'zk_mode': with InitCluster.ClusterEnt(**dict(task_list,**{'zk_hosts':_argv['zk_hosts'], 'cluster_type':_argv['cluster_type']})): pass elif _argv['cluster_type'] == 'leader_mode': with InitCluster.ClusterEnt(**dict(task_list,**{'cluster_nodes':_argv['cluster_nodes'], 'cluster_type': _argv['cluster_type'], 'self_host':_argv['host']})): pass else: Logging(msg='invalid option cluster_type {}'.format(_argv['cluster_type']), level='warning') else: with SingeTask.SingeTask(**(start(_argv['config']))): pass
def __check_conn(self): for th in self.thread_list: try: th['cur'].execute('select 1') except: Logging(msg=traceback.format_exc(),level='error') sys.exit()
def __mul_dump_go(self,database,tablename,tbl=None,cols=None,iso=None): ''' 多线程导出函数 尽量选择合适的索引,通过索引值拆分每个线程数操作的值区间 :param database: :param tablename: :return: ''' idx_name, pri_idx = self.check_pri(cur=self.cur, db=database, table=tablename) chunks_list,uli = self.get_chunks(cur=self.cur, databases=database, tables=tablename,index_name=idx_name) if len(self.des_thread_list) < 1: self.__init_info(des=True) stat = self.queal_struct if self.queal_struct else self.dump.prepare_structe(database=database, tablename=tablename) #bytes_col_list = self.check_byte_col(cur=self.cur,db=database,table=tablename) if chunks_list is None: Logging(msg='this table {} chunks_list is None,maybe this table not data'.format(tablename),level='warning') return if uli: '''多线程''' '''初始化目标库所有并发链接及函数''' if len(self.des_thread_list) <= 1: if self.threads and self.threads > 1: # self.init_conn() self.init_des_conn(binlog=self.binlog) # stat = self.queal_struct if self.queal_struct else self.dump.prepare_structe(database=database, # tablename=tablename) if stat: self.__getcolumn(database,tablename) for t in range(len(self.thread_list)): dump = Dump(cur=self.thread_list[t]['cur'], des_conn=self.des_thread_list[t]['conn'], des_cur=self.des_thread_list[t]['cur'],destination_type=self.destination_type, table_column_struct=self.table_column_struct,des_conn_info=self.des_conn_info, jar=self.jar,jar_conf=self.jar_conf,binlog=self.binlog) __dict_ = [self.queue, dump, chunks_list[t], database, tablename, idx_name, pri_idx,tbl,cols,iso] _t = ThreadDump(*__dict_) _t.start() self.__get_queue() else: Logging(msg='Initialization structure error', level='error') sys.exit() else: '''单线程''' self.__dump_go(database,tablename,idx_name,pri_idx,chunks_list,tbl=tbl,cols=cols)
def __check_stat(self, state, only_state=None): if state: if only_state: return else: Logging(msg='desthread failed!!!!', level='error') self.error_queue.put(1) sys.exit()
def Init(self): try: database_url = 'http://{}:{}/'.format(self.host, self.port) conn = phoenixdb.connect(database_url, autocommit=True) return conn except: Logging(msg=traceback.format_exc(), level='error') return None
def start(self): ''' 所有在线导出操作将在该函数内部直接完成,直至退出并返回binlog相关信息 binlog信息在所有链接初始化完成后获取,因为所有链接都采用的SNAPSHOT 因此主链接会执行全局读锁,但非常短暂,在所有链接初始化完成将释放 :return: ''' binlog_file, binlog_pos = self.master_info(cur=self.cur) if binlog_file and binlog_pos: pass else: self.cur.execute('UNLOCK TABLES') self.close(self.cur, self.conn) Logging(msg='invalid master info , file {} position {}'.format( binlog_file, binlog_pos), level='error') sys.exit() '''初始化源库、目标库所有链接''' if self.threads and self.threads > 1: self.init_conn() self.init_des_conn(binlog=self.binlog) self.cur.execute('UNLOCK TABLES') if self.threads and self.threads > 1: '''多线程导出''' for database in self.databases: if self.tables: for tablename in self.tables: _parmeter = [database, tablename] self.__mul_dump_go(*_parmeter) else: tables = self.get_tables(cur=self.cur, db=database) for tablename in tables: _parmeter = [database, tablename] self.__mul_dump_go(*_parmeter) else: '''单线程导出''' for database in self.databases: if self.tables: for tablename in self.tables: _parameter = [database, tablename] self.__dump_go(*_parameter) else: '''全库导出''' tables = self.get_tables(cur=self.cur, db=database) for table in tables: _parameter = [database, table] self.__dump_go(*_parameter) '''操作完成关闭所有数据库链接''' if self.threads and self.threads > 1: for thread in self.thread_list: self.close(thread['cur'], thread['conn']) for thread in self.des_thread_list: self.close(thread['cur'], thread['conn']) return binlog_file, binlog_pos
def __raise(self, sql, args=None): ''' 一直尝试重新执行,直到成功或超过递归上限 如遇语法错误将直接退出整个程序 :param sql: :return: ''' try: if sql == 'commit': self.connection.commit() else: self.cur.execute(sql, args) except pymysql.Error as e: Logging(msg=traceback.format_exc(), level='error') Logging(msg='error code {}'.format(e.args[0]), level='error') if ErrorCode[e.args[0]]: self.__retry_conn() Logging(msg='connection success on status db', level='info') if sql == 'commit': self.__raise(self.insert_sql_list[0], self.insert_sql_list[1]) self.__raise('commit') else: self.__raise(sql, args) return True else: Logging(msg='error code test', level='error') Logging(msg=e, level='error') sys.exit() except: Logging(msg='error code test status db', level='error') Logging(msg=traceback.format_exc(), level='error') sys.exit() return True
def __check_thread_state(self): while 1: for thread in self.thread_list: #print(thread,thread.is_alive()) if not thread.is_alive(): Logging(msg='replication thread {} is down,stop all thread!!'.format(thread),level='error') self.__stop_all_thread() sys.exit() time.sleep(1)