Example #1
0
 def __init_status_conn(self):
     self._status_conn = GetStruct(host=self.shost,
                                   port=self.sport,
                                   user=self.suser,
                                   passwd=self.spassword,
                                   binlog=self.sbinlog)  # 状态库链接类初始化
     self._status_conn.CreateTmp()
Example #2
0
    def Operation(self):
        '''
        :return:
        '''

        '''全量导出入口'''
        if self.full_dump:
            des_mysql_info = {'mysql_host':self.dhost,'mysql_port':self.dport,'mysql_user':self.duser,
                              'mysql_password':self.dpasswd}
            src_mysql_info = {'mysql_host':self.host,'mysql_port':self.port,'mysql_user':self.user,
                              'mysql_password':self.passwd,'unix_socket':self.unix_socket}
            _binlog_file,_binlog_pos = processdump(threads=self.threads,dbs=self.databases,tables=self.tables,
                                                   src_kwargs=src_mysql_info,des_kwargs=des_mysql_info,binlog=self.binlog).start()
            if _binlog_file is None or _binlog_pos is None:
                sys.exit()
        '''============================================================================================================'''

        '''
        在源库利用replication协议建立主从链接
        如有全量导出使用导出开始时记录的binlog信息,不然使用传入参数的值
        '''
        self.__init_master_slave_conn() #初始化源库、目标库同步链接
        Logging(msg='replication to master.............', level='info')
        if self.full_dump:
            ReplConn = ReplicationMysql(log_file=_binlog_file, log_pos=_binlog_pos,mysql_connection=self.conn,
                                        server_id=self.server_id).ReadPack()
        else:
            ReplConn = ReplicationMysql(log_file=self.binlog_file, log_pos=self.start_position,
                                        mysql_connection=self.conn,server_id=self.server_id).ReadPack()
        '''============================================================================================================'''

        table_struce_key = None
        next_pos = None
        binlog_file_name = _binlog_file if self.full_dump else self.binlog_file

        _mysql_conn = GetStruct(host=self.host, port=self.port,user=self.user,passwd=self.passwd)
        _mysql_conn.CreateTmp()
        _gtid = None
        if ReplConn:
            Logging(msg='replication succeed................', level='info')
            at_pos = _binlog_pos if self.full_dump else self.start_position

            '''
            开始循环获取binlog
            仅对row_event、table_map_event、gtid_log_event、rotate_event、query_event
            row_event: 获取行数据
            table_map_event: 获取数据库名、表明、字段信息
            gtid_log_event、rotate_event、query_event:获取binlog基本信息记录与dump2db中
            '''
            while 1:
                try:
                    pkt = ReplConn._read_packet()
                    _parse_event = ParseEvent(packet=pkt,remote=True)
                    event_code, event_length ,next_pos= _parse_event.read_header()
                    if event_code is None:
                        continue
                    if event_code in (binlog_events.WRITE_ROWS_EVENT,binlog_events.UPDATE_ROWS_EVENT,binlog_events.DELETE_ROWS_EVENT):
                        '''
                        对过滤的thread_id,type进行判断
                        '''
                        if self.ithread:
                            if self.ithread == tmepdata.thread_id:
                                continue
                            if self.ignore_type and self.ignore[self.ignore_type] == event_code:
                                continue
                            self.__execute_code(_parse_event=_parse_event,event_code=event_code,
                                                    event_length=event_length,table_struce_key=table_struce_key)
                        else:
                            if self.ignore_type and self.ignore[self.ignore_type] == event_code:
                                continue
                            self.__execute_code(_parse_event=_parse_event, event_code=event_code,
                                                    event_length=event_length, table_struce_key=table_struce_key)

                    elif event_code == binlog_events.TABLE_MAP_EVENT:
                        tmepdata.database_name, tmepdata.table_name, tmepdata.cloums_type_id_list, tmepdata.metadata_dict=_parse_event.GetValue(type_code=event_code,event_length=event_length)  # 获取event数据
                        table_struce_key = '{}:{}'.format(tmepdata.database_name, tmepdata.table_name)
                        if table_struce_key not in tmepdata.table_struct_list:
                            if tmepdata.database_name in self.databases:
                                if self.tables:
                                    if tmepdata.table_name in self.tables:
                                        column_list, pk_idex, column_type_list = _mysql_conn.GetColumn(tmepdata.database_name, tmepdata.table_name)
                                        tmepdata.table_struct_list[table_struce_key] = column_list
                                        tmepdata.table_pk_idex_list[table_struce_key] = pk_idex
                                        tmepdata.table_struct_type_list[table_struce_key] = column_type_list
                                    continue
                                column_list, pk_idex, column_type_list = _mysql_conn.GetColumn(tmepdata.database_name, tmepdata.table_name)
                                tmepdata.table_struct_list[table_struce_key] = column_list
                                tmepdata.table_pk_idex_list[table_struce_key] = pk_idex
                                tmepdata.table_struct_type_list[table_struce_key] = column_type_list
                    elif event_code == binlog_events.ROTATE_EVENT:
                            binlog_file_name = _parse_event.read_rotate_log_event(event_length=event_length)
                    elif event_code == binlog_events.QUERY_EVENT:
                        if self.ithread:
                            tmepdata.thread_id,_,_ = _parse_event.read_query_event(event_length=event_length)
                    elif event_code == binlog_events.GTID_LOG_EVENT:
                        _gtid = _parse_event.read_gtid_event(event_length=event_length)
                except:
                    Logging(msg=traceback.format_exc(),level='error')
                    ReplConn.close()
                    break

                if _gtid and _gtid != tmepdata.gtid:
                    _mysql_conn.SaveStatus(logname=binlog_file_name, at_pos=at_pos, next_pos=next_pos,
                                           server_id=self.server_id,gtid=_gtid)
                    tmepdata.gtid=_gtid
                elif _gtid is None:
                    _mysql_conn.SaveStatus(logname=binlog_file_name, at_pos=at_pos, next_pos=next_pos,
                                           server_id=self.server_id, gtid=_gtid)
                    tmepdata.gtid = _gtid
                else:
                    _mysql_conn.SaveStatus(logname=binlog_file_name,at_pos=at_pos,next_pos=next_pos,server_id=self.server_id)

                at_pos = next_pos
        else:
            Logging(msg='replication failed................', level='error')
            _mysql_conn.close()
Example #3
0
class OperationDB(escape):
    def __init__(self,**kwargs):
        super(OperationDB,self).__init__()
        self.destnation_type = kwargs['destnation_type']
        self.jar = kwargs['jar']
        self.jar_conf = kwargs['jar_conf']

        self.map_conf = kwargs['map_conf']
        self.queal_struct = kwargs['queal_struct']
        self.ssl_auth = {'cert':kwargs['cert'],'key':kwargs['key']} if kwargs['ssl'] else None
        self.daemon = kwargs['daemon']
        self.queue = kwargs['queue']
        self.lookback = kwargs['lookback']
        #print('op: {}'.format(self.queue))
        self.full_dump = kwargs['full_dump']                                                                                                        #是否全量导出
        self.threads = kwargs['threads']                                                                                                            #全量导出时并发线程

        self.host,self.port,self.user,self.passwd = kwargs['host'],kwargs['port'],kwargs['user'],kwargs['passwd']                                   #源库连接相关信息
        self.unix_socket = kwargs['socket']

        self.dhost,self.dport,self.duser,self.dpasswd = kwargs['dhost'],kwargs['dport'],kwargs['duser'],kwargs['dpasswd']                           #目标库连接相关信息
        self.binlog = kwargs['binlog']                                                                                                              #是否在目标库记录binlog的参数

        self.destination_conn = None
        self.destination_cur = None
        self.conn = None
        self.cur = None

        self.repl_mark = None                                                                                                                       #读取binlog记录该GTID事务是否拥有标签操作
        self.repl_mark_status = None                                                                                                                #插入数据时记录该事务是否已做标签操作
        self.apply_conn = None
        self.crc = None
        self._status_conn = None
        self.gno_uid = None                                                                                                                         #binlog中读取到当前事务的gtid
        self.gno_id = None                                                                                                                          #binlog读取到gtid的gno

        self.at_pos = None

        self.trancaction_list = []                                                                                                                  #已执行的事务sql,用于重连之后重新执行

        self.databases = kwargs['databases']
        self.tables = kwargs['tables']
        self.binlog_file = kwargs['binlog_file']
        self.start_position = kwargs['start_position']
        self.auto_position = kwargs['auto_position']
        self.gtid = kwargs['gtid']

        self.ithread = kwargs['ithread']
        self.ignore_type = kwargs['ignore_type'] if kwargs['ignore_type'] else []
        self.ignore = {'delete':binlog_events.DELETE_ROWS_EVENT,'update':binlog_events.UPDATE_ROWS_EVENT,'insert':binlog_events.WRITE_ROWS_EVENT}
        if self.ignore_type:
            self.ignore_code = [self.ignore[a] for a in self.ignore if a in self.ignore_type]
        self.server_id = kwargs['server_id']

        self.shost = kwargs['shost']
        self.sport = kwargs['sport']
        self.suser = kwargs['suser']
        self.spassword = kwargs['spassword']
        self.sbinlog = kwargs['sbinlog']

        #----单独过滤配置
        self.ignores = kwargs['ignores']
        self.check_att = None                           #判断是否为附加任务的表

        self.mysql_version = None                       #记录mysql版本号,用于binlog解析差异

    def __init_master_slave_conn(self):
        '''
        初始化同步所需的源库、目标库的链接
        :return:
        '''
        for i in range(60):
            self.conn = InitMyDB(mysql_host=self.host, mysql_port=self.port, mysql_user=self.user,
                                 mysql_password=self.passwd, unix_socket=self.unix_socket, ssl=self.ssl_auth).Init()

            try:
                self.cur = self.conn.cursor()
                self.cur.execute('SET SESSION wait_timeout = 2147483;')
                return
            except pymysql.Error as e:
                Logging(msg=e.args,level='error')
            time.sleep(1)
        else:
            Logging(msg='source db connection failed !!!',level='error')
            sys.exit()

    def __init_status_conn(self):

        self._status_conn = GetStruct(host=self.shost, port=self.sport, user=self.suser, passwd=self.spassword,
                                      binlog=self.sbinlog,server_id=self.server_id)  # 状态库链接类初始化
        self._status_conn.CreateTmp()

    def __execute_code(self,_parse_event,event_code,event_length,table_struce_key):
        '''
        在此需要对库、表进行再次判断,因为在一个GTID中可能有包含非需要的库表操作,这样会使mark标签失效
        :param _parse_event:
        :param event_code:
        :param event_length:
        :param table_struce_key:
        :return:
        '''

        _values = _parse_event.GetValue(type_code=event_code, event_length=event_length,
                            cloums_type_id_list=tmepdata.cloums_type_id_list,
                            metadata_dict=tmepdata.metadata_dict,
                            unsigned_list=self.__check_struce(table_struce_key))
        tmepdata.sql_list.append([_values,tmepdata.database_name,tmepdata.table_name,self.at_pos,event_code])

        # if tmepdata.database_name in self.databases:
        #     if self.tables:
        #         if tmepdata.table_name in self.tables:
        #             _values = _parse_event.GetValue(type_code=event_code, event_length=event_length,
        #                                 cloums_type_id_list=tmepdata.cloums_type_id_list,
        #                                 metadata_dict=tmepdata.metadata_dict,
        #                                 unsigned_list=self.__check_struce(table_struce_key))
        #             tmepdata.sql_list.append([_values,tmepdata.database_name,tmepdata.table_name,self.at_pos,event_code])
        #     else:
        #         _values = _parse_event.GetValue(type_code=event_code, event_length=event_length,
        #                                         cloums_type_id_list=tmepdata.cloums_type_id_list,
        #                                         metadata_dict=tmepdata.metadata_dict,
        #                                         unsigned_list=self.__check_struce(table_struce_key))
        #         tmepdata.sql_list.append([_values, tmepdata.database_name, tmepdata.table_name, self.at_pos,event_code])

    def __check_att_status(self,event_code=None,status_check=None):
        '''
        判断附加任务表
        :param event_code:
        :return:
        '''

        if status_check:
            for row in self.ignores:
                _db, _tb_list = row[0], row[1]
                if tmepdata.database_name == _db and tmepdata.table_name in _tb_list:
                    if self.repl_mark_status is None:
                        self.repl_mark_status = True
                    if self.check_att is None:
                        self.check_att = True
                    return True
            return False
        elif event_code:
            for row in self.ignores:
                _db, _tb_list = row[0], row[1]
                _ignore_code = [self.ignore[i] for i in row[2] if i in self.ignore]
                if tmepdata.database_name == _db and tmepdata.table_name in _tb_list:
                    if event_code in _ignore_code:
                        return True
                    return False



    def __check_struce(self,table_struce_key):
        if table_struce_key in tmepdata.table_struct_type_list:
            return tmepdata.table_struct_type_list[table_struce_key]
        else:
            Logging(msg='this struce name {} not in table_struct_type_list'.format(table_struce_key),level='error')
            sys.exit()

    # def __check_att_repl_info(self,_binlog_file,_binlog_pos,_excute_gtid):
    #     '''
    #     对合并进行判断
    #     :param _binlog_file:
    #     :param _binlog_pos:
    #     :param _excute_gtid:
    #     :return:
    #     '''
    #
    #     if self.att_info:
    #         server_uuid = self.__get_server_uuid()
    #         _excute_gtid_dict = {}
    #         for uuid_gnoid in _excute_gtid.split(','):
    #             _ = uuid_gnoid.split(':')
    #             _excute_gtid_dict[_[0]] = _[1]
    #
    #         _att_gtid_dict = {}
    #         for att_uuid_gnoid in self.att_gtid.split(','):
    #             _ = att_uuid_gnoid.split(':')
    #             _att_gtid_dict[_[0]] = _[1]
    #
    #         if server_uuid in _excute_gtid_dict and server_uuid in _att_gtid_dict:
    #             _excuet_gnoid = _excute_gtid_dict[server_uuid].split('-')
    #             _att_gnoid = _excute_gtid_dict[server_uuid].split('-')
    #             if _excuet_gnoid[0] == _att_gnoid[0]:
    #                 if _excuet_gnoid[1] > _att_gnoid[1]:
    #                     _excute_gtid = self.att_gtid
    #             elif _excuet_gnoid[0] > _att_gnoid[0]:
    #                 _excute_gtid = self.att_gtid
    #         else:
    #             Logging(msg='att_gtid:{} is invalid'.format(self.att_gtid), level='error')
    #             sys.exit()
    #     elif self.att_binlog and self.att_pos:
    #         _att_binlog_file = int(self.att_binlog.split('.')[1])
    #         _excute_binlog_file = int(_binlog_file.split('.')[1])
    #         if _att_binlog_file == _excute_binlog_file:
    #             if int(self.att_pos) < int(_binlog_pos):
    #                 _binlog_pos = self.att_pos
    #         elif _att_binlog_file < _excute_binlog_file:
    #             _binlog_file, _binlog_pos = self.att_binlog, self.att_pos
    #
    #     return _binlog_file,_binlog_pos,_excute_gtid


    def Operation(self):
        '''
        :return:
        '''

        self.__init_status_conn()  # 初始化状态库
        if not self.daemon:
            if self._status_conn.checkserverid():
                pass
            else:
                Logging(msg='this server_id already exists in dump2db.dump_status. to ensure uniqueness, please confirm whether it has been abandoned.',level='error')
                sys.exit()

        '''全量导出入口'''
        if self.daemon:
            pass
        elif self.full_dump:
            des_mysql_info = {'mysql_host':self.dhost,'mysql_port':self.dport,'mysql_user':self.duser,
                              'mysql_password':self.dpasswd}
            src_mysql_info = {'mysql_host':self.host,'mysql_port':self.port,'mysql_user':self.user,
                              'mysql_password':self.passwd,'unix_socket':self.unix_socket,'ssl':self.ssl_auth}
            _binlog_file,_binlog_pos,_excute_gtid = processdump(threads=self.threads,dbs=self.databases,tables=self.tables,
                                                   src_kwargs=src_mysql_info,des_kwargs=des_mysql_info,binlog=self.binlog,
                                                                map_conf=self.map_conf,queal_struct=self.queal_struct,
                                                                destination_type=self.destnation_type,jar=self.jar,
                                                                jar_conf=self.jar_conf).start()

            if _binlog_file is None or _binlog_pos is None:
                sys.exit()
        '''============================================================================================================'''

        '''
        在源库利用replication协议建立主从链接
        如有全量导出使用导出开始时记录的binlog信息,不然使用传入参数的值
        '''
        self.__init_master_slave_conn()  # 初始化源库、目标库同步链接
        self.__get_all_table_struct()
        self.__get_version()            #获取源mysql版本号
        # self.cur.close()
        self.queue.put({'table_struct': [tmepdata.table_struct_list, tmepdata.table_pk_idex_list]})

        if self.daemon:
            _binlog_file, _binlog_pos, _excute_gtid ,gtid_uid = self._status_conn.get_daemon_info(self.server_id)
            self._status_conn.close()
            _excute_gtid = ','.join(['{}:{}'.format(uuid, _excute_gtid[uuid]) for uuid in _excute_gtid])
        Logging(msg='replication to master.............', level='info')

        if self.daemon or self.full_dump:
            self.cur.close()
            rep_info = {'log_file': _binlog_file, 'log_pos': _binlog_pos, 'mysql_connection': self.conn,
                        'server_id': self.server_id, 'auto_position': self.auto_position, 'gtid': _excute_gtid}
            Logging(msg='binlog: {} position: {} gtid : {}'.format(_binlog_file, _binlog_pos, _excute_gtid),
                    level='info')
            '''初始化要记录得gtid'''
            _gtid = self.__gtid_set(_excute_gtid) if _excute_gtid else None

            ReplConn = ReplicationMysql(**rep_info).ReadPack()
        else:
            self.__check_repl_info()
            self.cur.close()
            rep_info = {'log_file': self.binlog_file, 'log_pos': self.start_position, 'mysql_connection': self.conn,
                        'server_id': self.server_id, 'auto_position': self.auto_position, 'gtid': self.gtid}
            Logging(msg='binlog: {} position: {} gtid : {}'.format(self.binlog_file, self.start_position, self.gtid),
                    level='info')
            '''初始化要记录得gtid'''
            _gtid = self.__gtid_set(self.gtid) if self.gtid else None
            ReplConn = ReplicationMysql(**rep_info).ReadPack()
        '''============================================================================================================'''

        table_struce_key = None
        binlog_file_name = _binlog_file if self.full_dump or self.daemon else self.binlog_file
        next_pos = _binlog_pos if self.full_dump or self.daemon else self.start_position #开始读取的binlog位置

        if ReplConn:
            Logging(msg='replication succeed................', level='info')

            '''
            开始循环获取binlog
            仅对row_event、table_map_event、gtid_log_event、rotate_event、query_event
            row_event: 获取行数据
            table_map_event: 获取数据库名、表明、字段信息
            gtid_log_event、rotate_event、query_event:获取binlog基本信息记录与dump2db中
            '''

            while 1:
                self.at_pos = next_pos
                try:
                    pkt = ReplConn._read_packet()
                except pymysql.OperationalError:
                    '''链接断开重新注册'''
                    Logging(msg='retry to regist master', level='error')
                    ReplConn = self.__retry_regist_master(gtid=tmepdata.excute_gtid, binlog=binlog_file_name,
                                                          position=self.at_pos)
                    continue
                except pymysql.Error as e:
                    Logging(msg=e.args,level='error')
                    if e.args[0] == 1236:
                        '''gtid被清理的情况下重新获取'''
                        if tmepdata.excute_gtid:
                            pass
                        else:
                            tmepdata.excute_gtid = _gtid.copy()
                        self.__get_gtid_info()
                        Logging(msg='retry to regist master', level='error')
                        ReplConn = self.__retry_regist_master(gtid=tmepdata.excute_gtid, binlog=binlog_file_name,
                                                              position=self.at_pos)
                        continue
                except:
                    Logging(msg=traceback.format_exc(), level='error')
                    ReplConn.close()

                _parse_event = ParseEvent(packet=pkt,remote=True, mysql_version=self.mysql_version)
                event_code, event_length ,next_pos= _parse_event.read_header()
                if event_code is None or event_code in (binlog_events.UNKNOWN_EVENT,binlog_events.START_EVENT_V3):
                    continue

                if event_code in (binlog_events.WRITE_ROWS_EVENT,binlog_events.UPDATE_ROWS_EVENT,binlog_events.DELETE_ROWS_EVENT):
                    '''
                    对过滤的thread_id,type进行判断
                    '''
                    if self.lookback:
                        if self.repl_mark:
                            continue
                    if self.repl_mark_status:
                        if self.ithread:
                            if self.ithread == tmepdata.thread_id:
                                continue

                            if self.check_att:
                                if self.__check_att_status(event_code=event_code):
                                    continue
                            #if self.ignore_type and self.ignore[self.ignore_type] == event_code:
                            else:
                                if self.ignore_type and event_code in self.ignore_code:
                                    continue
                            self.__execute_code(_parse_event=_parse_event,event_code=event_code,
                                                    event_length=event_length,table_struce_key=table_struce_key)
                        else:
                            if self.check_att:
                                if self.__check_att_status(event_code=event_code):
                                    continue
                            #if self.ignore_type and self.ignore[self.ignore_type] == event_code:
                            else:
                                if self.ignore_type and event_code in self.ignore_code:
                                    continue
                            self.__execute_code(_parse_event=_parse_event, event_code=event_code,
                                                    event_length=event_length, table_struce_key=table_struce_key)

                elif event_code == binlog_events.TABLE_MAP_EVENT:
                    '''
                    table_map获取表结构等信息,在此实现的功能有:
                    1、判断是否是需要同步的库、表
                    2、如果满足同步要求,在table_map后将是row数据信息,因此在这对repl_mark标签进行操作,又由于一个GTID可能包含多个表
                    操作,故在此对标签库操作增加状态,一个GTID事务只做一次标签库操作
                    3、接受数据判断是否是repl_mark的操作,如果是将设置状态跳过当前GTID所有操作,用于回环控制
                    4、有可能未使用gtid模式,所以在table_map进行标签操作,在xid_even进行结束操作
                    '''
                    tmepdata.database_name, tmepdata.table_name, tmepdata.cloums_type_id_list, tmepdata.metadata_dict=_parse_event.GetValue(type_code=event_code,event_length=event_length)  # 获取event数据
                    table_struce_key = '{}:{}'.format(tmepdata.database_name, tmepdata.table_name)
                    if self.lookback:
                        if self.repl_mark:
                            continue
                        elif tmepdata.database_name == 'repl_mark':
                            self.repl_mark = True
                            continue
                    if self.ignores:
                        if self.__check_att_status(status_check=True):
                            continue
                    if tmepdata.database_name in self.databases:
                        if self.tables:
                            if tmepdata.table_name in self.tables:
                                '''写入标签'''
                                if self.repl_mark_status is None:
                                    #self.__set_mark()
                                    self.repl_mark_status = True
                                    continue
                                else:
                                    continue
                        else:
                            '''写入标签'''
                            if self.repl_mark_status is None:
                                #self.__set_mark()
                                self.repl_mark_status = True
                                continue
                            else:
                                continue


                    '''不为需求库,把状态变量设置为None'''
                    if self.repl_mark_status:
                        self.repl_mark_status = None
                    if self.check_att:
                        self.check_att = None


                elif event_code == binlog_events.ROTATE_EVENT:
                        binlog_file_name = _parse_event.read_rotate_log_event(event_length=event_length)

                elif event_code == binlog_events.QUERY_EVENT:
                    if self.repl_mark:
                        continue
                    tmepdata.thread_id,_db,_statement = _parse_event.read_query_event(event_length=event_length)

                elif event_code == binlog_events.GTID_LOG_EVENT:
                    '''
                    获取当前事务GTID,在此不做任何操作,因为不知道gtid包含的事务所属库、表
                    '''
                    self.gno_uid,self.gno_id = _parse_event.read_gtid_event(event_length=event_length)
                    _gtid[self.gno_uid] = '1-{}'.format(self.gno_id)

                elif event_code in (binlog_events.XID_EVENT,binlog_events.XA_PREPARE_LOG_EVENT):
                    '''
                    xid_event是整个gtid事务结束的标志
                    在此做所有事务的提交操作
                    '''
                    save_gtid_value = json.dumps(_gtid) if _gtid else None

                    tracaction_value = {'gtid':save_gtid_value,'gno_uid':self.gno_uid,'gno_id':self.gno_id,
                                        'binlog':binlog_file_name,'at_pos':self.at_pos,'next_pos':next_pos,
                                        'value_list':tmepdata.sql_list}
                    append_state = self.append_data(values=tracaction_value)
                    if append_state:
                        tmepdata.excute_gtid = _gtid.copy() if _gtid else  None

                        tmepdata.sql_list = []
                        self.xa = None
                        self.repl_mark_status = None
                        self.repl_mark = None
                        self.check_att = None
                        continue
                    else:
                        Logging(msg='the binlog queue is full !!!!!',level='error')
                        sys.exit()

        else:
            Logging(msg='replication failed................', level='error')

            self.conn.close()


    def __set_mark(self):
        '''
        所有目标库操作前都设置事务标签,用于双向同步的回环控制
        :return:
        '''
        sql = 'INSERT INTO repl_mark.mark_status(id,gtid,gno_id) VALUES(%s,%s,%s) ON DUPLICATE KEY UPDATE  gtid=%s,gno_id=%s;'
        tmepdata.sql_list.append([sql,[self.server_id,self.gno_uid,self.gno_id,self.gno_uid,self.gno_id]])


    def __get_all_table_struct(self):
        '''
        获取所有需要同步的表结构
        :return:
        '''
        sql = 'select TABLE_SCHEMA,TABLE_NAME from INFORMATION_SCHEMA.COLUMNS GROUP BY TABLE_SCHEMA,TABLE_NAME;'
        self.__check_stat(self.__raise_sql(sql=sql))

        result = self.cur.fetchall()
        if result:
            for row in result:
                table_schema = row['TABLE_SCHEMA']
                table_name = row['TABLE_NAME']
                table_struce_key = '{}:{}'.format(table_schema, table_name)
                if table_schema in self.databases:
                    if self.tables:
                        if table_name in self.tables:
                            tmepdata.table_struct_list[table_struce_key],tmepdata.table_pk_idex_list[table_struce_key],\
                            tmepdata.table_struct_type_list[table_struce_key] = self.__getcolumn(table_schema,table_name)
                    else:
                        tmepdata.table_struct_list[table_struce_key], tmepdata.table_pk_idex_list[table_struce_key],\
                        tmepdata.table_struct_type_list[table_struce_key] = self.__getcolumn(table_schema, table_name)

                if self.ignores:
                    for _r in self.ignores:
                        db_name, tb_list = _r[0],_r[1]
                        if db_name == table_schema:
                            for _tb in tb_list:
                                _tble_struce_key = '{}:{}'.format(db_name,_tb)
                                if _tble_struce_key not in tmepdata.table_struct_list:
                                    tmepdata.table_struct_list[_tble_struce_key], \
                                    tmepdata.table_pk_idex_list[_tble_struce_key],\
                                    tmepdata.table_struct_type_list[_tble_struce_key] = self.__getcolumn(db_name,_tb)


    def __getcolumn(self,*args):
        '''args顺序 database、tablename'''
        column_list = []
        column_type_list = []

        sql = 'select COLUMN_NAME,COLUMN_KEY,COLUMN_TYPE from INFORMATION_SCHEMA.COLUMNS where table_schema=%s and table_name=%s order by ORDINAL_POSITION;'
        self.__check_stat(self.__raise_sql(sql = sql,args=args))

        result = self.cur.fetchall()
        pk_idex = []
        for idex,row in enumerate(result):
            column_list.append(row['COLUMN_NAME'])
            column_type_list.append(row['COLUMN_TYPE'])
            # if row['COLUMN_NAME'] in ('user_id','msg_id'):
            #     pk_idex.append(idex)
            if row['COLUMN_KEY'] == 'PRI':
                pk_idex.append(idex)
        return column_list,pk_idex,column_type_list

    def __check_repl_info(self):
        '''
        在没有提供任何同步所需的binlog参数情况下
        获取当前位置进行同步
        :return:
        '''
        if all([self.binlog_file,self.start_position]):
            pass
        elif self.gtid:
            pass
        else:
            sql = 'show master status'
            self.__check_stat(self.__raise_sql(sql=sql))
            result = self.cur.fetchall()
            _re = result[0]
            self.binlog_file,self.start_position,self.gtid= _re['File'],_re['Position'],str(_re['Executed_Gtid_Set']).replace('\n','')

    def __get_server_uuid(self):
        '''
        获取愿库当前节点的server_uuid
        :return:
        '''
        sql = 'select @@server_uuid as server_uuid;'
        self.__check_stat(self.__raise_sql(sql=sql))
        result = self.cur.fetchall()
        return result[0]['server_uuid']

    def __get_version(self):
        sql = 'select @@version as version;'
        self.__check_stat(self.__raise_sql(sql=sql))
        result = self.cur.fetchall()
        tmp = result[0]['version'].split('.')
        self.mysql_version = int(tmp[0])

    def __gtid_set(self,gtid):
        '''
        字典化gtid
        :param gtid:
        :return:
        '''
        _gtid_list = gtid.replace('\n','').split(',')
        _gtid_dict = {}
        for uuid in _gtid_list:
            _u = uuid.split(':')
            _gtid_dict[_u[0]] = _u[1]
        return _gtid_dict


    def __retry_regist_master(self,gtid=None,binlog=None,position=None):
        '''
        尝试重新注册主从链接
        :param gtid:
        :param binlog:
        :param position:
        :return:
        '''
        import time
        if gtid:
            gtid = ','.join(['{}:{}'.format(uuid,gtid[uuid]) for uuid in gtid])
        while True:
            try:
                self.conn = InitMyDB(mysql_host=self.host, mysql_port=self.port, mysql_user=self.user,
                                     mysql_password=self.passwd, unix_socket=self.unix_socket,ssl=self.ssl_auth).Init()
                if self.conn:
                    rep_info = {'log_file': binlog, 'log_pos': position, 'mysql_connection': self.conn,
                                'server_id': self.server_id, 'auto_position': self.auto_position, 'gtid': gtid}
                    try:
                        Logging(msg='binlog: {} position: {} gtid : {}'.format(binlog, position,gtid),level='info')
                        ReplConn = ReplicationMysql(**rep_info).ReadPack()
                    except pymysql.Error as e:
                        Logging(msg=traceback.format_exc(),level='error')
                        if ErrorCode[e.args[0]]:
                            self.__retry_regist_master(gtid=gtid,binlog=binlog,position=position)
                    Logging(msg='regist master ok !', level='info')
                    return ReplConn
            except pymysql.Error:
                Logging(msg=traceback.format_list(),level='error')

            time.sleep(1)

    def __get_gtid_info(self):
        '''
        在主从切换之后可能被调用,发生调用的原因
        1、运行一段时间slave的gtid被清理,但同步程序并没有获取过slave的gtid,因此在切换之后gtid会不一致
        2、如果新master是该程序运行途中加入集群,该程序初始化并未保存到该gtid信息,需要获取
        '''
        for i in range(10):
            try:
                conn = InitMyDB(mysql_host=self.host, mysql_port=self.port, mysql_user=self.user,
                                             mysql_password=self.passwd, unix_socket=self.unix_socket,ssl=self.ssl_auth).Init()
                cur = conn.cursor()
                cur.execute('select @@gtid_purged as purged;')
                gtid_purged = cur.fetchall()[0]['purged']
                gtid_purged = gtid_purged.split(',')
                for gtid in gtid_purged:
                    _gtid = gtid.replace('\n','').split(':')
                    if _gtid[0] in tmepdata.excute_gtid:
                        _e_noid = tmepdata.excute_gtid[_gtid[0]].split('-')[-1]
                        _noid = _gtid[-1].split('-')[-1]
                        if int(_e_noid) < int(_noid):
                            tmepdata.excute_gtid[_gtid[0]] = _gtid[-1]
                    else:
                        tmepdata.excute_gtid[_gtid[0]] = _gtid[-1]
                return True
            except pymysql.Error as e:
                Logging(msg=e.args,level='error')
            time.sleep(1)
        else:
            Logging(msg='get gtid purged info is failed!!!!',level='error')
            return None


    def __raise_sql(self, sql, args=[]):
        '''
        追加binlog数据到目标库
        :param sql:
        :param args:
        :return:
        '''
        args = self.escape_string(args) if args else []
        try:
            self.cur.execute(sql,args)
        except pymysql.Error as e:
            Logging(msg='{}'.format(e.args), level='error')
            if e.args[0] in ErrorCode:
                if ErrorCode[e.args[0]]:
                    self.__retry_execute(sql=sql, args=args)
                    return True
            Logging(msg='sql:{},values:{}'.format(sql, args), level='error')
            Logging(msg=e, level='error')
            return None
        except:
            Logging(msg=traceback.format_exc(), level='error')
            return None
        return True

    def __retry_execute(self, sql, args=[]):
        '''
        异常重试
        :param sql: sql语句
        :param args: 参数列表
        :param type: 是否需要重新执行该sql
        :param retry: 是否是重新执行的sql
        :return:
        '''
        self.__retry_connection_destion()
        self.__check_stat(self.__raise_sql(sql, args))


    def __retry_connection_destion(self):
        '''
        目标库链接丢失重试60次,如果60次都失败将退出整个程序
        使用30次的原因是有可能目标数据在发生宕机切换,如果30
        秒都无法完成重连那表示数据库已经宕机或无法链接
        :return:
        '''
        import time
        for i in range(60):
            Logging(msg='connection to source db try agian!!!', level='info')
            try:
                self.conn = InitMyDB(mysql_host=self.host, mysql_port=self.port,
                                                 mysql_user=self.user,
                                                 mysql_password=self.password,
                                                 unix_socket=self.unix_socket,
                                                 ssl=self.ssl_auth).Init()
                if self.conn:
                    self.cur = self.conn.cursor()
                    Logging(msg='connection success!!!', level='info')
                    try:
                        self.cur.execute('SET SESSION wait_timeout = 2147483;')
                        return True
                    except pymysql.Error as e:
                        Logging(msg=e.args,level='error')
                        if ErrorCode[e.args[0]]:
                            self.__retry_connection_destion()
            except:
                Logging(msg=traceback.format_exc(), level='error')
            time.sleep(1)
        else:
            Logging(msg='try 60 times to fail for conncetion source db,exist now', level='error')
            sys.exit()

    def __check_stat(self, state):
        if state:
            pass
        else:
            Logging(msg='failed!!!!', level='error')
            sys.exit()


    def append_data(self,values):
        '''
        向队列写入gtid事务数据,超时时间60秒,60秒内队列为被消费将直接退出
        :param values: {'gtid':{},'gno_uid':'',gno_id:123,'binlog':'','at_pos':123,'next_pos':123,'sql_list':[[sql,[args...],db,tabl],....]}
        :return:
        '''
        for i in range(60):
            if self.queue.full():
                time.sleep(1)
                continue
            self.queue.put(values)
            return True
        else:
            return False
Example #4
0
class destination(escape):
    def __init__(self, **kwargs):
        super(destination, self).__init__()

        self.destnation_type = kwargs[
            'destnation_type'] if 'destnation_type' in kwargs else None
        self.queue = kwargs['queue']
        #print('ds : {}'.format(self.queue))
        self.threads = kwargs['dthreads']
        self.save_append_status = {}

        self.server_id = kwargs['server_id']

        self.shost = kwargs['shost']
        self.sport = kwargs['sport']
        self.suser = kwargs['suser']
        self.spassword = kwargs['spassword']
        self.sbinlog = kwargs['sbinlog']

        self.error_queue = None
        self.kwargs = kwargs.copy()
        self.kwargs['queue'] = des_queue
        self.kwargs['error_queue'] = error_queue

        self.th_list = []
        self.__init_status_conn()

    def __init_status_conn(self):
        self._status_conn = GetStruct(host=self.shost,
                                      port=self.sport,
                                      user=self.suser,
                                      passwd=self.spassword,
                                      binlog=self.sbinlog)  # 状态库链接类初始化
        self._status_conn.CreateTmp()

    def __check_stat(self, state, only_state=None):
        if state:
            if only_state:
                return
        else:
            Logging(msg='consume queue is full ,exist now!!!!', level='error')
            if self.error_queue:
                self.error_queue.put(1)
            sys.exit()

    def __get_struct(self):
        global table_struct_list, table_pk_idex_list
        while 1:
            if not self.queue.empty():
                value = self.queue.get()
                if 'table_struct' in value:
                    table_struct_list = value['table_struct'][0]
                    table_pk_idex_list = value['table_struct'][1]
                    break

    def __enter__(self):
        '''
        循环获取队列数据
        :return:
        '''
        self.__get_struct()

        #目标库并发线程初始化
        for i in range(self.threads):
            p = ThreadDump(**dict(self.kwargs, **{'thread_id': i + 1}))
            if p:
                p.start()
                self.th_list.append(p)
            else:
                sys.exit()

        #状态库线程初始化
        p = ThreadDump(**{
            'server_id': self.server_id,
            'save_status': self._status_conn
        })
        p.start()
        self.th_list.append(p)

        group_sql = {}
        tmp_status = {}
        num = 0
        interval = int(time.time())

        while 1:
            if not self.queue.empty():
                trancaction = self.queue.get()
                if 'table_struct' in trancaction:
                    global table_struct_list, table_pk_idex_list
                    table_struct_list = trancaction['table_struct'][0]
                    table_pk_idex_list = trancaction['table_struct'][1]
                    continue
                value_list = trancaction['value_list']

                for _value in value_list:
                    db_name, tbl_name = _value[1], _value[2]
                    if '{}:{}'.format(db_name, tbl_name) in group_sql:
                        group_sql['{}:{}'.format(db_name, tbl_name)].append({
                            'gtid':
                            trancaction['gtid'],
                            'gno_uid':
                            trancaction['gno_uid'],
                            'gno_id':
                            trancaction['gno_id'],
                            'binlog':
                            trancaction['binlog'],
                            'at_pos':
                            trancaction['at_pos'],
                            'next_pos':
                            trancaction['next_pos'],
                            'value_list':
                            _value
                        })
                    else:
                        group_sql['{}:{}'.format(db_name, tbl_name)] = [{
                            'gtid':
                            trancaction['gtid'],
                            'gno_uid':
                            trancaction['gno_uid'],
                            'gno_id':
                            trancaction['gno_id'],
                            'binlog':
                            trancaction['binlog'],
                            'at_pos':
                            trancaction['at_pos'],
                            'next_pos':
                            trancaction['next_pos'],
                            'value_list':
                            _value
                        }]
                '''100个事务或者10s提交一次到并发序列'''
                num += 1
                tmp_status = {
                    'binlog': trancaction['binlog'],
                    'at_pos': trancaction['at_pos'],
                    'next_pos': trancaction['next_pos'],
                    'gtid': trancaction['gtid'],
                    'gno_uid': trancaction['gno_uid']
                }
                # if (num >= 100 or (int(time.time()) - interval) >= 10):
                #     if error_queue.empty():
                #         tmp_status = {'binlog':trancaction['binlog'],'at_pos':trancaction['at_pos'],
                #                       'next_pos':trancaction['next_pos'],'gtid':trancaction['gtid'],
                #                       'gno_uid':trancaction['gno_uid']}
                #         self.__check_stat(self.__put_queue(value=group_sql,tmp_status=tmp_status),only_state=True)
                #
                #     else:
                #         for th in self.th_list:
                #             th.isDaemon()
                #         Logging(msg='an exception occurred in the inbound thread on destination db...',level='error')
                #         sys.exit()
                if self.__put(group_sql=group_sql,
                              tmp_status=tmp_status,
                              num=num,
                              interval=interval):
                    interval = int(time.time())
                    num = 0
                    group_sql = {}
                continue
            else:
                if self.__put(group_sql=group_sql,
                              tmp_status=tmp_status,
                              num=num,
                              interval=interval):
                    interval = int(time.time())
                    num = 0
                    group_sql = {}
                time.sleep(0.001)

    def __put(self, group_sql, tmp_status, num=None, interval=None):
        if (num >= 100 or (int(time.time()) - interval) >= 10) and group_sql:
            if error_queue.empty():
                self.__check_stat(self.__put_queue(value=group_sql,
                                                   tmp_status=tmp_status),
                                  only_state=True)
                return True
            else:
                for th in self.th_list:
                    th.isDaemon()
                Logging(
                    msg=
                    'an exception occurred in the inbound thread on destination db...',
                    level='error')
                sys.exit()
        return False

    def __put_queue(self, value, tmp_status={}):
        '''
        先检测并发队列是否已满,超时时间为60秒,如果超过60秒检测时间都为full
        表示线程已崩,防止在此进入死循环而无法检测目标库线程是否已停止
        :param value:
        :return:
        '''
        for i in range(60):
            if self.__check_queue():
                _uuid = int(uuid.uuid1())
                chunk_list_status.append([_uuid, len(value), tmp_status])
                chunk_list_status_th[_uuid] = []
                for i in value:
                    des_queue.put([i, value[i], _uuid])
                return True
            time.sleep(1)
        else:
            return False

    def __check_queue(self):
        '''
        检查所有队列是否有满的,如果有一个满的表示可能阻塞了
        二是为了防止某一个表落后很多
        :return:
        '''
        if des_queue.full():
            return False
        return True

    def __exit__(self, exc_type, exc_val, exc_tb):
        pass