Beispiel #1
0
    def process_binlog(self):
        stream = BinLogStreamReader(connection_settings=self.conn_setting, server_id=self.server_id,
                                    log_file=self.start_file, log_pos=self.start_pos, only_schemas=self.only_schemas,
                                    only_tables=self.only_tables, resume_stream=True)

        flag_last_event = False
        e_start_pos, last_pos = stream.log_pos, stream.log_pos
        # to simplify code, we do not use flock for tmp_file.
        tmp_file = create_unique_file('%s.%s' % (self.conn_setting['host'], self.conn_setting['port']))
        with temp_open(tmp_file, "w") as f_tmp, self.connection as cursor:
            for binlog_event in stream:
                if not self.stop_never:
                    try:
                        event_time = datetime.datetime.fromtimestamp(binlog_event.timestamp)
                    except OSError:
                        event_time = datetime.datetime(1980, 1, 1, 0, 0)
                    if (stream.log_file == self.end_file and stream.log_pos == self.end_pos) or \
                            (stream.log_file == self.eof_file and stream.log_pos == self.eof_pos):
                        flag_last_event = True
                    elif event_time < self.start_time:
                        if not (isinstance(binlog_event, RotateEvent)
                                or isinstance(binlog_event, FormatDescriptionEvent)):
                            last_pos = binlog_event.packet.log_pos
                        continue
                    elif (stream.log_file not in self.binlogList) or \
                            (self.end_pos and stream.log_file == self.end_file and stream.log_pos > self.end_pos) or \
                            (stream.log_file == self.eof_file and stream.log_pos > self.eof_pos) or \
                            (event_time >= self.stop_time):
                        break
                    # else:
                    #     raise ValueError('unknown binlog file or position')

                if isinstance(binlog_event, QueryEvent) and binlog_event.query == 'BEGIN':
                    e_start_pos = last_pos

                if isinstance(binlog_event, QueryEvent) and not self.only_dml:
                    sql = concat_sql_from_binlog_event(cursor=cursor, binlog_event=binlog_event,
                                                       flashback=self.flashback, no_pk=self.no_pk)
                    if sql:
                        print(sql)
                elif is_dml_event(binlog_event) and event_type(binlog_event) in self.sql_type:
                    for row in binlog_event.rows:
                        sql = concat_sql_from_binlog_event(cursor=cursor, binlog_event=binlog_event, no_pk=self.no_pk,
                                                           row=row, flashback=self.flashback, e_start_pos=e_start_pos)
                        if self.flashback:
                            f_tmp.write(sql + '\n')
                        else:
                            print(sql)

                if not (isinstance(binlog_event, RotateEvent) or isinstance(binlog_event, FormatDescriptionEvent)):
                    last_pos = binlog_event.packet.log_pos
                if flag_last_event:
                    break

            stream.close()
            f_tmp.close()
            if self.flashback:
                self.print_rollback_sql(filename=tmp_file)
        return True
    def process_binlog(self):
        stream = BinLogStreamReader(connection_settings=self.conn_setting, server_id=self.server_id,
                                    log_file=self.start_file, log_pos=self.start_pos, only_schemas=self.only_schemas,
                                    only_tables=self.only_tables, resume_stream=True, blocking=True)
        # for binlogevent in stream:
        #     binlogevent.dump()
        #     return

        flag_last_event = False
        e_start_pos, last_pos = stream.log_pos, stream.log_pos
        # logger.log(f"e_start_pos:{e_start_pos}, last_pos:{last_pos}") #4
        # to simplify code, we do not use flock for tmp_file.
        tmp_file = create_unique_file('%s.%s' % (self.conn_setting['host'], self.conn_setting['port']))
        with temp_open(tmp_file, "w") as f_tmp, self.connection as cursor:
            for binlog_event in stream:
                # logger.log(f"binlog_event:{binlog_event}")
                if not self.stop_never: #Continuously parse binlog. default: stop at the latest event when you start.
                    # logger.log(f"### not parse binlog continue, binlog_evnet: ###")
                    # # print(f"binlog_event:{binlog_event}")
                    # binlog_event
                    # logger.log(f"### not parse binlog continue, end: ###")
                    try:
                        event_time = datetime.datetime.fromtimestamp(binlog_event.timestamp)
                    except OSError:
                        event_time = datetime.datetime(1980, 1, 1, 0, 0)
                    # logger.log(f"event_time:{event_time}, log_pos:{binlog_event.packet.log_pos}, type:{type(binlog_event)}")
                    # logger.log(f"eof_file:{self.eof_file}")
                    #if to then event end like 1555
                    if (stream.log_file == self.end_file and stream.log_pos == self.end_pos) or \
                            (stream.log_file == self.eof_file and stream.log_pos == self.eof_pos):
                        # logger.log(f"flag_last_event = True")
                        flag_last_event = True
                    elif event_time < self.start_time: #if current event time befor start time
                        if not (isinstance(binlog_event, RotateEvent)
                                or isinstance(binlog_event, FormatDescriptionEvent)):
                            last_pos = binlog_event.packet.log_pos
                        continue
                    elif (stream.log_file not in self.binlogList) or \
                            (self.end_pos and stream.log_file == self.end_file and stream.log_pos > self.end_pos) or \
                            (stream.log_file == self.eof_file and stream.log_pos > self.eof_pos) or \
                            (event_time >= self.stop_time):
                        break
                    # else:
                    #     raise ValueError('unknown binlog file or position')

                if isinstance(binlog_event, QueryEvent) and binlog_event.query == 'BEGIN':
                    # logger.log(f"isinstance(binlog_event, QueryEvent) and binlog_event.query == 'BEGIN', query:{binlog_event.query}")
                    e_start_pos = last_pos

                if isinstance(binlog_event, QueryEvent) and not self.only_dml: #INSERT/UPDATE/DELETE
                    # logger.log(f"QueryEvent and not self.only_dml, exec concat_sql_from_binlog_event, query:\n{binlog_event.query}")
                    sql = concat_sql_from_binlog_event(cursor=cursor, binlog_event=binlog_event,
                                                       flashback=self.flashback, no_pk=self.no_pk)
                    if sql:
                        print(f"{sql}")
                elif is_dml_event(binlog_event) and event_type(binlog_event) in self.sql_type:
                    for row in binlog_event.rows:
                        # logger.log(f"row:{row}")
                        sql = concat_sql_from_binlog_event(cursor=cursor, binlog_event=binlog_event, no_pk=self.no_pk,
                                                           row=row, flashback=self.flashback, e_start_pos=e_start_pos)
                        if self.flashback:
                            f_tmp.write(sql + '\n')
                        else:
                            print(sql)
                            # print(f"sql:\n{sql}")

                if not (isinstance(binlog_event, RotateEvent) or isinstance(binlog_event, FormatDescriptionEvent)):
                    last_pos = binlog_event.packet.log_pos
                if flag_last_event:
                    break

            stream.close()
            f_tmp.close()
            if self.flashback:
                self.print_rollback_sql(filename=tmp_file)
        return True
    def process_binlog(self):
        stream = BinLogStreamReader(connection_settings=self.conn_setting, server_id=self.server_id,
                                    log_file=self.start_file, log_pos=self.start_pos, only_schemas=self.only_schemas,
                                    only_tables=self.only_tables, resume_stream=True, blocking=True)

        #判断binglog日志是否解析完毕:
        flag_last_event = False

        e_start_pos, last_pos = stream.log_pos, stream.log_pos

        #回滚sql生成文件:IP+PORT
        #tmp_file = create_unique_file('%s.%s' % (self.conn_setting['host'], self.conn_setting['port']))

        #with temp_open(tmp_file, "w") as f_tmp, self.connection as cursor, self.dest_connection as dest_cursor:
        with self.connection as cursor, self.dest_connection as dest_cursor:
            for binlog_event in stream:

                #不持续解析binlog
                if not self.stop_never:
                    try:
                        event_time = datetime.datetime.fromtimestamp(binlog_event.timestamp)
                    except OSError:
                        event_time = datetime.datetime(1980, 1, 1, 0, 0)
                    if (stream.log_file == self.end_file and stream.log_pos == self.end_pos) or \
                            (stream.log_file == self.eof_file and stream.log_pos == self.eof_pos):
                        flag_last_event = True
                    elif event_time < self.start_time:
                        if not (isinstance(binlog_event, RotateEvent)
                                or isinstance(binlog_event, FormatDescriptionEvent)):
                            last_pos = binlog_event.packet.log_pos
                        continue
                    elif (stream.log_file not in self.binlogList) or \
                            (self.end_pos and stream.log_file == self.end_file and stream.log_pos > self.end_pos) or \
                            (stream.log_file == self.eof_file and stream.log_pos > self.eof_pos) or \
                            (event_time >= self.stop_time):
                        break
                    # else:
                    #     raise ValueError('unknown binlog file or position')

                #
                if isinstance(binlog_event, QueryEvent) and binlog_event.query == 'BEGIN':
                    e_start_pos = last_pos

                #解析DDL
                if isinstance(binlog_event, QueryEvent) and not self.only_dml:
                    sql = concat_sql_from_binlog_event(cursor=cursor, binlog_event=binlog_event,
                                                       flashback=self.flashback, no_pk=self.no_pk)
                    if sql:
                        print(sql)

                #解析DML语句
                elif is_dml_event(binlog_event) and event_type(binlog_event) in self.sql_type:
                    for row in binlog_event.rows:
                        sql = concat_sql_from_binlog_event(cursor=cursor, binlog_event=binlog_event, no_pk=self.no_pk,
                                                           row=row, flashback=self.flashback, e_start_pos=e_start_pos)
                        if self.flashback:
                            #f_tmp.write(sql + '\n')
                            print("generate flashback sql.")
                        else:
                            for value in sql.values():
                                try:
                                    print(value)
                                    dest_cursor.execute("%s" %value)
                                    self.dest_connection.commit()
                                except Exception as e:
                                    print(e)

                #binlog发生切换:
                if not (isinstance(binlog_event, RotateEvent) or isinstance(binlog_event, FormatDescriptionEvent)):
                    last_pos = binlog_event.packet.log_pos

                #binlog解析完毕,退出,默认False不退出
                if flag_last_event:
                    break

            stream.close()
            
            #f_tmp.close()
            #if self.flashback:
            #    self.print_rollback_sql(filename=tmp_file)
        return True
Beispiel #4
0
    def process_binlog(self):
        if self.debug:
            for k, v in vars(self).items():
                if k == 'timezone':
                    print_line('{} = {}'.format(k, v._std_offset))
                elif k == 'connection':
                    pass
                else:
                    print_line('{} = {}'.format(k, v))
            return
        stream = BinLogStreamReader(connection_settings=self.conn_setting,
                                    server_id=self.server_id,
                                    log_file=self.start_file,
                                    log_pos=self.start_position,
                                    only_schemas=self.databases,
                                    only_tables=self.tables,
                                    resume_stream=True,
                                    blocking=True,
                                    skip_to_timestamp=self.start_time)
        with self.connection as cursor:
            sql = '# {} #\n# {} binlog2sql start! #\n# {} #'.format(
                '=' * 50, arrow.now(), '=' * 50)
            print_line(sql, self.output_file)

            start_pos, print_interval, print_time = 4, 60 * 10, 0
            for binlog_event in stream:

                if (print_time + print_interval
                    ) < binlog_event.timestamp < self.start_time:
                    print_time = binlog_event.timestamp
                    sql = '# Binlog scan to {}'.format(
                        arrow.get(print_time).to(self.timezone))
                    print_line(sql, self.output_file)

                if binlog_event.timestamp < self.start_time:
                    continue

                # dml
                if is_dml_event(binlog_event) in self.sql_type:
                    for row in binlog_event.rows:
                        if self.json:
                            for column in binlog_event.columns:
                                if column.type == 245:
                                    for k, v in row.items():
                                        row[k][column.name] = json.dumps(
                                            type_convert(v[column.name]),
                                            ensure_ascii=False)
                        sql = generate_sql(cursor=cursor,
                                           binlog_event=binlog_event,
                                           no_pk=self.no_pk,
                                           row=row,
                                           e_start_pos=start_pos,
                                           flashback=self.flashback)
                        print_line(sql, self.output_file)
                # ddl
                elif is_ddl_event(binlog_event):
                    start_pos = binlog_event.packet.log_pos
                    if not self.only_dml and binlog_event.query != 'BEGIN':
                        sql = generate_sql(cursor=cursor,
                                           binlog_event=binlog_event,
                                           no_pk=self.no_pk,
                                           e_start_pos=start_pos,
                                           flashback=self.flashback)
                        print_line(sql, self.output_file)

                # exceed the end position of the end binlog file
                if stream.log_file == self.stop_file and (
                        binlog_event.packet.log_pos >= self.stop_position
                        or binlog_event.timestamp >= self.stop_time
                ) and not self.stop_never:
                    sql = '# {} #\n# {} binlog2sql stop!  #\n# {} #'.format(
                        '=' * 50, arrow.now(), '=' * 50)
                    print_line(sql, self.output_file)
                    break
            stream.close()
    def process_binlog(self):
        stream = BinLogStreamReader(connection_settings=self.conn_setting,
                                    server_id=self.server_id,
                                    log_file=self.start_file,
                                    log_pos=self.start_pos,
                                    only_schemas=self.only_schemas,
                                    only_tables=self.only_tables,
                                    resume_stream=True,
                                    blocking=True)
        flag_last_event = False
        slave_proxy_id = 0
        e_start_pos, last_pos = stream.log_pos, stream.log_pos
        self.touch_tmp_sql_file()
        # to simplify code, we do not use flock for tmp_file.
        transaction_count = 0
        with self.connection as cursor:
            sql_list = []
            for binlog_event in stream:
                # for attr_name in dir(binlog_event):
                #     print attr_name + ":" + str(getattr(binlog_event, attr_name))
                if not self.stop_never:
                    try:
                        event_time = datetime.datetime.fromtimestamp(
                            binlog_event.timestamp)
                    except OSError:
                        event_time = datetime.datetime(1980, 1, 1, 0, 0)
                    if (stream.log_file == self.end_file and stream.log_pos == self.end_pos) or \
                            (stream.log_file == self.eof_file and stream.log_pos == self.eof_pos):
                        flag_last_event = True
                    elif event_time < self.start_time:
                        if not (isinstance(binlog_event, RotateEvent)
                                or isinstance(binlog_event,
                                              FormatDescriptionEvent)):
                            last_pos = binlog_event.packet.log_pos
                        continue
                    elif (stream.log_file not in self.binlogList) or \
                            (self.end_pos and stream.log_file == self.end_file and stream.log_pos > self.end_pos) or \
                            (stream.log_file == self.eof_file and stream.log_pos > self.eof_pos) or \
                            (event_time >= self.stop_time):
                        break
                    # else:
                    #     raise ValueError('unknown binlog file or position')
                if isinstance(binlog_event,
                              QueryEvent) and binlog_event.query == 'BEGIN':
                    e_start_pos = last_pos
                    transaction_count += 1
                    if transaction_count % 100 == 0:
                        print("process binlog at {}".format(last_pos))
                    slave_proxy_id = binlog_event.slave_proxy_id
                    if len(sql_list) == 0 or sql_list[-1] != SPLIT_TRAN_FLAG:
                        sql_list.append(SPLIT_TRAN_FLAG)

                if self.pseudo_thread_id > 0:
                    if self.pseudo_thread_id != slave_proxy_id:
                        continue
                if isinstance(binlog_event, QueryEvent) and not self.only_dml:
                    sql = concat_sql_from_binlog_event(
                        cursor=cursor,
                        binlog_event=binlog_event,
                        flashback=self.flashback,
                        no_pk=self.no_pk,
                        rollback_with_primary_key=self.
                        rollback_with_primary_key,
                        rollback_with_changed_value=self.
                        rollback_with_changed_value)
                    if sql:
                        sql_list.append(sql)
                        if len(sql_list) == MAX_SQL_COUNT_PER_WRITE:
                            self.write_tmp_sql(sql_list=sql_list)
                elif is_dml_event(binlog_event) and event_type(
                        binlog_event) in self.sql_type:
                    for row in binlog_event.rows:
                        sql = concat_sql_from_binlog_event(
                            cursor=cursor,
                            binlog_event=binlog_event,
                            no_pk=self.no_pk,
                            row=row,
                            flashback=self.flashback,
                            e_start_pos=e_start_pos,
                            rollback_with_primary_key=self.
                            rollback_with_primary_key,
                            rollback_with_changed_value=self.
                            rollback_with_changed_value)
                        sql_list.append(sql)
                        if len(sql_list) == MAX_SQL_COUNT_PER_WRITE:
                            self.write_tmp_sql(sql_list=sql_list)
                            sql_list = []

                if not (isinstance(binlog_event, RotateEvent)
                        or isinstance(binlog_event, FormatDescriptionEvent)):
                    last_pos = binlog_event.packet.log_pos
                if flag_last_event:
                    break
            self.write_tmp_sql(sql_list=sql_list)
            stream.close()

            if self.flashback:
                self.create_rollback_sql()
            else:
                self.create_execute_sql()
            print("===============================================")
            if not self.flashback:
                print("执行脚本文件:\n{0}".format(self.execute_sql_file))
            else:
                print("回滚脚本文件:")
                new_file_list = list(reversed(self.rollback_sql_files))
                for tmp_file in new_file_list:
                    print(tmp_file)
            print("===============================================")
        return True
Beispiel #6
0
    def process_binlog(self):
        start_time = time.time()
        config = {}
        total_row = 0
        filter_row = 0
        dml = {
            'delete':0,
            'update':0,
            'insert':0
        }
        ddl = 0
        for k, v in vars(self).items():
            if k == 'timezone':
                config.update({k:v._std_offset})
            elif k == 'connection':
                pass
            else:
                config.update({k:v})
        self.logger.info(config)
        if self.debug:
            return
        stream = BinLogStreamReader(connection_settings=self.conn_setting, server_id=self.server_id,
                                    log_file=self.start_file, log_pos=self.start_position,
                                    only_schemas=self.only_schemas, only_tables=self.tables, resume_stream=True,
                                    blocking=True, skip_to_timestamp=self.start_time)
        with self.connection as cursor:
            #sql = '# {0} #\n# {1} binlog2sql start! #\n# {2} #'.format('=' * 50, datetime.datetime.now(), '=' * 50)
            sql = '# binlog2sql start...'
            self.logger.info(sql)
            #print_line(sql, self.output_file)

            start_pos, print_interval, print_time = 4, 60 * 10, 0
            for binlog_event in stream:
                total_row +=1
                if (print_time + print_interval) < binlog_event.timestamp < self.start_time:
                    print_time = binlog_event.timestamp

                    sql = '# Binlog scan to {0}'.format(datetime.datetime.fromtimestamp(print_time))
                    self.logger.info(sql)
                    #print_line(sql, self.output_file)

                if binlog_event.timestamp < self.start_time:
                    continue

                # dml
                event_type = is_dml_event(binlog_event)
                if event_type in self.sql_type:
                    filter_row +=1
                    for row in binlog_event.rows:
                        if event_type == 'INSERT':
                            if self.flashback:
                                dml['delete'] +=1
                            else:
                                dml['insert'] += 1
                        if event_type == 'DELETE':
                            if self.flashback:
                                dml['insert'] +=1
                            else:
                                dml['delete'] += 1
                        if event_type == 'UPDATE':
                            dml['update'] +=1
                        if self.json:
                            for column in binlog_event.columns:
                                if column.type == 245:
                                    for k, v in row.items():
                                        row[k][column.name] = json.dumps(type_convert(v[column.name]),
                                                                         ensure_ascii=False)
                        sql = generate_sql(cursor=cursor, binlog_event=binlog_event, no_pk=self.no_pk, row=row,
                                           e_start_pos=start_pos, flashback=self.flashback)
                        print_line(sql, self.output_file)
                # ddl
                elif is_ddl_event(binlog_event):
                    start_pos = binlog_event.packet.log_pos
                    if not self.only_dml and binlog_event.query != 'BEGIN':
                        filter_row += 1
                        ddl +=1
                        sql = generate_sql(cursor=cursor, binlog_event=binlog_event, no_pk=self.no_pk,
                                           e_start_pos=start_pos, flashback=self.flashback)
                        print_line(sql, self.output_file)

                # exceed the end position of the end binlog file
                if stream.log_file == self.stop_file and (
                        binlog_event.packet.log_pos >= self.stop_position or binlog_event.timestamp >= self.stop_time
                ) and not self.stop_never:
                    res = {
                        "total_rows":total_row,
                        "filter_rows":filter_row,
                        "DML":dml,
                        "DDL":ddl,
                        "cost_time":time.time()-start_time
                    }
                    self.logger.info(res)
                    #sql = '# {0} #\n# {1} binlog2sql stop!  #\n# {2} #'.format('=' * 50, datetime.datetime.now(), '=' * 50)
                    sql = '# binlog2sql stop!'
                    self.logger.info(sql)
                    #print_line(sql, self.output_file)
                    break
            stream.close()