def process_binlog(self): stream = BinLogStreamReader(connection_settings=self.conn_setting, server_id=self.server_id, log_file=self.start_file, log_pos=self.start_pos, only_schemas=self.only_schemas, only_tables=self.only_tables, resume_stream=True) flag_last_event = False e_start_pos, last_pos = stream.log_pos, stream.log_pos # to simplify code, we do not use flock for tmp_file. tmp_file = create_unique_file('%s.%s' % (self.conn_setting['host'], self.conn_setting['port'])) with temp_open(tmp_file, "w") as f_tmp, self.connection as cursor: for binlog_event in stream: if not self.stop_never: try: event_time = datetime.datetime.fromtimestamp(binlog_event.timestamp) except OSError: event_time = datetime.datetime(1980, 1, 1, 0, 0) if (stream.log_file == self.end_file and stream.log_pos == self.end_pos) or \ (stream.log_file == self.eof_file and stream.log_pos == self.eof_pos): flag_last_event = True elif event_time < self.start_time: if not (isinstance(binlog_event, RotateEvent) or isinstance(binlog_event, FormatDescriptionEvent)): last_pos = binlog_event.packet.log_pos continue elif (stream.log_file not in self.binlogList) or \ (self.end_pos and stream.log_file == self.end_file and stream.log_pos > self.end_pos) or \ (stream.log_file == self.eof_file and stream.log_pos > self.eof_pos) or \ (event_time >= self.stop_time): break # else: # raise ValueError('unknown binlog file or position') if isinstance(binlog_event, QueryEvent) and binlog_event.query == 'BEGIN': e_start_pos = last_pos if isinstance(binlog_event, QueryEvent) and not self.only_dml: sql = concat_sql_from_binlog_event(cursor=cursor, binlog_event=binlog_event, flashback=self.flashback, no_pk=self.no_pk) if sql: print(sql) elif is_dml_event(binlog_event) and event_type(binlog_event) in self.sql_type: for row in binlog_event.rows: sql = concat_sql_from_binlog_event(cursor=cursor, binlog_event=binlog_event, no_pk=self.no_pk, row=row, flashback=self.flashback, e_start_pos=e_start_pos) if self.flashback: f_tmp.write(sql + '\n') else: print(sql) if not (isinstance(binlog_event, RotateEvent) or isinstance(binlog_event, FormatDescriptionEvent)): last_pos = binlog_event.packet.log_pos if flag_last_event: break stream.close() f_tmp.close() if self.flashback: self.print_rollback_sql(filename=tmp_file) return True
def process_binlog(self): stream = BinLogStreamReader(connection_settings=self.conn_setting, server_id=self.server_id, log_file=self.start_file, log_pos=self.start_pos, only_schemas=self.only_schemas, only_tables=self.only_tables, resume_stream=True, blocking=True) # for binlogevent in stream: # binlogevent.dump() # return flag_last_event = False e_start_pos, last_pos = stream.log_pos, stream.log_pos # logger.log(f"e_start_pos:{e_start_pos}, last_pos:{last_pos}") #4 # to simplify code, we do not use flock for tmp_file. tmp_file = create_unique_file('%s.%s' % (self.conn_setting['host'], self.conn_setting['port'])) with temp_open(tmp_file, "w") as f_tmp, self.connection as cursor: for binlog_event in stream: # logger.log(f"binlog_event:{binlog_event}") if not self.stop_never: #Continuously parse binlog. default: stop at the latest event when you start. # logger.log(f"### not parse binlog continue, binlog_evnet: ###") # # print(f"binlog_event:{binlog_event}") # binlog_event # logger.log(f"### not parse binlog continue, end: ###") try: event_time = datetime.datetime.fromtimestamp(binlog_event.timestamp) except OSError: event_time = datetime.datetime(1980, 1, 1, 0, 0) # logger.log(f"event_time:{event_time}, log_pos:{binlog_event.packet.log_pos}, type:{type(binlog_event)}") # logger.log(f"eof_file:{self.eof_file}") #if to then event end like 1555 if (stream.log_file == self.end_file and stream.log_pos == self.end_pos) or \ (stream.log_file == self.eof_file and stream.log_pos == self.eof_pos): # logger.log(f"flag_last_event = True") flag_last_event = True elif event_time < self.start_time: #if current event time befor start time if not (isinstance(binlog_event, RotateEvent) or isinstance(binlog_event, FormatDescriptionEvent)): last_pos = binlog_event.packet.log_pos continue elif (stream.log_file not in self.binlogList) or \ (self.end_pos and stream.log_file == self.end_file and stream.log_pos > self.end_pos) or \ (stream.log_file == self.eof_file and stream.log_pos > self.eof_pos) or \ (event_time >= self.stop_time): break # else: # raise ValueError('unknown binlog file or position') if isinstance(binlog_event, QueryEvent) and binlog_event.query == 'BEGIN': # logger.log(f"isinstance(binlog_event, QueryEvent) and binlog_event.query == 'BEGIN', query:{binlog_event.query}") e_start_pos = last_pos if isinstance(binlog_event, QueryEvent) and not self.only_dml: #INSERT/UPDATE/DELETE # logger.log(f"QueryEvent and not self.only_dml, exec concat_sql_from_binlog_event, query:\n{binlog_event.query}") sql = concat_sql_from_binlog_event(cursor=cursor, binlog_event=binlog_event, flashback=self.flashback, no_pk=self.no_pk) if sql: print(f"{sql}") elif is_dml_event(binlog_event) and event_type(binlog_event) in self.sql_type: for row in binlog_event.rows: # logger.log(f"row:{row}") sql = concat_sql_from_binlog_event(cursor=cursor, binlog_event=binlog_event, no_pk=self.no_pk, row=row, flashback=self.flashback, e_start_pos=e_start_pos) if self.flashback: f_tmp.write(sql + '\n') else: print(sql) # print(f"sql:\n{sql}") if not (isinstance(binlog_event, RotateEvent) or isinstance(binlog_event, FormatDescriptionEvent)): last_pos = binlog_event.packet.log_pos if flag_last_event: break stream.close() f_tmp.close() if self.flashback: self.print_rollback_sql(filename=tmp_file) return True
def process_binlog(self): stream = BinLogStreamReader(connection_settings=self.conn_setting, server_id=self.server_id, log_file=self.start_file, log_pos=self.start_pos, only_schemas=self.only_schemas, only_tables=self.only_tables, resume_stream=True, blocking=True) #判断binglog日志是否解析完毕: flag_last_event = False e_start_pos, last_pos = stream.log_pos, stream.log_pos #回滚sql生成文件:IP+PORT #tmp_file = create_unique_file('%s.%s' % (self.conn_setting['host'], self.conn_setting['port'])) #with temp_open(tmp_file, "w") as f_tmp, self.connection as cursor, self.dest_connection as dest_cursor: with self.connection as cursor, self.dest_connection as dest_cursor: for binlog_event in stream: #不持续解析binlog if not self.stop_never: try: event_time = datetime.datetime.fromtimestamp(binlog_event.timestamp) except OSError: event_time = datetime.datetime(1980, 1, 1, 0, 0) if (stream.log_file == self.end_file and stream.log_pos == self.end_pos) or \ (stream.log_file == self.eof_file and stream.log_pos == self.eof_pos): flag_last_event = True elif event_time < self.start_time: if not (isinstance(binlog_event, RotateEvent) or isinstance(binlog_event, FormatDescriptionEvent)): last_pos = binlog_event.packet.log_pos continue elif (stream.log_file not in self.binlogList) or \ (self.end_pos and stream.log_file == self.end_file and stream.log_pos > self.end_pos) or \ (stream.log_file == self.eof_file and stream.log_pos > self.eof_pos) or \ (event_time >= self.stop_time): break # else: # raise ValueError('unknown binlog file or position') # if isinstance(binlog_event, QueryEvent) and binlog_event.query == 'BEGIN': e_start_pos = last_pos #解析DDL if isinstance(binlog_event, QueryEvent) and not self.only_dml: sql = concat_sql_from_binlog_event(cursor=cursor, binlog_event=binlog_event, flashback=self.flashback, no_pk=self.no_pk) if sql: print(sql) #解析DML语句 elif is_dml_event(binlog_event) and event_type(binlog_event) in self.sql_type: for row in binlog_event.rows: sql = concat_sql_from_binlog_event(cursor=cursor, binlog_event=binlog_event, no_pk=self.no_pk, row=row, flashback=self.flashback, e_start_pos=e_start_pos) if self.flashback: #f_tmp.write(sql + '\n') print("generate flashback sql.") else: for value in sql.values(): try: print(value) dest_cursor.execute("%s" %value) self.dest_connection.commit() except Exception as e: print(e) #binlog发生切换: if not (isinstance(binlog_event, RotateEvent) or isinstance(binlog_event, FormatDescriptionEvent)): last_pos = binlog_event.packet.log_pos #binlog解析完毕,退出,默认False不退出 if flag_last_event: break stream.close() #f_tmp.close() #if self.flashback: # self.print_rollback_sql(filename=tmp_file) return True
def process_binlog(self): if self.debug: for k, v in vars(self).items(): if k == 'timezone': print_line('{} = {}'.format(k, v._std_offset)) elif k == 'connection': pass else: print_line('{} = {}'.format(k, v)) return stream = BinLogStreamReader(connection_settings=self.conn_setting, server_id=self.server_id, log_file=self.start_file, log_pos=self.start_position, only_schemas=self.databases, only_tables=self.tables, resume_stream=True, blocking=True, skip_to_timestamp=self.start_time) with self.connection as cursor: sql = '# {} #\n# {} binlog2sql start! #\n# {} #'.format( '=' * 50, arrow.now(), '=' * 50) print_line(sql, self.output_file) start_pos, print_interval, print_time = 4, 60 * 10, 0 for binlog_event in stream: if (print_time + print_interval ) < binlog_event.timestamp < self.start_time: print_time = binlog_event.timestamp sql = '# Binlog scan to {}'.format( arrow.get(print_time).to(self.timezone)) print_line(sql, self.output_file) if binlog_event.timestamp < self.start_time: continue # dml if is_dml_event(binlog_event) in self.sql_type: for row in binlog_event.rows: if self.json: for column in binlog_event.columns: if column.type == 245: for k, v in row.items(): row[k][column.name] = json.dumps( type_convert(v[column.name]), ensure_ascii=False) sql = generate_sql(cursor=cursor, binlog_event=binlog_event, no_pk=self.no_pk, row=row, e_start_pos=start_pos, flashback=self.flashback) print_line(sql, self.output_file) # ddl elif is_ddl_event(binlog_event): start_pos = binlog_event.packet.log_pos if not self.only_dml and binlog_event.query != 'BEGIN': sql = generate_sql(cursor=cursor, binlog_event=binlog_event, no_pk=self.no_pk, e_start_pos=start_pos, flashback=self.flashback) print_line(sql, self.output_file) # exceed the end position of the end binlog file if stream.log_file == self.stop_file and ( binlog_event.packet.log_pos >= self.stop_position or binlog_event.timestamp >= self.stop_time ) and not self.stop_never: sql = '# {} #\n# {} binlog2sql stop! #\n# {} #'.format( '=' * 50, arrow.now(), '=' * 50) print_line(sql, self.output_file) break stream.close()
def process_binlog(self): stream = BinLogStreamReader(connection_settings=self.conn_setting, server_id=self.server_id, log_file=self.start_file, log_pos=self.start_pos, only_schemas=self.only_schemas, only_tables=self.only_tables, resume_stream=True, blocking=True) flag_last_event = False slave_proxy_id = 0 e_start_pos, last_pos = stream.log_pos, stream.log_pos self.touch_tmp_sql_file() # to simplify code, we do not use flock for tmp_file. transaction_count = 0 with self.connection as cursor: sql_list = [] for binlog_event in stream: # for attr_name in dir(binlog_event): # print attr_name + ":" + str(getattr(binlog_event, attr_name)) if not self.stop_never: try: event_time = datetime.datetime.fromtimestamp( binlog_event.timestamp) except OSError: event_time = datetime.datetime(1980, 1, 1, 0, 0) if (stream.log_file == self.end_file and stream.log_pos == self.end_pos) or \ (stream.log_file == self.eof_file and stream.log_pos == self.eof_pos): flag_last_event = True elif event_time < self.start_time: if not (isinstance(binlog_event, RotateEvent) or isinstance(binlog_event, FormatDescriptionEvent)): last_pos = binlog_event.packet.log_pos continue elif (stream.log_file not in self.binlogList) or \ (self.end_pos and stream.log_file == self.end_file and stream.log_pos > self.end_pos) or \ (stream.log_file == self.eof_file and stream.log_pos > self.eof_pos) or \ (event_time >= self.stop_time): break # else: # raise ValueError('unknown binlog file or position') if isinstance(binlog_event, QueryEvent) and binlog_event.query == 'BEGIN': e_start_pos = last_pos transaction_count += 1 if transaction_count % 100 == 0: print("process binlog at {}".format(last_pos)) slave_proxy_id = binlog_event.slave_proxy_id if len(sql_list) == 0 or sql_list[-1] != SPLIT_TRAN_FLAG: sql_list.append(SPLIT_TRAN_FLAG) if self.pseudo_thread_id > 0: if self.pseudo_thread_id != slave_proxy_id: continue if isinstance(binlog_event, QueryEvent) and not self.only_dml: sql = concat_sql_from_binlog_event( cursor=cursor, binlog_event=binlog_event, flashback=self.flashback, no_pk=self.no_pk, rollback_with_primary_key=self. rollback_with_primary_key, rollback_with_changed_value=self. rollback_with_changed_value) if sql: sql_list.append(sql) if len(sql_list) == MAX_SQL_COUNT_PER_WRITE: self.write_tmp_sql(sql_list=sql_list) elif is_dml_event(binlog_event) and event_type( binlog_event) in self.sql_type: for row in binlog_event.rows: sql = concat_sql_from_binlog_event( cursor=cursor, binlog_event=binlog_event, no_pk=self.no_pk, row=row, flashback=self.flashback, e_start_pos=e_start_pos, rollback_with_primary_key=self. rollback_with_primary_key, rollback_with_changed_value=self. rollback_with_changed_value) sql_list.append(sql) if len(sql_list) == MAX_SQL_COUNT_PER_WRITE: self.write_tmp_sql(sql_list=sql_list) sql_list = [] if not (isinstance(binlog_event, RotateEvent) or isinstance(binlog_event, FormatDescriptionEvent)): last_pos = binlog_event.packet.log_pos if flag_last_event: break self.write_tmp_sql(sql_list=sql_list) stream.close() if self.flashback: self.create_rollback_sql() else: self.create_execute_sql() print("===============================================") if not self.flashback: print("执行脚本文件:\n{0}".format(self.execute_sql_file)) else: print("回滚脚本文件:") new_file_list = list(reversed(self.rollback_sql_files)) for tmp_file in new_file_list: print(tmp_file) print("===============================================") return True
def process_binlog(self): start_time = time.time() config = {} total_row = 0 filter_row = 0 dml = { 'delete':0, 'update':0, 'insert':0 } ddl = 0 for k, v in vars(self).items(): if k == 'timezone': config.update({k:v._std_offset}) elif k == 'connection': pass else: config.update({k:v}) self.logger.info(config) if self.debug: return stream = BinLogStreamReader(connection_settings=self.conn_setting, server_id=self.server_id, log_file=self.start_file, log_pos=self.start_position, only_schemas=self.only_schemas, only_tables=self.tables, resume_stream=True, blocking=True, skip_to_timestamp=self.start_time) with self.connection as cursor: #sql = '# {0} #\n# {1} binlog2sql start! #\n# {2} #'.format('=' * 50, datetime.datetime.now(), '=' * 50) sql = '# binlog2sql start...' self.logger.info(sql) #print_line(sql, self.output_file) start_pos, print_interval, print_time = 4, 60 * 10, 0 for binlog_event in stream: total_row +=1 if (print_time + print_interval) < binlog_event.timestamp < self.start_time: print_time = binlog_event.timestamp sql = '# Binlog scan to {0}'.format(datetime.datetime.fromtimestamp(print_time)) self.logger.info(sql) #print_line(sql, self.output_file) if binlog_event.timestamp < self.start_time: continue # dml event_type = is_dml_event(binlog_event) if event_type in self.sql_type: filter_row +=1 for row in binlog_event.rows: if event_type == 'INSERT': if self.flashback: dml['delete'] +=1 else: dml['insert'] += 1 if event_type == 'DELETE': if self.flashback: dml['insert'] +=1 else: dml['delete'] += 1 if event_type == 'UPDATE': dml['update'] +=1 if self.json: for column in binlog_event.columns: if column.type == 245: for k, v in row.items(): row[k][column.name] = json.dumps(type_convert(v[column.name]), ensure_ascii=False) sql = generate_sql(cursor=cursor, binlog_event=binlog_event, no_pk=self.no_pk, row=row, e_start_pos=start_pos, flashback=self.flashback) print_line(sql, self.output_file) # ddl elif is_ddl_event(binlog_event): start_pos = binlog_event.packet.log_pos if not self.only_dml and binlog_event.query != 'BEGIN': filter_row += 1 ddl +=1 sql = generate_sql(cursor=cursor, binlog_event=binlog_event, no_pk=self.no_pk, e_start_pos=start_pos, flashback=self.flashback) print_line(sql, self.output_file) # exceed the end position of the end binlog file if stream.log_file == self.stop_file and ( binlog_event.packet.log_pos >= self.stop_position or binlog_event.timestamp >= self.stop_time ) and not self.stop_never: res = { "total_rows":total_row, "filter_rows":filter_row, "DML":dml, "DDL":ddl, "cost_time":time.time()-start_time } self.logger.info(res) #sql = '# {0} #\n# {1} binlog2sql stop! #\n# {2} #'.format('=' * 50, datetime.datetime.now(), '=' * 50) sql = '# binlog2sql stop!' self.logger.info(sql) #print_line(sql, self.output_file) break stream.close()