def _single_pk_iterate(self): # 从本次最小 id 开始(不包含), 按序查出接下来的 STEPS 条记录 this_min = self.pks[0]['last_max'] src_sql = SQL_SINGLE_PK_GREATER_LIMIT_ITERATE.format( table=wrap(self.table), id=wrap(self.pks[0]['name']), md5_concat=self.md5_concat) src_result_tuples = query(Const.SRC_POOL, src_sql, this_min, Const.STEPS) got_this_time = len(src_result_tuples) not_matches_this_time = [] if got_this_time > 0: # 更新本次查询到的最大 id self.pks[0]['last_max'] = this_max = src_result_tuples[-1][0] # 按照区间, 查询目标表 dest_sql = SQL_SINGLE_PK_RANGE_ITERATE.format( table=wrap(self.table), id=wrap(self.pks[0]['name']), md5_concat=self.md5_concat) dest_result_tuples = query(Const.DEST_POOL, dest_sql, this_min, this_max) # 比较记录的 md5 值 not_matches_this_time = self._cmp_md5(src_result_tuples, dest_result_tuples) # 返回本次查询到记录数 return got_this_time, not_matches_this_time
def _composite_pk_iterate(self): # 本次最小 id pk0_begin = self.pks[0]['last_max'] pk1_begin = self.pks[1]['last_max'] pk2_begin = None begin_where = None end_where = None if len(self.pks) == 2: # (A, B) > (x, y) 等价于 A>x OR (A=x AND B>y) # 经实测, 前面的简单写法在 MySQL 中也用到了索引,但就是比后面的写法慢 begin_where = "{pk0}>%s OR ({pk0}=%s AND {pk1}>%s)".format( pk0=wrap(self.pks[0]['name']), pk1=wrap(self.pks[1]['name'])) end_where = "{pk0}<%s OR ({pk0}=%s AND {pk1}<=%s)".format( pk0=wrap(self.pks[0]['name']), pk1=wrap(self.pks[1]['name'])) else: # (A, B, C) > (x, y, z) 等价于 A>x OR (A=x AND (B>y OR (B=y AND C>z))) # 所有 > 3 主键列的表, 数据量都 < 10000, 采用前面的简单写法 begin_where = '({pk_fields}) > (%s, %s, %s)'.format( pk_fields=self.pk_fields) end_where = '({pk_fields}) <= (%s, %s, %s)'.format( pk_fields=self.pk_fields) pk2_begin = self.pks[2]['last_max'] # 从本次最小 id 开始(不包含), 按序查出接下来的 STEPS 条记录 src_args = self._build_args([pk0_begin, pk1_begin, pk2_begin]) src_args.append(Const.STEPS) src_sql = SQL_COMPOSITE_PK_GREATER_LIMIT_ITERATE.format( table=wrap(self.table), md5_concat=self.md5_concat, pk_fields=self.pk_fields, begin_where=begin_where) src_result_tuples = query(Const.SRC_POOL, src_sql, *src_args) got_this_time = len(src_result_tuples) not_matches_this_time = [] if got_this_time > 0: # 更新本次查询到的联合主键最大值, 排除最后一个(md5_concat) for i in range(len(src_result_tuples[-1]) - 1): self.pks[i]['last_max'] = src_result_tuples[-1][i] # 按照区间, 查询目标表 dest_sql = SQL_COMPOSITE_PK_RANGE_ITERATE.format( table=wrap(self.table), md5_concat=self.md5_concat, pk_fields=self.pk_fields, begin_where=begin_where, end_where=end_where) begin_args = self._build_args([pk0_begin, pk1_begin, pk2_begin]) end_args = self._build_args([pk['last_max'] for pk in self.pks]) dest_args = begin_args + end_args dest_result_tuples = query(Const.DEST_POOL, dest_sql, *dest_args) # 比较记录的 md5 值 not_matches_this_time = self._cmp_md5(src_result_tuples, dest_result_tuples) # 返回本次查询到记录数 return got_this_time, not_matches_this_time
def _double_check_log_dingding(self, not_matches): if not not_matches: return if self.pk_type not in [ PK_AUTO_INCREMENT, PK_VARCHAR_INT, PK_COMPOSITE ]: return errors = [] sql = SQL_BY_ID.format(table=wrap(self.table), pk_fields=self.pk_fields, md5_concat=self.md5_concat, value_params=', '.join( ['%s' for i in range(len(self.pks))])) for record_tup in not_matches: src_result_tuples = query(Const.SRC_POOL, sql, *record_tup[0:-1]) dest_result_tuples = query(Const.DEST_POOL, sql, *record_tup[0:-1]) error_dict = { 'db_type': Const.DB_TYPE, 'database': Const.DATABASE, 'table': self.table, 'pk_fields': self.pk_fields } if not src_result_tuples: if dest_result_tuples: # str(dest_result_tuples[0]) 避免数据库字段对应类转换为 json 异常 error_dict.update({ 'error': Const.NO_ID_IN_SRC, 'dest': str(dest_result_tuples[0]) }) errors.append(error_dict) elif not dest_result_tuples: error_dict.update({ 'error': Const.NO_ID_IN_DEST, 'src': str(src_result_tuples[0]) }) errors.append(error_dict) elif src_result_tuples[0][-1] != dest_result_tuples[0][-1]: error_dict.update({ 'error': Const.MD5_NOT_EQUAL, 'src': str(src_result_tuples[0]), 'dest': str(dest_result_tuples[0]) }) errors.append(error_dict) if errors: result_logger.info( json.dumps(errors, ensure_ascii=False, cls=JsonEncoderX)) dingding_inconsistency('data inconsistency', errors)
def _list_columns_pks(self, pool, db, table): # 查询表所有列信息 column_tuples = query(pool, SQL_LIST_COLUMNS, table) # 列信息 list columns = [{ 'name': name, 'data_type': data_type } for (name, data_type) in column_tuples] # 主键列信息, 注: 联合主键时, 主键列有多列 pk_tuples = query(pool, SQL_GET_PK, table) pks = [columns[int(i) - 1] for (i, ) in pk_tuples] return (columns, pks)
def _select_all_iterate(self): sql = SQL_SELECT_ALL_ITERATE.format(table=wrap(self.table), md5_concat=self.md5_concat) src_result_tuples = query(Const.SRC_POOL, sql) got_this_time = len(src_result_tuples) not_matches_this_time = [] if got_this_time > 0: # 查询目标表 dest_result_tuples = query(Const.DEST_POOL, sql) # 比较记录的 md5 值 not_matches_this_time = self._cmp_md5(src_result_tuples, dest_result_tuples) # 返回本次查询到记录数 return got_this_time, not_matches_this_time
def _list_columns_pks(self, pool, db, table): # 查询表所有列信息 column_tuples = query(pool, SQL_LIST_COLUMNS, db, table) # 列信息 list columns = [{ 'name': name, 'data_type': data_type, 'key': key, 'extra': extra } for (name, data_type, key, extra) in column_tuples] column_dict = {column['name']: column for column in columns} # 主键列信息, 注: 联合主键时, 主键列有多列 pk_tuples = query(pool, SQL_GET_PK, db, table) pks = [column_dict[name] for (name, seq) in pk_tuples] # 表中无显式指定主键, MySQL 会将索引列作为主键 if len(pks) < 1: pks = [column for column in columns if column['key'] == 'PRI'] return (columns, pks)
def _table_structure(self, pool, db, table): val_concat_tuples = query(pool, SQL_CREATE_TABLE, table, table) return ';'.join( sorted([val_concat for (val_concat, ) in val_concat_tuples ])).replace('public.', '')
def _list_tables(self, pool, db): table_tuples = query(pool, SQL_LIST_TABLES) return [table for (table, ) in table_tuples]
def set_count(self): result_tuples = query(Const.SRC_POOL, SQL_COUNT.format(table=wrap(self.table))) self.count = result_tuples[0][0] logger.info("===== %s, db=%s table=%s, count=%s", Const.DB_TYPE, Const.DATABASE, self.table, self.count)
def _table_structure(self, pool, db, table): # https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_concat-ws val_concat_tuples = query(pool, SQL_CREATE_TABLE, db, table, db, table) # 排序后再 join return ';'.join( sorted([val_concat for (val_concat, ) in val_concat_tuples]))