Python query Examples, samplings.db.base.query Python Examples

Example #1

0

Show file

    def _single_pk_iterate(self):
        # 从本次最小 id 开始(不包含), 按序查出接下来的 STEPS 条记录
        this_min = self.pks[0]['last_max']
        src_sql = SQL_SINGLE_PK_GREATER_LIMIT_ITERATE.format(
            table=wrap(self.table),
            id=wrap(self.pks[0]['name']),
            md5_concat=self.md5_concat)
        src_result_tuples = query(Const.SRC_POOL, src_sql, this_min,
                                  Const.STEPS)

        got_this_time = len(src_result_tuples)
        not_matches_this_time = []
        if got_this_time > 0:
            # 更新本次查询到的最大 id
            self.pks[0]['last_max'] = this_max = src_result_tuples[-1][0]
            # 按照区间, 查询目标表
            dest_sql = SQL_SINGLE_PK_RANGE_ITERATE.format(
                table=wrap(self.table),
                id=wrap(self.pks[0]['name']),
                md5_concat=self.md5_concat)
            dest_result_tuples = query(Const.DEST_POOL, dest_sql, this_min,
                                       this_max)
            # 比较记录的 md5 值
            not_matches_this_time = self._cmp_md5(src_result_tuples,
                                                  dest_result_tuples)

        # 返回本次查询到记录数
        return got_this_time, not_matches_this_time

Example #2

0

Show file

    def _composite_pk_iterate(self):
        # 本次最小 id
        pk0_begin = self.pks[0]['last_max']
        pk1_begin = self.pks[1]['last_max']
        pk2_begin = None

        begin_where = None
        end_where = None
        if len(self.pks) == 2:
            # (A, B) > (x, y) 等价于 A>x OR (A=x AND B>y)
            # 经实测, 前面的简单写法在 MySQL 中也用到了索引，但就是比后面的写法慢
            begin_where = "{pk0}>%s OR ({pk0}=%s AND {pk1}>%s)".format(
                pk0=wrap(self.pks[0]['name']), pk1=wrap(self.pks[1]['name']))
            end_where = "{pk0}<%s OR ({pk0}=%s AND {pk1}<=%s)".format(
                pk0=wrap(self.pks[0]['name']), pk1=wrap(self.pks[1]['name']))
        else:
            # (A, B, C) > (x, y, z) 等价于 A>x OR (A=x AND (B>y OR (B=y AND C>z)))
            # 所有 > 3 主键列的表, 数据量都 < 10000, 采用前面的简单写法
            begin_where = '({pk_fields}) > (%s, %s, %s)'.format(
                pk_fields=self.pk_fields)
            end_where = '({pk_fields}) <= (%s, %s, %s)'.format(
                pk_fields=self.pk_fields)
            pk2_begin = self.pks[2]['last_max']

        # 从本次最小 id 开始(不包含), 按序查出接下来的 STEPS 条记录
        src_args = self._build_args([pk0_begin, pk1_begin, pk2_begin])
        src_args.append(Const.STEPS)
        src_sql = SQL_COMPOSITE_PK_GREATER_LIMIT_ITERATE.format(
            table=wrap(self.table),
            md5_concat=self.md5_concat,
            pk_fields=self.pk_fields,
            begin_where=begin_where)
        src_result_tuples = query(Const.SRC_POOL, src_sql, *src_args)

        got_this_time = len(src_result_tuples)
        not_matches_this_time = []
        if got_this_time > 0:
            # 更新本次查询到的联合主键最大值, 排除最后一个(md5_concat)
            for i in range(len(src_result_tuples[-1]) - 1):
                self.pks[i]['last_max'] = src_result_tuples[-1][i]
            # 按照区间, 查询目标表
            dest_sql = SQL_COMPOSITE_PK_RANGE_ITERATE.format(
                table=wrap(self.table),
                md5_concat=self.md5_concat,
                pk_fields=self.pk_fields,
                begin_where=begin_where,
                end_where=end_where)
            begin_args = self._build_args([pk0_begin, pk1_begin, pk2_begin])
            end_args = self._build_args([pk['last_max'] for pk in self.pks])
            dest_args = begin_args + end_args
            dest_result_tuples = query(Const.DEST_POOL, dest_sql, *dest_args)
            # 比较记录的 md5 值
            not_matches_this_time = self._cmp_md5(src_result_tuples,
                                                  dest_result_tuples)

        # 返回本次查询到记录数
        return got_this_time, not_matches_this_time

Example #3

0

Show file

    def _double_check_log_dingding(self, not_matches):
        if not not_matches:
            return

        if self.pk_type not in [
                PK_AUTO_INCREMENT, PK_VARCHAR_INT, PK_COMPOSITE
        ]:
            return

        errors = []
        sql = SQL_BY_ID.format(table=wrap(self.table),
                               pk_fields=self.pk_fields,
                               md5_concat=self.md5_concat,
                               value_params=', '.join(
                                   ['%s' for i in range(len(self.pks))]))

        for record_tup in not_matches:
            src_result_tuples = query(Const.SRC_POOL, sql, *record_tup[0:-1])
            dest_result_tuples = query(Const.DEST_POOL, sql, *record_tup[0:-1])

            error_dict = {
                'db_type': Const.DB_TYPE,
                'database': Const.DATABASE,
                'table': self.table,
                'pk_fields': self.pk_fields
            }

            if not src_result_tuples:
                if dest_result_tuples:
                    # str(dest_result_tuples[0]) 避免数据库字段对应类转换为 json 异常
                    error_dict.update({
                        'error': Const.NO_ID_IN_SRC,
                        'dest': str(dest_result_tuples[0])
                    })
                    errors.append(error_dict)
            elif not dest_result_tuples:
                error_dict.update({
                    'error': Const.NO_ID_IN_DEST,
                    'src': str(src_result_tuples[0])
                })
                errors.append(error_dict)
            elif src_result_tuples[0][-1] != dest_result_tuples[0][-1]:
                error_dict.update({
                    'error': Const.MD5_NOT_EQUAL,
                    'src': str(src_result_tuples[0]),
                    'dest': str(dest_result_tuples[0])
                })
                errors.append(error_dict)

        if errors:
            result_logger.info(
                json.dumps(errors, ensure_ascii=False, cls=JsonEncoderX))
            dingding_inconsistency('data inconsistency', errors)

Example #4

0

Show file

    def _list_columns_pks(self, pool, db, table):
        # 查询表所有列信息
        column_tuples = query(pool, SQL_LIST_COLUMNS, table)
        # 列信息 list
        columns = [{
            'name': name,
            'data_type': data_type
        } for (name, data_type) in column_tuples]

        # 主键列信息, 注: 联合主键时, 主键列有多列
        pk_tuples = query(pool, SQL_GET_PK, table)
        pks = [columns[int(i) - 1] for (i, ) in pk_tuples]

        return (columns, pks)

Example #5

0

Show file

    def _select_all_iterate(self):
        sql = SQL_SELECT_ALL_ITERATE.format(table=wrap(self.table),
                                            md5_concat=self.md5_concat)
        src_result_tuples = query(Const.SRC_POOL, sql)

        got_this_time = len(src_result_tuples)
        not_matches_this_time = []
        if got_this_time > 0:
            # 查询目标表
            dest_result_tuples = query(Const.DEST_POOL, sql)
            # 比较记录的 md5 值
            not_matches_this_time = self._cmp_md5(src_result_tuples,
                                                  dest_result_tuples)

        # 返回本次查询到记录数
        return got_this_time, not_matches_this_time

Example #6

0

Show file

File: mysql.py Project: moonrunwater/data-sampling

    def _list_columns_pks(self, pool, db, table):
        # 查询表所有列信息
        column_tuples = query(pool, SQL_LIST_COLUMNS, db, table)
        # 列信息 list
        columns = [{
            'name': name,
            'data_type': data_type,
            'key': key,
            'extra': extra
        } for (name, data_type, key, extra) in column_tuples]
        column_dict = {column['name']: column for column in columns}

        # 主键列信息, 注: 联合主键时, 主键列有多列
        pk_tuples = query(pool, SQL_GET_PK, db, table)
        pks = [column_dict[name] for (name, seq) in pk_tuples]
        # 表中无显式指定主键, MySQL 会将索引列作为主键
        if len(pks) < 1:
            pks = [column for column in columns if column['key'] == 'PRI']

        return (columns, pks)

Example #7

0

Show file

 def _table_structure(self, pool, db, table):
     val_concat_tuples = query(pool, SQL_CREATE_TABLE, table, table)
     return ';'.join(
         sorted([val_concat for (val_concat, ) in val_concat_tuples
                 ])).replace('public.', '')

Example #8

0

Show file

 def _list_tables(self, pool, db):
     table_tuples = query(pool, SQL_LIST_TABLES)
     return [table for (table, ) in table_tuples]

Example #9

0

Show file

 def set_count(self):
     result_tuples = query(Const.SRC_POOL,
                           SQL_COUNT.format(table=wrap(self.table)))
     self.count = result_tuples[0][0]
     logger.info("===== %s, db=%s table=%s, count=%s", Const.DB_TYPE,
                 Const.DATABASE, self.table, self.count)

Example #10

0

Show file

File: mysql.py Project: moonrunwater/data-sampling

 def _table_structure(self, pool, db, table):
     # https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_concat-ws
     val_concat_tuples = query(pool, SQL_CREATE_TABLE, db, table, db, table)
     # 排序后再 join
     return ';'.join(
         sorted([val_concat for (val_concat, ) in val_concat_tuples]))