def read_table_name(self):

        while True:

            # 获取配置参数,带宽,map数
            get_config_sql = "select network,map from tb_copy_data_config where migration_batch='" + input_batch + "'"
            get_config_result = conn_db.select(get_config_sql)
            self.bandwidth = get_config_result[0][0]
            self.map_num = get_config_result[0][1]

            print '获取配置参数', get_config_result, self.bandwidth, self.map_num

            # 获取可以稽核表名列表,表里获取分区键
            get_task_sql = "select id, table_name ,partition_type,partition_time from tb_copy_get_partition_task where copy_status='0' and   migration_batch='" + input_batch + "' order by partition_time asc limit 10"

            print '获取配置参数sql:', get_config_sql
            print '获取任务sql:', get_task_sql

            select_result = conn_db.select(get_task_sql)
            print '获取任务:', select_result

            # 取不到任务
            if not select_result:
                print '无迁移任务'
                exit(0)

            # 遍历集合,更新此批次状态,status=1
            update_sql = "update tb_copy_get_partition_task  set copy_status='1'"
            in_condition = ''
            for i in select_result:
                in_condition = str(i[0]) + "," + in_condition

            update_sql = update_sql + "where id in (" + in_condition + "0)"

            print '更新此批次状态', update_sql
            result = conn_db.insert(update_sql)

            # 遍历表名,插日志
            for i in select_result:
                table_name = i[1]
                partition_type = i[2]
                partition_time = i[3]

                # 当前时间
                now_time = str(date_time.datetime.now())[0:19]

                insert_sql = "insert into tb_copy_data_log (data_source,table_name,partition_type,partition_time,copy_status,chk_status,start_time,end_time) values('%s','%s','%s','%s','%s','%s','%s','%s')" % (
                    config.data_source, table_name, partition_type,
                    partition_time, config.copy_status_1, config.chk_status_0,
                    now_time, '')

                result = conn_db.insert(insert_sql)

                # 删除分区,添加分区
                self.add_partition(table_name, partition_type, partition_time)

                # 拷贝数据
                self.copy_data(table_name, partition_type, partition_time)
コード例 #2
0
    def check_date(self, table_name, partition_date):
        # 检测该表是否存在
        check_table_sql = "select table_name from tb_copy_data_log where table_name='" + table_name + "\'"

        print '检测该表是否存在:', check_table_sql

        check_table_result = conn_db.select(check_table_sql)

        # 当前时间
        now_time = str(date_time.datetime.now())[0:19]

        if not check_table_result:
            # 初始化mysql同步状态
            insert_table_sql = "insert into tb_copy_data_log (data_source,table_name,partition_type,partition_time,copy_status,chk_status,start_time,end_time) values('%s','%s','%s','%s','%s','%s','%s','%s')" % (
                config.data_source, table_name, config.partition_statis_date,
                partition_date, config.copy_status_1, config.chk_status_0,
                now_time, '0')
            print '插入记录初始化:', insert_table_sql

            conn_db.insert(insert_table_sql)

            # 测试
            conn_db.insert("insert into test (id) values ('123')")
            print '完成初始化'

        # 检测该分区是否存在
        check_partition_sql = "select table_name from tb_copy_data_log where table_name='" + table_name + "' and partition_time='%s\'" % (
            partition_date)
        print check_partition_sql
        check_partition_result = conn_db.select(check_partition_sql)
        if not check_partition_result:
            # 插入分区
            insert_table_sql = "insert into tb_copy_data_log (data_source,table_name,partition_type,partition_time,copy_status,chk_status,start_time,end_time) values('%s','%s','%s','%s','%s','%s','%s','%s')" % (
                config.data_source, table_name, config.partition_statis_date,
                partition_date, config.copy_status_1, config.chk_status_0,
                now_time, '0')
            print '插入记录初始化:', insert_table_sql
            check_table_result = conn_db.insert(insert_table_sql)

        # 检测同步状态
        check_status_sql = " select table_name from tb_copy_data_log where table_name='" + table_name + "' and partition_time= '" + partition_date + "' and copy_status = '" + config.copy_status_0 + "\'"

        print check_status_sql
        select_result = conn_db.select(check_status_sql)

        print '检测结果:', select_result

        # 结果判空
        if select_result:

            return True
        else:
            return False
コード例 #3
0
    def check_exists(self, table_name):
        # 如果存在先删除,表名,周期,库名
        check_sql = "select id from tb_copy_data_count_check_log where table_name='%s' and db_name='%s'" % (
            table_name, self.db_name)

        print 'check_sql:', check_sql

        check_result = conn_db.select(check_sql)

        print 'check_result:', check_result

        # 存在
        if check_result:
            condition = ''
            if len(check_result) == 1:
                condition = "'%s'" % (check_result[0])
            else:
                for j in check_result:
                    condition = condition + ',' + "'%s'" % (j[0])

            if condition[0] == ',':
                condition = condition[1:]

            print 'condition:', condition

            delete_sql = "delete from tb_copy_data_count_check_log where id in (%s)" % (
                condition)

            print 'delete_sql:', delete_sql

            conn_db.insert(delete_sql)
コード例 #4
0
def get_int(table_name):
    get_int_sql = "select  column_name from columns where  data_type ='int' and table_name=\'" + table_name + '\''
    print 'get_int_sql:', get_int_sql
    result = conn_db.select(get_int_sql)

    print result[0]
    return result[0]
コード例 #5
0
def read_table_name():
    # 获取可以稽核表名列表
    get_task_sql = "select a.table_name from tb_copy_get_task a left join tb_copy_data_log b on a.table_name=b.table_name where b.copy_status='0' or b.table_name is null ;"

    print get_task_sql

    # get_task_sql_sh = mysql_sh + get_task_sql + '\' > ' + get_task_file

    select_result = conn_db.select(get_task_sql)
    print select_result
    print type(select_result)

    # 遍历查询结果
    for i in select_result:
        table_name = i[0]
        print '表名:', table_name

        # 调用迁移
        input_date(table_name)

    # 执行获取sql
    # os.popen(get_task_sql_sh)

    # get_task_list = open(get_task_file, 'r')

    multi_list = []
コード例 #6
0
    def read_table_name(self):

        get_task_sql = ''

        # 全量表
        if self.table_type == '1':
            # 获取可以稽核表名列表
            get_task_sql = "select table_name  from tb_copy_get_task where table_type='" + self.table_type + "' and size_type='" + self.size_type + "' and  migration_batch= '" + input_batch + "';"

        else:

            # 获取可以稽核表名列表,表里获取分区键
            get_task_sql = "select table_name  from tb_copy_get_task where start_partition is not null and end_partition is not null and ifnull(now_partition ,start_partition) <= end_partition and  migration_batch= " + input_batch + ';'

        print '获取任务sql:', get_task_sql

        select_result = conn_db.select(get_task_sql)
        print '获取任务:', select_result

        # 遍历任务列表
        for i in select_result:
            table_name = i[0]
            print '表名:', table_name

            # 调用迁移
            self.input_date(table_name)

        print '无迁移任务'
コード例 #7
0
    def read_table_name(self):

        while True:
            # 获取可以稽核表名列表,表里获取分区键,只稽核分区表
            # get_task_sql = "select id, table_name ,partition_type from tb_copy_data_count_check_task where  " + self.check_status + "='0' and   batch_num ='" + self.batch_num + "'  limit 10"

            # get_task_sql = "select id,table_name,partition_type,partition_time  from tb_copy_data_log where remark3 is null and start_time >'2020-08-13 10:00:51' and copy_status='2' and (%s is  NULL or %s ='')  order by partition_time desc,start_time asc limit 1000;" % (
            #             #     self.count_col, self.count_col)

            get_task_sql = "select id,table_name,partition_type,partition_time  from tb_copy_data_log where remark3 is null and start_time >'2020-08-13 10:00:51' and copy_status='2' and (%s is  NULL or %s ='')  limit 1000;" % (
                self.count_col, self.count_col)

            print '获取任务sql:', get_task_sql

            select_result = conn_db.select(get_task_sql)
            # print '获取任务:', select_result

            # 取不到任务
            if not select_result:
                print '无稽核任务'
                exit(0)

            # # 遍历集合,更新此批次状态,status=1
            update_status_list = []
            update_sql = "update tb_copy_data_log  set remark3='checked' "
            in_condition = ''
            for i in select_result:
                in_condition = str(i[0]) + "," + in_condition

            update_sql = update_sql + "where id in (" + in_condition + "0)"

            # print '更新此批次状态', update_sql
            result = conn_db.insert(update_sql)

            self.start_time = time.time()
            # 遍历表名,插日志
            for i in select_result:
                id, table_name, partition_type, partition_time = i[0], i[1], i[
                    2], i[3]

                # count_sql = "select %s,count(*) from %s group by %s " % (partition_type, table_name, partition_type)
                count_sql = "select partition_type,partition_time,count_num from tb_copy_data_count_check_log where db_name='%s' and table_name='%s' and partition_time='%s' " % (
                    self.db_name, table_name, partition_time)

                # print 'count_sql', count_sql

                # 获取稽核结果
                self.get_count_data(count_sql, id, table_name, partition_type,
                                    partition_time)

            # 更新mysql
            # print 'update_sql_list', self.update_sql_list
            conn_db.insert_batch(self.update_sql_list)

            print 'sleep 5s,更新数据量:', len(
                self.update_sql_list), '耗时:', time.time() - self.start_time
            self.update_sql_list = []

            time.sleep(3)
コード例 #8
0
def get_end_string(table_name):
    get_end_string_sql = "select a.column_name from columns a inner join ( select table_schema,table_name,max(column_id) mx_column_id from columns where data_type like 'varchar%'  group by 1,2) b on a.column_id = b.mx_column_id and a.table_name=\'" + table_name + '\''

    print 'get_end_string_sql:', get_end_string_sql
    result = conn_db.select(get_end_string_sql)

    print result[0][0]
    return result[0][0]
コード例 #9
0
def get_time():
    try:
        get_time_str_sql = "select time_str from tb_tmp_copy_data "

        result = conn_db.select(get_time_str_sql)[0][0]

        print 'get_time_str:', result

        return result

    except Exception as e:
        print e
        print '异常'
コード例 #10
0
def get_error_log():
    try:
        get_error_sql = "select table_name,partition_type,partition_time,copy_status from tb_copy_data_log where copy_status='3'"

        print 'get_error_sql', get_error_sql
        get_error_result = conn_db.select(get_error_sql)
        for i in get_error_result:
            table_name, partition_type, partition_time, copy_status = i[0], i[
                1], i[2], i[3]
            check_partition(table_name, partition_type, partition_time,
                            copy_status)
    except Exception as e:
        print e
        print '异常'
コード例 #11
0
def check_partition(table_name, partition_date):
    partition_str = "statis_date"

    get_partition_sql = "select  column_name from columns where column_name='" + partition_str + "'   data_type ='int' and table_name=\'" + table_name + '\''

    print 'get_end_string_sql:', get_partition_sql
    result = conn_db.select(get_partition_sql)

    # 无分区
    if len(result[0]) == 0:
        return ''

    # 有分区
    else:
        return partition_date
コード例 #12
0
ファイル: check_error.py プロジェクト: bradbann/bid_data_new
def get_error_log():
    try:
        get_error_sql = "select table_name,partition_type,partition_time,copy_status from tb_copy_data_log where  partition_type<>'all' and ( copy_status='3' or sy_count<>ocdp_count) limit 100"

        print 'get_error_sql', get_error_sql
        get_error_result = conn_db.select(get_error_sql)
        for i in get_error_result:
            try:
                table_name, partition_type, partition_time, copy_status = i[0], i[1], i[2], i[3]
                check_partition(table_name, partition_type, partition_time, copy_status)
            except Exception as e:
                print e
                print '异常2'
    except Exception as e:
        print e
        print '异常1'
コード例 #13
0
def read_table_name():
    # 获取可以稽核表名列表
    get_task_sql = "select table_name  from tb_copy_get_task where start_partition is not null and end_partition is not null and ifnull(now_partition ,start_partition) < end_partition;"

    print '获取任务sql:', get_task_sql

    select_result = conn_db.select(get_task_sql)
    print '获取任务:', select_result

    # 遍历任务列表
    for i in select_result:
        table_name = i[0]
        print '表名:', table_name

        # 调用迁移
        input_date(table_name)

    print '无迁移任务'
コード例 #14
0
    def get_count_data(self, count_sql, id, table_name, partition_type,
                       partition_time):

        # mysql获取结果
        result = conn_db.select(count_sql)

        check_error = ''

        # 同步失败,更新数据库
        if not result:
            # 稽核失败,更新配置表,返回

            # print '不存在该表',id, table_name, partition_type, partition_time

            return

        else:
            # print '表存在',id, table_name, partition_type, partition_time
            self.insert_data(id, result)
コード例 #15
0
ファイル: close_lock.py プロジェクト: Biking0/big_data_old
    def get_task(self):
        while True:
            # 获取可以稽核表名列表,表里获取分区键
            get_task_sql = "select id, table_name ,batch_num from tb_close_lock_get_task where copy_status='0' and   batch_num='" + self.input_batch + "' order by id asc limit 1"

            print '获取任务sql:', get_task_sql

            select_result = conn_db.select(get_task_sql)
            print '获取任务:', select_result

            # 取不到任务
            if not select_result:
                print '无迁移任务'
                exit(0)

            # 遍历集合,更新此批次状态,status=1
            update_sql = "update tb_close_lock_get_task  set copy_status='1'"
            in_condition = ''
            for i in select_result:
                in_condition = str(i[0]) + "," + in_condition

            update_sql = update_sql + "where id in (" + in_condition + "0)"

            print '更新此批次状态', update_sql
            result = conn_db.insert(update_sql)

            # 遍历表名,插日志,开始执行任务
            for i in select_result:
                table_name = i[1]

                # 当前时间
                now_time = str(date_time.datetime.now())[0:19]

                insert_sql = "insert into tb_close_lock_log (table_name,rename_status,create_status,insert_status,start_time,end_time) values('%s','%s','%s','%s','%s','%s')" % (
                    table_name, config.rename_status_0, config.create_status_0,
                    config.insert_status_0, now_time, '')

                result = conn_db.insert(insert_sql)

                self.check_name(table_name)

            print '无任务'
コード例 #16
0
    def read_table_name(self):

        # 获取可以稽核表名列表
        get_task_sql = "select a.table_name ,a.partition_type from tb_copy_get_task a left join tb_copy_data_log b on a.table_name=b.table_name where copy_status='" + config.copy_status_2 + "' and table_type='" + self.table_type + "' and size_type='" + self.size_type + "' and  migration_batch= '" + input_batch + "';"

        print '获取任务sql:', get_task_sql

        select_result = conn_db.select(get_task_sql)
        print '获取任务:', select_result

        # 全量表
        if self.table_type == '1':
            # 遍历任务表名列表
            for i in select_result:
                table_name = i[0]
                print '表名:', table_name

                # 调用迁移
                self.input_date(table_name, partition_date=None)

        # 日分区表
        elif self.table_type == '2':

            # 分区键
            partition_type = select_result[0][1]

            print '获取分区键', partition_type

            # 日表迁移33天
            day_partition_list = config.day_partition.reverse()
            for partition_date in day_partition_list:

                # 遍历任务列表
                for i in select_result:
                    table_name = i[0]
                    print '表名:', table_name

                    # 调用迁移
                    self.input_date(table_name, partition_date)

        print '无迁移任务'
コード例 #17
0
def input_date(table_name):
    get_date_sql = "select ifnull(now_partition,start_partition) as start_partition,end_partition from tb_copy_get_task where table_name='" + table_name + "\'"

    print '获取开始日期:', get_date_sql
    select_result = conn_db.select(get_date_sql)

    start_date = select_result[0][0]
    end_date = select_result[0][1]

    start_date_time = datetime.strptime(start_date, day_format)

    end_date_time = datetime.strptime(end_date, day_format)

    # 迁移周期跨度
    date_length = (end_date_time - start_date_time).days + 1

    print '迁移周期:', date_length

    # print type((end_date_time-start_date_time).days)

    partition_date_init = start_date_time

    # 遍历迁移周期
    for i in range(date_length):
        print i
        partition_date = str(
            (partition_date_init +
             date_time.timedelta(days=i)).date()).replace('-', '')
        print partition_date

        # 检测该周期是否已迁移完成
        if check_date(table_name, partition_date):
            # 返回结果不为空
            continue

        # 更新mysql同步状态
        update_copy_status = "update tb_copy_data_log set copy_status = '1' , where  "

        # 添加分区
        add_partition(table_name, partition_date)
コード例 #18
0
    def read_table_name(self):

        while True:
            # 获取可以稽核表名列表,表里获取分区键,只稽核分区表
            get_task_sql = "select id, table_name ,partition_type from tb_copy_data_count_check_task where  " + self.check_status + "='0' and   batch_num ='" + self.batch_num + "'  limit 1"

            print '获取任务sql:', get_task_sql

            select_result = conn_db.select(get_task_sql)
            print '获取任务:', select_result

            # 取不到任务
            if not select_result:
                print '无稽核任务'
                exit(0)

            # 遍历集合,更新此批次状态,status=1
            update_sql = "update tb_copy_data_count_check_task  set %s='1'" % (
                self.check_status)
            in_condition = ''
            for i in select_result:
                in_condition = str(i[0]) + "," + in_condition

            update_sql = update_sql + "where id in (" + in_condition + "0)"

            print '更新此批次状态', update_sql
            result = conn_db.insert(update_sql)

            # 遍历表名,插日志
            for i in select_result:
                id, table_name, partition_type = i[0], i[1], i[2]

                count_sql = "select %s,count(*) from %s group by %s " % (
                    partition_type, table_name, partition_type)

                print 'count_sql', count_sql

                # 获取稽核结果
                self.get_count_data(count_sql, id, table_name, partition_type)
コード例 #19
0
def read_table_name():
    # 获取可以稽核表名列表
    # get_task_sql = "select a.table_name from tb_copy_get_task a left join tb_copy_data_log b on a.table_name=b.table_name where  b.table_name is null ;"
    get_task_sql = "select table_name  from tb_copy_get_task where start_partition is not null and end_partition is not null and ifnull(now_partition ,start_partition) < end_partition;"

    print get_task_sql

    # get_task_sql_sh = mysql_sh + get_task_sql + '\' > ' + get_task_file

    select_result = conn_db.select(get_task_sql)
    print select_result
    print type(select_result)

    # 遍历查询结果
    for i in select_result:
        table_name = i[0]
        print '表名:', table_name

        # 调用迁移
        input_date(table_name)

    print '无迁移任务'
    def check_sy_data(self, table_name, partition_type, partition_time):

        # 检测老hive库数据量是为0
        count_sql = "select count_num from tb_copy_data_count_check_log where table_name ='%s' and db_name='old' and partition_time='%s' " % (
            table_name, partition_time)

        result = conn_db.select(count_sql)

        print 'result:', result

        if result:
            # 不为空
            result_count = int(result[0][0])
            if result_count > 0:

                update_sql = "update tb_copy_data_log set sy_count='%s',result_not_exist='%s' where table_name='%s' and partition_time='%s'" % (
                    result_count,result_count,table_name, partition_time)
                conn_db.insert(update_sql)

                return True
            else:

                # 老库为0,是否更新到mysql
                print 'sy库数量为0'

                update_sql = "update tb_copy_data_log set result_not_exist='0' where table_name='%s' and partition_time='%s'" % (
                    table_name, partition_time)
                conn_db.insert(update_sql)

                return False
        else:
            # 不存在
            print 'sy库不存在'

            update_sql = "update tb_copy_data_log set result_not_exist='no' where table_name='%s' and partition_time='%s'" % (
                table_name, partition_time)
            conn_db.insert(update_sql)
            return False
コード例 #21
0
def get_error_log():
    global start_time
    try:
        get_error_sql = "select id,table_name,partition_type,partition_time,copy_status from tb_copy_data_log where  remark2 is null and remark5 ='' and partition_type<>'all' and ( copy_status='3' or sy_count<>ocdp_count or sy_count is null or ocdp_count is null) limit 200"

        print 'get_error_sql', get_error_sql
        get_error_result = conn_db.select(get_error_sql)

        start_time = time.time()

        # # 遍历集合,更新此批次状态,status=1
        update_status_list = []
        update_sql = "update tb_copy_data_log  set remark5='checked' "
        in_condition = ''
        for i in get_error_result:
            in_condition = str(i[0]) + "," + in_condition

        update_sql = update_sql + "where id in (" + in_condition + "0)"

        # print '更新此批次状态', update_sql
        result = conn_db.insert(update_sql)

        for i in get_error_result:
            try:
                table_name, partition_type, partition_time, copy_status = i[
                    1], i[2], i[3], i[4]
                check_partition(table_name, partition_type, partition_time,
                                copy_status)
            except Exception as e:
                print e
                print '异常2'
        # 批量更新mysql
        update_mysql_batch()
    except Exception as e:
        print e
        print '异常1'
コード例 #22
0
    def check_date(self, table_name, partition_date, error):
        print '初始化检测,更新同步状态'

        # 检测该表是否存在日志表里,初始化准备
        check_table_sql = "select table_name from tb_copy_data_log where table_name='" + table_name + "\'"

        print '检测该表是否存在:', check_table_sql

        check_table_result = conn_db.select(check_table_sql)

        print '检查结果:', check_table_result
        # 当前时间
        now_time = str(date_time.datetime.now())[0:19]

        # 如果表不存在,返回true,初始化,状态改为正在迁移,执行迁移
        if not check_table_result:

            # 全量表,检测是否在日志表里,日志表里状态是否已同步完成
            if self.table_type == '1':

                if not error:
                    # 初始化mysql同步状态
                    insert_table_sql = "insert into tb_copy_data_log (data_source,table_name,partition_type,partition_time,copy_status,chk_status,start_time,end_time) values('%s','%s','%s','%s','%s','%s','%s','%s')" % (
                        config.data_source, table_name, config.all_table,
                        partition_date, config.copy_status_1,
                        config.chk_status_0, now_time, '0')
                    print '插入记录初始化:', insert_table_sql

                    conn_db.insert(insert_table_sql)

                    # 测试
                    conn_db.insert("insert into test (id) values ('123')")
                    print '完成初始化'

                # 同步失败
                else:
                    # 初始化mysql同步状态
                    insert_table_sql = "insert into tb_copy_data_log (data_source,table_name,partition_type,partition_time,copy_status,chk_status,start_time,end_time) values('%s','%s','%s','%s','%s','%s','%s','%s')" % (
                        config.data_source, table_name, config.all_table,
                        partition_date, config.copy_status_3,
                        config.chk_status_0, now_time, '0')
                    print '插入记录同步失败:', insert_table_sql

                    conn_db.insert(insert_table_sql)

                    # 测试
                    conn_db.insert("insert into test (id) values ('123')")
                    print '完成失败状态同步'

            # 非全量表,todo
            else:
                pass

            return True

        # 表存在日志表里,检测分区,判断是否已迁移
        else:
            # 全量表,检测是否在日志表里,日志表里状态是否已同步完成
            if self.table_type == '1':

                if not error:
                    # 获取迁移状态
                    copy_status_sql = "select * from tb_copy_data_log where table_name='" + table_name + "';"

                    copy_status = conn_db.select(copy_status_sql)

                    # 已同步完成
                    if copy_status[0][0] == '2':
                        return False

                    # 同步失败
                    else:
                        return True
                # 更新失败同步状态
                else:
                    # 更新失败同步状态
                    update_table_sql = "update tb_copy_data_log set copy_status='" + config.copy_status_3 + "' ,end_time='" + str(
                        date_time.datetime.now(
                        ))[0:19] + "' where table_name='" + table_name + "\'"

                    print '插入记录同步失败:', update_table_sql

                    conn_db.insert(update_table_sql)

                    # 测试
                    conn_db.insert("insert into test (id) values ('123')")
                    print '完成失败状态同步'

            # 非全量表,todo
            else:

                # 检测分区是否已存在
                if not check_table_result:
                    # 初始化mysql同步状态
                    insert_table_sql = "insert into tb_copy_data_log (data_source,table_name,partition_type,partition_time,copy_status,chk_status,start_time,end_time) values('%s','%s','%s','%s','%s','%s','%s','%s')" % (
                        config.data_source, table_name,
                        config.partition_statis_date, partition_date,
                        config.copy_status_1, config.chk_status_0, now_time,
                        '0')
                    print '插入记录初始化:', insert_table_sql

                    conn_db.insert(insert_table_sql)

                    # 测试
                    conn_db.insert("insert into test (id) values ('123')")
                    print '完成初始化'

                # 检测该分区是否存在
                check_partition_sql = "select table_name from tb_copy_data_log where table_name='" + table_name + "' and partition_time='%s\'" % (
                    partition_date)
                print check_partition_sql
                check_partition_result = conn_db.select(check_partition_sql)
                if not check_partition_result:
                    # 插入分区
                    insert_table_sql = "insert into tb_copy_data_log (data_source,table_name,partition_type,partition_time,copy_status,chk_status,start_time,end_time) values('%s','%s','%s','%s','%s','%s','%s','%s')" % (
                        config.data_source, table_name,
                        config.partition_statis_date, partition_date,
                        config.copy_status_1, config.chk_status_0, now_time,
                        '0')
                    print '插入记录初始化:', insert_table_sql
                    check_table_result = conn_db.insert(insert_table_sql)

                # 检测同步状态
                check_status_sql = " select table_name from tb_copy_data_log where table_name='" + table_name + "' and partition_time= '" + partition_date + "' and copy_status = '" + config.copy_status_0 + "\'"

                print check_status_sql
                select_result = conn_db.select(check_status_sql)

                print '检测结果:', select_result

                # 结果判空
                if select_result:

                    return True
                else:
                    return False
コード例 #23
0
    with open(path) as json_file:
        return json.load(json_file)

config_json = get_config_file('config.json')

conn = get_connection(
        driver = config_json["database"]["driver"],
        server = config_json["database"]["server"],
        database = config_json["database"]["db_name"],
        username = config_json["database"]["username"],
        password = config_json["database"]["password"]
    )


# Selecting elements
teste_select = select(connector = conn, \
                    query = '''SELECT * from {}'''.format(config_json["database"]["table_teste_name"]))
print(teste_select)
'''
# Inserting elements
teste_insert = insert(connector = conn, \
                    table = config_json["database"]["table_teste_name"], \
                    columns = ['t'], \
                    values = [[1],[1000],[876]])
print(teste_insert)

# Updating elements
teste_update = update(connector = conn, \
                    table = config_json["database"]["table_teste_name"], \
                    columns = ['t'], \
                    condition = 't', \
                    values = [(10,1000),(2,1),(888,876)])
コード例 #24
0
    def input_date(self, table_name, partition_date):

        # 全量表
        if self.table_type == '1':
            if self.check_date(table_name, partition_date=None, error=None):

                try:
                    self.add_partition(table_name, partition_date=None)
                except Exception as e:

                    print '全量表-出现异常:', table_name, e

                    self.check_date(table_name,
                                    partition_date=None,
                                    error=True)

                # 更新mysql失败

            # 该表已迁移
            else:
                print '该表已迁移', table_name
                return

        # 非全量表,todo
        else:

            # 获取分区键,分区时间
            get_date_sql = "select ifnull(now_partition,start_partition) as start_partition,end_partition from tb_copy_get_task where table_name='" + table_name + "\'"

            print '获取开始日期:', get_date_sql
            select_result = conn_db.select(get_date_sql)

            start_date = select_result[0][0]
            end_date = select_result[0][1]

            start_date_time = datetime.strptime(start_date, config.day_format)

            end_date_time = datetime.strptime(end_date, config.day_format)

            # 迁移周期跨度
            date_length = (end_date_time - start_date_time).days + 1

            print '迁移周期:', date_length

            partition_date_init = start_date_time

            # 遍历迁移周期
            for i in range(date_length):

                print i
                partition_date = str(
                    (partition_date_init +
                     date_time.timedelta(days=i)).date()).replace('-', '')

                print partition_date

                try:

                    # 检测该周期是否已迁移完成
                    if self.check_date(table_name, partition_date, error=None):
                        # 返回结果不为空
                        continue

                    # 添加分区
                    self.add_partition(table_name, partition_date)

                    # 单条测试,正式上线后删掉
                    # break
                except Exception as e:
                    print '全量表-出现异常:', table_name, partition_date
コード例 #25
0
import conn_db

table_file = open('./table_list.txt', 'r')
no_file = open('./no_list.txt', 'a+')
yes_file = open('./yes_count_list.txt', 'a+')
table_list = table_file.readlines()
table_file.close()

# 遍历表名
for i in range(len(table_list)):
    table_name = table_list[i].replace('\n', '')
    sql = "select count(*) from PARTITIONS where TBL_ID = ( select TBL_ID from TBLS where OWNER='hive' and tbl_name='%s' order by CREATE_TIME desc limit 1);" % (
        table_name)
    print 'sql:', sql

    result = conn_db.select(sql)

    print result
    print result[0][0]

    pt_count = int(result[0][0])

    if pt_count == 0:
        print '非分区表', table_name
        no_file.write(table_list[i])
    else:
        print '分区表', table_name
        yes_file.write(table_name + ' ' + str(pt_count) + '\n')

    # 测试
    # if i > 10: