def read_table_name(self): while True: # 获取配置参数,带宽,map数 get_config_sql = "select network,map from tb_copy_data_config where migration_batch='" + input_batch + "'" get_config_result = conn_db.select(get_config_sql) self.bandwidth = get_config_result[0][0] self.map_num = get_config_result[0][1] print '获取配置参数', get_config_result, self.bandwidth, self.map_num # 获取可以稽核表名列表,表里获取分区键 get_task_sql = "select id, table_name ,partition_type,partition_time from tb_copy_get_partition_task where copy_status='0' and migration_batch='" + input_batch + "' order by partition_time asc limit 10" print '获取配置参数sql:', get_config_sql print '获取任务sql:', get_task_sql select_result = conn_db.select(get_task_sql) print '获取任务:', select_result # 取不到任务 if not select_result: print '无迁移任务' exit(0) # 遍历集合,更新此批次状态,status=1 update_sql = "update tb_copy_get_partition_task set copy_status='1'" in_condition = '' for i in select_result: in_condition = str(i[0]) + "," + in_condition update_sql = update_sql + "where id in (" + in_condition + "0)" print '更新此批次状态', update_sql result = conn_db.insert(update_sql) # 遍历表名,插日志 for i in select_result: table_name = i[1] partition_type = i[2] partition_time = i[3] # 当前时间 now_time = str(date_time.datetime.now())[0:19] insert_sql = "insert into tb_copy_data_log (data_source,table_name,partition_type,partition_time,copy_status,chk_status,start_time,end_time) values('%s','%s','%s','%s','%s','%s','%s','%s')" % ( config.data_source, table_name, partition_type, partition_time, config.copy_status_1, config.chk_status_0, now_time, '') result = conn_db.insert(insert_sql) # 删除分区,添加分区 self.add_partition(table_name, partition_type, partition_time) # 拷贝数据 self.copy_data(table_name, partition_type, partition_time)
def check_date(self, table_name, partition_date): # 检测该表是否存在 check_table_sql = "select table_name from tb_copy_data_log where table_name='" + table_name + "\'" print '检测该表是否存在:', check_table_sql check_table_result = conn_db.select(check_table_sql) # 当前时间 now_time = str(date_time.datetime.now())[0:19] if not check_table_result: # 初始化mysql同步状态 insert_table_sql = "insert into tb_copy_data_log (data_source,table_name,partition_type,partition_time,copy_status,chk_status,start_time,end_time) values('%s','%s','%s','%s','%s','%s','%s','%s')" % ( config.data_source, table_name, config.partition_statis_date, partition_date, config.copy_status_1, config.chk_status_0, now_time, '0') print '插入记录初始化:', insert_table_sql conn_db.insert(insert_table_sql) # 测试 conn_db.insert("insert into test (id) values ('123')") print '完成初始化' # 检测该分区是否存在 check_partition_sql = "select table_name from tb_copy_data_log where table_name='" + table_name + "' and partition_time='%s\'" % ( partition_date) print check_partition_sql check_partition_result = conn_db.select(check_partition_sql) if not check_partition_result: # 插入分区 insert_table_sql = "insert into tb_copy_data_log (data_source,table_name,partition_type,partition_time,copy_status,chk_status,start_time,end_time) values('%s','%s','%s','%s','%s','%s','%s','%s')" % ( config.data_source, table_name, config.partition_statis_date, partition_date, config.copy_status_1, config.chk_status_0, now_time, '0') print '插入记录初始化:', insert_table_sql check_table_result = conn_db.insert(insert_table_sql) # 检测同步状态 check_status_sql = " select table_name from tb_copy_data_log where table_name='" + table_name + "' and partition_time= '" + partition_date + "' and copy_status = '" + config.copy_status_0 + "\'" print check_status_sql select_result = conn_db.select(check_status_sql) print '检测结果:', select_result # 结果判空 if select_result: return True else: return False
def check_exists(self, table_name): # 如果存在先删除,表名,周期,库名 check_sql = "select id from tb_copy_data_count_check_log where table_name='%s' and db_name='%s'" % ( table_name, self.db_name) print 'check_sql:', check_sql check_result = conn_db.select(check_sql) print 'check_result:', check_result # 存在 if check_result: condition = '' if len(check_result) == 1: condition = "'%s'" % (check_result[0]) else: for j in check_result: condition = condition + ',' + "'%s'" % (j[0]) if condition[0] == ',': condition = condition[1:] print 'condition:', condition delete_sql = "delete from tb_copy_data_count_check_log where id in (%s)" % ( condition) print 'delete_sql:', delete_sql conn_db.insert(delete_sql)
def get_int(table_name): get_int_sql = "select column_name from columns where data_type ='int' and table_name=\'" + table_name + '\'' print 'get_int_sql:', get_int_sql result = conn_db.select(get_int_sql) print result[0] return result[0]
def read_table_name(): # 获取可以稽核表名列表 get_task_sql = "select a.table_name from tb_copy_get_task a left join tb_copy_data_log b on a.table_name=b.table_name where b.copy_status='0' or b.table_name is null ;" print get_task_sql # get_task_sql_sh = mysql_sh + get_task_sql + '\' > ' + get_task_file select_result = conn_db.select(get_task_sql) print select_result print type(select_result) # 遍历查询结果 for i in select_result: table_name = i[0] print '表名:', table_name # 调用迁移 input_date(table_name) # 执行获取sql # os.popen(get_task_sql_sh) # get_task_list = open(get_task_file, 'r') multi_list = []
def read_table_name(self): get_task_sql = '' # 全量表 if self.table_type == '1': # 获取可以稽核表名列表 get_task_sql = "select table_name from tb_copy_get_task where table_type='" + self.table_type + "' and size_type='" + self.size_type + "' and migration_batch= '" + input_batch + "';" else: # 获取可以稽核表名列表,表里获取分区键 get_task_sql = "select table_name from tb_copy_get_task where start_partition is not null and end_partition is not null and ifnull(now_partition ,start_partition) <= end_partition and migration_batch= " + input_batch + ';' print '获取任务sql:', get_task_sql select_result = conn_db.select(get_task_sql) print '获取任务:', select_result # 遍历任务列表 for i in select_result: table_name = i[0] print '表名:', table_name # 调用迁移 self.input_date(table_name) print '无迁移任务'
def read_table_name(self): while True: # 获取可以稽核表名列表,表里获取分区键,只稽核分区表 # get_task_sql = "select id, table_name ,partition_type from tb_copy_data_count_check_task where " + self.check_status + "='0' and batch_num ='" + self.batch_num + "' limit 10" # get_task_sql = "select id,table_name,partition_type,partition_time from tb_copy_data_log where remark3 is null and start_time >'2020-08-13 10:00:51' and copy_status='2' and (%s is NULL or %s ='') order by partition_time desc,start_time asc limit 1000;" % ( # # self.count_col, self.count_col) get_task_sql = "select id,table_name,partition_type,partition_time from tb_copy_data_log where remark3 is null and start_time >'2020-08-13 10:00:51' and copy_status='2' and (%s is NULL or %s ='') limit 1000;" % ( self.count_col, self.count_col) print '获取任务sql:', get_task_sql select_result = conn_db.select(get_task_sql) # print '获取任务:', select_result # 取不到任务 if not select_result: print '无稽核任务' exit(0) # # 遍历集合,更新此批次状态,status=1 update_status_list = [] update_sql = "update tb_copy_data_log set remark3='checked' " in_condition = '' for i in select_result: in_condition = str(i[0]) + "," + in_condition update_sql = update_sql + "where id in (" + in_condition + "0)" # print '更新此批次状态', update_sql result = conn_db.insert(update_sql) self.start_time = time.time() # 遍历表名,插日志 for i in select_result: id, table_name, partition_type, partition_time = i[0], i[1], i[ 2], i[3] # count_sql = "select %s,count(*) from %s group by %s " % (partition_type, table_name, partition_type) count_sql = "select partition_type,partition_time,count_num from tb_copy_data_count_check_log where db_name='%s' and table_name='%s' and partition_time='%s' " % ( self.db_name, table_name, partition_time) # print 'count_sql', count_sql # 获取稽核结果 self.get_count_data(count_sql, id, table_name, partition_type, partition_time) # 更新mysql # print 'update_sql_list', self.update_sql_list conn_db.insert_batch(self.update_sql_list) print 'sleep 5s,更新数据量:', len( self.update_sql_list), '耗时:', time.time() - self.start_time self.update_sql_list = [] time.sleep(3)
def get_end_string(table_name): get_end_string_sql = "select a.column_name from columns a inner join ( select table_schema,table_name,max(column_id) mx_column_id from columns where data_type like 'varchar%' group by 1,2) b on a.column_id = b.mx_column_id and a.table_name=\'" + table_name + '\'' print 'get_end_string_sql:', get_end_string_sql result = conn_db.select(get_end_string_sql) print result[0][0] return result[0][0]
def get_time(): try: get_time_str_sql = "select time_str from tb_tmp_copy_data " result = conn_db.select(get_time_str_sql)[0][0] print 'get_time_str:', result return result except Exception as e: print e print '异常'
def get_error_log(): try: get_error_sql = "select table_name,partition_type,partition_time,copy_status from tb_copy_data_log where copy_status='3'" print 'get_error_sql', get_error_sql get_error_result = conn_db.select(get_error_sql) for i in get_error_result: table_name, partition_type, partition_time, copy_status = i[0], i[ 1], i[2], i[3] check_partition(table_name, partition_type, partition_time, copy_status) except Exception as e: print e print '异常'
def check_partition(table_name, partition_date): partition_str = "statis_date" get_partition_sql = "select column_name from columns where column_name='" + partition_str + "' data_type ='int' and table_name=\'" + table_name + '\'' print 'get_end_string_sql:', get_partition_sql result = conn_db.select(get_partition_sql) # 无分区 if len(result[0]) == 0: return '' # 有分区 else: return partition_date
def get_error_log(): try: get_error_sql = "select table_name,partition_type,partition_time,copy_status from tb_copy_data_log where partition_type<>'all' and ( copy_status='3' or sy_count<>ocdp_count) limit 100" print 'get_error_sql', get_error_sql get_error_result = conn_db.select(get_error_sql) for i in get_error_result: try: table_name, partition_type, partition_time, copy_status = i[0], i[1], i[2], i[3] check_partition(table_name, partition_type, partition_time, copy_status) except Exception as e: print e print '异常2' except Exception as e: print e print '异常1'
def read_table_name(): # 获取可以稽核表名列表 get_task_sql = "select table_name from tb_copy_get_task where start_partition is not null and end_partition is not null and ifnull(now_partition ,start_partition) < end_partition;" print '获取任务sql:', get_task_sql select_result = conn_db.select(get_task_sql) print '获取任务:', select_result # 遍历任务列表 for i in select_result: table_name = i[0] print '表名:', table_name # 调用迁移 input_date(table_name) print '无迁移任务'
def get_count_data(self, count_sql, id, table_name, partition_type, partition_time): # mysql获取结果 result = conn_db.select(count_sql) check_error = '' # 同步失败,更新数据库 if not result: # 稽核失败,更新配置表,返回 # print '不存在该表',id, table_name, partition_type, partition_time return else: # print '表存在',id, table_name, partition_type, partition_time self.insert_data(id, result)
def get_task(self): while True: # 获取可以稽核表名列表,表里获取分区键 get_task_sql = "select id, table_name ,batch_num from tb_close_lock_get_task where copy_status='0' and batch_num='" + self.input_batch + "' order by id asc limit 1" print '获取任务sql:', get_task_sql select_result = conn_db.select(get_task_sql) print '获取任务:', select_result # 取不到任务 if not select_result: print '无迁移任务' exit(0) # 遍历集合,更新此批次状态,status=1 update_sql = "update tb_close_lock_get_task set copy_status='1'" in_condition = '' for i in select_result: in_condition = str(i[0]) + "," + in_condition update_sql = update_sql + "where id in (" + in_condition + "0)" print '更新此批次状态', update_sql result = conn_db.insert(update_sql) # 遍历表名,插日志,开始执行任务 for i in select_result: table_name = i[1] # 当前时间 now_time = str(date_time.datetime.now())[0:19] insert_sql = "insert into tb_close_lock_log (table_name,rename_status,create_status,insert_status,start_time,end_time) values('%s','%s','%s','%s','%s','%s')" % ( table_name, config.rename_status_0, config.create_status_0, config.insert_status_0, now_time, '') result = conn_db.insert(insert_sql) self.check_name(table_name) print '无任务'
def read_table_name(self): # 获取可以稽核表名列表 get_task_sql = "select a.table_name ,a.partition_type from tb_copy_get_task a left join tb_copy_data_log b on a.table_name=b.table_name where copy_status='" + config.copy_status_2 + "' and table_type='" + self.table_type + "' and size_type='" + self.size_type + "' and migration_batch= '" + input_batch + "';" print '获取任务sql:', get_task_sql select_result = conn_db.select(get_task_sql) print '获取任务:', select_result # 全量表 if self.table_type == '1': # 遍历任务表名列表 for i in select_result: table_name = i[0] print '表名:', table_name # 调用迁移 self.input_date(table_name, partition_date=None) # 日分区表 elif self.table_type == '2': # 分区键 partition_type = select_result[0][1] print '获取分区键', partition_type # 日表迁移33天 day_partition_list = config.day_partition.reverse() for partition_date in day_partition_list: # 遍历任务列表 for i in select_result: table_name = i[0] print '表名:', table_name # 调用迁移 self.input_date(table_name, partition_date) print '无迁移任务'
def input_date(table_name): get_date_sql = "select ifnull(now_partition,start_partition) as start_partition,end_partition from tb_copy_get_task where table_name='" + table_name + "\'" print '获取开始日期:', get_date_sql select_result = conn_db.select(get_date_sql) start_date = select_result[0][0] end_date = select_result[0][1] start_date_time = datetime.strptime(start_date, day_format) end_date_time = datetime.strptime(end_date, day_format) # 迁移周期跨度 date_length = (end_date_time - start_date_time).days + 1 print '迁移周期:', date_length # print type((end_date_time-start_date_time).days) partition_date_init = start_date_time # 遍历迁移周期 for i in range(date_length): print i partition_date = str( (partition_date_init + date_time.timedelta(days=i)).date()).replace('-', '') print partition_date # 检测该周期是否已迁移完成 if check_date(table_name, partition_date): # 返回结果不为空 continue # 更新mysql同步状态 update_copy_status = "update tb_copy_data_log set copy_status = '1' , where " # 添加分区 add_partition(table_name, partition_date)
def read_table_name(self): while True: # 获取可以稽核表名列表,表里获取分区键,只稽核分区表 get_task_sql = "select id, table_name ,partition_type from tb_copy_data_count_check_task where " + self.check_status + "='0' and batch_num ='" + self.batch_num + "' limit 1" print '获取任务sql:', get_task_sql select_result = conn_db.select(get_task_sql) print '获取任务:', select_result # 取不到任务 if not select_result: print '无稽核任务' exit(0) # 遍历集合,更新此批次状态,status=1 update_sql = "update tb_copy_data_count_check_task set %s='1'" % ( self.check_status) in_condition = '' for i in select_result: in_condition = str(i[0]) + "," + in_condition update_sql = update_sql + "where id in (" + in_condition + "0)" print '更新此批次状态', update_sql result = conn_db.insert(update_sql) # 遍历表名,插日志 for i in select_result: id, table_name, partition_type = i[0], i[1], i[2] count_sql = "select %s,count(*) from %s group by %s " % ( partition_type, table_name, partition_type) print 'count_sql', count_sql # 获取稽核结果 self.get_count_data(count_sql, id, table_name, partition_type)
def read_table_name(): # 获取可以稽核表名列表 # get_task_sql = "select a.table_name from tb_copy_get_task a left join tb_copy_data_log b on a.table_name=b.table_name where b.table_name is null ;" get_task_sql = "select table_name from tb_copy_get_task where start_partition is not null and end_partition is not null and ifnull(now_partition ,start_partition) < end_partition;" print get_task_sql # get_task_sql_sh = mysql_sh + get_task_sql + '\' > ' + get_task_file select_result = conn_db.select(get_task_sql) print select_result print type(select_result) # 遍历查询结果 for i in select_result: table_name = i[0] print '表名:', table_name # 调用迁移 input_date(table_name) print '无迁移任务'
def check_sy_data(self, table_name, partition_type, partition_time): # 检测老hive库数据量是为0 count_sql = "select count_num from tb_copy_data_count_check_log where table_name ='%s' and db_name='old' and partition_time='%s' " % ( table_name, partition_time) result = conn_db.select(count_sql) print 'result:', result if result: # 不为空 result_count = int(result[0][0]) if result_count > 0: update_sql = "update tb_copy_data_log set sy_count='%s',result_not_exist='%s' where table_name='%s' and partition_time='%s'" % ( result_count,result_count,table_name, partition_time) conn_db.insert(update_sql) return True else: # 老库为0,是否更新到mysql print 'sy库数量为0' update_sql = "update tb_copy_data_log set result_not_exist='0' where table_name='%s' and partition_time='%s'" % ( table_name, partition_time) conn_db.insert(update_sql) return False else: # 不存在 print 'sy库不存在' update_sql = "update tb_copy_data_log set result_not_exist='no' where table_name='%s' and partition_time='%s'" % ( table_name, partition_time) conn_db.insert(update_sql) return False
def get_error_log(): global start_time try: get_error_sql = "select id,table_name,partition_type,partition_time,copy_status from tb_copy_data_log where remark2 is null and remark5 ='' and partition_type<>'all' and ( copy_status='3' or sy_count<>ocdp_count or sy_count is null or ocdp_count is null) limit 200" print 'get_error_sql', get_error_sql get_error_result = conn_db.select(get_error_sql) start_time = time.time() # # 遍历集合,更新此批次状态,status=1 update_status_list = [] update_sql = "update tb_copy_data_log set remark5='checked' " in_condition = '' for i in get_error_result: in_condition = str(i[0]) + "," + in_condition update_sql = update_sql + "where id in (" + in_condition + "0)" # print '更新此批次状态', update_sql result = conn_db.insert(update_sql) for i in get_error_result: try: table_name, partition_type, partition_time, copy_status = i[ 1], i[2], i[3], i[4] check_partition(table_name, partition_type, partition_time, copy_status) except Exception as e: print e print '异常2' # 批量更新mysql update_mysql_batch() except Exception as e: print e print '异常1'
def check_date(self, table_name, partition_date, error): print '初始化检测,更新同步状态' # 检测该表是否存在日志表里,初始化准备 check_table_sql = "select table_name from tb_copy_data_log where table_name='" + table_name + "\'" print '检测该表是否存在:', check_table_sql check_table_result = conn_db.select(check_table_sql) print '检查结果:', check_table_result # 当前时间 now_time = str(date_time.datetime.now())[0:19] # 如果表不存在,返回true,初始化,状态改为正在迁移,执行迁移 if not check_table_result: # 全量表,检测是否在日志表里,日志表里状态是否已同步完成 if self.table_type == '1': if not error: # 初始化mysql同步状态 insert_table_sql = "insert into tb_copy_data_log (data_source,table_name,partition_type,partition_time,copy_status,chk_status,start_time,end_time) values('%s','%s','%s','%s','%s','%s','%s','%s')" % ( config.data_source, table_name, config.all_table, partition_date, config.copy_status_1, config.chk_status_0, now_time, '0') print '插入记录初始化:', insert_table_sql conn_db.insert(insert_table_sql) # 测试 conn_db.insert("insert into test (id) values ('123')") print '完成初始化' # 同步失败 else: # 初始化mysql同步状态 insert_table_sql = "insert into tb_copy_data_log (data_source,table_name,partition_type,partition_time,copy_status,chk_status,start_time,end_time) values('%s','%s','%s','%s','%s','%s','%s','%s')" % ( config.data_source, table_name, config.all_table, partition_date, config.copy_status_3, config.chk_status_0, now_time, '0') print '插入记录同步失败:', insert_table_sql conn_db.insert(insert_table_sql) # 测试 conn_db.insert("insert into test (id) values ('123')") print '完成失败状态同步' # 非全量表,todo else: pass return True # 表存在日志表里,检测分区,判断是否已迁移 else: # 全量表,检测是否在日志表里,日志表里状态是否已同步完成 if self.table_type == '1': if not error: # 获取迁移状态 copy_status_sql = "select * from tb_copy_data_log where table_name='" + table_name + "';" copy_status = conn_db.select(copy_status_sql) # 已同步完成 if copy_status[0][0] == '2': return False # 同步失败 else: return True # 更新失败同步状态 else: # 更新失败同步状态 update_table_sql = "update tb_copy_data_log set copy_status='" + config.copy_status_3 + "' ,end_time='" + str( date_time.datetime.now( ))[0:19] + "' where table_name='" + table_name + "\'" print '插入记录同步失败:', update_table_sql conn_db.insert(update_table_sql) # 测试 conn_db.insert("insert into test (id) values ('123')") print '完成失败状态同步' # 非全量表,todo else: # 检测分区是否已存在 if not check_table_result: # 初始化mysql同步状态 insert_table_sql = "insert into tb_copy_data_log (data_source,table_name,partition_type,partition_time,copy_status,chk_status,start_time,end_time) values('%s','%s','%s','%s','%s','%s','%s','%s')" % ( config.data_source, table_name, config.partition_statis_date, partition_date, config.copy_status_1, config.chk_status_0, now_time, '0') print '插入记录初始化:', insert_table_sql conn_db.insert(insert_table_sql) # 测试 conn_db.insert("insert into test (id) values ('123')") print '完成初始化' # 检测该分区是否存在 check_partition_sql = "select table_name from tb_copy_data_log where table_name='" + table_name + "' and partition_time='%s\'" % ( partition_date) print check_partition_sql check_partition_result = conn_db.select(check_partition_sql) if not check_partition_result: # 插入分区 insert_table_sql = "insert into tb_copy_data_log (data_source,table_name,partition_type,partition_time,copy_status,chk_status,start_time,end_time) values('%s','%s','%s','%s','%s','%s','%s','%s')" % ( config.data_source, table_name, config.partition_statis_date, partition_date, config.copy_status_1, config.chk_status_0, now_time, '0') print '插入记录初始化:', insert_table_sql check_table_result = conn_db.insert(insert_table_sql) # 检测同步状态 check_status_sql = " select table_name from tb_copy_data_log where table_name='" + table_name + "' and partition_time= '" + partition_date + "' and copy_status = '" + config.copy_status_0 + "\'" print check_status_sql select_result = conn_db.select(check_status_sql) print '检测结果:', select_result # 结果判空 if select_result: return True else: return False
with open(path) as json_file: return json.load(json_file) config_json = get_config_file('config.json') conn = get_connection( driver = config_json["database"]["driver"], server = config_json["database"]["server"], database = config_json["database"]["db_name"], username = config_json["database"]["username"], password = config_json["database"]["password"] ) # Selecting elements teste_select = select(connector = conn, \ query = '''SELECT * from {}'''.format(config_json["database"]["table_teste_name"])) print(teste_select) ''' # Inserting elements teste_insert = insert(connector = conn, \ table = config_json["database"]["table_teste_name"], \ columns = ['t'], \ values = [[1],[1000],[876]]) print(teste_insert) # Updating elements teste_update = update(connector = conn, \ table = config_json["database"]["table_teste_name"], \ columns = ['t'], \ condition = 't', \ values = [(10,1000),(2,1),(888,876)])
def input_date(self, table_name, partition_date): # 全量表 if self.table_type == '1': if self.check_date(table_name, partition_date=None, error=None): try: self.add_partition(table_name, partition_date=None) except Exception as e: print '全量表-出现异常:', table_name, e self.check_date(table_name, partition_date=None, error=True) # 更新mysql失败 # 该表已迁移 else: print '该表已迁移', table_name return # 非全量表,todo else: # 获取分区键,分区时间 get_date_sql = "select ifnull(now_partition,start_partition) as start_partition,end_partition from tb_copy_get_task where table_name='" + table_name + "\'" print '获取开始日期:', get_date_sql select_result = conn_db.select(get_date_sql) start_date = select_result[0][0] end_date = select_result[0][1] start_date_time = datetime.strptime(start_date, config.day_format) end_date_time = datetime.strptime(end_date, config.day_format) # 迁移周期跨度 date_length = (end_date_time - start_date_time).days + 1 print '迁移周期:', date_length partition_date_init = start_date_time # 遍历迁移周期 for i in range(date_length): print i partition_date = str( (partition_date_init + date_time.timedelta(days=i)).date()).replace('-', '') print partition_date try: # 检测该周期是否已迁移完成 if self.check_date(table_name, partition_date, error=None): # 返回结果不为空 continue # 添加分区 self.add_partition(table_name, partition_date) # 单条测试,正式上线后删掉 # break except Exception as e: print '全量表-出现异常:', table_name, partition_date
import conn_db table_file = open('./table_list.txt', 'r') no_file = open('./no_list.txt', 'a+') yes_file = open('./yes_count_list.txt', 'a+') table_list = table_file.readlines() table_file.close() # 遍历表名 for i in range(len(table_list)): table_name = table_list[i].replace('\n', '') sql = "select count(*) from PARTITIONS where TBL_ID = ( select TBL_ID from TBLS where OWNER='hive' and tbl_name='%s' order by CREATE_TIME desc limit 1);" % ( table_name) print 'sql:', sql result = conn_db.select(sql) print result print result[0][0] pt_count = int(result[0][0]) if pt_count == 0: print '非分区表', table_name no_file.write(table_list[i]) else: print '分区表', table_name yes_file.write(table_name + ' ' + str(pt_count) + '\n') # 测试 # if i > 10: