def save_data(): init_file(dest_file) columns_ls = [ 'finance_offical_id', 'bill_id', 'bill_code', 'check_amount' ] # 日期字段 account_period columns_str = ",".join(columns_ls) sql = 'select {columns_str} from 01_datamart_layer_007_h_cw_df.finance_official_bill where check_amount > 0 AND bill_code is not NULL AND bill_code !="" '.format( columns_str=columns_str) count_sql = 'select count(a.bill_id) from ({sql}) a'.format(sql=sql) log.info(count_sql) records = prod_execute_sql(conn_type=CONN_TYPE, sqltype='select', sql=count_sql) count_records = records[0][0] log.info(f'* count_records ==> {count_records}') max_size = 1 * 100000 limit_size = 1 * 10000 select_sql_ls = [] if count_records >= max_size: offset_size = 0 while offset_size <= count_records: if offset_size + limit_size > count_records: limit_size = count_records - offset_size tmp_sql = "select {columns_str} from 01_datamart_layer_007_h_cw_df.finance_official_bill where check_amount > 0 AND bill_code is not NULL AND bill_code !='' ORDER BY check_amount limit {limit_size} offset {offset_size}".format( columns_str=columns_str, limit_size=limit_size, offset_size=offset_size) select_sql_ls.append(tmp_sql) break else: tmp_sql = "select {columns_str} from 01_datamart_layer_007_h_cw_df.finance_official_bill where check_amount > 0 AND bill_code is not NULL AND bill_code !='' ORDER BY check_amount limit {limit_size} offset {offset_size}".format( columns_str=columns_str, limit_size=limit_size, offset_size=offset_size) select_sql_ls.append(tmp_sql) offset_size = offset_size + limit_size else: tmp_sql = "select {columns_str} 01_datamart_layer_007_h_cw_df.finance_official_bill where check_amount > 0 AND bill_code is not NULL AND bill_code !='' ".format( columns_str=columns_str, test_limit_cond=test_limit_cond) select_sql_ls.append(tmp_sql) log.info('* check_49_plane_data 开始分页查询') start_time = time.perf_counter() pool = Pool(30) results = [] for sel_sql in select_sql_ls: rst = pool.spawn(exec_task, sel_sql) results.append(rst) gevent.joinall(results) consumed_time = round(time.perf_counter() - start_time) log.info(f'* check_49 一共有数据 {count_records} 条,保存数据耗时 {consumed_time} sec')
def exec_plane_task(sql, dest_file): # dest_file records = prod_execute_sql(conn_type=CONN_TYPE, sqltype='select', sql=sql) # time.sleep(0.01) if records and len(records) > 0: for idx, record in enumerate(records): finance_travel_id = str(record[0]) # finance_travel_id bill_id = str(record[1]) # bill_id plane_beg_date = str(record[2]) # 飞机开始时间 plane_end_date = str(record[3]) # 飞机结束时间 plane_origin_name = str(record[4]) # 飞机出发地 plane_destin_name = str(record[5]) # 飞机目的地 plane_check_amount = str(record[6]) # 飞机票的费用 plane_origin_name = plane_origin_name.replace(',', ' ') plane_destin_name = plane_destin_name.replace(',', ' ') record_str = f'{finance_travel_id},{bill_id},{plane_beg_date},{plane_end_date},{plane_origin_name},{plane_destin_name},{plane_check_amount}' # log.info(f"checkpoint14 plane {threading.current_thread().name} is running") # log.info(record_str) # print() with open(dest_file, "a+", encoding='utf-8') as file: file.write(record_str + "\n")
def db_fetch_to_dict(sql, columns=[]): records = prod_execute_sql(conn_type=CONN_TYPE, sqltype='select', sql=sql) result = [] if len(records) == 1: result_row = dict(zip(columns, records[0])) for key in result_row.keys(): if str(result_row[key]) == "None": result_row[key] = None elif str(type(result_row[key])) == "<java class 'JDouble'>": result_row[key] = float(result_row[key]) elif str(type(result_row[key])) == "<java class 'java.lang.Long'>": result_row[key] = str(result_row[key]) else: result_row[key] = str(result_row[key]) result.append(result_row) else: for record in records: result_row = dict(zip(columns, record)) for key in result_row.keys(): if str(result_row[key]) == "None": result_row[key] = None elif str(type(result_row[key])) == "<java class 'JDouble'>": result_row[key] = float(result_row[key]) elif str(type( result_row[key])) == "<java class 'java.lang.Long'>": result_row[key] = str(result_row[key]) else: result_row[key] = str(result_row[key]) result.append(result_row) return result
def query_travel(): year = '2021' test_limit_cond = '' columns_ls = [ 'destin_name', 'sales_name', 'sales_addressphone', 'sales_bank', 'finance_travel_id', 'origin_name', 'invo_code', 'sales_taxno' ] columns_str = ",".join(columns_ls) sql = """ select {columns_str} from 01_datamart_layer_007_h_cw_df.finance_travel_bill where !(sales_name is null and sales_addressphone is null and sales_bank is null and origin_name is null and destin_name is null and sales_taxno is null ) AND left(account_period,4) >= '{year}' {test_limit_cond} """.format(columns_str=columns_str, year=year, test_limit_cond=test_limit_cond).replace('\n', '').replace( '\r', '').strip() count_sql = 'select count(a.finance_travel_id) from ({sql}) a'.format( sql=sql) log.info(count_sql) records = prod_execute_sql(conn_type=CONN_TYPE, sqltype='select', sql=count_sql) count_records = records[0][0] log.info(f'* count_records ==> {count_records}')
def exec_task(self, sql): # log.info(sql) records = prod_execute_sql(conn_type=CONN_TYPE, sqltype="select", sql=sql) if records and len(records) > 0: for idx, record in enumerate(records): bill_id = str(record[0]) # bill_id city_name = str(record[1]) # 出差城市名称 city_grade_name = str(record[2]) # 出差城市等级 emp_name = str(record[3]) # 员工名字 stand_amount_perday = float(record[4]) # 每天标准住宿费用 hotel_amount_perday = float(record[5]) # 每天实际花费的住宿费用 province = self.province_service.query_belong_province( city_name) # 出差城市所属的省 # province = province if province else "******" # 可能需要补充 01_datamart_layer_007_h_cw_df.finance_province_city 表中的数据 # city_name = city_name.replace(",", " ") # emp_name = emp_name.replace(",", " ") if ',' in city_name: city_name = city_name.replace(",", " ") if ',' in emp_name: emp_name = emp_name.replace(",", " ") record_str = f"{bill_id},{city_name},{province},{city_grade_name},{emp_name},{stand_amount_perday},{hotel_amount_perday}" # log.info(f"checkpoint_13 {threading.current_thread().name} is running ") # log.info(record_str) with open(dest_file, "a+", encoding="utf-8") as file: file.write(record_str + "\n")
def demo3(): start_time0 = time.perf_counter() # prod_sql1 = 'select finance_travel_id,sales_name, sales_addressphone, sales_bank from 01_datamart_layer_007_h_cw_df.finance_travel_bill where finance_travel_id="92b750e8-f1c4-4a25-9c42-15c9aa49542a" ' prod_sql = """ select destin_name,sales_name,sales_addressphone,sales_bank,finance_travel_id,origin_name,invo_code from 01_datamart_layer_007_h_cw_df.finance_travel_bill where finance_travel_id='d8b37cb8-1b42-4de9-8cab-d1ed0586d120' """ prod_sql2 = """ select count(finance_travel_id) from (select destin_name,sales_name,sales_addressphone,sales_bank,finance_travel_id,origin_name,invo_code,sales_taxno from 01_datamart_layer_007_h_cw_df.finance_travel_bill where !(sales_name is null and sales_addressphone is null and sales_bank is null and origin_name is null and destin_name is null and sales_taxno is null ) and left(account_period,4) ='2016') a """ print(prod_sql2) records = prod_execute_sql(conn_type='test', sqltype='select', sql=prod_sql2) print(len(records)) for record in records: print(record) consumed_time0 = (time.perf_counter() - start_time0) print(f'* 取数耗时 => {consumed_time0} sec, records={len(records)}')
def get_travel_keyword(): """ 抽取差旅费的关键字 :return: """ sql = "select distinct commodityname from 01_datamart_layer_007_h_cw_df.finance_travel_bill where commodityname is not null and commodityname !='' " records = prod_execute_sql(conn_type=CONN_TYPE, sqltype='select', sql=sql) jiebaword = [] words = [] for record in records: record_str = str(record[0]) words.append(record_str) words1 = ' '.join(words) words2 = re.sub(r'[{}]+'.format(punctuation + digits), '', words1) words3 = re.sub("[a-z]", "", words2) words4 = re.sub("[A-Z]", "", words3) # print(words4) jieba.analyse.set_stop_words( "/you_filed_algos/app/report/algorithm/stop_words.txt") jieba.analyse.set_idf_path( "/you_filed_algos/app/report/algorithm/userdict.txt") final_list = analyse.extract_tags(words4, topK=80, withWeight=False, allowPOS=()) return final_list
def _query_province(self, keyword): if keyword is None or keyword == 'None' or len(keyword) == 0: return None, None, None, None try: # sel_sql = f"select area_id, area_name, parent_id, grade from 01_datamart_layer_007_h_cw_df.finance_province_city where area_name like '%{keyword}%'" sel_sql = f"select area_id, area_name, parent_id, grade from 01_datamart_layer_007_h_cw_df.finance_province_city where area_name = '{keyword}'" records = prod_execute_sql(conn_type=CONN_TYPE, sqltype='select', sql=sel_sql) if len(records) > 0: record = records[0] area_id = str(record[0]) if record[0] else None area_name = str(record[1]) if record[1] else None parent_id = str(record[2]) if record[2] else None grade = str(record[3]) if record[3] else None return area_id, area_name, parent_id, grade return None, None, None, None except Exception as e: print(e) return None, None, None, None
def _query_previous_province(self, area_id): """ 查找上一级的行政区域 :param area_id: :return: """ if area_id is None or area_id == 'None': return None, None, None, None try: sel_sql = f"select area_id, area_name, parent_id, grade from 01_datamart_layer_007_h_cw_df.finance_province_city where area_id = '{area_id}'" # print(sel_sql) records = prod_execute_sql(conn_type=CONN_TYPE, sqltype='select', sql=sel_sql) if len(records) > 0: record = records[0] area_id = str(record[0]) if record[0] else None area_name = str(record[1]) if record[1] else None parent_id = str(record[2]) if record[2] else None grade = str(record[3]) if record[3] else None return area_id, area_name, parent_id, grade return None, None, None, None except Exception as e: print(e) return None, None, None, None
def pagination_temp_api_bill_records(): """ 分页查询临时表的记录 :return: """ columns_ls = ['tem_api_id', 'order_number', 'target_classify', 'api_sql'] columns_str = ",".join(columns_ls) sql = f"SELECT {columns_str} FROM 01_datamart_layer_007_h_cw_df.temp_api_bill " count_sql = 'SELECT count(1) FROM ({sql}) a'.format(sql=sql) log.info(count_sql) records = prod_execute_sql(conn_type=CONN_TYPE, sqltype='select', sql=count_sql) count_records = records[0][0] # print('* count_records => ', count_records) ###### 拼装查询SQL where_sql = 'WHERE ' condition_sql = '' where_sql = where_sql + ' 1=1 ' order_sql = ' ORDER BY order_number ASC ' sql = sql + where_sql + order_sql return count_records, sql, columns_ls
def exec_task(self, sql): # log.info(sql) records = prod_execute_sql(conn_type=CONN_TYPE, sqltype='select', sql=sql) if records and len(records) > 0: for idx, record in enumerate(records): bill_code = str(record[0]) # bill_code origin_name = str(record[1]) # 出发地 destin_name = str(record[2]) # 目的地 travel_city_name = str(record[3]) # 出差城市 travel_beg_date = str(record[4]) # 差旅开始时间 travel_end_date = str(record[5]) # 差旅结束时间 travel_city_name = re.sub( r'[{}]+'.format(punctuation + digits), ' ', travel_city_name) # log.info(travel_city_name) record_str = f'{bill_code},{origin_name},{destin_name},{travel_city_name},{travel_beg_date},{travel_end_date}' #log.info(f"checkpoint_12 {threading.current_thread().name} is running ") # log.info(record_str) # print() with open(dest_file, "a+", encoding='utf-8') as file: file.write(record_str + "\n")
def execute_02_data(): init_file() columns_ls = [ 'destin_name', 'sales_name', 'sales_addressphone', 'sales_bank', 'finance_travel_id', 'origin_name', 'invo_code' ] # extra_columns_ls = ['bill_id'] # columns_ls.extend(extra_columns_ls) columns_str = ",".join(columns_ls) sql = """ select {columns_str} from 01_datamart_layer_007_h_cw_df.finance_travel_bill where sales_name is not null or sales_addressphone is not null or sales_bank is not null """.format(columns_str=columns_str).replace('\n', '').replace('\r', '').strip() log.info(sql) count_sql = 'select count(a.finance_travel_id) from ({sql}) a'.format( sql=sql) log.info(count_sql) records = prod_execute_sql(conn_type='test', sqltype='select', sql=count_sql) count_records = records[0][0] log.info(f'* count_records ==> {count_records}') max_size = 10 * 10000 limit_size = 1 * 1000 select_sql_ls = [] if count_records >= max_size: offset_size = 0 while offset_size <= count_records: if offset_size + limit_size > count_records: limit_size = count_records - offset_size tmp_sql = "select {columns_str} from 01_datamart_layer_007_h_cw_df.finance_travel_bill where sales_name is not null or sales_addressphone is not null or sales_bank is not null order by finance_travel_id limit {limit_size} offset {offset_size}".format( columns_str=columns_str, limit_size=limit_size, offset_size=offset_size) select_sql_ls.append(tmp_sql) break else: tmp_sql = "select {columns_str} from 01_datamart_layer_007_h_cw_df.finance_travel_bill where sales_name is not null or sales_addressphone is not null or sales_bank is not null order by finance_travel_id limit {limit_size} offset {offset_size}".format( columns_str=columns_str, limit_size=limit_size, offset_size=offset_size) select_sql_ls.append(tmp_sql) offset_size = offset_size + limit_size else: tmp_sql = "select {columns_str} from 01_datamart_layer_007_h_cw_df.finance_travel_bill where sales_name is not null or sales_addressphone is not null or sales_bank is not null ".format( columns_str=columns_str) select_sql_ls.append(tmp_sql) print('*** tmp_sql => ', tmp_sql) log.info(f'*** 开始分页查询,一共 {len(select_sql_ls)} 页') async_operate(select_sql_ls)
def demo2(): start_time = time.perf_counter() # 连接 KUDU 库下的表 # rd_df = getKUDUdata('select * from python_test_kudu.irisdataset limit 10') # 连接 HIVE 表 # rd_df = getKUDUdata('select * from python_test.irisdataset limit 100') # 连接生产库 # rd_df = getKUDUdata('select finance_travel_id from 01_datamart_layer_007_h_cw_df.finance_travel_bill limit 3') # print(rd_df) # print( f'* rd_df ==> {rd_df.shape[0]} rows * {rd_df.shape[1]} columns') # consumed_time = round(time.perf_counter() - start_time) # print(f'* consumed_time={consumed_time}') # dis_connection() # sql = 'select finance_travel_id from 01_datamart_layer_007_h_cw_df.finance_travel_bill t where t.check_amount > t.jzpz limit 5' # print(sql) # records = prod_execute_sql(sqltype='select', sql=sql) # print('111*** query_kudu_data=>', len(records)) # # sql = 'select finance_travel_id from 01_datamart_layer_007_h_cw_df.finance_travel_bill t where t.check_amount > t.jzpz limit 5' # print(sql) # records = prod_execute_sql(sqltype='select', sql=sql) # print('222*** query_kudu_data=>', len(records)) try: prod_sql = 'select finance_travel_id,bill_id from 01_datamart_layer_007_h_cw_df.finance_travel_bill t limit 5' test_sql = 'select * from 01_datamart_layer_007_h_cw_df.payment_result_info limit 5' print(test_sql) records = prod_execute_sql(conn_type='prod', sqltype='select', sql=prod_sql) print('111 *** query_kudu_data=>', len(records)) for record in records: print(record) # dis_connection() # print('-- ok --') records = prod_execute_sql(conn_type='prod', sqltype='select', sql=prod_sql) print('222 *** query_kudu_data=>', len(records)) except Exception as e: print(e)
def exec_step08(self): """ 执行第8步: 8、绩效接口API(脚本) :return: """ # 清空落地表数据 sql = 'delete from 01_datamart_layer_007_h_cw_df.finance_performance_api' log.info('* 第8步,开始清空落地表数据') prod_execute_sql(conn_type=CONN_TYPE, sqltype='insert', sql=sql) log.info( '* 第8步,成功清空落地表数据 ==> 01_datamart_layer_007_h_cw_df.finance_performance_api' ) travel_fee = '差旅费' meeting_fee = '会议费' office_fee = '办公费' car_fee = '车辆使用费' travel_records = query_temp_performance_bill_by_target_classify( travel_fee) meeting_records = query_temp_performance_bill_by_target_classify( meeting_fee) office_records = query_temp_performance_bill_by_target_classify( office_fee) car_records = query_temp_performance_bill_by_target_classify(car_fee) threadPool = ThreadPoolExecutor(max_workers=4, thread_name_prefix="thr") all_task = [] task1 = threadPool.submit(self.__exec_step08_task, travel_records, travel_fee) task2 = threadPool.submit(self.__exec_step08_task, meeting_records, meeting_fee) task3 = threadPool.submit(self.__exec_step08_task, office_records, office_fee) task4 = threadPool.submit(self.__exec_step08_task, car_records, car_fee) all_task.append(task1) all_task.append(task2) all_task.append(task3) all_task.append(task4) wait(all_task, return_when=ALL_COMPLETED) threadPool.shutdown(wait=True)
def exec_task(sql): records = prod_execute_sql(conn_type=CONN_TYPE, sqltype='select', sql=sql) # log.info(sql) if records and len(records) > 0: result = [] for idx, record in enumerate(records): finance_meeting_id = str(record[0]) meet_addr = str(record[1]) # 会议地址 sales_name = str(record[2]) # 开票公司 sales_addressphone = str(record[3]) # 开票地址及电话 sales_bank = str(record[4]) # 发票开会行 sales_taxno = str(record[5]) # 纳税人识别号 invo_code = str(record[6]) # 纳税人识别号 # sales_address = match_area.query_sales_address(sales_name=sales_name, sales_addressphone=sales_addressphone, # sales_bank=sales_bank) # 发票开票地(最小行政) # # receipt_city = match_area.query_receipt_city(sales_name=sales_name, sales_addressphone=sales_addressphone, # sales_bank=sales_bank) # 发票开票所在市 sales_address, receipt_city, receipt_province = operate_every_record( record) sales_taxno = process_invalid_content(sales_taxno) meet_addr = process_invalid_content(meet_addr) sales_name = process_invalid_content(sales_name) sales_addressphone = process_invalid_content(sales_addressphone) sales_bank = process_invalid_content(sales_bank) sales_address = match_area.filter_area( process_invalid_content(sales_address)) receipt_city = match_area.filter_area( process_invalid_content(receipt_city)) receipt_province = match_area.filter_area( process_invalid_content(receipt_province)) pstng_date = '无' # log.info(f" {threading.current_thread().name} is running ") record_str = f'{finance_meeting_id}\u0001{sales_taxno}\u0001{invo_code}\u0001{meet_addr}\u0001{sales_name}\u0001{sales_addressphone}\u0001{sales_bank}\u0001{sales_address}\u0001{receipt_province}\u0001{receipt_city}\u0001{pstng_date}' result.append(record_str) # print(record_str) # print('') if len(result) >= 100: for item in result: with open(dest_file, "a+", encoding='utf-8') as file: file.write(item + "\n") result = [] if len(result) > 0: for item in result: with open(dest_file, "a+", encoding='utf-8') as file: file.write(item + "\n") del result
def __exec_step08_task(self, records, target_classify): daily_start_date = get_current_time() try: for record in records: sql = str(record[0]) order_number = str(record[1]) prod_execute_sql(conn_type=CONN_TYPE, sqltype='insert', sql=sql) log.info( f'* 第8步,成功执行序号为 {order_number} ,target_classify为 {target_classify} 的SQL' ) process_status = 'sucess' daily_end_date = get_current_time() step_number = '8' operate_desc = f'成功执行绩效接口API表的目标分类为{target_classify}的SQL' orgin_source = 'kudu分析表/落地表' destin_source = '绩效API中间表' importdate = get_yyyymmdd_date() insert_finance_data_process(process_status, target_classify, daily_start_date, daily_end_date, step_number, operate_desc, orgin_source, destin_source, importdate) except Exception as e: log.error( f'* 执行第8步,序号为 {order_number} 的SQL报错,target_classify为 {target_classify} 的SQL' ) # print(sql) print(e) process_status = 'false' daily_end_date = get_current_time() step_number = '8' operate_desc = str(e) orgin_source = 'kudu分析表/落地表' destin_source = '绩效API中间表' importdate = get_yyyymmdd_date() insert_finance_data_process(process_status, target_classify, daily_start_date, daily_end_date, step_number, operate_desc, orgin_source, destin_source, importdate)
def insert_finance_company_code(company_name, company_code, company_old_code, iscompany): """ 添加单位code表的数据 :return: """ id = create_uuid() try: log.info('*** insert_finance_company_code ***') sql = f""" insert into 01_datamart_layer_007_h_cw_df.finance_company_code(id, company_name, company_code, company_old_code, iscompany) values("{id}", "{company_name}", "{company_code}", "{company_old_code}","{iscompany}" ) """.replace('\n', '').replace('\r', '').strip() log.info(sql) prod_execute_sql(conn_type=CONN_TYPE, sqltype='insert', sql=sql) return id except Exception as e: print(e) raise RuntimeError(e)
def exec_temp_performance_bill(performance_ids, is_log=True): try: daily_start_date = get_current_time() if is_log: daily_id = insert_finance_shell_daily( daily_status='ok', daily_start_date=daily_start_date, daily_end_date='', unusual_point='', daily_source='sql', operate_desc=f'doing', unusual_infor='', task_status='doing', daily_type='绩效') records = query_temp_performance_bill(performance_ids) # print(len(records), records) for idx, record in enumerate(records): performance_sql = record[0] # print(performance_sql) prod_execute_sql(conn_type=CONN_TYPE, sqltype='insert', sql=performance_sql) if is_log: operate_desc = f'成功执行绩效表中的SQL' daily_end_date = get_current_time() update_finance_shell_daily(daily_id, daily_end_date, task_status='done', operate_desc=operate_desc) except Exception as e: print(e) if is_log: error_info = str(e) daily_end_date = get_current_time() update_finance_shell_daily(daily_id, daily_end_date, task_status='error', operate_desc=error_info) raise RuntimeError(e)
def __exec_step06_task(self, records, target_classify): try: daily_start_date = get_current_time() for idx, record in enumerate(records): sql = str(record[0]) receipt_id = str(record[1]) prod_execute_sql(conn_type=CONN_TYPE, sqltype='insert', sql=sql) log.info( f'* 第6步,成功执行 target_classify为 {target_classify},receipt_id为 {receipt_id} 的SQL' ) process_status = 'sucess' daily_end_date = get_current_time() step_number = '6' operate_desc = f'成功执行目标分类为{target_classify}的SQL' orgin_source = '发票信息hive表' destin_source = 'kudu分析表' importdate = get_yyyymmdd_date() insert_finance_data_process(process_status, target_classify, daily_start_date, daily_end_date, step_number, operate_desc, orgin_source, destin_source, importdate) except Exception as e: log.error(f'* 执行第6步,目标分类为{target_classify} ,序号为{receipt_id}的SQL报错') log.info(sql) print(e) process_status = 'false' daily_end_date = get_current_time() step_number = '6' operate_desc = str(e) orgin_source = '发票信息hive表' destin_source = 'kudu分析表' importdate = get_yyyymmdd_date() insert_finance_data_process(process_status, target_classify, daily_start_date, daily_end_date, step_number, operate_desc, orgin_source, destin_source, importdate)
def demo1(): sql = """ describe analytic_layer_zbyy_sjbyy_003_cwzbbg.finance_all_targets """ # select * from 01_datamart_layer_007_h_cw_df.finance_unusual # describe analytic_layer_zbyy_sjbyy_003_cwzbbg.finance_all_targets # select * from analytic_layer_zbyy_sjbyy_003_cwzbbg.finance_all_targets where unusual_id='33' # records = prod_execute_sql(conn_type='prod', sqltype='select', sql=sql) # for record in records: # print(record) print('===' * 30) sql1 = """ msck repair table 02_logical_layer_007_h_lf_cw.finance_meeting_linshi_analysis """ prod_execute_sql(conn_type='test', sqltype='insert', sql=sql1)
def process_finance_shell_daily(): # sql1 = "delete from 01_datamart_layer_007_h_cw_df.finance_shell_daily " # prod_execute_sql(conn_type='prod', sqltype='insert', sql=sql1) sql2 = 'select * from 01_datamart_layer_007_h_cw_df.finance_shell_daily where daily_type="数据处理" ' log.info(sql2) records = prod_execute_sql(conn_type=CONN_TYPE, sqltype='select', sql=sql2) for record in records: print(record)
def demo5(): sel_sql = """ select pstng_date, account_period from 01_datamart_layer_007_h_cw_df.finance_travel_bill limit 5 """ records = prod_execute_sql(conn_type=CONN_TYPE, sqltype='select', sql=sel_sql) for record in records: print(record) print()
def demo3(): # sel_sql1 = "select * FROM 01_datamart_layer_007_h_cw_df.finance_data_process WHERE from_unixtime(unix_timestamp(to_date(importdate),'yyyy-MM-dd'),'yyyyMMdd') = '20220107' AND process_status = 'sucess' ORDER BY step_number ASC " # sel_sql2 = "select * FROM 01_datamart_layer_007_h_cw_df.finance_data_process " # sel_sql3 = """ # select cc.* from 01_datamart_layer_007_h_cw_df.finance_data_process cc, # (select distinct * from ( # select # step_number, # first_value(daily_end_date) over(partition by step_number order by daily_end_date desc) max_end_date # FROM 01_datamart_layer_007_h_cw_df.finance_data_process # WHERE from_unixtime(unix_timestamp(to_date(importdate),'yyyy-MM-dd'),'yyyyMMdd') = '20220105' # AND process_status = 'sucess' # ORDER BY step_number ASC) zz) bb # where cc.step_number=bb.step_number and cc.daily_end_date=bb.max_end_date # """ sel_sql = 'SELECT unusual_shell,isalgorithm,unusual_id FROM 01_datamart_layer_007_h_cw_df.finance_unusual WHERE cost_project="会议费" AND sign_status="1" ORDER BY unusual_id ASC' sel_sql2 = 'SELECT id,company_name,company_code,company_old_code,iscompany FROM 01_datamart_layer_007_h_cw_df.finance_company_code ' sel_sql3 = """ select * FROM 01_datamart_layer_007_h_cw_df.finance_data_process WHERE ( from_unixtime(unix_timestamp(to_date(importdate),'yyyy-MM-dd'),'yyyyMMdd') = '20220117' or importdate = '20220117' ) ORDER BY step_number ASC """ query_date = '20220118' sel_sql4 = f""" select * from ( select step_number, row_number() over (partition by target_classify,step_number order by daily_end_date desc) as numbers, process_id, process_status, target_classify, daily_start_date, daily_end_date, operate_desc, orgin_source, destin_source, importdate from 01_datamart_layer_007_h_cw_df.finance_data_process ) y where y.numbers=1 AND importdate = '{query_date}' order by step_number """ print(sel_sql4) records = prod_execute_sql(conn_type=CONN_TYPE, sqltype='select', sql=sel_sql4) print('records => ', len(records)) for record in records: # print('unusual_id=', record[2]) print(record)
def query_temp_performance_bill_by_target_classify(target_classify): try: sql = f'SELECT performance_sql,order_number FROM 01_datamart_layer_007_h_cw_df.temp_performance_bill WHERE target_classify="{target_classify}" order by order_number asc' log.info(sql) records = prod_execute_sql(conn_type=CONN_TYPE, sqltype='select', sql=sql) return records except Exception as e: print(e) raise RuntimeError(e)
def process_finance_unusual2(): update_sql = 'update 01_datamart_layer_007_h_cw_df.finance_unusual set unusual_code= unusual_id ' #log.info(update_sql) #prod_execute_sql(conn_type=CONN_TYPE, sqltype='insert', sql=update_sql) sql2 = 'select unusual_code,unusual_id from 01_datamart_layer_007_h_cw_df.finance_unusual order by unusual_id asc ' # log.info(sql2) records = prod_execute_sql(conn_type=CONN_TYPE, sqltype='select', sql=sql2) print('总记录数 =>', len(records)) for record in records: print(record)
def execute_sql(): sql = "select distinct commodityname from 01_datamart_layer_007_h_cw_df.finance_car_bill where commodityname is not null and commodityname !=''" records = prod_execute_sql(conn_type='test', sqltype='select', sql=sql) commodityname_str = None records_ls = [] for record in records: record_str = str(record[0]) records_ls.append(record_str) return records_ls
def exec_step07(self): """ 执行第7步 7、稽查点sql将数据写到kudu落地表(脚本) :return: """ log.info("*" * 30) log.info('***** 执行第7步,增量数据流程 *****') log.info("*" * 30) try: # 删除结果表中的数据 sql = 'delete from analytic_layer_zbyy_cwyy_014_cwzbbg.finance_all_targets' prod_execute_sql(conn_type=CONN_TYPE, sqltype='insert', sql=sql) log.info('增量数据流程执行第7步,首先删除结果表中的数据') super().exec_step07() except Exception as e: # log.error(f'* 执行第7步的SQL或Python Shell报错') # print(e) raise RuntimeError(e)
def check_execute_step05(self): """ 判断流程表中已经执行了第5步 :return: """ try: columns_ls = [ 'process_id', 'process_status', 'daily_start_date', 'daily_end_date', 'step_number', 'operate_desc', 'orgin_source', 'destin_source', 'importdate' ] columns_str = ",".join(columns_ls) t = get_current_time() data = t.split(' ') year_month_day = str(data[0]).replace('-', '') # year_month_day = '20220118' sel_sql = f""" select {columns_str} FROM 01_datamart_layer_007_h_cw_df.finance_data_process WHERE ( from_unixtime(unix_timestamp(to_date(importdate),'yyyy-MM-dd'),'yyyyMMdd') = '{year_month_day}' OR importdate = '{year_month_day}' ) AND process_status = 'sucess' ORDER BY step_number ASC """ log.info(sel_sql) records = prod_execute_sql(conn_type=CONN_TYPE, sqltype='select', sql=sel_sql) is_execute_step05 = False is_excute_step6789 = False if len(records) > 0: for record in records: # print(record) step_number = str(record[4]) if step_number == '5': is_execute_step05 = True if step_number in ['6', '7', '8', '9']: is_excute_step6789 = True else: log.info('*** 没有查询数据 ***') # 执行了前5步,但是没有执行过 6,7,8,9步,就开始执行任务 if is_execute_step05 and not is_excute_step6789: self.task() else: log.info('*** 不执行第6,7,8,9 步的任务 ***') except Exception as e: print(e)
def query_province_names(self, grade='1'): sel_sql = f'select area_name from 01_datamart_layer_007_h_cw_df.finance_province_city where grade="{grade}" order by area_name ' # print(sel_sql) province_names = [] records = prod_execute_sql(conn_type=CONN_TYPE, sqltype='select', sql=sel_sql) for record in records: # print(record) province_names.append(record[0]) return province_names
def finance_unusual_update(): log.info('----- test finance_unusual update -----') unusual_id = request.form.get('unusual_id') if request.form.get( 'unusual_id') else None unusual_shell = request.form.get('unusual_shell') if request.form.get( 'unusual_shell') else None log.info(f'unusual_id={unusual_id}') # log.info(f'unusual_shell={unusual_shell}') unusual_shell = transfer_content(unusual_shell) log.info(unusual_shell) if unusual_id is None: data = { "result": "error", "details": "输入的 unusual_id 不能为空", "code": 500 } response = jsonify(data) return response sql = f""" update 01_datamart_layer_007_h_cw_df.finance_unusual set unusual_shell="{unusual_shell}" where unusual_id='{unusual_id}' """ log.info(sql) try: prod_execute_sql(conn_type=CONN_TYPE, sqltype='insert', sql=sql) data = {'result': 'ok', 'code': 200, 'details': "成功修改一条'检查点相关'记录"} response = jsonify(data) return response except Exception as e: print(e) data = {'result': 'error', 'code': 500, 'details': str(e)} response = jsonify(data) return response