def run_unzip_file(conf: ConfigData, the_date: str): the_date = StrTool.get_the_date_str(the_date) if (type(the_date) is str) and len(the_date) == 8: m_month = the_date[0:6] m_day = the_date[6:8] else: return root_path = conf.get_data("allinpay_data_zc") destdir = conf.get_data("allinpay_data_zc") # ifile = '/home/testFolder/logflow/bl_shouyinbao/20181101/9999100000/t1_trxrecord_20181101_V2.csv' # ofile = '/home/testFolder/logflow/bl_shouyinbao/UTF8/20181101/9999100000/t1_trxrecord_20181101_V2.csv' print("Start\n") # os.path.join(root_path, the_date) # real SYB folder don't have date folder f_name = conf.get_zip_name(the_date, 3) # the_date+".zip" a_file = os.path.join(root_path, f_name) if MyLocalFile.check_file(a_file): MyLocalFile.unzip_the_file(a_file, destdir, p_name=the_date+"*") f_name = conf.get_zip_name(the_date,5) # the_date+"_agt.zip" a_file = os.path.join(root_path, f_name) if MyLocalFile.check_file(a_file): MyLocalFile.unzip_the_file(a_file, destdir, p_name=the_date+"*")
def run_unzip_file(conf: ConfigData, p_date: str): p_date = StrTool.get_the_date_str(p_date) if (type(p_date) is str) and len(p_date) == 8: m_month = p_date[0:6] m_day = p_date[6:8] else: return p_zip_path = os.path.join(conf.get_zip_path(), p_date) # root_path = conf.get_data("allinpay_data_zc") data_path = os.path.join(conf.get_data_path(), p_date) # allinpay_data_zc # ifile = '/home/testFolder/logflow/bl_shouyinbao/20181101/9999100000/t1_trxrecord_20181101_V2.csv' # ofile = '/home/testFolder/logflow/bl_shouyinbao/UTF8/20181101/9999100000/t1_trxrecord_20181101_V2.csv' print("Start\n") # os.path.join(root_path, the_date) # real SYB folder don't have date folder p_f_name = conf.get_zip_name( p_date) # 3= the_date+".zip" # 5 = the_date+"_agt.zip" p_a_file = os.path.join(p_zip_path, p_f_name) p_p_name = conf.get_file_name(p_date) # p_date+"*" if MyLocalFile.check_file(p_a_file): MyLocalFile.unzip_the_file(p_a_file, data_path, p_name=p_p_name)
def runCopyFile(conf: ConfigData, isBaoli=True): thedate = conf.test_date() #"20181101" root_path = conf.get_zip_path(1) destdir = conf.get_data_path(1) destdir = os.path.join(destdir, thedate) f_name = conf.get_zip_name("*", 1) # "t1_trxrecord_" # "_V2.csv" print("Start\n") branchs = MyLocalFile.get_child(root_path) for aBranch in branchs: if MyLocalFile.check_branch(aBranch): monthes = MyLocalFile.get_child(aBranch) for aMonth in monthes: theMonth = MyLocalFile.check_month(aMonth) if theMonth > 0: days = MyLocalFile.get_child(aMonth) for aDay in days: theDay = MyLocalFile.check_day(aDay) if theDay > 0: files = MyLocalFile.get_child(aDay) for aFile in files: if MyLocalFile.check_file(aFile, p_name=f_name): copyTheFile(destdir, aBranch, theMonth, theDay, aFile, 1)
def run_remove_files(conf: ConfigData, the_date: str, delta_day=0): sdate = StrTool.get_the_date_str(the_date, delta_day) # "20181101" data_path = os.path.join(conf.get_data_path(1), sdate) utf8_path = os.path.join(conf.get_utf8_path(1), sdate) hdfs_path = str(pathlib.PurePosixPath(conf.get_hdfs_path()).joinpath(sdate)) shutil.rmtree(data_path, ignore_errors=True) shutil.rmtree(utf8_path, ignore_errors=True) client = MyClient(conf.hdfs_ip()) # "http://10.2.201.197:50070" client.delete(hdfs_path, recursive=True)
def run_remove_hive(conf: ConfigData, the_date: str, delta_day=0): f_date_str = StrTool.get_the_date_str(the_date, delta_day) # "20181101" del_table = conf.get_table_name( ) # "hive_table" + str(conf.the_id) # "rds_posflow.loginfo_rsp_bl" del_file = conf.get_file_name(f_date_str).replace( '.', '*.') # "file_ext" + str(conf.the_id) MyHdfsFile.delete_hive_ssh(conf.get_data("cdh_ip"), table=del_table, p_name=del_file, username=conf.get_data("cdh_user"), password=conf.get_data("cdh_pass"))
def run_hive(conf: ConfigData, the_date: str): a_client = Client(conf.hdfs_ip()) # "http://10.2.201.197:50070" conn = connect(host=conf.hive_ip(), port=conf.hive_port(), auth_mechanism=conf.hive_auth(), user=conf.hive_user()) cur = conn.cursor() print("Start\n") the_date = StrTool.get_the_date_str(the_date) # "20181101" # hdfs_dir_bl root_path = str( pathlib.PurePosixPath(conf.get_hdfs_path()).joinpath(the_date)) file_name = str( pathlib.PurePosixPath(root_path).joinpath( conf.get_file_name(the_date))) # "/data/posflow/allinpay_utf8_zc/20181101/" # 20181101_loginfo_rsp_bl_new.csv # 20181101_rsp_agt_bl_new.del # 20181101_rxinfo_rsp_bl.txt table_name = conf.get_table_name() if MyHdfsFile.isfile(a_client, file_name): sql = 'LOAD DATA INPATH \'' + file_name + '\' INTO TABLE ' + table_name # 'test.t1_trxrecprd_v2_zc' # '\' OVERWRITE INTO TABLE test.t1_trxrecprd_v2_bl2' print("OK" + " " + sql + "\n") cur.execute(sql) # , async=True) cur.close() conn.close()
def run_remove_hive(conf: ConfigData, the_date: str, delta_day=0): f_date_str = StrTool.get_the_date_str(the_date, delta_day) # "20181101" # "/user/hive/warehouse/rds_posflow.db/t1_trxrecprd_v2/t1_trxrecord_20181204_V2*.csv" del_table = conf.get_table_name( ) # hive_table="rds_posflow.t1_trxrecprd_v2" if the_conf.m_project_id == 1: del_file = conf.get_file_name(f_date_str).replace('.', '*.') MyHdfsFile.delete_hive_ssh(conf.get_data("cdh_ip"), table=del_table, p_name=del_file, username=conf.get_data("cdh_user"), password=conf.get_data("cdh_pass")) if the_conf.m_project_id == 2: conn = connect(host=conf.hive_ip(), port=conf.hive_port(), auth_mechanism=conf.hive_auth(), user=conf.hive_user()) cur = conn.cursor() # "ALTER TABLE rds_posflow.t1_trxrecprd_v2_tmp DROP IF EXISTS PARTITION(p_date=20190208) " sql = "ALTER TABLE {} DROP IF EXISTS PARTITION( p_date={} )".format( del_table, the_date) print(sql) cur.execute(sql) cur.close() conn.close()
def run_hive(conf: ConfigData, the_date: str): client = Client(conf.hdfs_ip()) # "http://10.2.201.197:50070" conn = connect(host=conf.hive_ip(), port=conf.hive_port(), auth_mechanism=conf.hive_auth(), user=conf.hive_user()) cur = conn.cursor() the_date = StrTool.get_the_date_str(the_date) # "20181101" root_path = conf.get_data("hdfs_dir_zc") # "/data/posflow/allinpay_utf8_zc/" file_ext3 = conf.get_data("file_ext3") # _loginfo_rsp.txt # 20181101_loginfo_rsp.txt file_ext4 = conf.get_data("file_ext4") # _loginfo_rsp_agt.txt # 20181101_loginfo_rsp_agt.txt file_ext5 = conf.get_data("file_ext5") # _rxinfo_rsp.txt # 20181101_rxinfo_rsp.txt file_ext6 = conf.get_data("file_ext6") # _rxinfo_rsp_agt.txt # 20181101_rxinfo_rsp_agt.txt print("Start\n") file3 = str(pathlib.PurePosixPath(root_path).joinpath(the_date + file_ext3)) file4 = str(pathlib.PurePosixPath(root_path).joinpath(the_date + file_ext4)) file5 = str(pathlib.PurePosixPath(root_path).joinpath(the_date + file_ext5)) file6 = str(pathlib.PurePosixPath(root_path).joinpath(the_date + file_ext6)) f_list = [file3,file4,file5,file6] t_list = ["hive_table3", "hive_table4", "hive_table5", "hive_table6"] for n in range(0,4): if MyHdfsFile.isfile(client, f_list[n]): sql = 'LOAD DATA INPATH \'' + f_list[n] + '\' INTO TABLE ' + conf.get_data(t_list[n]) # 'test.t1_trxrecprd_v2_zc' # '\' OVERWRITE INTO TABLE test.t1_trxrecprd_v2_bl2' print("OK" + " " + sql+"\n") cur.execute(sql) # , async=True) cur.close() conn.close()
def run_remove_files(conf: ConfigData, the_date: str, delta_day=0): f_date_str = StrTool.get_the_date_str(the_date, delta_day) # "20181101" data_path = os.path.join(conf.get_data_path(), f_date_str) # allinpay_data_bl utf8_path = os.path.join(conf.get_utf8_path(), f_date_str) # allinpay_utf8_bl hdfs_path = str( pathlib.PurePosixPath( conf.get_hdfs_path()).joinpath(f_date_str)) # hdfs_dir_bl a_client = MyClient(conf.hdfs_ip()) # "http://10.2.201.197:50070" shutil.rmtree(data_path, ignore_errors=True) shutil.rmtree(utf8_path, ignore_errors=True) a_client.delete(hdfs_path, recursive=True)
def run_unzip_file(conf: ConfigData, the_date, folder_type=2): the_date = StrTool.get_the_date_str(the_date) if (type(the_date) is str) and len(the_date) == 8: m_month = the_date[0:6] m_day = the_date[6:8] else: return zip_path = conf.get_zip_path() data_path = conf.get_data_path() f_name = conf.get_zip_name("") # "t1_trxrecord_" the_date # "_V2.csv" print("Start\n") # os.path.join(root_path, the_date) # real SYB folder don't have date folder branches = MyLocalFile.get_child_dir(zip_path) for aBranch in branches: if MyLocalFile.check_branch(aBranch): months = MyLocalFile.get_child_dir(aBranch) for aMonth in months: the_month = MyLocalFile.check_month(aMonth) if the_month > 0 and "{:0>6d}".format(the_month) == m_month: day_list = MyLocalFile.get_child_dir(aMonth) for aDay in day_list: the_day = MyLocalFile.check_day(aDay) if the_day > 0 and "{:0>2d}".format(the_day) == m_day: files = MyLocalFile.get_child_file(aDay) for aFile in files: if MyLocalFile.check_file(aFile, p_name=f_name): short_name = os.path.basename(aBranch) if folder_type == 1: new_path = os.path.join( data_path, m_month, m_day, short_name) # "{:0>6d}".format(month) "{:0>2d}".format(day) else: new_path = os.path.join( data_path, m_month + m_day, short_name) # "{:0>6d}{:0>2d}".format(month, day) p_name = conf.get_file_name(m_month + m_day) MyLocalFile.unzip_the_file( aFile, new_path, p_name)
def run_hive_test(conf: ConfigData): host = conf.hive_ip() # '10.2.201.197' port = conf.hive_port() # 10000 user = conf.hive_user() # "hdfs" auth = conf.hive_auth() # 'PLAIN' test = conf.hive_test() # "select * from test.test1" conn = connect(host=host, port=port, auth_mechanism=auth, user=user, password='******') cur = conn.cursor() cur.execute(test) data = as_pandas(cur) print(data) cur.close() conn.close()
def run_conv_file_local(conf: ConfigData, the_date: str, is_baoli=True): the_date = StrTool.get_the_date_str(the_date) root_path = conf.get_data_path() dest_dir = conf.get_utf8_path() f_name = conf.get_file_name( the_date) # "t1_trxrecord_" the_date # "_V2.csv" print("Start\n") branches = MyLocalFile.get_child(os.path.join(root_path, the_date)) for aBranch in branches: if MyLocalFile.check_branch(aBranch): files = MyLocalFile.get_child(aBranch) for aFile in files: if MyLocalFile.check_file(aFile, f_name): MyLocalFile.conv_file_local( aFile, os.path.join(dest_dir, the_date, os.path.basename(aBranch), f_name), True)
def run_hdfs_test(conf: ConfigData): # the_date = conf.test_date() # "20181101" client = Client(conf.hdfs_ip()) # "http://10.2.201.197:50070" # root_path = conf.unzip_dir(is_baoli) # 'D:/DATA/UNZIP/' # dest_dir = conf.hdfs_dir_syb(is_baoli) # file_pre = conf.file_pre1() # "t1_trxrecord_" # file_ext = conf.file_ext2() # "_V2.csv" # client.upload('/shouyinbao/', "/home/testFolder/logflow/bl_shouyinbao/UTF8/20181101/9999100000/t1_trxrecord_20181101_V2.csv", cleanup=True) dat = client.list('/', status=False) print(dat)
def run_sftp_file(conf: ConfigData, the_date: str): the_date = StrTool.get_the_date_str(the_date) if (type(the_date) is str) and len(the_date) == 8: m_month = the_date[0:6] m_day = the_date[6:8] else: return a = sftp_tool.Sftp_Tool(h=conf.get_data("allinpay_ftp_ip_zc"), p=int(conf.get_data("allinpay_ftp_port_zc")), u=conf.get_data("allinpay_ftp_user_zc"), s=conf.get_data("allinpay_ftp_pass_zc"), r=conf.get_data("allinpay_ftp_folder_zc"), d=conf.get_data("allinpay_data_zc")) a.openSFTP() a.download_files(from_dir=conf.get_data("allinpay_ftp_folder_zc"), to_dir=conf.get_data("allinpay_data_zc"), p_name=the_date+"*.zip")
def run_conv_file_hdfs(conf: ConfigData, the_date: str, is_baoli=True): the_date = StrTool.get_the_date_str(the_date) client = Client(conf.hdfs_ip()) # "http://10.2.201.197:50070" root_path = conf.get_data_path() # 'D:/DATA/UNZIP/' dest_dir = conf.get_hdfs_path() f_name = conf.get_file_name( the_date) # "t1_trxrecord_" the_date # "_V2.csv" print("Start\n") branches = MyLocalFile.get_child(os.path.join(root_path, the_date)) for aBranch in branches: if MyLocalFile.check_branch(aBranch): files = MyLocalFile.get_child(aBranch) for aFile in files: if MyLocalFile.check_file(aFile, f_name): MyHdfsFile.conv_file_hdfs( aFile, os.path.join(dest_dir, the_date, os.path.basename(aBranch), f_name), client)
def run_conv_file_local_to_hdfs(conf: ConfigData, the_date: str): """ :param conf: :param the_date: :return: """ the_date = StrTool.get_the_date_str(the_date) client = MyClient(conf.hdfs_ip()) # "http://10.2.201.197:50070" root_path = conf.get_data("allinpay_data_zc") dest_dir1 = conf.get_data("allinpay_utf8_zc") dest_dir2 = conf.get_data("hdfs_dir_zc") file_ext3 = conf.get_data("file_ext3") # _loginfo_rsp.txt # 20181101_loginfo_rsp.txt file_ext4 = conf.get_data("file_ext4") # _loginfo_rsp_agt.txt # 20181101_loginfo_rsp_agt.txt file_ext5 = conf.get_data("file_ext5") # _rxinfo_rsp.txt # 20181101_rxinfo_rsp.txt file_ext6 = conf.get_data("file_ext6") # _rxinfo_rsp_agt.txt # 20181101_rxinfo_rsp_agt.txt print("Start\n") files = MyLocalFile.get_child_file(root_path) for aFile in files: short_name = os.path.basename(aFile).lower() if short_name == (the_date + file_ext3).lower() or \ short_name == (the_date + file_ext4).lower() or \ short_name == (the_date + file_ext5).lower() or \ short_name == (the_date + file_ext6).lower(): to_file1 = str(pathlib.PurePath(dest_dir1).joinpath(pathlib.PurePath(aFile).name)) to_file2 = str(pathlib.PurePosixPath(dest_dir2).joinpath(pathlib.PurePath(aFile).name)) MyLocalFile.conv_file_local(aFile, to_file1, need_first_line=True) MyHdfsFile.safe_make_dir(client, to_file2) # client.newupload(to_file2, to_file1, encoding='utf-8') the_file = client.status(to_file2, strict=False) if the_file is None: client.upload(to_file2, to_file1) client.set_permission(to_file2, 777) # client.set_owner(thePath,owner='hdfs',group='supergroup') elif the_file['type'].lower() == 'file': # 'directory' client.set_permission(to_file2, 777)
def run_conv_file_local_to_hdfs(conf: ConfigData, the_date: str): """ :param conf: :param the_date: :return: """ the_date = StrTool.get_the_date_str(the_date) a_client = MyClient(conf.hdfs_ip()) # "http://10.2.201.197:50070" # allinpay_data_bl data_path = os.path.join(conf.get_data_path(), the_date) # allinpay_utf8_bl dest_dir1 = os.path.join(conf.get_utf8_path(), the_date) # hdfs_dir_bl dest_dir2 = str( pathlib.PurePosixPath(conf.get_hdfs_path()).joinpath(the_date)) # file_ext7 = conf.get_data("file_ext7") # _loginfo_rsp_bl_new.csv # 20181101_loginfo_rsp_bl_new.csv # file_ext8 = conf.get_data("file_ext8") # _rsp_agt_bl_new.del # 20181101_rsp_agt_bl_new.del # file_ext9 = conf.get_data("file_ext9") # _rxinfo_rsp_bl.txt # 20181101_rxinfo_rsp_bl.txt # f_list = [file_ext7, file_ext8, file_ext9] print("Start\n") # "file_ext" + str(conf.the_id) file_name = conf.get_file_name(the_date).lower() files = MyLocalFile.get_child_file(data_path) for aFile in files: short_name = os.path.basename(aFile).lower() f_name = pathlib.PurePath(aFile).name if short_name == file_name: to_file1 = str(pathlib.PurePath(dest_dir1).joinpath(f_name)) to_file2 = str(pathlib.PurePosixPath(dest_dir2).joinpath(f_name)) f_add_date = conf.get_hive_add_date(the_date) f_need_head = conf.get_hive_head() MyLocalFile.conv_file_local(aFile, to_file1, need_first_line=f_need_head, p_add_head=f_add_date) MyHdfsFile.safe_make_dir(a_client, to_file2) # a_client.newupload(to_file2, to_file1, encoding='utf-8') the_file = a_client.status(to_file2, strict=False) if the_file is None: a_client.upload(to_file2, to_file1) a_client.set_permission(to_file2, 777) # a_client.set_owner(thePath,owner='hdfs',group='supergroup') elif the_file['type'].lower() == 'file': # 'directory' a_client.set_permission(to_file2, 777)
def run_hive(conf: ConfigData, the_date: str, is_baoli=True): p_client = Client(conf.hdfs_ip()) # "http://10.2.201.197:50070" conn = connect(host=conf.hive_ip(), port=conf.hive_port(), auth_mechanism=conf.hive_auth(), user=conf.hive_user()) cur = conn.cursor() the_date = StrTool.get_the_date_str(the_date) # "20181101" root_path = conf.get_hdfs_path() # "/shouyinbao/bl_shouyinbao/UTF8/" f_name = conf.get_file_name( the_date) # "t1_trxrecord_" the_date # "_V2.csv" table_name = conf.get_table_name() print("Start\n") idn = 0 branches = MyHdfsFile.get_child(p_client, root_path + the_date) for aBranch in branches: if MyHdfsFile.check_branch(p_client, aBranch): files = MyHdfsFile.get_child(p_client, aBranch) f_a_branch = MyHdfsFile.get_name(aBranch) for aFile in files: if MyHdfsFile.check_file(p_client, aFile, f_name): # '/shouyinbao/bl_shouyinbao/UTF8/20181101/9999997900/t1_trxrecord_20181101_V2.csv' to_file2 = str( pathlib.PurePosixPath(root_path).joinpath( the_date, f_a_branch, f_name)) if conf.m_project_id == 1: sql = 'LOAD DATA INPATH \'{}\' INTO TABLE {}'.format( to_file2, table_name) # 'test.t1_trxrecprd_v2_zc' # '\' OVERWRITE INTO TABLE test.t1_trxrecprd_v2_bl2' elif conf.m_project_id == 2: sql = 'LOAD DATA INPATH \'{}\' INTO TABLE {} PARTITION ( p_branch=\'{}\', p_date={} )'.format( to_file2, table_name, f_a_branch, the_date) # 'test.t1_trxrecprd_v2_zc' idn += 1 print(str(idn) + " " + sql + "\n") cur.execute(sql) # , async=True) cur.close() conn.close()
def run_sftp_file(conf: ConfigData, the_date: str): the_date = StrTool.get_the_date_str(the_date) # allinpay_ftp_folder_bl_1 or allinpay_ftp_folder_bl_2 f_dir = conf.get_remote_path_ftp(the_date) # allinpay_data_bl t_dir = os.path.join(conf.get_local_path_ftp(), the_date) # "file_ext" + str(conf.the_id) file_name = conf.get_ftp_name(the_date) a = sftp_tool.Sftp_Tool(h=conf.get_ftp_ip(), p=int(conf.get_ftp_port()), u=conf.get_ftp_user(), s=conf.get_ftp_pass(), r=f_dir, d=t_dir) a.openSFTP() a.download_files(from_dir=f_dir, to_dir=t_dir, p_name=file_name)
def copyTheFile(destdir, branch, month, day, file, foldertype=1): theday = datetime.date(month // 100, month % 100, day) thedayStr = theday.strftime("%Y%m%d") # if month == 201811 and day == 1: if thedayStr == ConfigData.test_date(): pass else: return shortname = os.path.basename(branch) if foldertype == 1: newpath = os.path.join(destdir, shortname, "{:0>6d}".format(month), "{:0>2d}".format(day)) else: newpath = os.path.join(destdir, "{:0>6d}{:0>2d}".format(month, day), shortname) if os.path.isfile(newpath): return if not os.path.exists(newpath): pathlib.Path(newpath).mkdir(parents=True, exist_ok=True) toFile = os.path.join(newpath, os.path.basename(file)) if not os.path.exists(toFile): shutil.copyfile(file, toFile) print("\nfile copied " + toFile)
def run_hive(conf: ConfigData, the_date: str): conn = connect(host="10.91.1.20", port=conf.hive_port(), auth_mechanism=conf.hive_auth(), user=conf.hive_user()) cur = conn.cursor() sql = """ --deduct union sql created by Zhaohu on 14-Nov-2018 am 10:49 use loginfo """ print("OK" + " " + sql + "\n") cur.execute(sql) # , async=True) sql = "set mapreduce.job.queuename = deduct_union_gansu_mysql;" print("OK" + " " + sql + "\n") cur.execute(sql) # , async=True) sql = "set hive.cli.print.header=true;" print("OK" + " " + sql + "\n") cur.execute(sql) # , async=True) sql = """ select distinct case when creq.prod_code='8005000001' then '甘肃银行' else '' end as `产品类别`, creq.LMT_SERNO as `授信编号`, creq.CUST_NAME as `客户姓名`, creq.LIVE_ADDR as `居住地`, creq.ADDRESS as `客户联系地址`, creq.TEL_NO as `家庭电话(贷款申请表)`, creq.MOBILE as `手机号码(贷款申请表)`, creq.MOBILE as `客户手机`, creq.SPOUSE_NM as `配偶姓名`, creq.SPOUSE_PHONE as `配偶电话`, creq.EMER_NAME_1 as `第一联系人姓名`, case when creq.EMER_REL_1='1' or creq.EMER_REL_1='01' then '配偶' when creq.EMER_REL_1='2' or creq.EMER_REL_1='02' then '父母' when creq.EMER_REL_1='3' or creq.EMER_REL_1='03' then '子女' when creq.EMER_REL_1='4' or creq.EMER_REL_1='04' then '亲戚' when creq.EMER_REL_1='5' or creq.EMER_REL_1='05' then '朋友' when creq.EMER_REL_1='6' or creq.EMER_REL_1='06' then '其他' when creq.EMER_REL_1='7' or creq.EMER_REL_1='07' then '兄弟姐妹' when creq.EMER_REL_1='8' or creq.EMER_REL_1='08' then '同事' else '' end as `第一联系人关系`, creq.EMER_PHONE_1 as `第一联系人联系电话`, creq.EMER_NAME_2 as `第二联系人姓名`, case when creq.EMER_REL_2='1' or creq.EMER_REL_2='01' then '配偶' when creq.EMER_REL_2='2' or creq.EMER_REL_2='02' then '父母' when creq.EMER_REL_2='3' or creq.EMER_REL_2='03' then '子女' when creq.EMER_REL_2='4' or creq.EMER_REL_2='04' then '亲戚' when creq.EMER_REL_2='5' or creq.EMER_REL_2='05' then '朋友' when creq.EMER_REL_2='6' or creq.EMER_REL_2='06' then '其他' when creq.EMER_REL_2='7' or creq.EMER_REL_2='07' then '兄弟姐妹' when creq.EMER_REL_2='8' or creq.EMER_REL_2='08' then '同事' else '' end as `第二联系人关系`, creq.EMER_PHONE_2 as `第二联系人联系电话`, creq.EMER_NAME_3 as `第三联系人姓名`, case when creq.EMER_REL_3='1' or creq.EMER_REL_3='01' then '配偶' when creq.EMER_REL_3='2' or creq.EMER_REL_3='02' then '父母' when creq.EMER_REL_3='3' or creq.EMER_REL_3='03' then '子女' when creq.EMER_REL_3='4' or creq.EMER_REL_3='04' then '亲戚' when creq.EMER_REL_3='5' or creq.EMER_REL_3='05' then '朋友' when creq.EMER_REL_3='6' or creq.EMER_REL_3='06' then '其他' when creq.EMER_REL_3='7' or creq.EMER_REL_3='07' then '兄弟姐妹' when creq.EMER_REL_3='8' or creq.EMER_REL_3='08' then '同事' else '' end as `第三联系人关系`, creq.EMER_PHONE_3 as `第三联系人联系电话`, creq.CREDIT_CONTRACT as `合同编号`, trim(creq.CERT_NO) as `身份证号`, case when creq.sex='2' then '男' when creq.sex='3' then '女' else '' end as `性别`, loaninfo.limit_statr_date as `贷款开始日期`, ureq.APP_START_DATE as `支用开始SAS日期`, ureq.APP_END_DATE as `支用结束SAS日期`, ureq.USE_DATE as `支用申请期限`, merchant.contact_tel as `联系电话(APMS系统)`, merchant.legal_name as `法定代表人姓名`, merchant.finance_name as `财务联系人`, merchant.finance_hp_no as `财务联系人电话(APMS系统)`, merchant.stlm_acct as `还款银行卡号`, merchant.name_busi as `商户名称`, dict4.dict_name as `所在城市`, merchant.busi_addr as `营业地址`, dict1.dict_name as `分公司`, merchant.stlm_ins_city as `地市业务部`, merchant.contact as `客户姓名`, trim(creq.merchant_no) as `商户编号`, dict2.dict_name as `inst_oid中文`, dict3.dict_name as `mcc中文`, trim(deductall.bill_no) as `借据编号`, deductall.CURR_DATE as `截止日期`, loaninfo.LOAN_AMT as `放款金额`, loaninfo.LIMIT_END_DATE as `货款到期日`, loaninfo.REMAIN_LOAN_AMT as `贷款剩余本金`, loaninfo.REMAIN_LOAN_AMT as `应收未收本金`, loaninfo.OVERDUE_INTE_AMT as `应收未收利息`, case when compensate.BILL_NO is not null then '是' else '否' end as `代偿状态`, deductall.begin_late_date as `逾期开始日期`, date_add(deductall.begin_late_date,1) as `新增日期`, datediff(from_unixtime(unix_timestamp(deductall.CURR_DATE,'yyyymmdd'),'yyyy-mm-dd') ,deductall.begin_late_date) as `逾期天数` from loginfo.union_flag1 deductall left join loginfo.t_loan_use_req ureq on ureq.bill_no=trim(deductall.bill_no) left join loginfo.t_loan_credit_req creq on creq.LMT_SERNO=ureq.LMT_SERNO left join loginfo.merchant_zc merchant on creq.merchant_no=merchant.mcht_cd left JOIN loginfo.T_REOCN_LOANINFO loaninfo on trim(deductall.bill_no)=loaninfo.bill_no and loaninfo.current_data=trim(deductall.curr_date) left join loginfo.t_loan_compernsatory compensate on ureq.BILL_NO=compensate.BILL_NO left join loginfo.t_allinpay_dict dict1 on merchant.aip_bran_id = dict1.dict_id and dict1.dict_type='aip_bran_id' left join loginfo.t_allinpay_dict dict2 on merchant.inst_oid = dict2.dict_id and dict2.dict_type='inst_oid' left join loginfo.t_allinpay_dict dict3 on merchant.up_mcc_cd = dict3.dict_id and dict3.dict_type='up_mcc_cd' left join loginfo.t_allinpay_dict dict4 on merchant.city_cd = dict4.dict_id and dict4.dict_type='city_cd' where creq.prod_code='8005000001' """ sql2 = """ select distinct case when creq.prod_code='8005000001' then '甘肃银行' else '' end as `产品类别`, creq.LMT_SERNO as `授信编号`, creq.CUST_NAME as `客户姓名`, creq.LIVE_ADDR as `居住地`, creq.ADDRESS as `客户联系地址`, creq.TEL_NO as `家庭电话(贷款申请表)`, creq.MOBILE as `手机号码(贷款申请表)`, creq.MOBILE as `客户手机`, creq.SPOUSE_NM as `配偶姓名`, creq.SPOUSE_PHONE as `配偶电话`, creq.EMER_NAME_1 as `第一联系人姓名`, case when creq.EMER_REL_1='1' or creq.EMER_REL_1='01' then '配偶' when creq.EMER_REL_1='2' or creq.EMER_REL_1='02' then '父母' when creq.EMER_REL_1='3' or creq.EMER_REL_1='03' then '子女' when creq.EMER_REL_1='4' or creq.EMER_REL_1='04' then '亲戚' when creq.EMER_REL_1='5' or creq.EMER_REL_1='05' then '朋友' when creq.EMER_REL_1='6' or creq.EMER_REL_1='06' then '其他' when creq.EMER_REL_1='7' or creq.EMER_REL_1='07' then '兄弟姐妹' when creq.EMER_REL_1='8' or creq.EMER_REL_1='08' then '同事' else '' end as `第一联系人关系`, creq.EMER_PHONE_1 as `第一联系人联系电话`, creq.EMER_NAME_2 as `第二联系人姓名`, case when creq.EMER_REL_2='1' or creq.EMER_REL_2='01' then '配偶' when creq.EMER_REL_2='2' or creq.EMER_REL_2='02' then '父母' when creq.EMER_REL_2='3' or creq.EMER_REL_2='03' then '子女' when creq.EMER_REL_2='4' or creq.EMER_REL_2='04' then '亲戚' when creq.EMER_REL_2='5' or creq.EMER_REL_2='05' then '朋友' when creq.EMER_REL_2='6' or creq.EMER_REL_2='06' then '其他' when creq.EMER_REL_2='7' or creq.EMER_REL_2='07' then '兄弟姐妹' when creq.EMER_REL_2='8' or creq.EMER_REL_2='08' then '同事' else '' end as `第二联系人关系`, creq.EMER_PHONE_2 as `第二联系人联系电话`, creq.EMER_NAME_3 as `第三联系人姓名`, case when creq.EMER_REL_3='1' or creq.EMER_REL_3='01' then '配偶' when creq.EMER_REL_3='2' or creq.EMER_REL_3='02' then '父母' when creq.EMER_REL_3='3' or creq.EMER_REL_3='03' then '子女' when creq.EMER_REL_3='4' or creq.EMER_REL_3='04' then '亲戚' when creq.EMER_REL_3='5' or creq.EMER_REL_3='05' then '朋友' when creq.EMER_REL_3='6' or creq.EMER_REL_3='06' then '其他' when creq.EMER_REL_3='7' or creq.EMER_REL_3='07' then '兄弟姐妹' when creq.EMER_REL_3='8' or creq.EMER_REL_3='08' then '同事' else '' end as `第三联系人关系`, creq.EMER_PHONE_3 as `第三联系人联系电话`, creq.CREDIT_CONTRACT as `合同编号`, trim(creq.CERT_NO) as `身份证号`, case when creq.sex='2' then '男' when creq.sex='3' then '女' else '' end as `性别`, loaninfo.limit_statr_date as `贷款开始日期`, ureq.APP_START_DATE as `支用开始SAS日期`, ureq.APP_END_DATE as `支用结束SAS日期`, ureq.USE_DATE as `支用申请期限`, merchant.contact_tel as `联系电话(APMS系统)`, merchant.legal_name as `法定代表人姓名`, merchant.finance_name as `财务联系人`, merchant.finance_hp_no as `财务联系人电话(APMS系统)`, merchant.stlm_acct as `还款银行卡号`, merchant.name_busi as `商户名称`, dict4.dict_name as `所在城市`, merchant.busi_addr as `营业地址`, dict1.dict_name as `分公司`, merchant.stlm_ins_city as `地市业务部`, merchant.contact as `客户姓名`, trim(creq.merchant_no) as `商户编号`, dict2.dict_name as `inst_oid中文`, dict3.dict_name as `mcc中文`, trim(deductall.bill_no) as `借据编号`, deductall.CURR_DATE as `截止日期`, loaninfo.LOAN_AMT as `放款金额`, loaninfo.LIMIT_END_DATE as `货款到期日`, loaninfo.REMAIN_LOAN_AMT as `贷款剩余本金`, loaninfo.REMAIN_LOAN_AMT as `应收未收本金`, loaninfo.OVERDUE_INTE_AMT as `应收未收利息`, case when compensate.BILL_NO is not null then '是' else '否' end as `代偿状态`, deductall.begin_late_date as `逾期开始日期`, date_add(deductall.begin_late_date,1) as `新增日期`, datediff(from_unixtime(unix_timestamp(deductall.CURR_DATE,'yyyymmdd'),'yyyy-mm-dd') ,deductall.begin_late_date) as `逾期天数` from loginfo.union_flag1 deductall left join loginfo.t_loan_use_req ureq on ureq.bill_no=trim(deductall.bill_no) left join loginfo.t_loan_credit_req creq on creq.LMT_SERNO=ureq.LMT_SERNO left join loginfo.merchant_zc merchant on creq.merchant_no=merchant.mcht_cd left JOIN loginfo.T_REOCN_LOANINFO loaninfo on trim(deductall.bill_no)=loaninfo.bill_no and loaninfo.current_data=trim(deductall.curr_date) left join loginfo.t_loan_compernsatory compensate on ureq.BILL_NO=compensate.BILL_NO left join loginfo.t_allinpay_dict dict1 on merchant.aip_bran_id = dict1.dict_id and dict1.dict_type='aip_bran_id' left join loginfo.t_allinpay_dict dict2 on merchant.inst_oid = dict2.dict_id and dict2.dict_type='inst_oid' left join loginfo.t_allinpay_dict dict3 on merchant.up_mcc_cd = dict3.dict_id and dict3.dict_type='up_mcc_cd' left join loginfo.t_allinpay_dict dict4 on merchant.city_cd = dict4.dict_id and dict4.dict_type='city_cd' where creq.prod_code='8005000001' and from_unixtime(unix_timestamp(deductall.CURR_DATE,'yyyymmdd'))>=date_add(current_date(),-5) """ print("OK" + " " + sql2 + "\n") cur.execute(sql2) # , async=True) data = as_pandas(cur) print(len(data)) name = '/home/data/deduct/deduct_gansu_late5_' + StrTool.get_the_date_str( '', -1) + '.xlsx' writer = pd.ExcelWriter(name) data.to_excel(writer, 'Sheet1') writer.save() cur.close() conn.close()
sdate = StrTool.get_the_date_str(the_date, delta_day) # "20181101" data_path = os.path.join(conf.get_data_path(1), sdate) utf8_path = os.path.join(conf.get_utf8_path(1), sdate) hdfs_path = str(pathlib.PurePosixPath(conf.get_hdfs_path()).joinpath(sdate)) shutil.rmtree(data_path, ignore_errors=True) shutil.rmtree(utf8_path, ignore_errors=True) client = MyClient(conf.hdfs_ip()) # "http://10.2.201.197:50070" client.delete(hdfs_path, recursive=True) # "/user/hive/warehouse/posflow.db/t1_trxrecprd_v2/t1_trxrecord_20181204_V2*.csv" # hive_table="posflow.t1_trxrecprd_v2", # file_pre1 = 't1_trxrecord_', # file_ext2 = "_V2.csv", if __name__ == "__main__": the_conf = ConfigData(p_is_test=False) client = Client(the_conf.hdfs_ip()) # "http://10.2.201.197:50070" a = MyHdfsFile.get_child(client, "/data/posflow/allinpay_utf8_zc") b = MyHdfsFile.get_child_file(client,"/data/posflow/allinpay_utf8_zc") c = MyHdfsFile.get_child_dir(client, "/data/posflow/allinpay_utf8_zc") # test # MyHdfsFile.delete(client, "/data/posflow/allinpay_utf8_zc", "*agt_cpy*") # test if the_conf.is_test(): day_str = the_conf.test_date() days = 9 else: day_str = StrTool.get_param_str(1, "")
left join loginfo.t_allinpay_dict dict1 on merchant.aip_bran_id = dict1.dict_id and dict1.dict_type='aip_bran_id' left join loginfo.t_allinpay_dict dict2 on merchant.inst_oid = dict2.dict_id and dict2.dict_type='inst_oid' left join loginfo.t_allinpay_dict dict3 on merchant.up_mcc_cd = dict3.dict_id and dict3.dict_type='up_mcc_cd' left join loginfo.t_allinpay_dict dict4 on merchant.city_cd = dict4.dict_id and dict4.dict_type='city_cd' where creq.prod_code='8005000001' and from_unixtime(unix_timestamp(deductall.CURR_DATE,'yyyymmdd'))>=date_add(current_date(),-5) """ print("OK" + " " + sql2 + "\n") cur.execute(sql2) # , async=True) data = as_pandas(cur) print(len(data)) name = '/home/data/deduct/deduct_gansu_late5_' + StrTool.get_the_date_str( '', -1) + '.xlsx' writer = pd.ExcelWriter(name) data.to_excel(writer, 'Sheet1') writer.save() cur.close() conn.close() if __name__ == "__main__": the_conf = ConfigData(p_is_test=False) run_hive(the_conf, the_date="")
# '\' OVERWRITE INTO TABLE test.t1_trxrecprd_v2_bl2' elif conf.m_project_id == 2: sql = 'LOAD DATA INPATH \'{}\' INTO TABLE {} PARTITION ( p_branch=\'{}\', p_date={} )'.format( to_file2, table_name, f_a_branch, the_date) # 'test.t1_trxrecprd_v2_zc' idn += 1 print(str(idn) + " " + sql + "\n") cur.execute(sql) # , async=True) cur.close() conn.close() if __name__ == "__main__": the_conf = ConfigData(p_is_test=False) if the_conf.is_test(): day_str = the_conf.test_date() days = 190 else: the_conf.m_project_id = StrTool.get_param_int(1, 1) day_str = StrTool.get_param_str(2, "") days = StrTool.get_param_int(3, 1) if the_conf.m_project_id == 1: return_code = subprocess.call("/app/code/posflow_loader/ftpcmd.sh", shell=True) print(return_code) f_delta = the_conf.get_data("file_date_delta" + str(the_conf.m_project_id),
def run_conv_file_local_to_hdfs(conf: ConfigData, the_date: str, is_baoli=True): """ # client.upload('/shouyinbao/', "/home/testFolder/logflow/bl_shouyinbao/UTF8/20181101/9999100000/t1_trxrecord_20181101_V2.csv", cleanup=True) # dat = client.list('/shouyinbao/', status=False) # print(dat) # root_path = "/home/bd/桌面/201811_flow/zc_shouyinbao/UNZIP/" # dest_dir1 = "/home/bd/桌面/201811_flow/zc_shouyinbao/UTF8/" # dest_dir2 = "/shouyinbao/zc_shouyinbao/UTF8/" # root_path = "/home/testFolder/logflow/bl_shouyinbao/UNZIP/" # dest_dir1 = "/home/testFolder/logflow/bl_shouyinbao/UTF8/" # dest_dir2 = "/shouyinbao/zc_shouyinbao/UTF8/" # i_file = '/home/testFolder/logflow/bl_shouyinbao/20181101/9999100000/t1_trxrecord_20181101_V2.csv' # o_file = '/home/testFolder/logflow/bl_shouyinbao/UTF8/20181101/9999100000/t1_trxrecord_20181101_V2.csv' :param conf: :param the_date: :param is_baoli: :return: """ the_date = StrTool.get_the_date_str(the_date) p_client = MyClient(url=conf.hdfs_ip()) # "http://10.2.201.197:50070" # webhdfs 默认是 dr.who ,不能伪装成其他用户,可以在配置里修改 hadoop.http.staticuser.user=dr.who # https://www.cnblogs.com/peizhe123/p/5540845.html root_path = os.path.join(conf.get_data_path(), the_date) dest_dir1 = os.path.join(conf.get_utf8_path(), the_date) dest_dir2 = str( pathlib.PurePosixPath(conf.get_hdfs_path()).joinpath(the_date)) f_name = conf.get_file_name( the_date) # "t1_trxrecord_" the_date # "_V2.csv" print("Start\n") branches = MyLocalFile.get_child_dir(root_path) for aBranch in branches: if MyLocalFile.check_branch(aBranch): files = MyLocalFile.get_child_file(aBranch) f_a_branch = os.path.basename(aBranch) for aFile in files: if MyLocalFile.check_file(aFile, f_name): to_file1 = os.path.join(dest_dir1, f_a_branch, f_name) to_file2 = str( pathlib.PurePosixPath(dest_dir2).joinpath( f_a_branch, f_name)) f_add_date = conf.get_hive_add_date(the_date) f_need_head = conf.get_hive_head() # False MyLocalFile.conv_file_local(aFile, to_file1, need_first_line=f_need_head, p_add_head=f_add_date) MyHdfsFile.safe_make_dir(p_client, to_file2) # client.newupload(to_file2, to_file1, encoding='utf-8') the_file = p_client.status(to_file2, strict=False) if the_file is None: p_client.upload(to_file2, to_file1) p_client.set_permission(to_file2, 777) # client.set_owner(thePath,owner='hdfs',group='supergroup') elif the_file['type'].lower() == 'file': # 'directory' p_client.set_permission(to_file2, 777)