def run_unzip_file(conf: ConfigData, the_date: str):
    the_date = StrTool.get_the_date_str(the_date)
    if (type(the_date) is str) and len(the_date) == 8:
        m_month = the_date[0:6]
        m_day = the_date[6:8]
    else:
        return

    root_path = conf.get_data("allinpay_data_zc")
    destdir = conf.get_data("allinpay_data_zc")

    #    ifile = '/home/testFolder/logflow/bl_shouyinbao/20181101/9999100000/t1_trxrecord_20181101_V2.csv'
    #    ofile = '/home/testFolder/logflow/bl_shouyinbao/UTF8/20181101/9999100000/t1_trxrecord_20181101_V2.csv'

    print("Start\n")

    # os.path.join(root_path, the_date) # real SYB folder don't have date folder

    f_name = conf.get_zip_name(the_date, 3)  # the_date+".zip"
    a_file = os.path.join(root_path, f_name)
    if MyLocalFile.check_file(a_file):
        MyLocalFile.unzip_the_file(a_file, destdir, p_name=the_date+"*")

    f_name = conf.get_zip_name(the_date,5)   # the_date+"_agt.zip"
    a_file = os.path.join(root_path, f_name)
    if MyLocalFile.check_file(a_file):
        MyLocalFile.unzip_the_file(a_file, destdir,  p_name=the_date+"*")
def run_unzip_file(conf: ConfigData, p_date: str):
    p_date = StrTool.get_the_date_str(p_date)
    if (type(p_date) is str) and len(p_date) == 8:
        m_month = p_date[0:6]
        m_day = p_date[6:8]
    else:
        return

    p_zip_path = os.path.join(conf.get_zip_path(), p_date)
    # root_path = conf.get_data("allinpay_data_zc")
    data_path = os.path.join(conf.get_data_path(), p_date)  # allinpay_data_zc

    #    ifile = '/home/testFolder/logflow/bl_shouyinbao/20181101/9999100000/t1_trxrecord_20181101_V2.csv'
    #    ofile = '/home/testFolder/logflow/bl_shouyinbao/UTF8/20181101/9999100000/t1_trxrecord_20181101_V2.csv'

    print("Start\n")

    # os.path.join(root_path, the_date) # real SYB folder don't have date folder

    p_f_name = conf.get_zip_name(
        p_date)  # 3= the_date+".zip" # 5 = the_date+"_agt.zip"
    p_a_file = os.path.join(p_zip_path, p_f_name)
    p_p_name = conf.get_file_name(p_date)  # p_date+"*"
    if MyLocalFile.check_file(p_a_file):
        MyLocalFile.unzip_the_file(p_a_file, data_path, p_name=p_p_name)
Example #3
0
def runCopyFile(conf: ConfigData, isBaoli=True):
    thedate = conf.test_date()  #"20181101"
    root_path = conf.get_zip_path(1)
    destdir = conf.get_data_path(1)
    destdir = os.path.join(destdir, thedate)

    f_name = conf.get_zip_name("*", 1)  # "t1_trxrecord_"  # "_V2.csv"

    print("Start\n")

    branchs = MyLocalFile.get_child(root_path)
    for aBranch in branchs:
        if MyLocalFile.check_branch(aBranch):
            monthes = MyLocalFile.get_child(aBranch)
            for aMonth in monthes:
                theMonth = MyLocalFile.check_month(aMonth)
                if theMonth > 0:
                    days = MyLocalFile.get_child(aMonth)
                    for aDay in days:
                        theDay = MyLocalFile.check_day(aDay)
                        if theDay > 0:
                            files = MyLocalFile.get_child(aDay)
                            for aFile in files:
                                if MyLocalFile.check_file(aFile,
                                                          p_name=f_name):
                                    copyTheFile(destdir, aBranch, theMonth,
                                                theDay, aFile, 1)
def run_remove_files(conf: ConfigData, the_date: str, delta_day=0):
    sdate = StrTool.get_the_date_str(the_date, delta_day)  # "20181101"
    data_path = os.path.join(conf.get_data_path(1), sdate)
    utf8_path = os.path.join(conf.get_utf8_path(1), sdate)
    hdfs_path = str(pathlib.PurePosixPath(conf.get_hdfs_path()).joinpath(sdate))
    shutil.rmtree(data_path, ignore_errors=True)
    shutil.rmtree(utf8_path, ignore_errors=True)
    client = MyClient(conf.hdfs_ip())  # "http://10.2.201.197:50070"
    client.delete(hdfs_path, recursive=True)
def run_remove_hive(conf: ConfigData, the_date: str, delta_day=0):
    f_date_str = StrTool.get_the_date_str(the_date, delta_day)  # "20181101"

    del_table = conf.get_table_name(
    )  # "hive_table" + str(conf.the_id) # "rds_posflow.loginfo_rsp_bl"
    del_file = conf.get_file_name(f_date_str).replace(
        '.', '*.')  # "file_ext" + str(conf.the_id)

    MyHdfsFile.delete_hive_ssh(conf.get_data("cdh_ip"),
                               table=del_table,
                               p_name=del_file,
                               username=conf.get_data("cdh_user"),
                               password=conf.get_data("cdh_pass"))
def run_hive(conf: ConfigData, the_date: str):
    a_client = Client(conf.hdfs_ip())  # "http://10.2.201.197:50070"
    conn = connect(host=conf.hive_ip(),
                   port=conf.hive_port(),
                   auth_mechanism=conf.hive_auth(),
                   user=conf.hive_user())
    cur = conn.cursor()

    print("Start\n")

    the_date = StrTool.get_the_date_str(the_date)  # "20181101"
    # hdfs_dir_bl
    root_path = str(
        pathlib.PurePosixPath(conf.get_hdfs_path()).joinpath(the_date))
    file_name = str(
        pathlib.PurePosixPath(root_path).joinpath(
            conf.get_file_name(the_date)))
    # "/data/posflow/allinpay_utf8_zc/20181101/"
    # 20181101_loginfo_rsp_bl_new.csv
    # 20181101_rsp_agt_bl_new.del
    # 20181101_rxinfo_rsp_bl.txt

    table_name = conf.get_table_name()

    if MyHdfsFile.isfile(a_client, file_name):
        sql = 'LOAD DATA INPATH \'' + file_name + '\' INTO TABLE ' + table_name  # 'test.t1_trxrecprd_v2_zc'
        # '\' OVERWRITE INTO TABLE test.t1_trxrecprd_v2_bl2'
        print("OK" + "  " + sql + "\n")
        cur.execute(sql)  # , async=True)

    cur.close()
    conn.close()
def run_remove_hive(conf: ConfigData, the_date: str, delta_day=0):
    f_date_str = StrTool.get_the_date_str(the_date, delta_day)  # "20181101"
    # "/user/hive/warehouse/rds_posflow.db/t1_trxrecprd_v2/t1_trxrecord_20181204_V2*.csv"

    del_table = conf.get_table_name(
    )  # hive_table="rds_posflow.t1_trxrecprd_v2"

    if the_conf.m_project_id == 1:
        del_file = conf.get_file_name(f_date_str).replace('.', '*.')
        MyHdfsFile.delete_hive_ssh(conf.get_data("cdh_ip"),
                                   table=del_table,
                                   p_name=del_file,
                                   username=conf.get_data("cdh_user"),
                                   password=conf.get_data("cdh_pass"))

    if the_conf.m_project_id == 2:
        conn = connect(host=conf.hive_ip(),
                       port=conf.hive_port(),
                       auth_mechanism=conf.hive_auth(),
                       user=conf.hive_user())
        cur = conn.cursor()

        # "ALTER TABLE rds_posflow.t1_trxrecprd_v2_tmp DROP IF EXISTS PARTITION(p_date=20190208) "
        sql = "ALTER TABLE {} DROP IF EXISTS PARTITION( p_date={} )".format(
            del_table, the_date)
        print(sql)
        cur.execute(sql)

        cur.close()
        conn.close()
def run_hive(conf: ConfigData, the_date: str):
    client = Client(conf.hdfs_ip())  # "http://10.2.201.197:50070"
    conn = connect(host=conf.hive_ip(), port=conf.hive_port(), auth_mechanism=conf.hive_auth(), user=conf.hive_user())
    cur = conn.cursor()

    the_date = StrTool.get_the_date_str(the_date)  # "20181101"
    root_path = conf.get_data("hdfs_dir_zc")  # "/data/posflow/allinpay_utf8_zc/"
    file_ext3 = conf.get_data("file_ext3")  # _loginfo_rsp.txt          # 20181101_loginfo_rsp.txt
    file_ext4 = conf.get_data("file_ext4")  # _loginfo_rsp_agt.txt      # 20181101_loginfo_rsp_agt.txt
    file_ext5 = conf.get_data("file_ext5")  # _rxinfo_rsp.txt           # 20181101_rxinfo_rsp.txt
    file_ext6 = conf.get_data("file_ext6")  # _rxinfo_rsp_agt.txt       # 20181101_rxinfo_rsp_agt.txt

    print("Start\n")

    file3 = str(pathlib.PurePosixPath(root_path).joinpath(the_date + file_ext3))
    file4 = str(pathlib.PurePosixPath(root_path).joinpath(the_date + file_ext4))
    file5 = str(pathlib.PurePosixPath(root_path).joinpath(the_date + file_ext5))
    file6 = str(pathlib.PurePosixPath(root_path).joinpath(the_date + file_ext6))

    f_list = [file3,file4,file5,file6]
    t_list = ["hive_table3", "hive_table4", "hive_table5", "hive_table6"]

    for n in range(0,4):
        if MyHdfsFile.isfile(client, f_list[n]):
            sql = 'LOAD DATA INPATH \'' + f_list[n] + '\' INTO TABLE ' + conf.get_data(t_list[n])  # 'test.t1_trxrecprd_v2_zc'
            # '\' OVERWRITE INTO TABLE test.t1_trxrecprd_v2_bl2'
            print("OK" + "  " + sql+"\n")
            cur.execute(sql)  # , async=True)

    cur.close()
    conn.close()
def run_remove_files(conf: ConfigData, the_date: str, delta_day=0):
    f_date_str = StrTool.get_the_date_str(the_date, delta_day)  # "20181101"
    data_path = os.path.join(conf.get_data_path(),
                             f_date_str)  # allinpay_data_bl
    utf8_path = os.path.join(conf.get_utf8_path(),
                             f_date_str)  # allinpay_utf8_bl
    hdfs_path = str(
        pathlib.PurePosixPath(
            conf.get_hdfs_path()).joinpath(f_date_str))  # hdfs_dir_bl

    a_client = MyClient(conf.hdfs_ip())  # "http://10.2.201.197:50070"

    shutil.rmtree(data_path, ignore_errors=True)
    shutil.rmtree(utf8_path, ignore_errors=True)
    a_client.delete(hdfs_path, recursive=True)
def run_unzip_file(conf: ConfigData, the_date, folder_type=2):
    the_date = StrTool.get_the_date_str(the_date)
    if (type(the_date) is str) and len(the_date) == 8:
        m_month = the_date[0:6]
        m_day = the_date[6:8]
    else:
        return

    zip_path = conf.get_zip_path()
    data_path = conf.get_data_path()

    f_name = conf.get_zip_name("")  # "t1_trxrecord_" the_date # "_V2.csv"

    print("Start\n")

    # os.path.join(root_path, the_date) # real SYB folder don't have date folder
    branches = MyLocalFile.get_child_dir(zip_path)
    for aBranch in branches:
        if MyLocalFile.check_branch(aBranch):
            months = MyLocalFile.get_child_dir(aBranch)
            for aMonth in months:
                the_month = MyLocalFile.check_month(aMonth)
                if the_month > 0 and "{:0>6d}".format(the_month) == m_month:
                    day_list = MyLocalFile.get_child_dir(aMonth)
                    for aDay in day_list:
                        the_day = MyLocalFile.check_day(aDay)
                        if the_day > 0 and "{:0>2d}".format(the_day) == m_day:
                            files = MyLocalFile.get_child_file(aDay)
                            for aFile in files:
                                if MyLocalFile.check_file(aFile,
                                                          p_name=f_name):
                                    short_name = os.path.basename(aBranch)
                                    if folder_type == 1:
                                        new_path = os.path.join(
                                            data_path, m_month, m_day,
                                            short_name)
                                        # "{:0>6d}".format(month)  "{:0>2d}".format(day)
                                    else:
                                        new_path = os.path.join(
                                            data_path, m_month + m_day,
                                            short_name)
                                        # "{:0>6d}{:0>2d}".format(month, day)
                                    p_name = conf.get_file_name(m_month +
                                                                m_day)
                                    MyLocalFile.unzip_the_file(
                                        aFile, new_path, p_name)
Example #11
0
def run_hive_test(conf: ConfigData):
    host = conf.hive_ip()  # '10.2.201.197'
    port = conf.hive_port()  # 10000
    user = conf.hive_user()  # "hdfs"
    auth = conf.hive_auth()  # 'PLAIN'
    test = conf.hive_test()  # "select * from test.test1"

    conn = connect(host=host,
                   port=port,
                   auth_mechanism=auth,
                   user=user,
                   password='******')
    cur = conn.cursor()

    cur.execute(test)
    data = as_pandas(cur)
    print(data)

    cur.close()
    conn.close()
def run_conv_file_local(conf: ConfigData, the_date: str, is_baoli=True):
    the_date = StrTool.get_the_date_str(the_date)
    root_path = conf.get_data_path()
    dest_dir = conf.get_utf8_path()

    f_name = conf.get_file_name(
        the_date)  # "t1_trxrecord_" the_date # "_V2.csv"

    print("Start\n")

    branches = MyLocalFile.get_child(os.path.join(root_path, the_date))
    for aBranch in branches:
        if MyLocalFile.check_branch(aBranch):
            files = MyLocalFile.get_child(aBranch)
            for aFile in files:
                if MyLocalFile.check_file(aFile, f_name):
                    MyLocalFile.conv_file_local(
                        aFile,
                        os.path.join(dest_dir, the_date,
                                     os.path.basename(aBranch), f_name), True)
Example #13
0
def run_hdfs_test(conf: ConfigData):
    # the_date = conf.test_date()  # "20181101"
    client = Client(conf.hdfs_ip())  # "http://10.2.201.197:50070"
    # root_path = conf.unzip_dir(is_baoli)     # 'D:/DATA/UNZIP/'
    # dest_dir = conf.hdfs_dir_syb(is_baoli)

    # file_pre = conf.file_pre1()  # "t1_trxrecord_"
    # file_ext = conf.file_ext2()  # "_V2.csv"

    #    client.upload('/shouyinbao/', "/home/testFolder/logflow/bl_shouyinbao/UTF8/20181101/9999100000/t1_trxrecord_20181101_V2.csv", cleanup=True)
    dat = client.list('/', status=False)
    print(dat)
def run_sftp_file(conf: ConfigData, the_date: str):
    the_date = StrTool.get_the_date_str(the_date)
    if (type(the_date) is str) and len(the_date) == 8:
        m_month = the_date[0:6]
        m_day = the_date[6:8]
    else:
        return

    a = sftp_tool.Sftp_Tool(h=conf.get_data("allinpay_ftp_ip_zc"), p=int(conf.get_data("allinpay_ftp_port_zc")),
                            u=conf.get_data("allinpay_ftp_user_zc"), s=conf.get_data("allinpay_ftp_pass_zc"),
                            r=conf.get_data("allinpay_ftp_folder_zc"), d=conf.get_data("allinpay_data_zc"))
    a.openSFTP()
    a.download_files(from_dir=conf.get_data("allinpay_ftp_folder_zc"),
                     to_dir=conf.get_data("allinpay_data_zc"), p_name=the_date+"*.zip")
def run_conv_file_hdfs(conf: ConfigData, the_date: str, is_baoli=True):
    the_date = StrTool.get_the_date_str(the_date)
    client = Client(conf.hdfs_ip())  # "http://10.2.201.197:50070"
    root_path = conf.get_data_path()  # 'D:/DATA/UNZIP/'
    dest_dir = conf.get_hdfs_path()

    f_name = conf.get_file_name(
        the_date)  # "t1_trxrecord_" the_date # "_V2.csv"

    print("Start\n")

    branches = MyLocalFile.get_child(os.path.join(root_path, the_date))
    for aBranch in branches:
        if MyLocalFile.check_branch(aBranch):
            files = MyLocalFile.get_child(aBranch)
            for aFile in files:
                if MyLocalFile.check_file(aFile, f_name):
                    MyHdfsFile.conv_file_hdfs(
                        aFile,
                        os.path.join(dest_dir, the_date,
                                     os.path.basename(aBranch), f_name),
                        client)
def run_conv_file_local_to_hdfs(conf: ConfigData, the_date: str):
    """

    :param conf:
    :param the_date:
    :return:
    """
    the_date = StrTool.get_the_date_str(the_date)
    client = MyClient(conf.hdfs_ip())  # "http://10.2.201.197:50070"
    root_path = conf.get_data("allinpay_data_zc")
    dest_dir1 = conf.get_data("allinpay_utf8_zc")
    dest_dir2 = conf.get_data("hdfs_dir_zc")
    file_ext3 = conf.get_data("file_ext3")  # _loginfo_rsp.txt          # 20181101_loginfo_rsp.txt
    file_ext4 = conf.get_data("file_ext4")  # _loginfo_rsp_agt.txt      # 20181101_loginfo_rsp_agt.txt
    file_ext5 = conf.get_data("file_ext5")  # _rxinfo_rsp.txt           # 20181101_rxinfo_rsp.txt
    file_ext6 = conf.get_data("file_ext6")  # _rxinfo_rsp_agt.txt      # 20181101_rxinfo_rsp_agt.txt

    print("Start\n")

    files = MyLocalFile.get_child_file(root_path)
    for aFile in files:
        short_name = os.path.basename(aFile).lower()
        if short_name == (the_date + file_ext3).lower() or \
                short_name == (the_date + file_ext4).lower() or \
                short_name == (the_date + file_ext5).lower() or \
                short_name == (the_date + file_ext6).lower():
            to_file1 = str(pathlib.PurePath(dest_dir1).joinpath(pathlib.PurePath(aFile).name))
            to_file2 = str(pathlib.PurePosixPath(dest_dir2).joinpath(pathlib.PurePath(aFile).name))
            MyLocalFile.conv_file_local(aFile, to_file1, need_first_line=True)
            MyHdfsFile.safe_make_dir(client, to_file2)
            # client.newupload(to_file2, to_file1, encoding='utf-8')
            the_file = client.status(to_file2, strict=False)
            if the_file is None:
                client.upload(to_file2, to_file1)
                client.set_permission(to_file2, 777)
            # client.set_owner(thePath,owner='hdfs',group='supergroup')
            elif the_file['type'].lower() == 'file':  # 'directory'
                client.set_permission(to_file2, 777)
def run_conv_file_local_to_hdfs(conf: ConfigData, the_date: str):
    """

    :param conf:
    :param the_date:
    :return:
    """
    the_date = StrTool.get_the_date_str(the_date)
    a_client = MyClient(conf.hdfs_ip())  # "http://10.2.201.197:50070"
    # allinpay_data_bl
    data_path = os.path.join(conf.get_data_path(), the_date)
    # allinpay_utf8_bl
    dest_dir1 = os.path.join(conf.get_utf8_path(), the_date)
    # hdfs_dir_bl
    dest_dir2 = str(
        pathlib.PurePosixPath(conf.get_hdfs_path()).joinpath(the_date))
    # file_ext7 = conf.get_data("file_ext7")  # _loginfo_rsp_bl_new.csv   # 20181101_loginfo_rsp_bl_new.csv
    # file_ext8 = conf.get_data("file_ext8")  # _rsp_agt_bl_new.del       # 20181101_rsp_agt_bl_new.del
    # file_ext9 = conf.get_data("file_ext9")  # _rxinfo_rsp_bl.txt        # 20181101_rxinfo_rsp_bl.txt

    # f_list = [file_ext7, file_ext8, file_ext9]

    print("Start\n")

    # "file_ext" + str(conf.the_id)
    file_name = conf.get_file_name(the_date).lower()
    files = MyLocalFile.get_child_file(data_path)
    for aFile in files:
        short_name = os.path.basename(aFile).lower()
        f_name = pathlib.PurePath(aFile).name
        if short_name == file_name:
            to_file1 = str(pathlib.PurePath(dest_dir1).joinpath(f_name))
            to_file2 = str(pathlib.PurePosixPath(dest_dir2).joinpath(f_name))
            f_add_date = conf.get_hive_add_date(the_date)
            f_need_head = conf.get_hive_head()
            MyLocalFile.conv_file_local(aFile,
                                        to_file1,
                                        need_first_line=f_need_head,
                                        p_add_head=f_add_date)
            MyHdfsFile.safe_make_dir(a_client, to_file2)
            # a_client.newupload(to_file2, to_file1, encoding='utf-8')
            the_file = a_client.status(to_file2, strict=False)
            if the_file is None:
                a_client.upload(to_file2, to_file1)
                a_client.set_permission(to_file2, 777)
            # a_client.set_owner(thePath,owner='hdfs',group='supergroup')
            elif the_file['type'].lower() == 'file':  # 'directory'
                a_client.set_permission(to_file2, 777)
def run_hive(conf: ConfigData, the_date: str, is_baoli=True):
    p_client = Client(conf.hdfs_ip())  # "http://10.2.201.197:50070"
    conn = connect(host=conf.hive_ip(),
                   port=conf.hive_port(),
                   auth_mechanism=conf.hive_auth(),
                   user=conf.hive_user())
    cur = conn.cursor()

    the_date = StrTool.get_the_date_str(the_date)  # "20181101"
    root_path = conf.get_hdfs_path()  # "/shouyinbao/bl_shouyinbao/UTF8/"
    f_name = conf.get_file_name(
        the_date)  # "t1_trxrecord_" the_date # "_V2.csv"
    table_name = conf.get_table_name()

    print("Start\n")

    idn = 0
    branches = MyHdfsFile.get_child(p_client, root_path + the_date)
    for aBranch in branches:
        if MyHdfsFile.check_branch(p_client, aBranch):
            files = MyHdfsFile.get_child(p_client, aBranch)
            f_a_branch = MyHdfsFile.get_name(aBranch)
            for aFile in files:
                if MyHdfsFile.check_file(p_client, aFile, f_name):
                    # '/shouyinbao/bl_shouyinbao/UTF8/20181101/9999997900/t1_trxrecord_20181101_V2.csv'
                    to_file2 = str(
                        pathlib.PurePosixPath(root_path).joinpath(
                            the_date, f_a_branch, f_name))
                    if conf.m_project_id == 1:
                        sql = 'LOAD DATA INPATH \'{}\' INTO TABLE {}'.format(
                            to_file2, table_name)  # 'test.t1_trxrecprd_v2_zc'
                    # '\' OVERWRITE INTO TABLE test.t1_trxrecprd_v2_bl2'
                    elif conf.m_project_id == 2:
                        sql = 'LOAD DATA INPATH \'{}\' INTO TABLE {} PARTITION ( p_branch=\'{}\', p_date={} )'.format(
                            to_file2, table_name, f_a_branch,
                            the_date)  # 'test.t1_trxrecprd_v2_zc'
                    idn += 1
                    print(str(idn) + "  " + sql + "\n")
                    cur.execute(sql)  # , async=True)

    cur.close()
    conn.close()
def run_sftp_file(conf: ConfigData, the_date: str):
    the_date = StrTool.get_the_date_str(the_date)

    # allinpay_ftp_folder_bl_1 or allinpay_ftp_folder_bl_2
    f_dir = conf.get_remote_path_ftp(the_date)
    # allinpay_data_bl
    t_dir = os.path.join(conf.get_local_path_ftp(), the_date)
    # "file_ext" + str(conf.the_id)
    file_name = conf.get_ftp_name(the_date)

    a = sftp_tool.Sftp_Tool(h=conf.get_ftp_ip(),
                            p=int(conf.get_ftp_port()),
                            u=conf.get_ftp_user(),
                            s=conf.get_ftp_pass(),
                            r=f_dir,
                            d=t_dir)
    a.openSFTP()
    a.download_files(from_dir=f_dir, to_dir=t_dir, p_name=file_name)
Example #20
0
def copyTheFile(destdir, branch, month, day, file, foldertype=1):
    theday = datetime.date(month // 100, month % 100, day)
    thedayStr = theday.strftime("%Y%m%d")
    # if month == 201811 and day == 1:
    if thedayStr == ConfigData.test_date():
        pass
    else:
        return

    shortname = os.path.basename(branch)
    if foldertype == 1:
        newpath = os.path.join(destdir, shortname, "{:0>6d}".format(month),
                               "{:0>2d}".format(day))
    else:
        newpath = os.path.join(destdir, "{:0>6d}{:0>2d}".format(month, day),
                               shortname)
    if os.path.isfile(newpath):
        return
    if not os.path.exists(newpath):
        pathlib.Path(newpath).mkdir(parents=True, exist_ok=True)
    toFile = os.path.join(newpath, os.path.basename(file))
    if not os.path.exists(toFile):
        shutil.copyfile(file, toFile)
        print("\nfile copied " + toFile)
Example #21
0
def run_hive(conf: ConfigData, the_date: str):
    conn = connect(host="10.91.1.20",
                   port=conf.hive_port(),
                   auth_mechanism=conf.hive_auth(),
                   user=conf.hive_user())
    cur = conn.cursor()

    sql = """
    --deduct union sql created by Zhaohu on 14-Nov-2018 am 10:49
use loginfo
"""
    print("OK" + "  " + sql + "\n")
    cur.execute(sql)  # , async=True)

    sql = "set mapreduce.job.queuename = deduct_union_gansu_mysql;"
    print("OK" + "  " + sql + "\n")
    cur.execute(sql)  # , async=True)
    sql = "set hive.cli.print.header=true;"
    print("OK" + "  " + sql + "\n")
    cur.execute(sql)  # , async=True)

    sql = """
select distinct 
case when creq.prod_code='8005000001' then '甘肃银行' else '' end as `产品类别`,
creq.LMT_SERNO as `授信编号`,
creq.CUST_NAME as `客户姓名`,
creq.LIVE_ADDR as `居住地`,
creq.ADDRESS as `客户联系地址`,
creq.TEL_NO as `家庭电话(贷款申请表)`,
creq.MOBILE as `手机号码(贷款申请表)`,
creq.MOBILE as `客户手机`,
creq.SPOUSE_NM as `配偶姓名`,
creq.SPOUSE_PHONE as `配偶电话`,
creq.EMER_NAME_1 as `第一联系人姓名`,
case when creq.EMER_REL_1='1' or creq.EMER_REL_1='01' then '配偶'
when creq.EMER_REL_1='2' or creq.EMER_REL_1='02' then '父母'
when creq.EMER_REL_1='3' or creq.EMER_REL_1='03' then '子女'
when creq.EMER_REL_1='4' or creq.EMER_REL_1='04' then '亲戚'
when creq.EMER_REL_1='5' or creq.EMER_REL_1='05' then '朋友'
when creq.EMER_REL_1='6' or creq.EMER_REL_1='06' then '其他'
when creq.EMER_REL_1='7' or creq.EMER_REL_1='07' then '兄弟姐妹'
when creq.EMER_REL_1='8' or creq.EMER_REL_1='08' then '同事'
else '' end as `第一联系人关系`,
creq.EMER_PHONE_1 as `第一联系人联系电话`,
creq.EMER_NAME_2 as `第二联系人姓名`,
case when creq.EMER_REL_2='1' or creq.EMER_REL_2='01' then '配偶'
when creq.EMER_REL_2='2' or creq.EMER_REL_2='02' then '父母'
when creq.EMER_REL_2='3' or creq.EMER_REL_2='03' then '子女'
when creq.EMER_REL_2='4' or creq.EMER_REL_2='04' then '亲戚'
when creq.EMER_REL_2='5' or creq.EMER_REL_2='05' then '朋友'
when creq.EMER_REL_2='6' or creq.EMER_REL_2='06' then '其他'
when creq.EMER_REL_2='7' or creq.EMER_REL_2='07' then '兄弟姐妹'
when creq.EMER_REL_2='8' or creq.EMER_REL_2='08' then '同事'
else '' end as `第二联系人关系`,
creq.EMER_PHONE_2 as `第二联系人联系电话`,
creq.EMER_NAME_3 as `第三联系人姓名`,
case when creq.EMER_REL_3='1' or creq.EMER_REL_3='01' then '配偶'
when creq.EMER_REL_3='2' or creq.EMER_REL_3='02' then '父母'
when creq.EMER_REL_3='3' or creq.EMER_REL_3='03' then '子女'
when creq.EMER_REL_3='4' or creq.EMER_REL_3='04' then '亲戚'
when creq.EMER_REL_3='5' or creq.EMER_REL_3='05' then '朋友'
when creq.EMER_REL_3='6' or creq.EMER_REL_3='06' then '其他'
when creq.EMER_REL_3='7' or creq.EMER_REL_3='07' then '兄弟姐妹'
when creq.EMER_REL_3='8' or creq.EMER_REL_3='08' then '同事'
else '' end as `第三联系人关系`,
creq.EMER_PHONE_3 as `第三联系人联系电话`,
creq.CREDIT_CONTRACT as `合同编号`,
trim(creq.CERT_NO) as `身份证号`,
case when creq.sex='2' then '男' when creq.sex='3' then '女' else '' end as `性别`,
loaninfo.limit_statr_date as `贷款开始日期`,
ureq.APP_START_DATE as `支用开始SAS日期`,
ureq.APP_END_DATE as `支用结束SAS日期`,
ureq.USE_DATE as `支用申请期限`,

merchant.contact_tel as `联系电话(APMS系统)`,
merchant.legal_name as `法定代表人姓名`,
merchant.finance_name as `财务联系人`,
merchant.finance_hp_no as `财务联系人电话(APMS系统)`,
merchant.stlm_acct as `还款银行卡号`,
merchant.name_busi as `商户名称`,
dict4.dict_name as `所在城市`,
merchant.busi_addr as `营业地址`,
dict1.dict_name as `分公司`,	
merchant.stlm_ins_city as `地市业务部`,
merchant.contact as `客户姓名`,
trim(creq.merchant_no) as `商户编号`,
dict2.dict_name as `inst_oid中文`,
dict3.dict_name as `mcc中文`,

trim(deductall.bill_no) as `借据编号`,
deductall.CURR_DATE as `截止日期`,

loaninfo.LOAN_AMT as `放款金额`,
loaninfo.LIMIT_END_DATE as `货款到期日`,
loaninfo.REMAIN_LOAN_AMT as `贷款剩余本金`,
loaninfo.REMAIN_LOAN_AMT as `应收未收本金`,
loaninfo.OVERDUE_INTE_AMT as `应收未收利息`,



case when compensate.BILL_NO is not null then '是'	else '否' end as `代偿状态`,
deductall.begin_late_date as `逾期开始日期`,
date_add(deductall.begin_late_date,1) as `新增日期`,
datediff(from_unixtime(unix_timestamp(deductall.CURR_DATE,'yyyymmdd'),'yyyy-mm-dd')
,deductall.begin_late_date) as `逾期天数`

from loginfo.union_flag1 deductall 
left join loginfo.t_loan_use_req  ureq 
on ureq.bill_no=trim(deductall.bill_no)
 left join loginfo.t_loan_credit_req creq on creq.LMT_SERNO=ureq.LMT_SERNO	
 left join loginfo.merchant_zc merchant on creq.merchant_no=merchant.mcht_cd
 left JOIN loginfo.T_REOCN_LOANINFO loaninfo on trim(deductall.bill_no)=loaninfo.bill_no and loaninfo.current_data=trim(deductall.curr_date)
left join loginfo.t_loan_compernsatory compensate on ureq.BILL_NO=compensate.BILL_NO
left join loginfo.t_allinpay_dict dict1 on merchant.aip_bran_id = dict1.dict_id and dict1.dict_type='aip_bran_id'
left join loginfo.t_allinpay_dict dict2 on merchant.inst_oid = dict2.dict_id and dict2.dict_type='inst_oid'
left join loginfo.t_allinpay_dict dict3 on merchant.up_mcc_cd = dict3.dict_id and dict3.dict_type='up_mcc_cd'
left join loginfo.t_allinpay_dict dict4 on merchant.city_cd = dict4.dict_id and dict4.dict_type='city_cd'
where creq.prod_code='8005000001'
    """

    sql2 = """
    select distinct 
case when creq.prod_code='8005000001' then '甘肃银行' else '' end as `产品类别`,
creq.LMT_SERNO as `授信编号`,
creq.CUST_NAME as `客户姓名`,
creq.LIVE_ADDR as `居住地`,
creq.ADDRESS as `客户联系地址`,
creq.TEL_NO as `家庭电话(贷款申请表)`,
creq.MOBILE as `手机号码(贷款申请表)`,
creq.MOBILE as `客户手机`,
creq.SPOUSE_NM as `配偶姓名`,
creq.SPOUSE_PHONE as `配偶电话`,
creq.EMER_NAME_1 as `第一联系人姓名`,
case when creq.EMER_REL_1='1' or creq.EMER_REL_1='01' then '配偶'
when creq.EMER_REL_1='2' or creq.EMER_REL_1='02' then '父母'
when creq.EMER_REL_1='3' or creq.EMER_REL_1='03' then '子女'
when creq.EMER_REL_1='4' or creq.EMER_REL_1='04' then '亲戚'
when creq.EMER_REL_1='5' or creq.EMER_REL_1='05' then '朋友'
when creq.EMER_REL_1='6' or creq.EMER_REL_1='06' then '其他'
when creq.EMER_REL_1='7' or creq.EMER_REL_1='07' then '兄弟姐妹'
when creq.EMER_REL_1='8' or creq.EMER_REL_1='08' then '同事'
else '' end as `第一联系人关系`,
creq.EMER_PHONE_1 as `第一联系人联系电话`,
creq.EMER_NAME_2 as `第二联系人姓名`,
case when creq.EMER_REL_2='1' or creq.EMER_REL_2='01' then '配偶'
when creq.EMER_REL_2='2' or creq.EMER_REL_2='02' then '父母'
when creq.EMER_REL_2='3' or creq.EMER_REL_2='03' then '子女'
when creq.EMER_REL_2='4' or creq.EMER_REL_2='04' then '亲戚'
when creq.EMER_REL_2='5' or creq.EMER_REL_2='05' then '朋友'
when creq.EMER_REL_2='6' or creq.EMER_REL_2='06' then '其他'
when creq.EMER_REL_2='7' or creq.EMER_REL_2='07' then '兄弟姐妹'
when creq.EMER_REL_2='8' or creq.EMER_REL_2='08' then '同事'
else '' end as `第二联系人关系`,
creq.EMER_PHONE_2 as `第二联系人联系电话`,
creq.EMER_NAME_3 as `第三联系人姓名`,
case when creq.EMER_REL_3='1' or creq.EMER_REL_3='01' then '配偶'
when creq.EMER_REL_3='2' or creq.EMER_REL_3='02' then '父母'
when creq.EMER_REL_3='3' or creq.EMER_REL_3='03' then '子女'
when creq.EMER_REL_3='4' or creq.EMER_REL_3='04' then '亲戚'
when creq.EMER_REL_3='5' or creq.EMER_REL_3='05' then '朋友'
when creq.EMER_REL_3='6' or creq.EMER_REL_3='06' then '其他'
when creq.EMER_REL_3='7' or creq.EMER_REL_3='07' then '兄弟姐妹'
when creq.EMER_REL_3='8' or creq.EMER_REL_3='08' then '同事'
else '' end as `第三联系人关系`,
creq.EMER_PHONE_3 as `第三联系人联系电话`,
creq.CREDIT_CONTRACT as `合同编号`,
trim(creq.CERT_NO) as `身份证号`,
case when creq.sex='2' then '男' when creq.sex='3' then '女' else '' end as `性别`,
loaninfo.limit_statr_date as `贷款开始日期`,
ureq.APP_START_DATE as `支用开始SAS日期`,
ureq.APP_END_DATE as `支用结束SAS日期`,
ureq.USE_DATE as `支用申请期限`,

merchant.contact_tel as `联系电话(APMS系统)`,
merchant.legal_name as `法定代表人姓名`,
merchant.finance_name as `财务联系人`,
merchant.finance_hp_no as `财务联系人电话(APMS系统)`,
merchant.stlm_acct as `还款银行卡号`,
merchant.name_busi as `商户名称`,
dict4.dict_name as `所在城市`,
merchant.busi_addr as `营业地址`,
dict1.dict_name as `分公司`,	
merchant.stlm_ins_city as `地市业务部`,
merchant.contact as `客户姓名`,
trim(creq.merchant_no) as `商户编号`,
dict2.dict_name as `inst_oid中文`,
dict3.dict_name as `mcc中文`,

trim(deductall.bill_no) as `借据编号`,
deductall.CURR_DATE as `截止日期`,

loaninfo.LOAN_AMT as `放款金额`,
loaninfo.LIMIT_END_DATE as `货款到期日`,
loaninfo.REMAIN_LOAN_AMT as `贷款剩余本金`,
loaninfo.REMAIN_LOAN_AMT as `应收未收本金`,
loaninfo.OVERDUE_INTE_AMT as `应收未收利息`,



case when compensate.BILL_NO is not null then '是'	else '否' end as `代偿状态`,
deductall.begin_late_date as `逾期开始日期`,
date_add(deductall.begin_late_date,1) as `新增日期`,
datediff(from_unixtime(unix_timestamp(deductall.CURR_DATE,'yyyymmdd'),'yyyy-mm-dd')
,deductall.begin_late_date) as `逾期天数`

from loginfo.union_flag1 deductall 
left join loginfo.t_loan_use_req  ureq 
on ureq.bill_no=trim(deductall.bill_no)
 left join loginfo.t_loan_credit_req creq on creq.LMT_SERNO=ureq.LMT_SERNO	
 left join loginfo.merchant_zc merchant on creq.merchant_no=merchant.mcht_cd
 left JOIN loginfo.T_REOCN_LOANINFO loaninfo on trim(deductall.bill_no)=loaninfo.bill_no and loaninfo.current_data=trim(deductall.curr_date)
left join loginfo.t_loan_compernsatory compensate on ureq.BILL_NO=compensate.BILL_NO
left join loginfo.t_allinpay_dict dict1 on merchant.aip_bran_id = dict1.dict_id and dict1.dict_type='aip_bran_id'
left join loginfo.t_allinpay_dict dict2 on merchant.inst_oid = dict2.dict_id and dict2.dict_type='inst_oid'
left join loginfo.t_allinpay_dict dict3 on merchant.up_mcc_cd = dict3.dict_id and dict3.dict_type='up_mcc_cd'
left join loginfo.t_allinpay_dict dict4 on merchant.city_cd = dict4.dict_id and dict4.dict_type='city_cd'
where creq.prod_code='8005000001' 
and from_unixtime(unix_timestamp(deductall.CURR_DATE,'yyyymmdd'))>=date_add(current_date(),-5)
    """
    print("OK" + "  " + sql2 + "\n")
    cur.execute(sql2)  # , async=True)
    data = as_pandas(cur)

    print(len(data))

    name = '/home/data/deduct/deduct_gansu_late5_' + StrTool.get_the_date_str(
        '', -1) + '.xlsx'

    writer = pd.ExcelWriter(name)

    data.to_excel(writer, 'Sheet1')

    writer.save()

    cur.close()
    conn.close()
    sdate = StrTool.get_the_date_str(the_date, delta_day)  # "20181101"
    data_path = os.path.join(conf.get_data_path(1), sdate)
    utf8_path = os.path.join(conf.get_utf8_path(1), sdate)
    hdfs_path = str(pathlib.PurePosixPath(conf.get_hdfs_path()).joinpath(sdate))
    shutil.rmtree(data_path, ignore_errors=True)
    shutil.rmtree(utf8_path, ignore_errors=True)
    client = MyClient(conf.hdfs_ip())  # "http://10.2.201.197:50070"
    client.delete(hdfs_path, recursive=True)
    # "/user/hive/warehouse/posflow.db/t1_trxrecprd_v2/t1_trxrecord_20181204_V2*.csv"
    # hive_table="posflow.t1_trxrecprd_v2",
    # file_pre1 = 't1_trxrecord_',
    # file_ext2 = "_V2.csv",


if __name__ == "__main__":
    the_conf = ConfigData(p_is_test=False)

    client = Client(the_conf.hdfs_ip())  # "http://10.2.201.197:50070"
    a = MyHdfsFile.get_child(client, "/data/posflow/allinpay_utf8_zc")
    b = MyHdfsFile.get_child_file(client,"/data/posflow/allinpay_utf8_zc")
    c = MyHdfsFile.get_child_dir(client, "/data/posflow/allinpay_utf8_zc")

    # test
    # MyHdfsFile.delete(client, "/data/posflow/allinpay_utf8_zc", "*agt_cpy*")
    # test

    if the_conf.is_test():
        day_str = the_conf.test_date()
        days = 9
    else:
        day_str = StrTool.get_param_str(1, "")
Example #23
0
left join loginfo.t_allinpay_dict dict1 on merchant.aip_bran_id = dict1.dict_id and dict1.dict_type='aip_bran_id'
left join loginfo.t_allinpay_dict dict2 on merchant.inst_oid = dict2.dict_id and dict2.dict_type='inst_oid'
left join loginfo.t_allinpay_dict dict3 on merchant.up_mcc_cd = dict3.dict_id and dict3.dict_type='up_mcc_cd'
left join loginfo.t_allinpay_dict dict4 on merchant.city_cd = dict4.dict_id and dict4.dict_type='city_cd'
where creq.prod_code='8005000001' 
and from_unixtime(unix_timestamp(deductall.CURR_DATE,'yyyymmdd'))>=date_add(current_date(),-5)
    """
    print("OK" + "  " + sql2 + "\n")
    cur.execute(sql2)  # , async=True)
    data = as_pandas(cur)

    print(len(data))

    name = '/home/data/deduct/deduct_gansu_late5_' + StrTool.get_the_date_str(
        '', -1) + '.xlsx'

    writer = pd.ExcelWriter(name)

    data.to_excel(writer, 'Sheet1')

    writer.save()

    cur.close()
    conn.close()


if __name__ == "__main__":
    the_conf = ConfigData(p_is_test=False)

    run_hive(the_conf, the_date="")
                    # '\' OVERWRITE INTO TABLE test.t1_trxrecprd_v2_bl2'
                    elif conf.m_project_id == 2:
                        sql = 'LOAD DATA INPATH \'{}\' INTO TABLE {} PARTITION ( p_branch=\'{}\', p_date={} )'.format(
                            to_file2, table_name, f_a_branch,
                            the_date)  # 'test.t1_trxrecprd_v2_zc'
                    idn += 1
                    print(str(idn) + "  " + sql + "\n")
                    cur.execute(sql)  # , async=True)

    cur.close()
    conn.close()


if __name__ == "__main__":

    the_conf = ConfigData(p_is_test=False)

    if the_conf.is_test():
        day_str = the_conf.test_date()
        days = 190
    else:
        the_conf.m_project_id = StrTool.get_param_int(1, 1)
        day_str = StrTool.get_param_str(2, "")
        days = StrTool.get_param_int(3, 1)

    if the_conf.m_project_id == 1:
        return_code = subprocess.call("/app/code/posflow_loader/ftpcmd.sh",
                                      shell=True)
        print(return_code)

    f_delta = the_conf.get_data("file_date_delta" + str(the_conf.m_project_id),
def run_conv_file_local_to_hdfs(conf: ConfigData,
                                the_date: str,
                                is_baoli=True):
    """

    # client.upload('/shouyinbao/', "/home/testFolder/logflow/bl_shouyinbao/UTF8/20181101/9999100000/t1_trxrecord_20181101_V2.csv", cleanup=True)
    # dat = client.list('/shouyinbao/', status=False)
    # print(dat)

    # root_path = "/home/bd/桌面/201811_flow/zc_shouyinbao/UNZIP/"
    # dest_dir1 = "/home/bd/桌面/201811_flow/zc_shouyinbao/UTF8/"
    # dest_dir2 = "/shouyinbao/zc_shouyinbao/UTF8/"

    # root_path = "/home/testFolder/logflow/bl_shouyinbao/UNZIP/"
    # dest_dir1 = "/home/testFolder/logflow/bl_shouyinbao/UTF8/"
    # dest_dir2 = "/shouyinbao/zc_shouyinbao/UTF8/"

    # i_file = '/home/testFolder/logflow/bl_shouyinbao/20181101/9999100000/t1_trxrecord_20181101_V2.csv'
    # o_file = '/home/testFolder/logflow/bl_shouyinbao/UTF8/20181101/9999100000/t1_trxrecord_20181101_V2.csv'

    :param conf:
    :param the_date:
    :param is_baoli:
    :return:
    """
    the_date = StrTool.get_the_date_str(the_date)
    p_client = MyClient(url=conf.hdfs_ip())  # "http://10.2.201.197:50070"
    # webhdfs 默认是 dr.who ,不能伪装成其他用户,可以在配置里修改 hadoop.http.staticuser.user=dr.who
    # https://www.cnblogs.com/peizhe123/p/5540845.html
    root_path = os.path.join(conf.get_data_path(), the_date)
    dest_dir1 = os.path.join(conf.get_utf8_path(), the_date)
    dest_dir2 = str(
        pathlib.PurePosixPath(conf.get_hdfs_path()).joinpath(the_date))

    f_name = conf.get_file_name(
        the_date)  # "t1_trxrecord_" the_date # "_V2.csv"

    print("Start\n")

    branches = MyLocalFile.get_child_dir(root_path)
    for aBranch in branches:
        if MyLocalFile.check_branch(aBranch):
            files = MyLocalFile.get_child_file(aBranch)
            f_a_branch = os.path.basename(aBranch)
            for aFile in files:
                if MyLocalFile.check_file(aFile, f_name):
                    to_file1 = os.path.join(dest_dir1, f_a_branch, f_name)
                    to_file2 = str(
                        pathlib.PurePosixPath(dest_dir2).joinpath(
                            f_a_branch, f_name))
                    f_add_date = conf.get_hive_add_date(the_date)
                    f_need_head = conf.get_hive_head()  # False
                    MyLocalFile.conv_file_local(aFile,
                                                to_file1,
                                                need_first_line=f_need_head,
                                                p_add_head=f_add_date)
                    MyHdfsFile.safe_make_dir(p_client, to_file2)
                    # client.newupload(to_file2, to_file1, encoding='utf-8')
                    the_file = p_client.status(to_file2, strict=False)
                    if the_file is None:
                        p_client.upload(to_file2, to_file1)
                        p_client.set_permission(to_file2, 777)
                    # client.set_owner(thePath,owner='hdfs',group='supergroup')
                    elif the_file['type'].lower() == 'file':  # 'directory'
                        p_client.set_permission(to_file2, 777)