예제 #1
0
def run_unzip_file(conf: ConfigData, p_date: str):
    p_date = StrTool.get_the_date_str(p_date)
    if (type(p_date) is str) and len(p_date) == 8:
        m_month = p_date[0:6]
        m_day = p_date[6:8]
    else:
        return

    p_zip_path = os.path.join(conf.get_zip_path(), p_date)
    # root_path = conf.get_data("allinpay_data_zc")
    data_path = os.path.join(conf.get_data_path(), p_date)  # allinpay_data_zc

    #    ifile = '/home/testFolder/logflow/bl_shouyinbao/20181101/9999100000/t1_trxrecord_20181101_V2.csv'
    #    ofile = '/home/testFolder/logflow/bl_shouyinbao/UTF8/20181101/9999100000/t1_trxrecord_20181101_V2.csv'

    print("Start\n")

    # os.path.join(root_path, the_date) # real SYB folder don't have date folder

    p_f_name = conf.get_zip_name(
        p_date)  # 3= the_date+".zip" # 5 = the_date+"_agt.zip"
    p_a_file = os.path.join(p_zip_path, p_f_name)
    p_p_name = conf.get_file_name(p_date)  # p_date+"*"
    if MyLocalFile.check_file(p_a_file):
        MyLocalFile.unzip_the_file(p_a_file, data_path, p_name=p_p_name)
예제 #2
0
def runCopyFile(conf: ConfigData, isBaoli=True):
    thedate = conf.test_date()  #"20181101"
    root_path = conf.get_zip_path(1)
    destdir = conf.get_data_path(1)
    destdir = os.path.join(destdir, thedate)

    f_name = conf.get_zip_name("*", 1)  # "t1_trxrecord_"  # "_V2.csv"

    print("Start\n")

    branchs = MyLocalFile.get_child(root_path)
    for aBranch in branchs:
        if MyLocalFile.check_branch(aBranch):
            monthes = MyLocalFile.get_child(aBranch)
            for aMonth in monthes:
                theMonth = MyLocalFile.check_month(aMonth)
                if theMonth > 0:
                    days = MyLocalFile.get_child(aMonth)
                    for aDay in days:
                        theDay = MyLocalFile.check_day(aDay)
                        if theDay > 0:
                            files = MyLocalFile.get_child(aDay)
                            for aFile in files:
                                if MyLocalFile.check_file(aFile,
                                                          p_name=f_name):
                                    copyTheFile(destdir, aBranch, theMonth,
                                                theDay, aFile, 1)
예제 #3
0
def run_remove_files(conf: ConfigData, the_date: str, delta_day=0):
    sdate = StrTool.get_the_date_str(the_date, delta_day)  # "20181101"
    data_path = os.path.join(conf.get_data_path(1), sdate)
    utf8_path = os.path.join(conf.get_utf8_path(1), sdate)
    hdfs_path = str(pathlib.PurePosixPath(conf.get_hdfs_path()).joinpath(sdate))
    shutil.rmtree(data_path, ignore_errors=True)
    shutil.rmtree(utf8_path, ignore_errors=True)
    client = MyClient(conf.hdfs_ip())  # "http://10.2.201.197:50070"
    client.delete(hdfs_path, recursive=True)
예제 #4
0
def run_remove_files(conf: ConfigData, the_date: str, delta_day=0):
    f_date_str = StrTool.get_the_date_str(the_date, delta_day)  # "20181101"
    data_path = os.path.join(conf.get_data_path(),
                             f_date_str)  # allinpay_data_bl
    utf8_path = os.path.join(conf.get_utf8_path(),
                             f_date_str)  # allinpay_utf8_bl
    hdfs_path = str(
        pathlib.PurePosixPath(
            conf.get_hdfs_path()).joinpath(f_date_str))  # hdfs_dir_bl

    a_client = MyClient(conf.hdfs_ip())  # "http://10.2.201.197:50070"

    shutil.rmtree(data_path, ignore_errors=True)
    shutil.rmtree(utf8_path, ignore_errors=True)
    a_client.delete(hdfs_path, recursive=True)
예제 #5
0
def run_conv_file_local_to_hdfs(conf: ConfigData, the_date: str):
    """

    :param conf:
    :param the_date:
    :return:
    """
    the_date = StrTool.get_the_date_str(the_date)
    a_client = MyClient(conf.hdfs_ip())  # "http://10.2.201.197:50070"
    # allinpay_data_bl
    data_path = os.path.join(conf.get_data_path(), the_date)
    # allinpay_utf8_bl
    dest_dir1 = os.path.join(conf.get_utf8_path(), the_date)
    # hdfs_dir_bl
    dest_dir2 = str(
        pathlib.PurePosixPath(conf.get_hdfs_path()).joinpath(the_date))
    # file_ext7 = conf.get_data("file_ext7")  # _loginfo_rsp_bl_new.csv   # 20181101_loginfo_rsp_bl_new.csv
    # file_ext8 = conf.get_data("file_ext8")  # _rsp_agt_bl_new.del       # 20181101_rsp_agt_bl_new.del
    # file_ext9 = conf.get_data("file_ext9")  # _rxinfo_rsp_bl.txt        # 20181101_rxinfo_rsp_bl.txt

    # f_list = [file_ext7, file_ext8, file_ext9]

    print("Start\n")

    # "file_ext" + str(conf.the_id)
    file_name = conf.get_file_name(the_date).lower()
    files = MyLocalFile.get_child_file(data_path)
    for aFile in files:
        short_name = os.path.basename(aFile).lower()
        f_name = pathlib.PurePath(aFile).name
        if short_name == file_name:
            to_file1 = str(pathlib.PurePath(dest_dir1).joinpath(f_name))
            to_file2 = str(pathlib.PurePosixPath(dest_dir2).joinpath(f_name))
            f_add_date = conf.get_hive_add_date(the_date)
            f_need_head = conf.get_hive_head()
            MyLocalFile.conv_file_local(aFile,
                                        to_file1,
                                        need_first_line=f_need_head,
                                        p_add_head=f_add_date)
            MyHdfsFile.safe_make_dir(a_client, to_file2)
            # a_client.newupload(to_file2, to_file1, encoding='utf-8')
            the_file = a_client.status(to_file2, strict=False)
            if the_file is None:
                a_client.upload(to_file2, to_file1)
                a_client.set_permission(to_file2, 777)
            # a_client.set_owner(thePath,owner='hdfs',group='supergroup')
            elif the_file['type'].lower() == 'file':  # 'directory'
                a_client.set_permission(to_file2, 777)
예제 #6
0
def run_unzip_file(conf: ConfigData, the_date, folder_type=2):
    the_date = StrTool.get_the_date_str(the_date)
    if (type(the_date) is str) and len(the_date) == 8:
        m_month = the_date[0:6]
        m_day = the_date[6:8]
    else:
        return

    zip_path = conf.get_zip_path()
    data_path = conf.get_data_path()

    f_name = conf.get_zip_name("")  # "t1_trxrecord_" the_date # "_V2.csv"

    print("Start\n")

    # os.path.join(root_path, the_date) # real SYB folder don't have date folder
    branches = MyLocalFile.get_child_dir(zip_path)
    for aBranch in branches:
        if MyLocalFile.check_branch(aBranch):
            months = MyLocalFile.get_child_dir(aBranch)
            for aMonth in months:
                the_month = MyLocalFile.check_month(aMonth)
                if the_month > 0 and "{:0>6d}".format(the_month) == m_month:
                    day_list = MyLocalFile.get_child_dir(aMonth)
                    for aDay in day_list:
                        the_day = MyLocalFile.check_day(aDay)
                        if the_day > 0 and "{:0>2d}".format(the_day) == m_day:
                            files = MyLocalFile.get_child_file(aDay)
                            for aFile in files:
                                if MyLocalFile.check_file(aFile,
                                                          p_name=f_name):
                                    short_name = os.path.basename(aBranch)
                                    if folder_type == 1:
                                        new_path = os.path.join(
                                            data_path, m_month, m_day,
                                            short_name)
                                        # "{:0>6d}".format(month)  "{:0>2d}".format(day)
                                    else:
                                        new_path = os.path.join(
                                            data_path, m_month + m_day,
                                            short_name)
                                        # "{:0>6d}{:0>2d}".format(month, day)
                                    p_name = conf.get_file_name(m_month +
                                                                m_day)
                                    MyLocalFile.unzip_the_file(
                                        aFile, new_path, p_name)
예제 #7
0
def run_conv_file_local(conf: ConfigData, the_date: str, is_baoli=True):
    the_date = StrTool.get_the_date_str(the_date)
    root_path = conf.get_data_path()
    dest_dir = conf.get_utf8_path()

    f_name = conf.get_file_name(
        the_date)  # "t1_trxrecord_" the_date # "_V2.csv"

    print("Start\n")

    branches = MyLocalFile.get_child(os.path.join(root_path, the_date))
    for aBranch in branches:
        if MyLocalFile.check_branch(aBranch):
            files = MyLocalFile.get_child(aBranch)
            for aFile in files:
                if MyLocalFile.check_file(aFile, f_name):
                    MyLocalFile.conv_file_local(
                        aFile,
                        os.path.join(dest_dir, the_date,
                                     os.path.basename(aBranch), f_name), True)
예제 #8
0
def run_conv_file_hdfs(conf: ConfigData, the_date: str, is_baoli=True):
    the_date = StrTool.get_the_date_str(the_date)
    client = Client(conf.hdfs_ip())  # "http://10.2.201.197:50070"
    root_path = conf.get_data_path()  # 'D:/DATA/UNZIP/'
    dest_dir = conf.get_hdfs_path()

    f_name = conf.get_file_name(
        the_date)  # "t1_trxrecord_" the_date # "_V2.csv"

    print("Start\n")

    branches = MyLocalFile.get_child(os.path.join(root_path, the_date))
    for aBranch in branches:
        if MyLocalFile.check_branch(aBranch):
            files = MyLocalFile.get_child(aBranch)
            for aFile in files:
                if MyLocalFile.check_file(aFile, f_name):
                    MyHdfsFile.conv_file_hdfs(
                        aFile,
                        os.path.join(dest_dir, the_date,
                                     os.path.basename(aBranch), f_name),
                        client)
예제 #9
0
def run_conv_file_local_to_hdfs(conf: ConfigData,
                                the_date: str,
                                is_baoli=True):
    """

    # client.upload('/shouyinbao/', "/home/testFolder/logflow/bl_shouyinbao/UTF8/20181101/9999100000/t1_trxrecord_20181101_V2.csv", cleanup=True)
    # dat = client.list('/shouyinbao/', status=False)
    # print(dat)

    # root_path = "/home/bd/桌面/201811_flow/zc_shouyinbao/UNZIP/"
    # dest_dir1 = "/home/bd/桌面/201811_flow/zc_shouyinbao/UTF8/"
    # dest_dir2 = "/shouyinbao/zc_shouyinbao/UTF8/"

    # root_path = "/home/testFolder/logflow/bl_shouyinbao/UNZIP/"
    # dest_dir1 = "/home/testFolder/logflow/bl_shouyinbao/UTF8/"
    # dest_dir2 = "/shouyinbao/zc_shouyinbao/UTF8/"

    # i_file = '/home/testFolder/logflow/bl_shouyinbao/20181101/9999100000/t1_trxrecord_20181101_V2.csv'
    # o_file = '/home/testFolder/logflow/bl_shouyinbao/UTF8/20181101/9999100000/t1_trxrecord_20181101_V2.csv'

    :param conf:
    :param the_date:
    :param is_baoli:
    :return:
    """
    the_date = StrTool.get_the_date_str(the_date)
    p_client = MyClient(url=conf.hdfs_ip())  # "http://10.2.201.197:50070"
    # webhdfs 默认是 dr.who ,不能伪装成其他用户,可以在配置里修改 hadoop.http.staticuser.user=dr.who
    # https://www.cnblogs.com/peizhe123/p/5540845.html
    root_path = os.path.join(conf.get_data_path(), the_date)
    dest_dir1 = os.path.join(conf.get_utf8_path(), the_date)
    dest_dir2 = str(
        pathlib.PurePosixPath(conf.get_hdfs_path()).joinpath(the_date))

    f_name = conf.get_file_name(
        the_date)  # "t1_trxrecord_" the_date # "_V2.csv"

    print("Start\n")

    branches = MyLocalFile.get_child_dir(root_path)
    for aBranch in branches:
        if MyLocalFile.check_branch(aBranch):
            files = MyLocalFile.get_child_file(aBranch)
            f_a_branch = os.path.basename(aBranch)
            for aFile in files:
                if MyLocalFile.check_file(aFile, f_name):
                    to_file1 = os.path.join(dest_dir1, f_a_branch, f_name)
                    to_file2 = str(
                        pathlib.PurePosixPath(dest_dir2).joinpath(
                            f_a_branch, f_name))
                    f_add_date = conf.get_hive_add_date(the_date)
                    f_need_head = conf.get_hive_head()  # False
                    MyLocalFile.conv_file_local(aFile,
                                                to_file1,
                                                need_first_line=f_need_head,
                                                p_add_head=f_add_date)
                    MyHdfsFile.safe_make_dir(p_client, to_file2)
                    # client.newupload(to_file2, to_file1, encoding='utf-8')
                    the_file = p_client.status(to_file2, strict=False)
                    if the_file is None:
                        p_client.upload(to_file2, to_file1)
                        p_client.set_permission(to_file2, 777)
                    # client.set_owner(thePath,owner='hdfs',group='supergroup')
                    elif the_file['type'].lower() == 'file':  # 'directory'
                        p_client.set_permission(to_file2, 777)