예제 #1
0
def run_unzip_file(conf: ConfigData, the_date: str):
    the_date = StrTool.get_the_date_str(the_date)
    if (type(the_date) is str) and len(the_date) == 8:
        m_month = the_date[0:6]
        m_day = the_date[6:8]
    else:
        return

    root_path = conf.get_data("allinpay_data_zc")
    destdir = conf.get_data("allinpay_data_zc")

    #    ifile = '/home/testFolder/logflow/bl_shouyinbao/20181101/9999100000/t1_trxrecord_20181101_V2.csv'
    #    ofile = '/home/testFolder/logflow/bl_shouyinbao/UTF8/20181101/9999100000/t1_trxrecord_20181101_V2.csv'

    print("Start\n")

    # os.path.join(root_path, the_date) # real SYB folder don't have date folder

    f_name = conf.get_zip_name(the_date, 3)  # the_date+".zip"
    a_file = os.path.join(root_path, f_name)
    if MyLocalFile.check_file(a_file):
        MyLocalFile.unzip_the_file(a_file, destdir, p_name=the_date+"*")

    f_name = conf.get_zip_name(the_date,5)   # the_date+"_agt.zip"
    a_file = os.path.join(root_path, f_name)
    if MyLocalFile.check_file(a_file):
        MyLocalFile.unzip_the_file(a_file, destdir,  p_name=the_date+"*")
예제 #2
0
def run_hive(conf: ConfigData, the_date: str):
    client = Client(conf.hdfs_ip())  # "http://10.2.201.197:50070"
    conn = connect(host=conf.hive_ip(), port=conf.hive_port(), auth_mechanism=conf.hive_auth(), user=conf.hive_user())
    cur = conn.cursor()

    the_date = StrTool.get_the_date_str(the_date)  # "20181101"
    root_path = conf.get_data("hdfs_dir_zc")  # "/data/posflow/allinpay_utf8_zc/"
    file_ext3 = conf.get_data("file_ext3")  # _loginfo_rsp.txt          # 20181101_loginfo_rsp.txt
    file_ext4 = conf.get_data("file_ext4")  # _loginfo_rsp_agt.txt      # 20181101_loginfo_rsp_agt.txt
    file_ext5 = conf.get_data("file_ext5")  # _rxinfo_rsp.txt           # 20181101_rxinfo_rsp.txt
    file_ext6 = conf.get_data("file_ext6")  # _rxinfo_rsp_agt.txt       # 20181101_rxinfo_rsp_agt.txt

    print("Start\n")

    file3 = str(pathlib.PurePosixPath(root_path).joinpath(the_date + file_ext3))
    file4 = str(pathlib.PurePosixPath(root_path).joinpath(the_date + file_ext4))
    file5 = str(pathlib.PurePosixPath(root_path).joinpath(the_date + file_ext5))
    file6 = str(pathlib.PurePosixPath(root_path).joinpath(the_date + file_ext6))

    f_list = [file3,file4,file5,file6]
    t_list = ["hive_table3", "hive_table4", "hive_table5", "hive_table6"]

    for n in range(0,4):
        if MyHdfsFile.isfile(client, f_list[n]):
            sql = 'LOAD DATA INPATH \'' + f_list[n] + '\' INTO TABLE ' + conf.get_data(t_list[n])  # 'test.t1_trxrecprd_v2_zc'
            # '\' OVERWRITE INTO TABLE test.t1_trxrecprd_v2_bl2'
            print("OK" + "  " + sql+"\n")
            cur.execute(sql)  # , async=True)

    cur.close()
    conn.close()
예제 #3
0
def run_remove_hive(conf: ConfigData, the_date: str, delta_day=0):
    f_date_str = StrTool.get_the_date_str(the_date, delta_day)  # "20181101"
    # "/user/hive/warehouse/rds_posflow.db/t1_trxrecprd_v2/t1_trxrecord_20181204_V2*.csv"

    del_table = conf.get_table_name(
    )  # hive_table="rds_posflow.t1_trxrecprd_v2"

    if the_conf.m_project_id == 1:
        del_file = conf.get_file_name(f_date_str).replace('.', '*.')
        MyHdfsFile.delete_hive_ssh(conf.get_data("cdh_ip"),
                                   table=del_table,
                                   p_name=del_file,
                                   username=conf.get_data("cdh_user"),
                                   password=conf.get_data("cdh_pass"))

    if the_conf.m_project_id == 2:
        conn = connect(host=conf.hive_ip(),
                       port=conf.hive_port(),
                       auth_mechanism=conf.hive_auth(),
                       user=conf.hive_user())
        cur = conn.cursor()

        # "ALTER TABLE rds_posflow.t1_trxrecprd_v2_tmp DROP IF EXISTS PARTITION(p_date=20190208) "
        sql = "ALTER TABLE {} DROP IF EXISTS PARTITION( p_date={} )".format(
            del_table, the_date)
        print(sql)
        cur.execute(sql)

        cur.close()
        conn.close()
예제 #4
0
def run_remove_hive(conf: ConfigData, the_date: str, delta_day=0):
    f_date_str = StrTool.get_the_date_str(the_date, delta_day)  # "20181101"

    del_table = conf.get_table_name(
    )  # "hive_table" + str(conf.the_id) # "rds_posflow.loginfo_rsp_bl"
    del_file = conf.get_file_name(f_date_str).replace(
        '.', '*.')  # "file_ext" + str(conf.the_id)

    MyHdfsFile.delete_hive_ssh(conf.get_data("cdh_ip"),
                               table=del_table,
                               p_name=del_file,
                               username=conf.get_data("cdh_user"),
                               password=conf.get_data("cdh_pass"))
예제 #5
0
def run_sftp_file(conf: ConfigData, the_date: str):
    the_date = StrTool.get_the_date_str(the_date)
    if (type(the_date) is str) and len(the_date) == 8:
        m_month = the_date[0:6]
        m_day = the_date[6:8]
    else:
        return

    a = sftp_tool.Sftp_Tool(h=conf.get_data("allinpay_ftp_ip_zc"), p=int(conf.get_data("allinpay_ftp_port_zc")),
                            u=conf.get_data("allinpay_ftp_user_zc"), s=conf.get_data("allinpay_ftp_pass_zc"),
                            r=conf.get_data("allinpay_ftp_folder_zc"), d=conf.get_data("allinpay_data_zc"))
    a.openSFTP()
    a.download_files(from_dir=conf.get_data("allinpay_ftp_folder_zc"),
                     to_dir=conf.get_data("allinpay_data_zc"), p_name=the_date+"*.zip")
예제 #6
0
def run_conv_file_local_to_hdfs(conf: ConfigData, the_date: str):
    """

    :param conf:
    :param the_date:
    :return:
    """
    the_date = StrTool.get_the_date_str(the_date)
    client = MyClient(conf.hdfs_ip())  # "http://10.2.201.197:50070"
    root_path = conf.get_data("allinpay_data_zc")
    dest_dir1 = conf.get_data("allinpay_utf8_zc")
    dest_dir2 = conf.get_data("hdfs_dir_zc")
    file_ext3 = conf.get_data("file_ext3")  # _loginfo_rsp.txt          # 20181101_loginfo_rsp.txt
    file_ext4 = conf.get_data("file_ext4")  # _loginfo_rsp_agt.txt      # 20181101_loginfo_rsp_agt.txt
    file_ext5 = conf.get_data("file_ext5")  # _rxinfo_rsp.txt           # 20181101_rxinfo_rsp.txt
    file_ext6 = conf.get_data("file_ext6")  # _rxinfo_rsp_agt.txt      # 20181101_rxinfo_rsp_agt.txt

    print("Start\n")

    files = MyLocalFile.get_child_file(root_path)
    for aFile in files:
        short_name = os.path.basename(aFile).lower()
        if short_name == (the_date + file_ext3).lower() or \
                short_name == (the_date + file_ext4).lower() or \
                short_name == (the_date + file_ext5).lower() or \
                short_name == (the_date + file_ext6).lower():
            to_file1 = str(pathlib.PurePath(dest_dir1).joinpath(pathlib.PurePath(aFile).name))
            to_file2 = str(pathlib.PurePosixPath(dest_dir2).joinpath(pathlib.PurePath(aFile).name))
            MyLocalFile.conv_file_local(aFile, to_file1, need_first_line=True)
            MyHdfsFile.safe_make_dir(client, to_file2)
            # client.newupload(to_file2, to_file1, encoding='utf-8')
            the_file = client.status(to_file2, strict=False)
            if the_file is None:
                client.upload(to_file2, to_file1)
                client.set_permission(to_file2, 777)
            # client.set_owner(thePath,owner='hdfs',group='supergroup')
            elif the_file['type'].lower() == 'file':  # 'directory'
                client.set_permission(to_file2, 777)
예제 #7
0
    the_conf = ConfigData(p_is_test=False)

    if the_conf.is_test():
        day_str = the_conf.test_date()
        days = 190
    else:
        the_conf.m_project_id = StrTool.get_param_int(1, 1)
        day_str = StrTool.get_param_str(2, "")
        days = StrTool.get_param_int(3, 1)

    if the_conf.m_project_id == 1:
        return_code = subprocess.call("/app/code/posflow_loader/ftpcmd.sh",
                                      shell=True)
        print(return_code)

    f_delta = the_conf.get_data("file_date_delta" + str(the_conf.m_project_id),
                                "0")
    day_str = StrTool.get_the_date_str(day_str, -int(f_delta))

    del_range = 30  # 删除旧数据的时间范围,天
    keep_range = 7  # 保留最近旧数据的时间范围,天

    for i in range(0, del_range):
        run_remove_files(the_conf, day_str,
                         -(days + keep_range + del_range - 1 - i))

    date1 = StrTool.get_the_date(day_str)
    for i in range(0, days):
        delta = days - i - 1
        date2 = date1 - datetime.timedelta(days=delta)
        day_str2 = date2.strftime("%Y%m%d")
        run_remove_files(the_conf, day_str2, -keep_range)