Ejemplo n.º 1
0
def run_hive(configData: ConfigData):
    a_client = InsecureClient(url=configData.hdfs_ip(), user="******")  # "http://10.2.201.197:50070"
    conn = connect(host=configData.hive_ip(), port=configData.hive_port(), auth_mechanism=configData.hive_auth(), user=configData.hive_user())
    cur = conn.cursor()

    f_date_str = configData.get_f_date()  # "20181101"
    p_date_str = configData.get_p_date()  # "2018-11-01"

    root_path = configData.get_hdfs_path()  # "/shouyinbao/bl_shouyinbao/UTF8/"
    file_name = configData.get_file_name(f_date_str)  # "t1_trxrecord_" the_date # "_V2.csv"
    table_name = configData.get_table_name()

    print("Start\n")

    idn = 0
    branches = MyHdfsFile.get_child(a_client, str(pathlib.PurePosixPath(root_path).joinpath(f_date_str)))
    for aBranch in branches:
        if MyHdfsFile.check_branch(a_client, aBranch):
            files = MyHdfsFile.get_child(a_client, aBranch)
            f_a_branch = MyHdfsFile.get_name(aBranch)
            for aFile in files:
                if MyHdfsFile.check_file(a_client, aFile, file_name):
                    # '/shouyinbao/bl_shouyinbao/UTF8/20181101/9999997900/t1_trxrecord_20181101_V2.csv'
                    to_file2 = str(pathlib.PurePosixPath(root_path).joinpath(f_date_str, f_a_branch, file_name))
                    if not configData.get_has_partition():
                        sql = "LOAD DATA INPATH '{}' INTO TABLE {}".format(to_file2, table_name)  # 'test.t1_trxrecprd_v2_zc'
                    # '\' OVERWRITE INTO TABLE test.t1_trxrecprd_v2_bl2'
                    else:
                        sql = "LOAD DATA INPATH '{}' INTO TABLE {} PARTITION ( p_date='{}' )".format(to_file2, table_name, p_date_str)  # 'test.t1_trxrecprd_v2_zc'
                    idn += 1
                    print(str(idn) + "  " + sql + "\n")
                    cur.execute(sql)  # , async=True)

    cur.close()
    conn.close()
Ejemplo n.º 2
0
def run_hive(conf: ConfigData, the_date: str, is_baoli=True):
    p_client = Client(conf.hdfs_ip())  # "http://10.2.201.197:50070"
    conn = connect(host=conf.hive_ip(),
                   port=conf.hive_port(),
                   auth_mechanism=conf.hive_auth(),
                   user=conf.hive_user())
    cur = conn.cursor()

    the_date = StrTool.get_the_date_str(the_date)  # "20181101"
    root_path = conf.get_hdfs_path()  # "/shouyinbao/bl_shouyinbao/UTF8/"
    f_name = conf.get_file_name(
        the_date)  # "t1_trxrecord_" the_date # "_V2.csv"
    table_name = conf.get_table_name()

    print("Start\n")

    idn = 0
    branches = MyHdfsFile.get_child(p_client, root_path + the_date)
    for aBranch in branches:
        if MyHdfsFile.check_branch(p_client, aBranch):
            files = MyHdfsFile.get_child(p_client, aBranch)
            f_a_branch = MyHdfsFile.get_name(aBranch)
            for aFile in files:
                if MyHdfsFile.check_file(p_client, aFile, f_name):
                    # '/shouyinbao/bl_shouyinbao/UTF8/20181101/9999997900/t1_trxrecord_20181101_V2.csv'
                    to_file2 = str(
                        pathlib.PurePosixPath(root_path).joinpath(
                            the_date, f_a_branch, f_name))
                    if conf.m_project_id == 1:
                        sql = 'LOAD DATA INPATH \'{}\' INTO TABLE {}'.format(
                            to_file2, table_name)  # 'test.t1_trxrecprd_v2_zc'
                    # '\' OVERWRITE INTO TABLE test.t1_trxrecprd_v2_bl2'
                    elif conf.m_project_id == 2:
                        sql = 'LOAD DATA INPATH \'{}\' INTO TABLE {} PARTITION ( p_branch=\'{}\', p_date={} )'.format(
                            to_file2, table_name, f_a_branch,
                            the_date)  # 'test.t1_trxrecprd_v2_zc'
                    idn += 1
                    print(str(idn) + "  " + sql + "\n")
                    cur.execute(sql)  # , async=True)

    cur.close()
    conn.close()
Ejemplo n.º 3
0
    hdfs_path = str(pathlib.PurePosixPath(conf.get_hdfs_path()).joinpath(sdate))
    shutil.rmtree(data_path, ignore_errors=True)
    shutil.rmtree(utf8_path, ignore_errors=True)
    client = MyClient(conf.hdfs_ip())  # "http://10.2.201.197:50070"
    client.delete(hdfs_path, recursive=True)
    # "/user/hive/warehouse/posflow.db/t1_trxrecprd_v2/t1_trxrecord_20181204_V2*.csv"
    # hive_table="posflow.t1_trxrecprd_v2",
    # file_pre1 = 't1_trxrecord_',
    # file_ext2 = "_V2.csv",


if __name__ == "__main__":
    the_conf = ConfigData(p_is_test=False)

    client = Client(the_conf.hdfs_ip())  # "http://10.2.201.197:50070"
    a = MyHdfsFile.get_child(client, "/data/posflow/allinpay_utf8_zc")
    b = MyHdfsFile.get_child_file(client,"/data/posflow/allinpay_utf8_zc")
    c = MyHdfsFile.get_child_dir(client, "/data/posflow/allinpay_utf8_zc")

    # test
    # MyHdfsFile.delete(client, "/data/posflow/allinpay_utf8_zc", "*agt_cpy*")
    # test

    if the_conf.is_test():
        day_str = the_conf.test_date()
        days = 9
    else:
        day_str = StrTool.get_param_str(1, "")
        days = StrTool.get_param_int(2, 1)

    day_str = StrTool.get_the_date_str(day_str)