예제 #1
0
    stat = merge_file()
    if not stat:
        logging.info('Get file from HDFS error!')
        sys.exit(1)

    from utils import NODE

    level = int(sys.argv[1]) or 10
    output_rel_path = BASE_FILE_PATH.get_output_rel_path()
    output_eid_mapping_path = BASE_FILE_PATH.get_output_eid_mapping_path()

    main(output_rel_path, output_eid_mapping_path, level)

    tmp = os.popen('hdfs dfs -stat %s' %
                   BASE_SPARK.get_hdfs_rel_json_path()).readlines()
    if len(tmp):
        os.system('hdfs dfs -rm %s' % BASE_SPARK.get_hdfs_rel_json_path())
    os.system('hdfs dfs -put %s %s' %
              (output_rel_path, BASE_SPARK.get_hdfs_rel_json_path()))

    tmp = os.popen('hdfs dfs -stat %s' %
                   BASE_SPARK.get_hdfs_eid_mapping_json_path()).readlines()
    if len(tmp):
        os.system('hdfs dfs -rm %s' %
                  BASE_SPARK.get_hdfs_eid_mapping_json_path())
    os.system(
        'hdfs dfs -put %s %s' %
        (output_eid_mapping_path, BASE_SPARK.get_hdfs_eid_mapping_json_path()))

    logging.info('=====Processing done at %s!!!=====' % get_date())
예제 #2
0
            .getOrCreate()

        hdfs_rel_json = BASE_SPARK.get_hdfs_rel_json_path()

        temp = os.popen('hdfs dfs -stat %s' % hdfs_rel_json).readlines()
        if len(temp):
            json_data = spark.read.json('hdfs://%s' % hdfs_rel_json)
            json_data.createOrReplaceTempView('output_table')

            spark.table('output_table').write.mode('overwrite').saveAsTable(
                BASE_SPARK.get_output_rel_table())
            os.system(
                'hive -e "INSERT OVERWRITE TABLE test.grnt1 SELECT id, value FROM {0};"'
                .format(BASE_SPARK.get_output_rel_table()))

        hdfs_eid_mapping_json = BASE_SPARK.get_hdfs_eid_mapping_json_path()

        temp = os.popen('hdfs dfs -stat %s' %
                        hdfs_eid_mapping_json).readlines()
        if len(temp):
            json_data = spark.read.json('hdfs://%s' % hdfs_eid_mapping_json)
            json_data.createOrReplaceTempView('eid_mapping')

            spark.table('eid_mapping').write.mode('overwrite').saveAsTable(
                BASE_SPARK.get_output_eid_mapping_table())
            os.system(
                'hive -e "INSERT OVERWRITE TABLE test.grnt1_eid SELECT eid, union_id FROM {0};"'
                .format(BASE_SPARK.get_output_eid_mapping_table()))

    except ():
        e = traceback.format_exc()