stat = merge_file() if not stat: logging.info('Get file from HDFS error!') sys.exit(1) from utils import NODE level = int(sys.argv[1]) or 10 output_rel_path = BASE_FILE_PATH.get_output_rel_path() output_eid_mapping_path = BASE_FILE_PATH.get_output_eid_mapping_path() main(output_rel_path, output_eid_mapping_path, level) tmp = os.popen('hdfs dfs -stat %s' % BASE_SPARK.get_hdfs_rel_json_path()).readlines() if len(tmp): os.system('hdfs dfs -rm %s' % BASE_SPARK.get_hdfs_rel_json_path()) os.system('hdfs dfs -put %s %s' % (output_rel_path, BASE_SPARK.get_hdfs_rel_json_path())) tmp = os.popen('hdfs dfs -stat %s' % BASE_SPARK.get_hdfs_eid_mapping_json_path()).readlines() if len(tmp): os.system('hdfs dfs -rm %s' % BASE_SPARK.get_hdfs_eid_mapping_json_path()) os.system( 'hdfs dfs -put %s %s' % (output_eid_mapping_path, BASE_SPARK.get_hdfs_eid_mapping_json_path())) logging.info('=====Processing done at %s!!!=====' % get_date())
.getOrCreate() hdfs_rel_json = BASE_SPARK.get_hdfs_rel_json_path() temp = os.popen('hdfs dfs -stat %s' % hdfs_rel_json).readlines() if len(temp): json_data = spark.read.json('hdfs://%s' % hdfs_rel_json) json_data.createOrReplaceTempView('output_table') spark.table('output_table').write.mode('overwrite').saveAsTable( BASE_SPARK.get_output_rel_table()) os.system( 'hive -e "INSERT OVERWRITE TABLE test.grnt1 SELECT id, value FROM {0};"' .format(BASE_SPARK.get_output_rel_table())) hdfs_eid_mapping_json = BASE_SPARK.get_hdfs_eid_mapping_json_path() temp = os.popen('hdfs dfs -stat %s' % hdfs_eid_mapping_json).readlines() if len(temp): json_data = spark.read.json('hdfs://%s' % hdfs_eid_mapping_json) json_data.createOrReplaceTempView('eid_mapping') spark.table('eid_mapping').write.mode('overwrite').saveAsTable( BASE_SPARK.get_output_eid_mapping_table()) os.system( 'hive -e "INSERT OVERWRITE TABLE test.grnt1_eid SELECT eid, union_id FROM {0};"' .format(BASE_SPARK.get_output_eid_mapping_table())) except (): e = traceback.format_exc()