Python Util.build_hdfs_path Exemples

Langage de programmation: Python

Espace de nommage/Pack: oni.utils

Class/Type: Util

Méthode/Fonction: build_hdfs_path

Exemples au hotexamples.com: 4

Python Util.build_hdfs_path - 4 exemples trouvés. Ce sont les exemples réels les mieux notés de oni.utils.Util.build_hdfs_path extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

execute_cmd(5)

load_to_hdfs(5)

creat_hdfs_folder(4)

get_logger(3)

send_new_file_notification(3)

build_hdfs_path(2)

create_watcher(2)

validate_data_source(2)

validate_parameter(2)

remove_kafka_topic(1)

Méthodes fréquemment utilisées

execute_cmd (5)

load_to_hdfs (5)

creat_hdfs_folder (4)

get_logger (3)

send_new_file_notification (3)

build_hdfs_path (2)

create_watcher (2)

validate_data_source (2)

validate_parameter (2)

remove_kafka_topic (1)

Associées

sum

close

validateFile

targets_dcorrcoef

WindowsListener

update_heart_beat

sigmoid

Members

env_laser_status

apply

Related in langs

MinElem (PHP)

WPCW_questions_tags_updatePopularity (PHP)

MessageProcessor (C#)

ConfirmationPopup (C#)

crRealloc (C++)

Dib (C++)

AddOneHandlerFunc (Go)

AttemptStrategy (Go)

GetCountersRequest (Java)

BaseActivity (Java)

Exemple #1

0

Afficher le fichier

Fichier : worker.py Projet : arunkumarpt/open-network-insight

def process_new_binary_file(new_file): # get file from hdfs get_file_cmd = "hadoop fs -get {0} ../stage/.".format(new_file) print get_file_cmd subprocess.call(get_file_cmd,shell=True) # get file name and date binary_year,binary_month,binary_day,binary_hour,binary_date_path,file_name = Util.build_hdfs_path(new_file,ingest_type) # build process cmd. post_process_cmd = None process_opt = worker_conf[ingest_type]['process_opt'] if ingest_type == 'dns': post_process_cmd = "tshark -r ../stage/{0} {1} >> ../stage/{0}.csv".format(file_name,process_opt) elif ingest_type == 'flow': post_process_cmd = "nfdump -o csv -r ../stage/{0} {1} > ../stage/{0}.csv".format(file_name,process_opt) else: print "Unsupported ingest type" sys.exit(1) print post_process_cmd subprocess.call(post_process_cmd,shell=True) # create folder if it does not exist h_base_path = "{0}/{1}".format(os.getenv('HUSER','/user/oni'), ingest_type) h_csv_path = "{0}/csv".format(h_base_path) create_folder_cmd = "hadoop fs -mkdir -p {0}/y={1}/m={2}/d={3}/h={4}".format(h_csv_path,binary_year,binary_month,binary_day,binary_hour) print create_folder_cmd subprocess.call(create_folder_cmd,shell=True) #move to hdfs. upld_cmd = "hadoop fs -moveFromLocal ../stage/{0}.csv {1}/y={2}/m={3}/d={4}/h={5}/.".format(file_name,h_csv_path,binary_year,binary_month,binary_day,binary_hour) print upld_cmd subprocess.call(upld_cmd,shell=True) #make tmp folder in stage h_stage_timestamp = datetime.datetime.now().strftime('%M%S%f')[:-4] h_stage_path = "{0}/stage/{1}".format(h_base_path,h_stage_timestamp) create_tmp_cmd = "hadoop fs -mkdir -p {0}".format(h_stage_path) print create_tmp_cmd subprocess.call(create_tmp_cmd,shell=True) #load to avro load_avro_cmd = "hive -hiveconf dbname={6} -hiveconf y={0} -hiveconf m={1} -hiveconf d={2} -hiveconf h={3} -hiveconf data_location='{4}' -f oni/load_{5}_avro_parquet.hql".format(binary_year,binary_month,binary_day,binary_hour,h_stage_path,ingest_type,os.getenv('DBNAME','default') ) print load_avro_cmd subprocess.call(load_avro_cmd,shell=True) #remove from stage rm_tmp_cmd = "hadoop fs -rm -R -skipTrash {0}".format(h_stage_path) print rm_tmp_cmd subprocess.call(rm_tmp_cmd,shell=True) #can this delete other files when all is running on the same edge server? rm_tmp = "rm ../stage/{0}*".format(file_name) subprocess.call(rm_tmp,shell=True) print datetime.datetime.now()

Exemple #2

0

Afficher le fichier

Fichier : flow_master.py Projet : bapi-cloudwick/puppet-oni

def _load_to_hdfs(self,file): # get file name and date binary_year,binary_month,binary_day,binary_hour,binary_date_path,file_name = Util.build_hdfs_path(file,'flow') # hdfs path with timestamp. hdfs_path = "{0}/{1}/{2}".format(self._hdfs_root_path,binary_date_path,binary_hour) Util.creat_hdfs_folder(hdfs_path) # load to hdfs. Util.load_to_hdfs(file_name,file,hdfs_path) # send the notification to rabbitmq server. hadoop_pcap_file = "{0}/{1}".format(hdfs_path,file_name) Util.send_new_file_notification(hadoop_pcap_file,self._queue_name) print "Done !!!!!"

Exemple #3

0

Afficher le fichier

Fichier : flow_master.py Projet : ronkuhl/oni-ingest

def _load_to_hdfs(self, file): # get file name and date binary_year, binary_month, binary_day, binary_hour, binary_date_path, file_name = Util.build_hdfs_path( file, 'flow') # hdfs path with timestamp. hdfs_path = "{0}/binary/{1}/{2}".format(self._hdfs_root_path, binary_date_path, binary_hour) Util.creat_hdfs_folder(hdfs_path) # load to hdfs. Util.load_to_hdfs(file_name, file, hdfs_path) # send the notification to rabbitmq server. hadoop_pcap_file = "{0}/{1}".format(hdfs_path, file_name) Util.send_new_file_notification(hadoop_pcap_file, self._queue_name) print "Done !!!!!"

Exemple #4

0

Afficher le fichier

def process_new_binary_file(new_file): # get file from hdfs get_file_cmd = "hadoop fs -get {0} ../stage/.".format(new_file) print get_file_cmd subprocess.call(get_file_cmd,shell=True) # get file name and date binary_year,binary_month,binary_day,binary_hour,binary_date_path,file_name = Util.build_hdfs_path(new_file,ingest_type) # build process cmd. post_process_cmd = None process_opt = worker_conf[ingest_type]['process_opt'] if ingest_type == 'dns': post_process_cmd = "tshark -r ../stage/{0} {1} >> ../stage/{0}.csv".format(file_name,process_opt) elif ingest_type == 'flow': post_process_cmd = "nfdump -o csv -r ../stage/{0} {1} > ../stage/{0}.csv".format(file_name,process_opt) else: print "Unsupported ingest type" sys.exit(1) print post_process_cmd subprocess.call(post_process_cmd,shell=True) # create folder if it does not exist h_base_path = "{0}/{1}".format(worker_conf['huser'], ingest_type) h_csv_path = "{0}/csv".format(h_base_path) create_folder_cmd = "hadoop fs -mkdir -p {0}/y={1}/m={2}/d={3}/h={4}".format(h_csv_path,binary_year,binary_month,binary_day,binary_hour) print create_folder_cmd subprocess.call(create_folder_cmd,shell=True) #move to hdfs. upld_cmd = "hadoop fs -moveFromLocal ../stage/{0}.csv {1}/y={2}/m={3}/d={4}/h={5}/.".format(file_name,h_csv_path,binary_year,binary_month,binary_day,binary_hour) print upld_cmd subprocess.call(upld_cmd,shell=True) #make tmp folder in stage h_stage_timestamp = datetime.datetime.now().strftime('%M%S%f')[:-4] h_stage_path = "{0}/stage/{1}".format(h_base_path,h_stage_timestamp) create_tmp_cmd = "hadoop fs -mkdir -p {0}".format(h_stage_path) print create_tmp_cmd subprocess.call(create_tmp_cmd,shell=True) # move to stage. mv_to_stage = "hadoop fs -cp {0}/y={1}/m={2}/d={3}/h={4}/{5}.csv {6}/.".format(h_csv_path,binary_year,binary_month,binary_day,binary_hour,file_name,h_stage_path) print mv_to_stage subprocess.call(mv_to_stage,shell=True) #load to avro load_avro_cmd = "hive -hiveconf dbname={6} -hiveconf y={0} -hiveconf m={1} -hiveconf d={2} -hiveconf h={3} -hiveconf data_location='{4}' -f oni/load_{5}_avro_parquet.hql".format(binary_year,binary_month,binary_day,binary_hour,h_stage_path,ingest_type,worker_conf['dbname'] ) print load_avro_cmd subprocess.call(load_avro_cmd,shell=True) #remove from stage rm_tmp_cmd = "hadoop fs -rm -R -skipTrash {0}".format(h_stage_path) print rm_tmp_cmd subprocess.call(rm_tmp_cmd,shell=True) #can this delete other files when all is running on the same edge server? rm_tmp = "rm ../stage/{0}*".format(file_name) subprocess.call(rm_tmp,shell=True) print datetime.datetime.now()