def _load_to_hdfs(self,file): # get file name and date binary_year,binary_month,binary_day,binary_hour,binary_date_path,file_name = util.build_hdfs_path(file,'flow') # hdfs path with timestamp. hdfs_path = "{0}/{1}/{2}".format(self._hdfs_root_path,binary_date_path,binary_hour) util.creat_hdfs_folder(hdfs_path) # load to hdfs. util.load_to_hdfs(file_name,file,hdfs_path) # send the notification to rabbitmq server. hadoop_pcap_file = "{0}/{1}".format(hdfs_path,file_name) util.send_new_file_notification(hadoop_pcap_file,self._queue_name) print "Done !!!!!"
def _split_pcap_file(self,file_name,file_local_path,hdfs_path): # split file. name = file_name.split('.')[0] split_cmd="editcap -c {0} {1} {2}/{3}_split.pcap".format(self._pkt_num,file_local_path,self._pcap_split_staging,name) print split_cmd subprocess.call(split_cmd,shell=True) for currdir,subdir,files in os.walk(self._pcap_split_staging): for file in files: if file.endswith(".pcap") and "{0}_split".format(name) in file: # load file to hdfs. util.load_to_hdfs(file,os.path.join(currdir,file),hdfs_path) #send rabbitmq notificaion. hadoop_pcap_file = "{0}/{1}".format(hdfs_path,file) util.send_new_file_notification(hadoop_pcap_file,self._queue_name) rm_big_file = "rm {0}".format(file_local_path) print rm_big_file subprocess.call(rm_big_file,shell=True)
def _process_pcap_file(self,file_name,file_local_path,hdfs_root_path): # get timestamp from the file name. file_date = file_name.split('.')[0] pcap_hour=file_date[-4:-2] pcap_date_path = file_date[-12:-4] # hdfs path with timestamp. hdfs_path = "{0}/{1}/{2}".format(hdfs_root_path,pcap_date_path,pcap_hour) util.creat_hdfs_folder(hdfs_path) # get file size. file_size = os.stat(file_local_path) if file_size.st_size > 1145498644: # split file. self._split_pcap_file(file_name,file_local_path,hdfs_path) else: # load file to hdfs util.load_to_hdfs(file_name,file_local_path,hdfs_path) # send rabbitmq notification. hadoop_pcap_file = "{0}/{1}".format(hdfs_path,file_name) util.send_new_file_notification(hadoop_pcap_file,self._queue_name)