コード例 #1
0
    def _process_pcap_file(self, file_name, file_local_path, hdfs_root_path):

        # get timestamp from the file name.
        file_date = file_name.split('.')[0]
        pcap_hour = file_date[-4:-2]
        pcap_date_path = file_date[-12:-4]

        # hdfs path with timestamp.
        hdfs_path = "{0}/{1}/{2}".format(hdfs_root_path, pcap_date_path,
                                         pcap_hour)
        Util.creat_hdfs_folder(hdfs_path)

        # get file size.
        file_size = os.stat(file_local_path)
        if file_size.st_size > 1145498644:

            # split file.
            self._split_pcap_file(file_name, file_local_path, hdfs_path)
        else:
            # load file to hdfs
            Util.load_to_hdfs(file_name, file_local_path, hdfs_path)

            # send rabbitmq notification.
            hadoop_pcap_file = "{0}/{1}".format(hdfs_path, file_name)
            Util.send_new_file_notification(hadoop_pcap_file, self._queue_name)
コード例 #2
0
	def _load_to_hdfs(self,file):

		# get file name and date
		binary_year,binary_month,binary_day,binary_hour,binary_date_path,file_name =  Util.build_hdfs_path(file,'flow')

		# hdfs path with timestamp.
                hdfs_path = "{0}/{1}/{2}".format(self._hdfs_root_path,binary_date_path,binary_hour)
                Util.creat_hdfs_folder(hdfs_path)

		# load to hdfs.
		Util.load_to_hdfs(file_name,file,hdfs_path)

		# send the notification to rabbitmq server.
		hadoop_pcap_file = "{0}/{1}".format(hdfs_path,file_name)
                Util.send_new_file_notification(hadoop_pcap_file,self._queue_name)

		print "Done !!!!!"
コード例 #3
0
ファイル: flow_master.py プロジェクト: ronkuhl/oni-ingest
    def _load_to_hdfs(self, file):

        # get file name and date
        binary_year, binary_month, binary_day, binary_hour, binary_date_path, file_name = Util.build_hdfs_path(
            file, 'flow')

        # hdfs path with timestamp.
        hdfs_path = "{0}/binary/{1}/{2}".format(self._hdfs_root_path,
                                                binary_date_path, binary_hour)
        Util.creat_hdfs_folder(hdfs_path)

        # load to hdfs.
        Util.load_to_hdfs(file_name, file, hdfs_path)

        # send the notification to rabbitmq server.
        hadoop_pcap_file = "{0}/{1}".format(hdfs_path, file_name)
        Util.send_new_file_notification(hadoop_pcap_file, self._queue_name)

        print "Done !!!!!"
コード例 #4
0
	def _split_pcap_file(self,file_name,file_local_path,hdfs_path):

		# split file.
		name = file_name.split('.')[0]
		split_cmd="editcap -c {0} {1} {2}/{3}_split.pcap".format(self._pkt_num,file_local_path,self._pcap_split_staging,name)
		print split_cmd
		subprocess.call(split_cmd,shell=True)

		for currdir,subdir,files in os.walk(self._pcap_split_staging):
			for file in files:
				if file.endswith(".pcap") and "{0}_split".format(name) in file:
					# load file to hdfs.
					Util.load_to_hdfs(file,os.path.join(currdir,file),hdfs_path)

					#send rabbitmq notificaion.
					hadoop_pcap_file = "{0}/{1}".format(hdfs_path,file)
                        		Util.send_new_file_notification(hadoop_pcap_file,self._queue_name)

        	rm_big_file = "rm {0}".format(file_local_path)
        	print rm_big_file
 	 	subprocess.call(rm_big_file,shell=True)
コード例 #5
0
	def _process_pcap_file(self,file_name,file_local_path,hdfs_root_path):

		# get timestamp from the file name.
		file_date = file_name.split('.')[0]
                pcap_hour=file_date[-4:-2]
                pcap_date_path = file_date[-12:-4]

		# hdfs path with timestamp.
		hdfs_path = "{0}/{1}/{2}".format(hdfs_root_path,pcap_date_path,pcap_hour)
		Util.creat_hdfs_folder(hdfs_path)

		# get file size.
		file_size = os.stat(file_local_path)
		if file_size.st_size > 1145498644:

			# split file.
			self._split_pcap_file(file_name,file_local_path,hdfs_path)
	        else:
			# load file to hdfs
            		Util.load_to_hdfs(file_name,file_local_path,hdfs_path)

			# send rabbitmq notification.
			hadoop_pcap_file = "{0}/{1}".format(hdfs_path,file_name)
			Util.send_new_file_notification(hadoop_pcap_file,self._queue_name)
コード例 #6
0
    def _split_pcap_file(self, file_name, file_local_path, hdfs_path):

        # split file.
        name = file_name.split('.')[0]
        split_cmd = "editcap -c {0} {1} {2}/{3}_split.pcap".format(
            self._pkt_num, file_local_path, self._pcap_split_staging, name)
        print split_cmd
        subprocess.call(split_cmd, shell=True)

        for currdir, subdir, files in os.walk(self._pcap_split_staging):
            for file in files:
                if file.endswith(".pcap") and "{0}_split".format(name) in file:
                    # load file to hdfs.
                    Util.load_to_hdfs(file, os.path.join(currdir, file),
                                      hdfs_path)

                    #send rabbitmq notificaion.
                    hadoop_pcap_file = "{0}/{1}".format(hdfs_path, file)
                    Util.send_new_file_notification(hadoop_pcap_file,
                                                    self._queue_name)

        rm_big_file = "rm {0}".format(file_local_path)
        print rm_big_file
        subprocess.call(rm_big_file, shell=True)