def main(): global device_list, verbose start_time = time.time() # example: # - manually set the in/out directories, experiment_list, device_list below # - call: python3 process_traces.py --override True -p 1 print("Processing pcaps") print("Start time: %s\n" % time.strftime("%A %d %B %Y %H:%M:%S %Z", time.localtime(start_time))) #Options parser = argparse.ArgumentParser( usage="Look in process_traces.py to see supported arguments", add_help=False) #parser.add_argument("-i", dest="input_dir", default="/home/robin/datasets/moniotr") # parser.add_argument("-i", dest="input_dir", default="/home/robin/datasets/yourthings") parser.add_argument( "-o", dest="output_dir", default="/home/robin/datasets/scratch/test/moniotr_test5") # parser.add_argument("-o", dest="output_dir", default="/home/robin/datasets/scratch/test/yourthings_test1") # parser.add_argument("-d", dest="device_list_path", default="/home/robin/datasets/moniotr/devices.json") # parser.add_argument("-d", dest="device_list_path", default="/home/robin/datasets/yourthings/devices.json") # experiment_list must be a comma-separated list of directories that are recursively traversed, looking for pcaps to process # parser.add_argument("-l", dest="experiment_list", default="iot-data/uk/t-philips-hub,iot-data/uk/smarter-coffee-mach,iot-data/uk/echoplus") # parser.add_argument("-l", dest="experiment_list", default="12") # parser.add_argument("-l", dest="experiment_list", default="iot-data/uk/smarter-coffee-mach") # parser.add_argument("-l", dest="experiment_list", default="iot-data/uk/echoplus/volume") # parser.add_argument("-l", dest="experiment_list", default="../scratch/TEMP/tls_1_3.pcapng") # TLS 1.3 example trace # parser.add_argument("-l", dest="experiment_list", default="iot-data/uk/appletv/power/2019-04-26_12:59:01.247s.pcap") # has TLS 1.3 data # parser.add_argument("-l", dest="experiment_list", default="../scratch/TEMP/2019-04-26_12_23_35.222s.pcap") # spurious retransmits # parser.add_argument("-l", dest="experiment_list", default="iot-data/uk/echoplus") # interval tester 1 # parser.add_argument("-l", dest="experiment_list", default="iot-data/uk/allure-speaker") # interval tester 2 # parser.add_argument("-l", dest="experiment_list", default="iot-data/uk/echoplus") # parser.add_argument("-l", dest="experiment_list", default="iot-data/uk/samsungtv-wired") # local_menu # parser.add_argument("-l", dest="experiment_list", default="iot-data/uk/blink-camera/alexa_watch") # parser.add_argument("-l", dest="experiment_list", default="iot-data/uk/ring-doorbell/android_wan_watch") # parser.add_argument("-l", dest="experiment_list", default="iot-data/uk/bosiwo-camera-wired") # parser.add_argument("-l", dest="experiment_list", default="iot-data/uk/yi-camera") # parser.add_argument("-l", dest="experiment_list", default="iot-data/uk/smarter-coffee-mach") # parser.add_argument("-l", dest="experiment_list", default="iot-data/uk/appletv") # parser.add_argument("-l", dest="experiment_list", default="iot-data/uk/blink-security-hub") # parser.add_argument("-l", dest="experiment_list", default="iot-data/uk/charger-camera") # parser.add_argument("-l", dest="experiment_list", default="iot-data/uk/dlink-camera") # parser.add_argument("-l", dest="experiment_list", default="iot-data/uk/echodot") # parser.add_argument("-l", dest="experiment_list", default="iot-data/uk/echospot") # parser.add_argument("-l", dest="experiment_list", default="iot-data/uk/firetv") # parser.add_argument("-l", dest="experiment_list", default="iot-data/uk/smarter-coffee-mach/alexa_on") # parser.add_argument("-l", dest="experiment_list", default="iot-data/uk/smarter-coffee-mach/power") # parser.add_argument("-l", dest="experiment_list", default="iot-data/uk/smarter-coffee-mach,iot-data/uk/ring-doorbell") # parser.add_argument("-l", dest="experiment_list", default="../scratch") # parser.add_argument("-l", dest="experiment_list", default="iot-data/uk/smarter-coffee-mach/power,iot-data/uk/smarter-coffee-mach/android_wan_on") # parser.add_argument("-l", dest="experiment_list", default="iot-data/uk/smarter-coffee-mach/") # parser.add_argument("-l", dest="experiment_list", default="iot-data/uk/smarter-coffee-mach/power,iot-data/uk/smarter-coffee-mach/android_wan_on,iot-data/uk/smarter-coffee-mach/android_lan_on") # parser.add_argument("-l", dest="experiment_list", default="iot-data/uk/magichome-strip/power,iot-data/uk/magichome-strip/android_wan_on,iot-data/uk/magichome-strip/android_lan_on") # parser.add_argument("-l", dest="experiment_list", default="iot-data/us/philips-bulb/power,iot-data/us/philips-bulb/android_wan_on,iot-data/us/philips-bulb/android_lan_on") # parser.add_argument("-l", dest="experiment_list", default="../thebasement/wemo_initial") # # parser.add_argument("-l", dest="experiment_list", default="iot-data/uk/smarter-coffee-mach/power,iot-idle/uk/smarter-coffee-mach,iot-data/uk/smarter-coffee-mach/android_wan_on,iot-data/uk/smarter-coffee-mach/android_lan_on") parser.add_argument("-i", dest="input_dir", default="/home/robin/datasets/yourthings") parser.add_argument("-d", dest="device_list_path", default="/home/robin/datasets/yourthings/devices.json") parser.add_argument("-l", dest="experiment_list", default="11/eth1-20180411.0000.1523422800" ) # small one at 76MB # takes about 40s # parser.add_argument("-l", dest="experiment_list", default="11/eth1-20180411.2020.1523496000") # medium one at 181 MB # takes about 1 minute # parser.add_argument("-l", dest="experiment_list", default="11/eth1-20180411.0055.1523426100") # big one at 227MB # takes about 1.5 minutes # parser.add_argument("-l", dest="experiment_list", default="11/eth1-20180411.0410.1523437800") # biggest one at 345 MB # takes about 2 minutes parser.add_argument("-v", dest="verbose", default=True) parser.add_argument("-p", dest="process_count", default=4) parser.add_argument("--override", dest="override_results", default=False) parser.add_argument("-h", dest="help", action="store_true", default=False) #Parse Arguments args = parser.parse_args() if args.help: print_usage(0) errors = False experiment_list = [] if args.experiment_list is not None: experiment_list = args.experiment_list.split(",") if len(experiment_list) == 0: errors = True print("Error: experiment list was empty!", file=sys.stderr) #check -i input_dir if args.input_dir == "": errors = True print("Error: Pcap input directory (-i) required.", file=sys.stderr) elif Util.check_dir(args.input_dir, "Input pcap directory"): errors = True #check -i output_dir if args.output_dir == "": errors = True print("Error: output directory (-o) required.", file=sys.stderr) elif Util.check_dir(args.output_dir, "Output directory"): errors = True device_list = [] if args.device_list_path == "": errors = True print("Error: device list (-d) required.", file=sys.stderr) else: if not os.path.isfile(args.device_list_path): errors = True print("Error: specified device list file does not exist. " + str(args.device_list_path), file=sys.stderr) else: with open(args.device_list_path, "r") as f: device_list = json.loads(f.read()) if len(device_list) == 0: errors = True print("Error: device list file was empty or not JSON. " + str(args.device_list_path), file=sys.stderr) else: device_list = Device.DeviceList(device_list) if errors: print_usage(1) process_count = int(args.process_count) override_results = args.override_results verbose = args.verbose #Create the groups to run analysis with processes raw_files = [[] for _ in range(process_count)] # Split the pcap files into num_proc groups # TODO: adjust this logic for when we start processing actual batches def walk_directory(dir_path, output_list, output_index): print("Walk dir " + dir_path) if os.path.isfile(dir_path): raw_files[output_index].append(dir_path) output_index += 1 if output_index >= process_count: output_index = 0 return output_index for root, dirs, files in os.walk(dir_path): for filename in files: if ( filename.endswith(".pcap") and not filename.startswith(".") ) or filename.endswith(".pcapng") or filename.startswith( "eth" ): # one of the datasets has pcaps without extensions but starts with "eth" raw_files[output_index].append(os.path.join( root, filename)) output_index += 1 if output_index >= process_count: output_index = 0 for dir in dirs: output_index = walk_directory(os.path.join(root, dir), raw_files, output_index) return output_index for experiment in experiment_list: walk_directory(args.input_dir + os.path.sep + experiment, raw_files, 0) if override_results: if verbose: print("Clearing output directory %s" % args.output_dir) for root, dirs, files in os.walk(args.output_dir): for f in files: os.unlink(os.path.join(root, f)) for d in dirs: shutil.rmtree(os.path.join(root, d)) gc.collect() if verbose: print("Analyzing input pcap files...") # TODO: refactor this into a proper class params = { "device_list": device_list, "output_dir": args.output_dir, "verbose": verbose } # run analysis with num_proc processes procs = [] for pid, files in enumerate(raw_files): p = Process(target=run, args=(pid, files, params)) procs.append(p) p.start() for p in procs: p.join() end_time = time.time() print("\nEnd time: %s" % time.strftime("%A %d %B %Y %H:%M:%S %Z", time.localtime(end_time))) #Calculate elapsed time sec = round(end_time - start_time) hrs = sec // 3600 if hrs != 0: sec = sec - hrs * 3600 minute = sec // 60 if minute != 0: sec = sec - minute * 60 print("Elapsed time: %s hours %s minutes %s seconds" % (hrs, minute, sec)) if verbose: print("\nAnalysis finished.")
def main(): global device_list, verbose start_time = time.time() print("Creating intermediate JSON files for faster processing later") print("Start time: %s\n" % time.strftime("%A %d %B %Y %H:%M:%S %Z", time.localtime(start_time))) #Options parser = argparse.ArgumentParser( usage="Look in create_json.py to see supported arguments", add_help=False) parser.add_argument("-i", dest="input_dir", default="/home/robin/datasets/yourthings/11") parser.add_argument("-o", dest="output_dir", default="/home/robin/datasets/yourthings/11_json") parser.add_argument("-v", dest="verbose", default=True) parser.add_argument("--override", dest="override_results", default=True) parser.add_argument("-h", dest="help", action="store_true", default=False) #Parse Arguments args = parser.parse_args() if args.help: print_usage(0) errors = False #check -i input_dir if args.input_dir == "": errors = True print("Error: Results input directory (-i) required.", file=sys.stderr) elif Util.check_dir(args.input_dir, "Results directory"): errors = True #check -i output_dir if args.output_dir == "": errors = True print("Error: output directory (-o) required.", file=sys.stderr) elif Util.check_dir(args.output_dir, "Output directory"): errors = True if errors: print_usage(1) process_count = 1 # args.process_count override_results = args.override_results verbose = args.verbose #Create the groups to run analysis with processes raw_files = [[] for _ in range(process_count)] # Split the pcap files into num_proc groups # TODO: adjust this logic for when we start processing actual batches def walk_directory(dir_path, output_list, output_index): print("Walk dir " + dir_path + "," + str(output_index)) for root, dirs, files in os.walk(dir_path): for filename in files: if ( filename.endswith(".pcap") and not filename.startswith(".") ) or filename.endswith(".pcapng") or filename.startswith( "eth" ): # one of the datasets has pcaps without extensions but starts with "eth" raw_files[output_index].append(os.path.join( root, filename)) output_index += 1 if output_index >= process_count: output_index = 0 for dir in dirs: output_index = walk_directory(os.path.join(root, dir), raw_files, output_index) return output_index walk_directory(args.input_dir, raw_files, 0) if override_results: if verbose: print("Clearing output directory %s" % args.output_dir) for root, dirs, files in os.walk(args.output_dir): for f in files: os.unlink(os.path.join(root, f)) for d in dirs: shutil.rmtree(os.path.join(root, d)) gc.collect() if verbose: print("Summarizing device result files...") # TODO: refactor this into a proper class params = {"output_dir": args.output_dir, "verbose": verbose} # run analysis with num_proc processes procs = [] for pid, files in enumerate(raw_files): p = Process(target=run, args=(pid, files, params)) procs.append(p) p.start() for p in procs: p.join() end_time = time.time() print("\nEnd time: %s" % time.strftime("%A %d %B %Y %H:%M:%S %Z", time.localtime(end_time))) #Calculate elapsed time sec = round(end_time - start_time) hrs = sec // 3600 if hrs != 0: sec = sec - hrs * 3600 minute = sec // 60 if minute != 0: sec = sec - minute * 60 print("Elapsed time: %s hours %s minutes %s seconds" % (hrs, minute, sec)) if verbose: print("\nTransformation finished.")