def write_log_file(filename, node, exp_name): """Writes all the log data from the node to a HDF5 file.""" import datetime import wlan_exp.log.util_hdf as hdf_util import wlan_exp.log.util as log_util data_buffer = node.log_get_all_new(log_tail_pad=0) try: print(" {0}".format(filename)) # Get the byte log_data out of the Buffer data = data_buffer.get_bytes() # Example Attribute Dictionary for the HDF5 file attr_dict = { 'exp_name': exp_name, 'exp_time': log_util.convert_datetime_to_log_time_str( datetime.datetime.utcnow()), 'node_desc': node.description } # Write the byte Log_data to the file hdf_util.log_data_to_hdf5(log_data=data, filename=filename, attr_dict=attr_dict) except AttributeError as err: print("Error writing log file: {0}".format(err))
def write_log_file(file_name, data_buffer): """Writes log data to a HDF5 file.""" try: print(" {0}".format(file_name)) # Get the byte log_data out of the WARPNet buffer data = data_buffer.get_bytes() # Write the byte Log_data to the file hdf_util.log_data_to_hdf5(data, file_name) except AttributeError as err: print("Error writing log file: {0}".format(err))
def log_file_update(filename): """Update the log file.""" #--------------------------------------------------------------------- # Read input file # # Get the log_data from the file log_bytes = bytearray(hdf_util.hdf5_to_log_data(filename=filename)) # Get the raw_log_index from the file raw_log_index = hdf_util.hdf5_to_log_index(filename=filename) # Get the user attributes from the file log_attr_dict = hdf_util.hdf5_to_attr_dict(filename=filename) #--------------------------------------------------------------------- # Print information about the file # log_util.print_log_index_summary(raw_log_index, "Log Index Summary:") #--------------------------------------------------------------------- # Write output file # # Write the log to a new HDF5 file (fn_fldr, fn_file) = os.path.split(filename) # Find the last '.' in the file name and classify everything after that as the <ext> ext_i = fn_file.rfind('.') if (ext_i != -1): # Remember the original file extension fn_ext = fn_file[ext_i:] fn_base = fn_file[0:ext_i] else: fn_ext = '' fn_base = fn_file newfilename = os.path.join(fn_fldr, fn_base + "_update" + fn_ext) print("Writing new file {0} ...".format(newfilename)) # Copy any user attributes to the new file hdf_util.log_data_to_hdf5(log_bytes, newfilename, attr_dict=log_attr_dict) return
def log_covert(filename): """Convert the log.""" ##################### # Read input file # Get the log_data from the file log_bytes = bytearray(hdf5_to_log_data(filename=filename)) # Get the user attributes from the file log_attr_dict = hdf5_to_attr_dict(filename=filename) ##################### # Write output file # NOTE: The raw_log_index will be automatically regenerated # Write the log to a new HDF5 file (fn_fldr, fn_file) = os.path.split(filename) # Find the last '.' in the file name and classify everything after that as the <ext> ext_i = fn_file.rfind('.') if (ext_i != -1): # Remember the original file extension fn_ext = fn_file[ext_i:] fn_base = fn_file[0:ext_i] else: fn_ext = '' fn_base = fn_file newfilename = os.path.join(fn_fldr, fn_base + "_new" + fn_ext) print("Writing new file {0} ...".format(newfilename)) # Copy any user attributes to the new file hdf_util.log_data_to_hdf5(log_bytes, newfilename, attr_dict=log_attr_dict) return
def write_log_file(filename, node, exp_name): """Writes all the log data from the node to a HDF5 file.""" import wlan_exp.log.util_hdf as hdf_util import wlan_exp.log.util as log_util data_buffer = node.log_get_all_new(log_tail_pad=0) try: print(" {0}".format(filename)) # Get the byte log_data out of the Buffer data = data_buffer.get_bytes() # Example Attribute Dictionary for the HDF5 file attr_dict = {'exp_name' : exp_name, 'exp_time' : log_util.convert_datetime_to_log_time_str(datetime.datetime.utcnow()), 'node_desc' : node.description} # Write the byte Log_data to the file hdf_util.log_data_to_hdf5(log_data=data, filename=filename, attr_dict=attr_dict) except AttributeError as err: print("Error writing log file: {0}".format(err))
def log_anonymize(filename): """Anonymize the log.""" global all_addrs # Get the log_data from the file log_bytes = bytearray(hdf_util.hdf5_to_log_data(filename=filename)) # Get the raw_log_index from the file raw_log_index = hdf_util.hdf5_to_log_index(filename=filename) # Get the user attributes from the file log_attr_dict = hdf_util.hdf5_to_attr_dict(filename=filename) # Generate the index of log entry locations sorted by log entry type # Merge the Rx / Tx subtypes that can be processed together log_index = log_util.filter_log_index(raw_log_index, merge={ 'RX_OFDM': ['RX_OFDM', 'RX_OFDM_LTG'], 'TX_HIGH': ['TX_HIGH', 'TX_HIGH_LTG'], 'TX_LOW': ['TX_LOW', 'TX_LOW_LTG'] }) # Re-initialize the address-byteindex map per file using the running # list of known MAC addresses addr_idx_map = dict() for addr in all_addrs: addr_idx_map[addr] = list() log_util.print_log_index_summary(log_index, "Log Index Summary (merged):") #--------------------------------------------------------------------- # Step 1: Build a dictionary of all MAC addresses in the log, then # map each addresses to a unique anonymous address # Uses tuple(bytearray slice) since bytearray isn't hashable as-is # print("Anonmyizing file step 1 ...") start_time = time.time() #---------------------------------- # Rx DSSS entries # try: print(" Anonmyizing {0} RX_DSSS entries".format( len(log_index['RX_DSSS']))) pyld_start = struct.calcsize(''.join( entry_types.entry_rx_dsss.get_field_struct_formats()[:-1])) for idx in log_index['RX_DSSS']: # 6-byte addresses at offsets 4, 10, 16 in the mac_payload for o in (4, 10, 16): addr_to_replace( tuple(log_bytes[idx + pyld_start + o:idx + pyld_start + o + 6]), idx + pyld_start + o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) #---------------------------------- # Rx OFDM entries # try: print(" Anonmyizing {0} RX_OFDM entries".format( len(log_index['RX_OFDM']))) pyld_start = struct.calcsize(''.join( entry_types.entry_rx_ofdm.get_field_struct_formats()[:-1])) for idx in log_index['RX_OFDM']: # 6-byte addresses at offsets 4, 10, 16 in the mac_payload for o in (4, 10, 16): addr_to_replace( tuple(log_bytes[idx + pyld_start + o:idx + pyld_start + o + 6]), idx + pyld_start + o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) #---------------------------------- # Tx entries # try: print(" Anonmyizing {0} TX_HIGH entries".format( len(log_index['TX_HIGH']))) pyld_start = struct.calcsize(''.join( entry_types.entry_tx_high.get_field_struct_formats()[:-1])) for idx in log_index['TX_HIGH']: # 6-byte addresses at offsets 4, 10, 16 in the mac_payload for o in (4, 10, 16): addr_to_replace( tuple(log_bytes[idx + pyld_start + o:idx + pyld_start + o + 6]), idx + pyld_start + o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) #---------------------------------- # Tx Low entries # try: print(" Anonmyizing {0} TX_LOW entries".format( len(log_index['TX_LOW']))) pyld_start = struct.calcsize(''.join( entry_types.entry_tx_low.get_field_struct_formats()[:-1])) for idx in log_index['TX_LOW']: # 6-byte addresses at offsets 40, 46, 52 for o in (4, 10, 16): addr_to_replace( tuple(log_bytes[idx + pyld_start + o:idx + pyld_start + o + 6]), idx + pyld_start + o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) #--------------------------------------------------------------------- # Step 2: Enumerate actual MAC addresses and their anonymous replacements # print("Anonmyizing file step 2 ...") print(" Enumerate MAC addresses and their anonymous replacements") addr_map = dict() for ii, addr in enumerate(all_addrs): # Address should not have a first octet that is odd, as this indicates # the address is multicast. Hence, use 0xFE as the first octet. # # Due to FCS errors, the number of addresses in a log file is # potentially large. Therefore, the anonymizer supports 2^24 unique # addresses. # anon_addr = (0xFE, 0xFF, 0xFF, (ii // (256**2)), ((ii // 256) % 256), (ii % 256)) addr_map[addr] = anon_addr if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) #--------------------------------------------------------------------- # Step 3: Replace all MAC addresses in the log # print("Anonmyizing file step 3 ...") print(" Replace all MAC addresses in the log") for old_addr in addr_idx_map.keys(): new_addr = bytearray(addr_map[old_addr]) for byte_idx in addr_idx_map[old_addr]: log_bytes[byte_idx:byte_idx + 6] = new_addr if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) #--------------------------------------------------------------------- # Step 4: Other annonymization steps # print("Anonmyizing file step 4 ...") print(" Remove all payloads") # Overwrite all payloads with zeros try: for key in log_index.keys(): log_util.overwrite_payloads(log_bytes, log_index[key]) except: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) #--------------------------------------------------------------------- # Write output files # # Write the modified log to a new HDF5 file (fn_fldr, fn_file) = os.path.split(filename) # Find the last '.' in the file name and classify everything after that as the <ext> ext_i = fn_file.rfind('.') if (ext_i != -1): # Remember the original file extension fn_ext = fn_file[ext_i:] fn_base = fn_file[0:ext_i] else: fn_ext = '' fn_base = fn_file newfilename = os.path.join(fn_fldr, fn_base + "_anon" + fn_ext) print("Writing new file {0} ...".format(newfilename)) # Copy any user attributes to the new anonymized file hdf_util.log_data_to_hdf5(log_bytes, newfilename, attr_dict=log_attr_dict) return
def log_anonymize(filename): """Anonymize the log.""" global all_addrs # Get the log_data from the file log_bytes = bytearray(hdf_util.hdf5_to_log_data(filename=filename)) # Get the raw_log_index from the file raw_log_index = hdf_util.hdf5_to_log_index(filename=filename) # Get the user attributes from the file log_attr_dict = hdf_util.hdf5_to_attr_dict(filename=filename) # Generate the index of log entry locations sorted by log entry type log_index = log_util.filter_log_index(raw_log_index) # Re-initialize the address-byteindex map per file using the running # list of known MAC addresses addr_idx_map = dict() for addr in all_addrs: addr_idx_map[addr] = list() log_util.print_log_index_summary(log_index) ##################### # Step 1: Build a dictionary of all MAC addresses in the log, then # map each addresses to a unique anonymous address # Uses tuple(bytearray slice) since bytearray isn't hashable as-is print("Anonmyizing file step 1 ...") start_time = time.time() # Station Info entries print(" Anonmyizing STATION_INFO entries") try: for idx in log_index['STATION_INFO']: # 6-byte address at offsets 8 o = 8 addr_to_replace(tuple(log_bytes[idx+o:idx+o+6]), idx+o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) # Tx/Rx Statistics entries print(" Anonmyizing TXRX_STATS entries") try: for idx in log_index['TXRX_STATS']: # 6-byte addresses at offsets 16 o = 16 addr_to_replace(tuple(log_bytes[idx+o:idx+o+6]), idx+o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) # Rx DSSS entries print(" Anonmyizing RX_DSSS entries") try: for idx in log_index['RX_DSSS']: # 6-byte addresses at offsets 28, 34, 40 for o in (28, 34, 40): addr_to_replace(tuple(log_bytes[idx+o:idx+o+6]), idx+o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) # Rx OFDM entries print(" Anonmyizing RX_OFDM entries") try: for idx in log_index['RX_OFDM']: # 6-byte addresses at offsets 284, 290, 296 for o in (284, 290, 296): addr_to_replace(tuple(log_bytes[idx+o:idx+o+6]), idx+o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) # Tx entries print(" Anonmyizing TX entries") try: for idx in log_index['TX']: # 6-byte addresses at offsets 44, 50, 56 for o in (44, 50, 56): addr_to_replace(tuple(log_bytes[idx+o:idx+o+6]), idx+o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) # Tx Low entries print(" Anonmyizing TX_LOW entries") try: for idx in log_index['TX_LOW']: # 6-byte addresses at offsets 40, 46, 52 for o in (40, 46, 52): addr_to_replace(tuple(log_bytes[idx+o:idx+o+6]), idx+o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) ##################### # Step 2: Enumerate actual MAC addresses and their anonymous replacements print("Anonmyizing file step 2 ...") print(" Enumerate MAC addresses and their anonymous replacements") addr_map = dict() for ii,addr in enumerate(all_addrs): anon_addr = (0xFF, 0xFF, 0xFF, 0xFF, (ii//256), (ii%256)) addr_map[addr] = anon_addr if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) ##################### # Step 3: Replace all MAC addresses in the log print("Anonmyizing file step 3 ...") print(" Replace all MAC addresses in the log") for old_addr in addr_idx_map.keys(): new_addr = bytearray(addr_map[old_addr]) for byte_idx in addr_idx_map[old_addr]: log_bytes[byte_idx:byte_idx+6] = new_addr if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) ##################### # Step 4: Other annonymization steps print("Anonmyizing file step 4 ...") print(" Replace STATION_INFO hostnames") # Station info entries contain "hostname", the DHCP client hostname field # Replace these with a string version of the new anonymous MAC addr try: for idx in log_index['STATION_INFO']: # 6-byte MAC addr (already anonymized) at offset 8 # 20 character ASCII string at offset 16 addr_o = 8 name_o = 16 addr = log_bytes[idx+addr_o : idx+addr_o+6] new_name = "AnonNode {0:02x}_{1:02x}".format(addr[4], addr[5]) new_name = new_name + '\x00' * (20 - len(new_name)) log_bytes[idx+name_o : idx+name_o+20] = bytearray(new_name.encode("UTF-8")) except KeyError: pass print(" Remove all WN_CMD_INFO entries") # WARPNet Command info entries contain command arguments that could possibly # contain sensitive information. Replace with NULL entries. try: log_util.overwrite_entries_with_null_entry(log_bytes, log_index['WN_CMD_INFO']) except: pass print(" Remove all payloads") # Overwrite all payloads with zeros try: for key in log_index.keys(): log_util.overwrite_payloads(log_bytes, log_index[key]) except: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) ##################### # Write output files #Write the modified log to a new HDF5 file (fn_fldr, fn_file) = os.path.split(filename) # Find the last '.' in the file name and classify everything after that as the <ext> ext_i = fn_file.rfind('.') if (ext_i != -1): # Remember the original file extension fn_ext = fn_file[ext_i:] fn_base = fn_file[0:ext_i] else: fn_ext = '' fn_base = fn_file newfilename = os.path.join(fn_fldr, fn_base + "_anon" + fn_ext) print("Writing new file {0} ...".format(newfilename)) # Copy any user attributes to the new anonymized file hdf_util.log_data_to_hdf5(log_bytes, newfilename, attr_dict=log_attr_dict) return
def log_anonymize(filename): """Anonymize the log.""" global all_addrs # Get the log_data from the file log_bytes = bytearray(hdf_util.hdf5_to_log_data(filename=filename)) # Get the raw_log_index from the file raw_log_index = hdf_util.hdf5_to_log_index(filename=filename) # Get the user attributes from the file log_attr_dict = hdf_util.hdf5_to_attr_dict(filename=filename) # Generate the index of log entry locations sorted by log entry type log_index = log_util.filter_log_index(raw_log_index) # Re-initialize the address-byteindex map per file using the running # list of known MAC addresses addr_idx_map = dict() for addr in all_addrs: addr_idx_map[addr] = list() log_util.print_log_index_summary(log_index) ##################### # Step 1: Build a dictionary of all MAC addresses in the log, then # map each addresses to a unique anonymous address # Uses tuple(bytearray slice) since bytearray isn't hashable as-is print("Anonmyizing file step 1 ...") start_time = time.time() # Station Info entries print(" Anonmyizing STATION_INFO entries") try: for idx in log_index['STATION_INFO']: # 6-byte address at offsets 8 o = 8 addr_to_replace(tuple(log_bytes[idx + o:idx + o + 6]), idx + o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) # Tx/Rx Statistics entries print(" Anonmyizing TXRX_STATS entries") try: for idx in log_index['TXRX_STATS']: # 6-byte addresses at offsets 16 o = 16 addr_to_replace(tuple(log_bytes[idx + o:idx + o + 6]), idx + o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) # Rx DSSS entries print(" Anonmyizing RX_DSSS entries") try: for idx in log_index['RX_DSSS']: # 6-byte addresses at offsets 28, 34, 40 for o in (28, 34, 40): addr_to_replace(tuple(log_bytes[idx + o:idx + o + 6]), idx + o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) # Rx OFDM entries print(" Anonmyizing RX_OFDM entries") try: for idx in log_index['RX_OFDM']: # 6-byte addresses at offsets 284, 290, 296 for o in (284, 290, 296): addr_to_replace(tuple(log_bytes[idx + o:idx + o + 6]), idx + o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) # Tx entries print(" Anonmyizing TX entries") try: for idx in log_index['TX']: # 6-byte addresses at offsets 44, 50, 56 for o in (44, 50, 56): addr_to_replace(tuple(log_bytes[idx + o:idx + o + 6]), idx + o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) # Tx Low entries print(" Anonmyizing TX_LOW entries") try: for idx in log_index['TX_LOW']: # 6-byte addresses at offsets 40, 46, 52 for o in (40, 46, 52): addr_to_replace(tuple(log_bytes[idx + o:idx + o + 6]), idx + o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) ##################### # Step 2: Enumerate actual MAC addresses and their anonymous replacements print("Anonmyizing file step 2 ...") print(" Enumerate MAC addresses and their anonymous replacements") addr_map = dict() for ii, addr in enumerate(all_addrs): anon_addr = (0xFF, 0xFF, 0xFF, 0xFF, (ii // 256), (ii % 256)) addr_map[addr] = anon_addr if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) ##################### # Step 3: Replace all MAC addresses in the log print("Anonmyizing file step 3 ...") print(" Replace all MAC addresses in the log") for old_addr in addr_idx_map.keys(): new_addr = bytearray(addr_map[old_addr]) for byte_idx in addr_idx_map[old_addr]: log_bytes[byte_idx:byte_idx + 6] = new_addr if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) ##################### # Step 4: Other annonymization steps print("Anonmyizing file step 4 ...") print(" Replace STATION_INFO hostnames") # Station info entries contain "hostname", the DHCP client hostname field # Replace these with a string version of the new anonymous MAC addr try: for idx in log_index['STATION_INFO']: # 6-byte MAC addr (already anonymized) at offset 8 # 20 character ASCII string at offset 16 addr_o = 8 name_o = 16 addr = log_bytes[idx + addr_o:idx + addr_o + 6] new_name = "AnonNode {0:02x}_{1:02x}".format(addr[4], addr[5]) new_name = new_name + '\x00' * (20 - len(new_name)) log_bytes[idx + name_o:idx + name_o + 20] = bytearray( new_name.encode("UTF-8")) except KeyError: pass print(" Remove all WN_CMD_INFO entries") # WARPNet Command info entries contain command arguments that could possibly # contain sensitive information. Replace with NULL entries. try: log_util.overwrite_entries_with_null_entry(log_bytes, log_index['WN_CMD_INFO']) except: pass print(" Remove all payloads") # Overwrite all payloads with zeros try: for key in log_index.keys(): log_util.overwrite_payloads(log_bytes, log_index[key]) except: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) ##################### # Write output files #Write the modified log to a new HDF5 file (fn_fldr, fn_file) = os.path.split(filename) # Find the last '.' in the file name and classify everything after that as the <ext> ext_i = fn_file.rfind('.') if (ext_i != -1): # Remember the original file extension fn_ext = fn_file[ext_i:] fn_base = fn_file[0:ext_i] else: fn_ext = '' fn_base = fn_file newfilename = os.path.join(fn_fldr, fn_base + "_anon" + fn_ext) print("Writing new file {0} ...".format(newfilename)) # Copy any user attributes to the new anonymized file hdf_util.log_data_to_hdf5(log_bytes, newfilename, attr_dict=log_attr_dict) return
def log_anonymize(filename): """Anonymize the log.""" global all_addrs # Get the log_data from the file log_bytes = bytearray(hdf_util.hdf5_to_log_data(filename=filename)) # Get the raw_log_index from the file raw_log_index = hdf_util.hdf5_to_log_index(filename=filename) # Get the user attributes from the file log_attr_dict = hdf_util.hdf5_to_attr_dict(filename=filename) # Generate the index of log entry locations sorted by log entry type # Merge the Rx / Tx subtypes that can be processed together log_index = log_util.filter_log_index(raw_log_index, merge={'RX_OFDM': ['RX_OFDM', 'RX_OFDM_LTG'], 'TX_HIGH': ['TX_HIGH', 'TX_HIGH_LTG'], 'TX_LOW' : ['TX_LOW', 'TX_LOW_LTG']}) # Re-initialize the address-byteindex map per file using the running # list of known MAC addresses addr_idx_map = dict() for addr in all_addrs: addr_idx_map[addr] = list() log_util.print_log_index_summary(log_index, "Log Index Summary (merged):") #--------------------------------------------------------------------- # Step 1: Build a dictionary of all MAC addresses in the log, then # map each addresses to a unique anonymous address # Uses tuple(bytearray slice) since bytearray isn't hashable as-is # print("Anonmyizing file step 1 ...") start_time = time.time() #---------------------------------- # Rx DSSS entries # try: print(" Anonmyizing {0} RX_DSSS entries".format(len(log_index['RX_DSSS']))) pyld_start = struct.calcsize(''.join( entry_types.entry_rx_dsss.get_field_struct_formats()[:-1]) ) for idx in log_index['RX_DSSS']: # 6-byte addresses at offsets 4, 10, 16 in the mac_payload for o in (4, 10, 16): addr_to_replace(tuple(log_bytes[idx+pyld_start+o:idx+pyld_start+o+6]), idx+pyld_start+o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) #---------------------------------- # Rx OFDM entries # try: print(" Anonmyizing {0} RX_OFDM entries".format(len(log_index['RX_OFDM']))) pyld_start = struct.calcsize(''.join( entry_types.entry_rx_ofdm.get_field_struct_formats()[:-1]) ) for idx in log_index['RX_OFDM']: # 6-byte addresses at offsets 4, 10, 16 in the mac_payload for o in (4, 10, 16): addr_to_replace(tuple(log_bytes[idx+pyld_start+o:idx+pyld_start+o+6]), idx+pyld_start+o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) #---------------------------------- # Tx entries # try: print(" Anonmyizing {0} TX_HIGH entries".format(len(log_index['TX_HIGH']))) pyld_start = struct.calcsize(''.join( entry_types.entry_tx_high.get_field_struct_formats()[:-1]) ) for idx in log_index['TX_HIGH']: # 6-byte addresses at offsets 4, 10, 16 in the mac_payload for o in (4, 10, 16): addr_to_replace(tuple(log_bytes[idx+pyld_start+o:idx+pyld_start+o+6]), idx+pyld_start+o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) #---------------------------------- # Tx Low entries # try: print(" Anonmyizing {0} TX_LOW entries".format(len(log_index['TX_LOW']))) pyld_start = struct.calcsize(''.join( entry_types.entry_tx_low.get_field_struct_formats()[:-1]) ) for idx in log_index['TX_LOW']: # 6-byte addresses at offsets 40, 46, 52 for o in (4, 10, 16): addr_to_replace(tuple(log_bytes[idx+pyld_start+o:idx+pyld_start+o+6]), idx+pyld_start+o, addr_idx_map) except KeyError: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) #--------------------------------------------------------------------- # Step 2: Enumerate actual MAC addresses and their anonymous replacements # print("Anonmyizing file step 2 ...") print(" Enumerate MAC addresses and their anonymous replacements") addr_map = dict() for ii,addr in enumerate(all_addrs): # Address should not have a first octet that is odd, as this indicates # the address is multicast. Hence, use 0xFE as the first octet. # # Due to FCS errors, the number of addresses in a log file is # potentially large. Therefore, the anonymizer supports 2^24 unique # addresses. # anon_addr = (0xFE, 0xFF, 0xFF, (ii//(256**2)), ((ii//256)%256), (ii%256)) addr_map[addr] = anon_addr if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) #--------------------------------------------------------------------- # Step 3: Replace all MAC addresses in the log # print("Anonmyizing file step 3 ...") print(" Replace all MAC addresses in the log") for old_addr in addr_idx_map.keys(): new_addr = bytearray(addr_map[old_addr]) for byte_idx in addr_idx_map[old_addr]: log_bytes[byte_idx:byte_idx+6] = new_addr if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) #--------------------------------------------------------------------- # Step 4: Other annonymization steps # print("Anonmyizing file step 4 ...") print(" Remove all payloads") # Overwrite all payloads with zeros try: for key in log_index.keys(): log_util.overwrite_payloads(log_bytes, log_index[key]) except: pass if print_time: print(" Time = {0:.3f}s".format(time.time() - start_time)) #--------------------------------------------------------------------- # Write output files # # Write the modified log to a new HDF5 file (fn_fldr, fn_file) = os.path.split(filename) # Find the last '.' in the file name and classify everything after that as the <ext> ext_i = fn_file.rfind('.') if (ext_i != -1): # Remember the original file extension fn_ext = fn_file[ext_i:] fn_base = fn_file[0:ext_i] else: fn_ext = '' fn_base = fn_file newfilename = os.path.join(fn_fldr, fn_base + "_anon" + fn_ext) print("Writing new file {0} ...".format(newfilename)) # Copy any user attributes to the new anonymized file hdf_util.log_data_to_hdf5(log_bytes, newfilename, attr_dict=log_attr_dict) return