Ejemplo n.º 1
0
def log_file_update(filename):
    """Update the log file."""
    
    #---------------------------------------------------------------------
    # Read input file
    #

    # Get the log_data from the file
    log_bytes = bytearray(hdf_util.hdf5_to_log_data(filename=filename))

    # Get the raw_log_index from the file
    raw_log_index = hdf_util.hdf5_to_log_index(filename=filename)

    # Get the user attributes from the file
    log_attr_dict  = hdf_util.hdf5_to_attr_dict(filename=filename)


    #---------------------------------------------------------------------
    # Print information about the file
    #

    log_util.print_log_index_summary(raw_log_index, "Log Index Summary:")


    #---------------------------------------------------------------------
    # Write output file
    #

    # Write the log to a new HDF5 file
    (fn_fldr, fn_file) = os.path.split(filename)

    # Find the last '.' in the file name and classify everything after that as the <ext>
    ext_i = fn_file.rfind('.')
    if (ext_i != -1):
        # Remember the original file extension
        fn_ext  = fn_file[ext_i:]
        fn_base = fn_file[0:ext_i]
    else:
        fn_ext  = ''
        fn_base = fn_file

    newfilename = os.path.join(fn_fldr, fn_base + "_update" + fn_ext)

    print("Writing new file {0} ...".format(newfilename))

    # Copy any user attributes to the new file
    hdf_util.log_data_to_hdf5(log_bytes, newfilename, attr_dict=log_attr_dict)

    return
Ejemplo n.º 2
0
else:
    print("Reading log file '{0}' ({1:5.1f} MB)\n".format(
        LOGFILE, (os.path.getsize(LOGFILE) / 2**20)))

#-----------------------------------------------------------------------------
# Main script
#-----------------------------------------------------------------------------

# Get the log_data from the file
log_data = hdf_util.hdf5_to_log_data(filename=LOGFILE)

# Get the raw_log_index from the file
raw_log_index = hdf_util.hdf5_to_log_index(filename=LOGFILE)

# Describe the raw_log_index
log_util.print_log_index_summary(raw_log_index, "Log Index Contents:")

# Filter log index to include all Rx entries and all Tx entries
log_index = log_util.filter_log_index(
    raw_log_index,
    include_only=['NODE_INFO', 'TIME_INFO', 'RX_OFDM', 'TX_HIGH', 'TX_LOW'],
    merge={
        'RX_OFDM': ['RX_OFDM', 'RX_OFDM_LTG'],
        'TX_HIGH': ['TX_HIGH', 'TX_HIGH_LTG'],
        'TX_LOW': ['TX_LOW', 'TX_LOW_LTG']
    })

log_util.print_log_index_summary(log_index, "Filtered Log Index:")

# Unpack the log into numpy structured arrays
#   log_data_to_np_arrays returns a dictionary with one key-value pair per
Ejemplo n.º 3
0
def log_anonymize(filename):
    """Anonymize the log."""
    global all_addrs

    # Get the log_data from the file
    log_bytes = bytearray(hdf_util.hdf5_to_log_data(filename=filename))

    # Get the raw_log_index from the file
    raw_log_index = hdf_util.hdf5_to_log_index(filename=filename)

    # Get the user attributes from the file
    log_attr_dict = hdf_util.hdf5_to_attr_dict(filename=filename)

    # Generate the index of log entry locations sorted by log entry type
    #    Merge the Rx / Tx subtypes that can be processed together
    log_index = log_util.filter_log_index(raw_log_index,
                                          merge={
                                              'RX_OFDM':
                                              ['RX_OFDM', 'RX_OFDM_LTG'],
                                              'TX_HIGH':
                                              ['TX_HIGH', 'TX_HIGH_LTG'],
                                              'TX_LOW':
                                              ['TX_LOW', 'TX_LOW_LTG']
                                          })

    # Re-initialize the address-byteindex map per file using the running
    #   list of known MAC addresses
    addr_idx_map = dict()
    for addr in all_addrs:
        addr_idx_map[addr] = list()

    log_util.print_log_index_summary(log_index, "Log Index Summary (merged):")

    #---------------------------------------------------------------------
    # Step 1: Build a dictionary of all MAC addresses in the log, then
    #   map each addresses to a unique anonymous address
    #   Uses tuple(bytearray slice) since bytearray isn't hashable as-is
    #
    print("Anonmyizing file step 1 ...")

    start_time = time.time()

    #----------------------------------
    # Rx DSSS entries
    #
    try:
        print("    Anonmyizing {0} RX_DSSS entries".format(
            len(log_index['RX_DSSS'])))

        pyld_start = struct.calcsize(''.join(
            entry_types.entry_rx_dsss.get_field_struct_formats()[:-1]))

        for idx in log_index['RX_DSSS']:
            # 6-byte addresses at offsets 4, 10, 16 in the mac_payload
            for o in (4, 10, 16):
                addr_to_replace(
                    tuple(log_bytes[idx + pyld_start + o:idx + pyld_start + o +
                                    6]), idx + pyld_start + o, addr_idx_map)
    except KeyError:
        pass

    if print_time:
        print("        Time = {0:.3f}s".format(time.time() - start_time))

    #----------------------------------
    # Rx OFDM entries
    #
    try:
        print("    Anonmyizing {0} RX_OFDM entries".format(
            len(log_index['RX_OFDM'])))

        pyld_start = struct.calcsize(''.join(
            entry_types.entry_rx_ofdm.get_field_struct_formats()[:-1]))

        for idx in log_index['RX_OFDM']:
            # 6-byte addresses at offsets 4, 10, 16 in the mac_payload
            for o in (4, 10, 16):
                addr_to_replace(
                    tuple(log_bytes[idx + pyld_start + o:idx + pyld_start + o +
                                    6]), idx + pyld_start + o, addr_idx_map)
    except KeyError:
        pass

    if print_time:
        print("        Time = {0:.3f}s".format(time.time() - start_time))

    #----------------------------------
    # Tx entries
    #
    try:
        print("    Anonmyizing {0} TX_HIGH entries".format(
            len(log_index['TX_HIGH'])))

        pyld_start = struct.calcsize(''.join(
            entry_types.entry_tx_high.get_field_struct_formats()[:-1]))

        for idx in log_index['TX_HIGH']:
            # 6-byte addresses at offsets 4, 10, 16 in the mac_payload
            for o in (4, 10, 16):
                addr_to_replace(
                    tuple(log_bytes[idx + pyld_start + o:idx + pyld_start + o +
                                    6]), idx + pyld_start + o, addr_idx_map)
    except KeyError:
        pass

    if print_time:
        print("        Time = {0:.3f}s".format(time.time() - start_time))

    #----------------------------------
    # Tx Low entries
    #
    try:
        print("    Anonmyizing {0} TX_LOW entries".format(
            len(log_index['TX_LOW'])))

        pyld_start = struct.calcsize(''.join(
            entry_types.entry_tx_low.get_field_struct_formats()[:-1]))

        for idx in log_index['TX_LOW']:
            # 6-byte addresses at offsets 40, 46, 52
            for o in (4, 10, 16):
                addr_to_replace(
                    tuple(log_bytes[idx + pyld_start + o:idx + pyld_start + o +
                                    6]), idx + pyld_start + o, addr_idx_map)
    except KeyError:
        pass

    if print_time:
        print("        Time = {0:.3f}s".format(time.time() - start_time))

    #---------------------------------------------------------------------
    # Step 2: Enumerate actual MAC addresses and their anonymous replacements
    #
    print("Anonmyizing file step 2 ...")

    print("    Enumerate MAC addresses and their anonymous replacements")

    addr_map = dict()
    for ii, addr in enumerate(all_addrs):
        # Address should not have a first octet that is odd, as this indicates
        # the address is multicast.  Hence, use 0xFE as the first octet.
        #
        # Due to FCS errors, the number of addresses in a log file is
        # potentially large.  Therefore, the anonymizer supports 2^24 unique
        # addresses.
        #
        anon_addr = (0xFE, 0xFF, 0xFF, (ii // (256**2)), ((ii // 256) % 256),
                     (ii % 256))
        addr_map[addr] = anon_addr

    if print_time:
        print("        Time = {0:.3f}s".format(time.time() - start_time))

    #---------------------------------------------------------------------
    # Step 3: Replace all MAC addresses in the log
    #
    print("Anonmyizing file step 3 ...")

    print("    Replace all MAC addresses in the log")

    for old_addr in addr_idx_map.keys():
        new_addr = bytearray(addr_map[old_addr])
        for byte_idx in addr_idx_map[old_addr]:
            log_bytes[byte_idx:byte_idx + 6] = new_addr

    if print_time:
        print("        Time = {0:.3f}s".format(time.time() - start_time))

    #---------------------------------------------------------------------
    # Step 4: Other annonymization steps
    #
    print("Anonmyizing file step 4 ...")

    print("    Remove all payloads")

    # Overwrite all payloads with zeros
    try:
        for key in log_index.keys():
            log_util.overwrite_payloads(log_bytes, log_index[key])
    except:
        pass

    if print_time:
        print("        Time = {0:.3f}s".format(time.time() - start_time))

    #---------------------------------------------------------------------
    # Write output files
    #

    # Write the modified log to a new HDF5 file
    (fn_fldr, fn_file) = os.path.split(filename)

    # Find the last '.' in the file name and classify everything after that as the <ext>
    ext_i = fn_file.rfind('.')
    if (ext_i != -1):
        # Remember the original file extension
        fn_ext = fn_file[ext_i:]
        fn_base = fn_file[0:ext_i]
    else:
        fn_ext = ''
        fn_base = fn_file

    newfilename = os.path.join(fn_fldr, fn_base + "_anon" + fn_ext)

    print("Writing new file {0} ...".format(newfilename))

    # Copy any user attributes to the new anonymized file
    hdf_util.log_data_to_hdf5(log_bytes, newfilename, attr_dict=log_attr_dict)

    return
else:
    print("Reading log file '{0}' ({1:5.1f} MB)\n".format(os.path.split(LOGFILE)[1], (os.path.getsize(LOGFILE)/1E6)))


#-----------------------------------------------------------------------------
# Main script 
#-----------------------------------------------------------------------------

# Get the log_data from the file
log_data = hdf_util.hdf5_to_log_data(filename=LOGFILE)

# Get the raw_log_index from the file
raw_log_index = hdf_util.hdf5_to_log_index(filename=LOGFILE)

# Describe the raw_log_index
log_util.print_log_index_summary(raw_log_index, "Raw Log Index Contents:")

# Filter log index to include all Rx entries, merged into RX_ALL, and all Tx entries
log_index = log_util.filter_log_index(raw_log_index,
                                      include_only=['NODE_INFO', 'TIME_INFO', 'RX_OFDM', 'TX'])

log_util.print_log_index_summary(log_index, "Filtered Log Index:")

# Unpack the log into numpy structured arrays
#   log_data_to_np_arrays returns a dictionary with one key-value pair per
#    entry type included in the log_index argument. The log_index keys are reused
#    as the output dictionary keys. Each output dictionary value is a numpy record array
#    Refer to wlan_exp_log.log_entries.py for the definition of each record array datatype
log_np = log_util.log_data_to_np_arrays(log_data, log_index)

###############################################################################
Ejemplo n.º 5
0
def log_anonymize(filename):
    """Anonymize the log."""
    global all_addrs

    # Get the log_data from the file
    log_bytes = bytearray(hdf_util.hdf5_to_log_data(filename=filename))

    # Get the raw_log_index from the file
    raw_log_index = hdf_util.hdf5_to_log_index(filename=filename)

    # Get the user attributes from the file
    log_attr_dict  = hdf_util.hdf5_to_attr_dict(filename=filename)


    # Generate the index of log entry locations sorted by log entry type
    log_index     = log_util.filter_log_index(raw_log_index)

    # Re-initialize the address-byteindex map per file using the running
    #   list of known MAC addresses
    addr_idx_map = dict()
    for addr in all_addrs:
        addr_idx_map[addr] = list()

    log_util.print_log_index_summary(log_index)


    #####################
    # Step 1: Build a dictionary of all MAC addresses in the log, then
    #   map each addresses to a unique anonymous address
    #   Uses tuple(bytearray slice) since bytearray isn't hashable as-is

    print("Anonmyizing file step 1 ...")

    start_time = time.time()

    # Station Info entries
    print("    Anonmyizing STATION_INFO entries")
    try:
        for idx in log_index['STATION_INFO']:
            # 6-byte address at offsets 8
                o = 8
                addr_to_replace(tuple(log_bytes[idx+o:idx+o+6]), idx+o, addr_idx_map)
    except KeyError:
        pass

    if print_time:
        print("        Time = {0:.3f}s".format(time.time() - start_time))

    # Tx/Rx Statistics entries
    print("    Anonmyizing TXRX_STATS entries")
    try:
        for idx in log_index['TXRX_STATS']:
            # 6-byte addresses at offsets 16
                o = 16
                addr_to_replace(tuple(log_bytes[idx+o:idx+o+6]), idx+o, addr_idx_map)
    except KeyError:
        pass

    if print_time:
        print("        Time = {0:.3f}s".format(time.time() - start_time))

    # Rx DSSS entries
    print("    Anonmyizing RX_DSSS entries")
    try:
        for idx in log_index['RX_DSSS']:
            # 6-byte addresses at offsets 28, 34, 40
            for o in (28, 34, 40):
                addr_to_replace(tuple(log_bytes[idx+o:idx+o+6]), idx+o, addr_idx_map)
    except KeyError:
        pass

    if print_time:
        print("        Time = {0:.3f}s".format(time.time() - start_time))

    # Rx OFDM entries
    print("    Anonmyizing RX_OFDM entries")
    try:
        for idx in log_index['RX_OFDM']:
            # 6-byte addresses at offsets 284, 290, 296
            for o in (284, 290, 296):
                addr_to_replace(tuple(log_bytes[idx+o:idx+o+6]), idx+o, addr_idx_map)
    except KeyError:
        pass

    if print_time:
        print("        Time = {0:.3f}s".format(time.time() - start_time))

    # Tx entries
    print("    Anonmyizing TX entries")
    try:
        for idx in log_index['TX']:
            # 6-byte addresses at offsets 44, 50, 56
            for o in (44, 50, 56):
                addr_to_replace(tuple(log_bytes[idx+o:idx+o+6]), idx+o, addr_idx_map)
    except KeyError:
        pass

    if print_time:
        print("        Time = {0:.3f}s".format(time.time() - start_time))

    # Tx Low entries
    print("    Anonmyizing TX_LOW entries")
    try:
        for idx in log_index['TX_LOW']:
            # 6-byte addresses at offsets 40, 46, 52
            for o in (40, 46, 52):
                addr_to_replace(tuple(log_bytes[idx+o:idx+o+6]), idx+o, addr_idx_map)
    except KeyError:
        pass

    if print_time:
        print("        Time = {0:.3f}s".format(time.time() - start_time))


    #####################
    # Step 2: Enumerate actual MAC addresses and their anonymous replacements

    print("Anonmyizing file step 2 ...")

    print("    Enumerate MAC addresses and their anonymous replacements")

    addr_map = dict()
    for ii,addr in enumerate(all_addrs):
        anon_addr = (0xFF, 0xFF, 0xFF, 0xFF, (ii//256), (ii%256))
        addr_map[addr] = anon_addr

    if print_time:
        print("        Time = {0:.3f}s".format(time.time() - start_time))


    #####################
    # Step 3: Replace all MAC addresses in the log 

    print("Anonmyizing file step 3 ...")

    print("    Replace all MAC addresses in the log")

    for old_addr in addr_idx_map.keys():
        new_addr = bytearray(addr_map[old_addr])
        for byte_idx in addr_idx_map[old_addr]:
            log_bytes[byte_idx:byte_idx+6] = new_addr

    if print_time:
        print("        Time = {0:.3f}s".format(time.time() - start_time))


    #####################
    # Step 4: Other annonymization steps

    print("Anonmyizing file step 4 ...")

    print("    Replace STATION_INFO hostnames")

    # Station info entries contain "hostname", the DHCP client hostname field
    #   Replace these with a string version of the new anonymous MAC addr
    try:
        for idx in log_index['STATION_INFO']:
            # 6-byte MAC addr (already anonymized) at offset 8
            # 20 character ASCII string at offset 16
            addr_o = 8
            name_o = 16
            addr = log_bytes[idx+addr_o : idx+addr_o+6]
    
            new_name   = "AnonNode {0:02x}_{1:02x}".format(addr[4], addr[5])
            new_name   = new_name + '\x00' * (20 - len(new_name))
            log_bytes[idx+name_o : idx+name_o+20] = bytearray(new_name.encode("UTF-8"))
    except KeyError:
        pass

    print("    Remove all WN_CMD_INFO entries")

    # WARPNet Command info entries contain command arguments that could possibly 
    #   contain sensitive information.  Replace with NULL entries.
    try:
        log_util.overwrite_entries_with_null_entry(log_bytes, log_index['WN_CMD_INFO'])
    except:
        pass

    print("    Remove all payloads")

    # Overwrite all payloads with zeros
    try:
        for key in log_index.keys():
            log_util.overwrite_payloads(log_bytes, log_index[key])
    except:
        pass

    if print_time:
        print("        Time = {0:.3f}s".format(time.time() - start_time))


    #####################
    # Write output files

    #Write the modified log to a new HDF5 file
    (fn_fldr, fn_file) = os.path.split(filename)

    # Find the last '.' in the file name and classify everything after that as the <ext>
    ext_i = fn_file.rfind('.')
    if (ext_i != -1):
        # Remember the original file extension
        fn_ext  = fn_file[ext_i:]
        fn_base = fn_file[0:ext_i]
    else:
        fn_ext  = ''
        fn_base = fn_file

    newfilename = os.path.join(fn_fldr, fn_base + "_anon" + fn_ext)

    print("Writing new file {0} ...".format(newfilename))

    # Copy any user attributes to the new anonymized file
    hdf_util.log_data_to_hdf5(log_bytes, newfilename, attr_dict=log_attr_dict)

    return
Ejemplo n.º 6
0
def log_anonymize(filename):
    """Anonymize the log."""
    global all_addrs

    # Get the log_data from the file
    log_bytes = bytearray(hdf_util.hdf5_to_log_data(filename=filename))

    # Get the raw_log_index from the file
    raw_log_index = hdf_util.hdf5_to_log_index(filename=filename)

    # Get the user attributes from the file
    log_attr_dict = hdf_util.hdf5_to_attr_dict(filename=filename)

    # Generate the index of log entry locations sorted by log entry type
    log_index = log_util.filter_log_index(raw_log_index)

    # Re-initialize the address-byteindex map per file using the running
    #   list of known MAC addresses
    addr_idx_map = dict()
    for addr in all_addrs:
        addr_idx_map[addr] = list()

    log_util.print_log_index_summary(log_index)

    #####################
    # Step 1: Build a dictionary of all MAC addresses in the log, then
    #   map each addresses to a unique anonymous address
    #   Uses tuple(bytearray slice) since bytearray isn't hashable as-is

    print("Anonmyizing file step 1 ...")

    start_time = time.time()

    # Station Info entries
    print("    Anonmyizing STATION_INFO entries")
    try:
        for idx in log_index['STATION_INFO']:
            # 6-byte address at offsets 8
            o = 8
            addr_to_replace(tuple(log_bytes[idx + o:idx + o + 6]), idx + o,
                            addr_idx_map)
    except KeyError:
        pass

    if print_time:
        print("        Time = {0:.3f}s".format(time.time() - start_time))

    # Tx/Rx Statistics entries
    print("    Anonmyizing TXRX_STATS entries")
    try:
        for idx in log_index['TXRX_STATS']:
            # 6-byte addresses at offsets 16
            o = 16
            addr_to_replace(tuple(log_bytes[idx + o:idx + o + 6]), idx + o,
                            addr_idx_map)
    except KeyError:
        pass

    if print_time:
        print("        Time = {0:.3f}s".format(time.time() - start_time))

    # Rx DSSS entries
    print("    Anonmyizing RX_DSSS entries")
    try:
        for idx in log_index['RX_DSSS']:
            # 6-byte addresses at offsets 28, 34, 40
            for o in (28, 34, 40):
                addr_to_replace(tuple(log_bytes[idx + o:idx + o + 6]), idx + o,
                                addr_idx_map)
    except KeyError:
        pass

    if print_time:
        print("        Time = {0:.3f}s".format(time.time() - start_time))

    # Rx OFDM entries
    print("    Anonmyizing RX_OFDM entries")
    try:
        for idx in log_index['RX_OFDM']:
            # 6-byte addresses at offsets 284, 290, 296
            for o in (284, 290, 296):
                addr_to_replace(tuple(log_bytes[idx + o:idx + o + 6]), idx + o,
                                addr_idx_map)
    except KeyError:
        pass

    if print_time:
        print("        Time = {0:.3f}s".format(time.time() - start_time))

    # Tx entries
    print("    Anonmyizing TX entries")
    try:
        for idx in log_index['TX']:
            # 6-byte addresses at offsets 44, 50, 56
            for o in (44, 50, 56):
                addr_to_replace(tuple(log_bytes[idx + o:idx + o + 6]), idx + o,
                                addr_idx_map)
    except KeyError:
        pass

    if print_time:
        print("        Time = {0:.3f}s".format(time.time() - start_time))

    # Tx Low entries
    print("    Anonmyizing TX_LOW entries")
    try:
        for idx in log_index['TX_LOW']:
            # 6-byte addresses at offsets 40, 46, 52
            for o in (40, 46, 52):
                addr_to_replace(tuple(log_bytes[idx + o:idx + o + 6]), idx + o,
                                addr_idx_map)
    except KeyError:
        pass

    if print_time:
        print("        Time = {0:.3f}s".format(time.time() - start_time))

    #####################
    # Step 2: Enumerate actual MAC addresses and their anonymous replacements

    print("Anonmyizing file step 2 ...")

    print("    Enumerate MAC addresses and their anonymous replacements")

    addr_map = dict()
    for ii, addr in enumerate(all_addrs):
        anon_addr = (0xFF, 0xFF, 0xFF, 0xFF, (ii // 256), (ii % 256))
        addr_map[addr] = anon_addr

    if print_time:
        print("        Time = {0:.3f}s".format(time.time() - start_time))

    #####################
    # Step 3: Replace all MAC addresses in the log

    print("Anonmyizing file step 3 ...")

    print("    Replace all MAC addresses in the log")

    for old_addr in addr_idx_map.keys():
        new_addr = bytearray(addr_map[old_addr])
        for byte_idx in addr_idx_map[old_addr]:
            log_bytes[byte_idx:byte_idx + 6] = new_addr

    if print_time:
        print("        Time = {0:.3f}s".format(time.time() - start_time))

    #####################
    # Step 4: Other annonymization steps

    print("Anonmyizing file step 4 ...")

    print("    Replace STATION_INFO hostnames")

    # Station info entries contain "hostname", the DHCP client hostname field
    #   Replace these with a string version of the new anonymous MAC addr
    try:
        for idx in log_index['STATION_INFO']:
            # 6-byte MAC addr (already anonymized) at offset 8
            # 20 character ASCII string at offset 16
            addr_o = 8
            name_o = 16
            addr = log_bytes[idx + addr_o:idx + addr_o + 6]

            new_name = "AnonNode {0:02x}_{1:02x}".format(addr[4], addr[5])
            new_name = new_name + '\x00' * (20 - len(new_name))
            log_bytes[idx + name_o:idx + name_o + 20] = bytearray(
                new_name.encode("UTF-8"))
    except KeyError:
        pass

    print("    Remove all WN_CMD_INFO entries")

    # WARPNet Command info entries contain command arguments that could possibly
    #   contain sensitive information.  Replace with NULL entries.
    try:
        log_util.overwrite_entries_with_null_entry(log_bytes,
                                                   log_index['WN_CMD_INFO'])
    except:
        pass

    print("    Remove all payloads")

    # Overwrite all payloads with zeros
    try:
        for key in log_index.keys():
            log_util.overwrite_payloads(log_bytes, log_index[key])
    except:
        pass

    if print_time:
        print("        Time = {0:.3f}s".format(time.time() - start_time))

    #####################
    # Write output files

    #Write the modified log to a new HDF5 file
    (fn_fldr, fn_file) = os.path.split(filename)

    # Find the last '.' in the file name and classify everything after that as the <ext>
    ext_i = fn_file.rfind('.')
    if (ext_i != -1):
        # Remember the original file extension
        fn_ext = fn_file[ext_i:]
        fn_base = fn_file[0:ext_i]
    else:
        fn_ext = ''
        fn_base = fn_file

    newfilename = os.path.join(fn_fldr, fn_base + "_anon" + fn_ext)

    print("Writing new file {0} ...".format(newfilename))

    # Copy any user attributes to the new anonymized file
    hdf_util.log_data_to_hdf5(log_bytes, newfilename, attr_dict=log_attr_dict)

    return
Ejemplo n.º 7
0
def log_anonymize(filename):
    """Anonymize the log."""
    global all_addrs

    # Get the log_data from the file
    log_bytes = bytearray(hdf_util.hdf5_to_log_data(filename=filename))

    # Get the raw_log_index from the file
    raw_log_index = hdf_util.hdf5_to_log_index(filename=filename)

    # Get the user attributes from the file
    log_attr_dict  = hdf_util.hdf5_to_attr_dict(filename=filename)


    # Generate the index of log entry locations sorted by log entry type
    #    Merge the Rx / Tx subtypes that can be processed together
    log_index      = log_util.filter_log_index(raw_log_index,
                                               merge={'RX_OFDM': ['RX_OFDM', 'RX_OFDM_LTG'],
                                                      'TX_HIGH': ['TX_HIGH', 'TX_HIGH_LTG'],
                                                      'TX_LOW' : ['TX_LOW', 'TX_LOW_LTG']})

    # Re-initialize the address-byteindex map per file using the running
    #   list of known MAC addresses
    addr_idx_map = dict()
    for addr in all_addrs:
        addr_idx_map[addr] = list()

    log_util.print_log_index_summary(log_index, "Log Index Summary (merged):")


    #---------------------------------------------------------------------
    # Step 1: Build a dictionary of all MAC addresses in the log, then
    #   map each addresses to a unique anonymous address
    #   Uses tuple(bytearray slice) since bytearray isn't hashable as-is
    #
    print("Anonmyizing file step 1 ...")

    start_time = time.time()

    #----------------------------------
    # Rx DSSS entries
    #
    try:
        print("    Anonmyizing {0} RX_DSSS entries".format(len(log_index['RX_DSSS'])))

        pyld_start = struct.calcsize(''.join(
                entry_types.entry_rx_dsss.get_field_struct_formats()[:-1])
        )

        for idx in log_index['RX_DSSS']:
            # 6-byte addresses at offsets 4, 10, 16 in the mac_payload
            for o in (4, 10, 16):
                addr_to_replace(tuple(log_bytes[idx+pyld_start+o:idx+pyld_start+o+6]), idx+pyld_start+o, addr_idx_map)
    except KeyError:
        pass

    if print_time:
        print("        Time = {0:.3f}s".format(time.time() - start_time))

    #----------------------------------
    # Rx OFDM entries
    #
    try:
        print("    Anonmyizing {0} RX_OFDM entries".format(len(log_index['RX_OFDM'])))

        pyld_start = struct.calcsize(''.join(
                entry_types.entry_rx_ofdm.get_field_struct_formats()[:-1])
        )

        for idx in log_index['RX_OFDM']:
            # 6-byte addresses at offsets 4, 10, 16 in the mac_payload
            for o in (4, 10, 16):
                addr_to_replace(tuple(log_bytes[idx+pyld_start+o:idx+pyld_start+o+6]), idx+pyld_start+o, addr_idx_map)
    except KeyError:
        pass

    if print_time:
        print("        Time = {0:.3f}s".format(time.time() - start_time))

    #----------------------------------
    # Tx entries
    #
    try:
        print("    Anonmyizing {0} TX_HIGH entries".format(len(log_index['TX_HIGH'])))

        pyld_start = struct.calcsize(''.join(
                entry_types.entry_tx_high.get_field_struct_formats()[:-1])
        )

        for idx in log_index['TX_HIGH']:
            # 6-byte addresses at offsets 4, 10, 16 in the mac_payload
            for o in (4, 10, 16):
                addr_to_replace(tuple(log_bytes[idx+pyld_start+o:idx+pyld_start+o+6]), idx+pyld_start+o, addr_idx_map)
    except KeyError:
        pass

    if print_time:
        print("        Time = {0:.3f}s".format(time.time() - start_time))

    #----------------------------------
    # Tx Low entries
    #
    try:
        print("    Anonmyizing {0} TX_LOW entries".format(len(log_index['TX_LOW'])))

        pyld_start = struct.calcsize(''.join(
                entry_types.entry_tx_low.get_field_struct_formats()[:-1])
        )

        for idx in log_index['TX_LOW']:
            # 6-byte addresses at offsets 40, 46, 52
            for o in (4, 10, 16):
                addr_to_replace(tuple(log_bytes[idx+pyld_start+o:idx+pyld_start+o+6]), idx+pyld_start+o, addr_idx_map)
    except KeyError:
        pass

    if print_time:
        print("        Time = {0:.3f}s".format(time.time() - start_time))


    #---------------------------------------------------------------------
    # Step 2: Enumerate actual MAC addresses and their anonymous replacements
    #
    print("Anonmyizing file step 2 ...")

    print("    Enumerate MAC addresses and their anonymous replacements")

    addr_map = dict()
    for ii,addr in enumerate(all_addrs):
        # Address should not have a first octet that is odd, as this indicates 
        # the address is multicast.  Hence, use 0xFE as the first octet.
        #
        # Due to FCS errors, the number of addresses in a log file is 
        # potentially large.  Therefore, the anonymizer supports 2^24 unique 
        # addresses.
        #
        anon_addr = (0xFE, 0xFF, 0xFF, (ii//(256**2)), ((ii//256)%256), (ii%256))
        addr_map[addr] = anon_addr

    if print_time:
        print("        Time = {0:.3f}s".format(time.time() - start_time))


    #---------------------------------------------------------------------
    # Step 3: Replace all MAC addresses in the log
    #
    print("Anonmyizing file step 3 ...")

    print("    Replace all MAC addresses in the log")

    for old_addr in addr_idx_map.keys():
        new_addr = bytearray(addr_map[old_addr])
        for byte_idx in addr_idx_map[old_addr]:
            log_bytes[byte_idx:byte_idx+6] = new_addr

    if print_time:
        print("        Time = {0:.3f}s".format(time.time() - start_time))


    #---------------------------------------------------------------------
    # Step 4: Other annonymization steps
    #
    print("Anonmyizing file step 4 ...")

    print("    Remove all payloads")

    # Overwrite all payloads with zeros
    try:
        for key in log_index.keys():
            log_util.overwrite_payloads(log_bytes, log_index[key])
    except:
        pass

    if print_time:
        print("        Time = {0:.3f}s".format(time.time() - start_time))


    #---------------------------------------------------------------------
    # Write output files
    #

    # Write the modified log to a new HDF5 file
    (fn_fldr, fn_file) = os.path.split(filename)

    # Find the last '.' in the file name and classify everything after that as the <ext>
    ext_i = fn_file.rfind('.')
    if (ext_i != -1):
        # Remember the original file extension
        fn_ext  = fn_file[ext_i:]
        fn_base = fn_file[0:ext_i]
    else:
        fn_ext  = ''
        fn_base = fn_file

    newfilename = os.path.join(fn_fldr, fn_base + "_anon" + fn_ext)

    print("Writing new file {0} ...".format(newfilename))

    # Copy any user attributes to the new anonymized file
    hdf_util.log_data_to_hdf5(log_bytes, newfilename, attr_dict=log_attr_dict)

    return