def _dd(self, file_name, offset, size, extension, output_file_name=None): ''' Extracts a file embedded inside the target file. @file_name - Path to the target file. @offset - Offset inside the target file where the embedded file begins. @size - Number of bytes to extract. @extension - The file exension to assign to the extracted file on disk. @output_file_name - The requested name of the output file. Returns the extracted file name. ''' total_size = 0 if not output_file_name or output_file_name is None: # Default extracted file name is <hex offset>.<extension> bname = "%X" % offset else: # Strip the output file name of invalid/dangerous characters (like file paths) bname = os.path.basename(output_file_name) fname = unique_file_name(bname, extension) try: # Open the target file and seek to the offset fdin = open(file_name, "rb") fdin.seek(offset) # Open the output file try: fdout = open(fname, "wb") except: # Fall back to the alternate name if the requested name fails fname = altname fdout = open(fname, "wb") # Read data from target file in chunks and write it to the extracted file while total_size < size: block_size = size - total_size if block_size > self.MAX_READ_SIZE: block_size = self.MAX_READ_SIZE fdout.write(fdin.read(block_size)) total_size += block_size # Cleanup fdout.close() fdin.close() except Exception, e: raise Exception( "Extractor.dd failed to extract data from '%s' to '%s': %s" % (file_name, fname, str(e)))
def _dd(self, file_name, offset, size, extension, output_file_name=None): ''' Extracts a file embedded inside the target file. @file_name - Path to the target file. @offset - Offset inside the target file where the embedded file begins. @size - Number of bytes to extract. @extension - The file exension to assign to the extracted file on disk. @output_file_name - The requested name of the output file. Returns the extracted file name. ''' total_size = 0 if not output_file_name or output_file_name is None: # Default extracted file name is <hex offset>.<extension> bname = "%X" % offset else: # Strip the output file name of invalid/dangerous characters (like file paths) bname = os.path.basename(output_file_name) fname = unique_file_name(bname, extension) try: # Open the target file and seek to the offset fdin = open(file_name, "rb") fdin.seek(offset) # Open the output file try: fdout = open(fname, "wb") except: # Fall back to the alternate name if the requested name fails fname = altname fdout = open(fname, "wb") # Read data from target file in chunks and write it to the extracted file while total_size < size: block_size = size - total_size if block_size > self.MAX_READ_SIZE: block_size = self.MAX_READ_SIZE fdout.write(fdin.read(block_size)) total_size += block_size # Cleanup fdout.close() fdin.close() except Exception, e: raise Exception("Extractor.dd failed to extract data from '%s' to '%s': %s" % (file_name, fname, str(e)))
def __init__(self, x, y, title='Entropy', average=0, file_results={}, show_legend=True, save=False): ''' Plots entropy data. @x - List of graph x-coordinates (i.e., data offsets). @y - List of graph y-coordinates (i.e., entropy for each offset). @title - Graph title. @average - The average entropy. @file_results - Binwalk results, if any. @show_legend - Set to False to not generate a color-coded legend and plotted x coordinates for the graph. @save - If set to True, graph will be saved to disk rather than displayed. Returns None. ''' import matplotlib.pyplot as plt import numpy as np i = 0 trigger = 0 new_ticks = [] color_mappings = {} plt.clf() if file_results: for (offset, results) in file_results: label = None description = results[0]['description'].split(',')[0] if not color_mappings.has_key(description): if show_legend: label = description color_mappings[description] = self.COLORS[i] i += 1 if i >= len(self.COLORS): i = 0 plt.axvline(x=offset, label=label, color=color_mappings[description], linewidth=self.LINE_WIDTH) new_ticks.append(offset) if show_legend: plt.legend() if new_ticks: new_ticks.sort() plt.xticks(np.array(new_ticks), new_ticks) plt.plot(x, y, linewidth=self.LINE_WIDTH) if average: plt.plot(x, [average] * len(x), linestyle='--', color='r') plt.xlabel(self.XLABEL) plt.ylabel(self.YLABEL) plt.title(title) plt.ylim(self.YLIM_MIN, self.YLIM_MAX) if save: plt.savefig(common.unique_file_name(title, self.FILE_FORMAT)) else: plt.show()
def scan(self, target_files, offset=0, length=0, show_invalid_results=False, callback=None, start_callback=None, end_callback=None, base_dir=None, matryoshka=1, plugins_whitelist=[], plugins_blacklist=[]): ''' Performs a binwalk scan on a file or list of files. @target_files - File or list of files to scan. @offset - Starting offset at which to start the scan. @length - Number of bytes to scan. Specify -1 for streams. @show_invalid_results - Set to True to display invalid results. @callback - Callback function to be invoked when matches are found. @start_callback - Callback function to be invoked prior to scanning each file. @end_callback - Callback function to be invoked after scanning each file. @base_dir - Base directory for output files. @matryoshka - Number of levels to traverse into the rabbit hole. @plugins_whitelist - A list of plugin names to load. If not empty, only these plugins will be loaded. @plugins_blacklist - A list of plugin names to not load. Returns a dictionary of : { 'target file name' : [ (0, [{description : "LZMA compressed data..."}]), (112, [{description : "gzip compressed data..."}]) ] } ''' # Prefix all directory names with an underscore. This prevents accidental deletion of the original file(s) # when the user is typing too fast and is trying to deleted the extraction directory. prefix = '_' dir_extension = 'extracted' i = 0 total_results = {} self.matryoshka = matryoshka # For backwards compatibility if not isinstance(target_files, type([])): target_files = [target_files] if base_dir is None: base_dir = '' # Instantiate the Plugins class and load all plugins, if not disabled self.plugins = Plugins(self, whitelist=plugins_whitelist, blacklist=plugins_blacklist) if self.load_plugins: self.plugins._load_plugins() while i < self.matryoshka: new_target_files = [] # Scan each target file for target_file in target_files: ignore_files = [] # On the first scan, add the base_dir value to dir_prefix. Subsequent target_file values will have this value prepended already. if i == 0: dir_prefix = os.path.join(base_dir, prefix + os.path.basename(target_file)) else: dir_prefix = os.path.join(os.path.dirname(target_file), prefix + os.path.basename(target_file)) output_dir = unique_file_name(dir_prefix, dir_extension) # Set the output directory for extracted files to go to self.extractor.output_directory(output_dir) if start_callback is not None: start_callback(target_file) results = self.single_scan(target_file, offset=offset, length=length, show_invalid_results=show_invalid_results, callback=callback) if end_callback is not None: end_callback(target_file) # Get a list of extracted file names; don't scan them again. for (index, results_list) in results: for result in results_list: if result['extract']: ignore_files.append(result['extract']) # Find all newly created files and add them to new_target_files / new_target_directories for (dir_path, sub_dirs, files) in os.walk(output_dir): for fname in files: fname = os.path.join(dir_path, fname) if fname not in ignore_files: new_target_files.append(fname) # Don't worry about sub-directories break total_results[target_file] = results target_files = new_target_files i += 1 # Be sure to delete the Plugins instance so that there isn't a lingering reference to # this Binwalk class instance (lingering handles to this Binwalk instance cause the # __del__ deconstructor to not be called). if self.plugins is not None: del self.plugins self.plugins = None return total_results
def plot(self, x, y, average=0, file_results=[], show_legend=True, save=False): ''' Plots entropy data. @x - List of graph x-coordinates (i.e., data offsets). @y - List of graph y-coordinates (i.e., entropy for each offset). @average - The average entropy. @file_results - A list of tuples containing additional analysis data, as returned by Binwalk.single_scan. @show_legend - Set to False to not generate a color-coded legend and plotted x coordinates for the graph. @save - If set to True, graph will be saved to disk rather than displayed. Returns None. ''' import matplotlib.pyplot as plt import numpy as np i = 0 trigger = 0 new_ticks = [] colors = ['darkgreen', 'blueviolet', 'saddlebrown', 'deeppink', 'goldenrod', 'olive', 'black'] color_mappings = {} plt.clf() if not file_results and show_legend and average: file_results = [] # Typically the average entropy is used as the trigger level for rising/falling entropy edges. # If the average entropy is too low, false rising and falling edges will be marked; if this is # the case, and if there is at least one data point greater than ENTROPY_MAX, use ENTROPY_TRIGGER # as the trigger level to avoid false edges. if average < self.ENTROPY_TRIGGER: for point in y: if point > self.ENTROPY_MAX: trigger = self.ENTROPY_TRIGGER break if not trigger: trigger = average for j in range(0, len(x)): if j > 0: if y[j] >= trigger and y[j-1] < trigger: file_results.append((x[j], [{'description' : 'Entropy rising edge'}])) elif y[j] <= trigger and y[j-1] > trigger: file_results.append((x[j], [{'description' : 'Entropy falling edge'}])) if file_results: for (offset, results) in file_results: label = None description = results[0]['description'].split(',')[0] if not color_mappings.has_key(description): if show_legend: label = description color_mappings[description] = colors[i] i += 1 if i >= len(colors): i = 0 plt.axvline(x=offset, label=label, color=color_mappings[description], linewidth=1.5) new_ticks.append(offset) if show_legend: plt.legend() if new_ticks: new_ticks.sort() plt.xticks(np.array(new_ticks), new_ticks) plt.plot(x, y, linewidth=1.5) if average: plt.plot(x, [average] * len(x), linestyle='--', color='r') plt.xlabel('Offset') plt.ylabel('Entropy') plt.title(self.fd.name) plt.ylim(0, 1.5) if save: plt.savefig(common.unique_file_name(os.path.join(os.path.dirname(self.fd.name), '_' + os.path.basename(self.fd.name)), self.FILE_FORMAT)) else: plt.show()
def scan(self, target_files, offset=0, length=0, show_invalid_results=False, callback=None, start_callback=None, end_callback=None, base_dir=None, matryoshka=1, plugins_whitelist=[], plugins_blacklist=[]): ''' Performs a binwalk scan on a file or list of files. @target_files - File or list of files to scan. @offset - Starting offset at which to start the scan. @length - Number of bytes to scan. Specify -1 for streams. @show_invalid_results - Set to True to display invalid results. @callback - Callback function to be invoked when matches are found. @start_callback - Callback function to be invoked prior to scanning each file. @end_callback - Callback function to be invoked after scanning each file. @base_dir - Base directory for output files. @matryoshka - Number of levels to traverse into the rabbit hole. @plugins_whitelist - A list of plugin names to load. If not empty, only these plugins will be loaded. @plugins_blacklist - A list of plugin names to not load. Returns a dictionary of : { 'target file name' : [ (0, [{description : "LZMA compressed data..."}]), (112, [{description : "gzip compressed data..."}]) ] } ''' # Prefix all directory names with an underscore. This prevents accidental deletion of the original file(s) # when the user is typing too fast and is trying to deleted the extraction directory. prefix = '_' dir_extension = 'extracted' i = 0 total_results = {} self.matryoshka = matryoshka # For backwards compatibility if not isinstance(target_files, type([])): target_files = [target_files] if base_dir is None: base_dir = '' # Instantiate the Plugins class and load all plugins, if not disabled self.plugins = Plugins(self, whitelist=plugins_whitelist, blacklist=plugins_blacklist) if self.load_plugins: self.plugins._load_plugins() while i < self.matryoshka: new_target_files = [] # Scan each target file for target_file in target_files: ignore_files = [] # On the first scan, add the base_dir value to dir_prefix. Subsequent target_file values will have this value prepended already. if i == 0: dir_prefix = os.path.join( base_dir, prefix + os.path.basename(target_file)) else: dir_prefix = os.path.join( os.path.dirname(target_file), prefix + os.path.basename(target_file)) output_dir = unique_file_name(dir_prefix, dir_extension) # Set the output directory for extracted files to go to self.extractor.output_directory(output_dir) if start_callback is not None: start_callback(target_file) results = self.single_scan( target_file, offset=offset, length=length, show_invalid_results=show_invalid_results, callback=callback) if end_callback is not None: end_callback(target_file) # Get a list of extracted file names; don't scan them again. for (index, results_list) in results: for result in results_list: if result['extract']: ignore_files.append(result['extract']) # Find all newly created files and add them to new_target_files / new_target_directories for (dir_path, sub_dirs, files) in os.walk(output_dir): for fname in files: fname = os.path.join(dir_path, fname) if fname not in ignore_files: new_target_files.append(fname) # Don't worry about sub-directories break total_results[target_file] = results target_files = new_target_files i += 1 # Be sure to delete the Plugins instance so that there isn't a lingering reference to # this Binwalk class instance (lingering handles to this Binwalk instance cause the # __del__ deconstructor to not be called). if self.plugins is not None: del self.plugins self.plugins = None return total_results