def lzma_cable_extractor(self, fname): # Try extracting the LZMA file without modification first if not self.binwalk.extractor.execute(self.original_cmd, fname): out_name = os.path.splitext(fname)[0] + '-patched' + os.path.splitext(fname)[1] fp_out = open(out_name, 'wb') fp_in = BlockFile(fname) fp_in.MAX_TRAILING_SIZE = 0 i = 0 while i < fp_in.length: (data, dlen) = fp_in.read_block() if i == 0: out_data = data[0:5] + self.FAKE_LZMA_SIZE + data[5:] else: out_data = data fp_out.write(str2bytes(out_data)) i += dlen fp_in.close() fp_out.close() # Overwrite the original file so that it can be cleaned up if -r was specified shutil.move(out_name, fname) self.binwalk.extractor.execute(self.original_cmd, fname)
def _dd(self, file_name, offset, size, extension, output_file_name=None): ''' Extracts a file embedded inside the target file. @file_name - Path to the target file. @offset - Offset inside the target file where the embedded file begins. @size - Number of bytes to extract. @extension - The file exension to assign to the extracted file on disk. @output_file_name - The requested name of the output file. Returns the extracted file name. ''' total_size = 0 # Default extracted file name is <hex offset>.<extension> default_bname = "%X" % offset if self.max_size and size > self.max_size: size = self.max_size if not output_file_name or output_file_name is None: bname = default_bname else: # Strip the output file name of invalid/dangerous characters (like file paths) bname = os.path.basename(output_file_name) fname = unique_file_name(bname, extension) try: # Open the target file and seek to the offset fdin = BlockFile(file_name, 'r', length=size) fdin.seek(offset) # Open the output file try: fdout = BlockFile(fname, 'w') except Exception as e: # Fall back to the default name if the requested name fails fname = unique_file_name(default_bname, extension) fdout = BlockFile(fname, 'w') while total_size < size: (data, dlen) = fdin.read_block() fdout.write(str2bytes(data[:dlen])) total_size += dlen # Cleanup fdout.close() fdin.close() except Exception as e: raise Exception("Extractor.dd failed to extract data from '%s' to '%s': %s" % (file_name, fname, str(e))) return fname
def _extractor(self, file_name): processed = 0 inflated_data = '' fd = BlockFile(file_name, 'rb') fd.READ_BLOCK_SIZE = self.SIZE while processed < fd.length: (data, dlen) = fd.read_block() inflated_block = self.tinfl.inflate_block(data, dlen) if inflated_block: inflated_data += ctypes.c_char_p(inflated_block).value[0:4] else: break processed += dlen fd.close() print inflated_data print "%s inflated to %d bytes" % (file_name, len(inflated_data))
def single_scan(self, target_file='', fd=None, offset=0, length=0, show_invalid_results=False, callback=None, plugins_whitelist=[], plugins_blacklist=[]): ''' Performs a binwalk scan on one target file or file descriptor. @target_file - File to scan. @fd - A common.BlockFile object. @offset - Starting offset at which to start the scan. @length - Number of bytes to scan. Specify -1 for streams. @show_invalid_results - Set to True to display invalid results. @callback - Callback function to be invoked when matches are found. @plugins_whitelist - A list of plugin names to load. If not empty, only these plugins will be loaded. @plugins_blacklist - A list of plugin names to not load. The callback function is passed two arguments: a list of result dictionaries containing the scan results (one result per dict), and the offset at which those results were identified. Example callback function: def my_callback(offset, results): print "Found %d results at offset %d:" % (len(results), offset) for result in results: print "\t%s" % result['description'] binwalk.Binwalk(callback=my_callback).scan("firmware.bin") Upon completion, the scan method returns a sorted list of tuples containing a list of results dictionaries and the offsets at which those results were identified: scan_results = [ (0, [{description : "LZMA compressed data..."}]), (112, [{description : "gzip compressed data..."}]) ] See SmartSignature.parse for a more detailed description of the results dictionary structure. ''' scan_results = {} fsize = 0 jump_offset = 0 i_opened_fd = False i_loaded_plugins = False plugret = PLUGIN_CONTINUE plugret_start = PLUGIN_CONTINUE self.total_read = 0 self.total_scanned = 0 self.scan_length = length self.filter.show_invalid_results = show_invalid_results self.start_offset = offset # Check to make sure either a target file or a file descriptor was supplied if not target_file and fd is None: raise Exception("Must supply Binwalk.single_scan with a valid file path or BlockFile object") # Need the total size of the target file, even if we aren't scanning the whole thing if target_file: fsize = file_size(target_file) # If no length was specified, make the length the size of the target file minus the starting offset if self.scan_length == 0: self.scan_length = fsize - offset # Open the target file and seek to the specified start offset if fd is None: fd = BlockFile(target_file, length=self.scan_length, offset=offset) i_opened_fd = True # If offset is negative (bytes from EOF), BlockFile class will autmoatically calculate the right offset offset = fd.offset # Seek to the starting offset. #fd.seek(offset) # If the Plugins class has not already been instantitated, do that now. if self.plugins is None: self.plugins = Plugins(self, blacklist=plugins_blacklist, whitelist=plugins_whitelist) i_loaded_plugins = True if self.load_plugins: self.plugins._load_plugins() # Invoke any pre-scan plugins plugret_start = self.plugins._pre_scan_callbacks(fd) # Load the default signatures if self.load_signatures has not already been invoked if self.magic is None: self.load_signatures() # Main loop, scan through all the data while not ((plugret | plugret_start) & PLUGIN_TERMINATE): i = 0 # Read in the next block of data from the target file and make sure it's valid (data, dlen) = fd.read_block() if not data or dlen == 0: break # The total number of bytes scanned could be bigger than the total number # of bytes read from the file if the previous signature result specified a # jump offset that was beyond the end of the then current data block. # # If this is the case, we need to index into this data block appropriately in order to # resume the scan from the appropriate offset. # # Don't update dlen though, as it is the literal offset into the data block that we # are to scan up to in this loop iteration. It is also appended to self.total_scanned, # which is what we want (even if we have been told to skip part of the block, the skipped # part is still considered part of the total bytes scanned). if jump_offset > 0: total_check = self.total_scanned + dlen # Is the jump offset beyond the total amount of data that we've currently read in (i.e., in a future data block)? if jump_offset >= total_check: i = -1 # Try to seek to the jump offset; this won't work if fd == sys.stdin try: fd.seek(jump_offset) self.total_read = jump_offset self.total_scanned = jump_offset - dlen except: pass # Is the jump offset inside this block of data? elif jump_offset > self.total_scanned and jump_offset < total_check: # Index into this block appropriately; jump_offset is the file offset that # we need to jump to, and self.total_scanned is the file offset that starts # the beginning of the current block i = jump_offset - self.total_scanned # We're done with jump_offset, zero it out for the next round jump_offset = 0 # Scan through each block of data looking for signatures if i >= 0 and i < dlen: # Scan this data block for a list of offsets which are candidates for possible valid signatures. # Signatures could be split across the block boundary; since data conatins 1KB more than dlen, # pass up to dlen+MAX_SIGNATURE_SIZE to find_signature_candidates, but don't accept signatures that # start after the end of dlen. for candidate in self.parser.find_signature_candidates(data[i:dlen+self.MAX_SIGNATURE_SIZE], (dlen-i)): # If a previous signature specified a jump offset beyond this candidate signature offset, ignore it if (i + candidate + self.total_scanned) < jump_offset: continue # Reset these values on each loop smart = {} results = [] results_offset = -1 # In python3 we need a bytes object to pass to magic.buffer candidate_data = str2bytes(data[i+candidate:i+candidate+fd.MAX_TRAILING_SIZE]) # Pass the data to libmagic, and split out multiple results into a list for magic_result in self.parser.split(self.magic.buffer(candidate_data)): i_set_results_offset = False # Some signatures need to take into account the length of a given string # when specifying additional offsets. Parse the string-len keyword to adjust # for this prior to calling self.smart.parse. magic_result = self.smart._parse_string_len(magic_result) # Some file names are not NULL byte terminated, but rather their length is # specified in a size field. To ensure these are not marked as invalid due to # non-printable characters existing in the file name, parse the filename(s) and # trim them to the specified filename length, if one was specified. magic_result = self.smart._parse_raw_strings(magic_result) # Invoke any pre-parser callback plugin functions if not (plugret_start & PLUGIN_STOP_PLUGINS): raw_result = {'description' : magic_result} plugret = self.plugins._scan_pre_parser_callbacks(raw_result) magic_result = raw_result['description'] if (plugret & PLUGIN_TERMINATE): break # Make sure this is a valid result before further processing if not self.filter.invalid(magic_result): # The smart filter parser returns a dictionary of keyword values and the signature description. smart = self.smart.parse(magic_result) # Validate the jump value and check if the response description should be displayed if self._is_valid(smart, candidate+i, fsize): # If multiple results are returned and one of them has smart['jump'] set to a non-zero value, # the calculated results offset will be wrong since i will have been incremented. Only set the # results_offset value when the first match is encountered. if results_offset < 0: results_offset = offset + i + candidate + smart['adjust'] + self.total_scanned i_set_results_offset = True # Double check to make sure the smart['adjust'] value is sane. # If it makes results_offset negative, then it is not sane. if results_offset >= 0: smart['offset'] = results_offset # Invoke any scan plugins if not (plugret_start & PLUGIN_STOP_PLUGINS): plugret = self.plugins._scan_callbacks(smart) results_offset = smart['offset'] if (plugret & PLUGIN_TERMINATE): break # Extract the result, if it matches one of the extract rules and is not a delayed extract. if self.extractor.enabled and not (self.extractor.delayed and smart['delay']) and not ((plugret | plugret_start) & PLUGIN_NO_EXTRACT): # If the signature did not specify a size, extract to the end of the file. if not smart['size']: smart['size'] = fsize-results_offset smart['extract'] = self.extractor.extract( results_offset, smart['description'], target_file, smart['size'], name=smart['name']) if not ((plugret | plugret_start) & PLUGIN_NO_DISPLAY): # This appears to be a valid result, so append it to the results list. results.append(smart) elif i_set_results_offset: results_offset = -1 # Did we find any valid results? if results_offset >= 0: scan_results[results_offset] = results if callback is not None: callback(results_offset, results) # If a relative jump offset was specified, update the absolute jump_offset variable if has_key(smart, 'jump') and smart['jump'] > 0: jump_offset = results_offset + smart['jump'] # Track the total number of bytes scanned self.total_scanned += dlen # The starting offset only affects the reported offset for results # in the first block of data. Zero it out after the first block has # been processed. offset = 0 # Sort the results before returning them scan_items = list(scan_results.items()) scan_items.sort() # Do delayed extraction, if specified. if self.extractor.enabled and self.extractor.delayed: scan_items = self.extractor.delayed_extract(scan_items, target_file, fsize) # Invoke any post-scan plugins #if not (plugret_start & PLUGIN_STOP_PLUGINS): self.plugins._post_scan_callbacks(fd) # Be sure to delete the Plugins instance so that there isn't a lingering reference to # this Binwalk class instance (lingering handles to this Binwalk instance cause the # __del__ deconstructor to not be called). if i_loaded_plugins: del self.plugins self.plugins = None if i_opened_fd: fd.close() return scan_items