Example #1
0
	def find_magic_file(self, fname, system_only=False, user_only=False):
		'''
		Finds the specified magic file name in the system / user magic file directories.

		@fname       - The name of the magic file.
		@system_only - If True, only the system magic file directory will be searched.
		@user_only   - If True, only the user magic file directory will be searched.

		If system_only and user_only are not set, the user directory is always searched first.

		Returns the path to the file on success; returns None on failure.
		'''
		loc = None

		if not system_only:
			fpath = self._user_path(self.BINWALK_MAGIC_DIR, fname)
			if os.path.exists(fpath) and common.file_size(fpath) > 0:
				loc = fpath

		if loc is None and not user_only:
			fpath = self._system_path(self.BINWALK_MAGIC_DIR, fname)
			if os.path.exists(fpath) and common.file_size(fpath) > 0:
				loc = fpath

		return fpath
Example #2
0
	def hexdiff(self, file_names, length=0x100, offset=0, block=16, first=False):
		if not length and len(file_names) > 0:
			length = file_size(file_names[0])
		if not block:
			block = 16

		HexDiff(self).display(file_names, offset=offset, size=length, block=block, show_first_only=first)
Example #3
0
	def extract(self, offset, description, file_name, size, name=None):
		'''
		Extract an embedded file from the target file, if it matches an extract rule.
		Called automatically by Binwalk.scan().

		@offset      - Offset inside the target file to begin the extraction.
		@description - Description of the embedded file to extract, as returned by libmagic.
		@file_name   - Path to the target file.
		@size        - Number of bytes to extract.
		@name        - Name to save the file as.

		Returns the name of the extracted file (blank string if nothing was extracted).
		'''
		fname = ''
		cleanup_extracted_fname = True
		original_dir = os.getcwd()
		rules = self._match(description)

		# No extraction rules for this file
		if not rules:
			return

		if not os.path.exists(self.extract_path):
			os.mkdir(self.extract_path)

		file_path = os.path.realpath(file_name)
		
		if os.path.isfile(file_path):
			os.chdir(self.extract_path)
			
			# Loop through each extraction rule until one succeeds
			for i in range(0, len(rules)):
				rule = rules[i]

				# Copy out the data to disk, if we haven't already
				fname = self._dd(file_path, offset, size, rule['extension'], output_file_name=name)

				# If there was a command specified for this rule, try to execute it.
				# If execution fails, the next rule will be attempted.
				if rule['cmd']:

					# Many extraction utilities will extract the file to a new file, just without
					# the file extension (i.e., myfile.7z -> myfile). If the presumed resulting
					# file name already exists before executing the extract command, do not attempt 
					# to clean it up even if its resulting file size is 0.
					if self.remove_after_execute:
						extracted_fname = os.path.splitext(fname)[0]
						if os.path.exists(extracted_fname):
							cleanup_extracted_fname = False
	
					# Execute the specified command against the extracted file
					extract_ok = self.execute(rule['cmd'], fname)

					# Only clean up files if remove_after_execute was specified				
					if extract_ok and self.remove_after_execute:

						# Remove the original file that we extracted
						try:
							os.unlink(fname)
						except:
							pass

						# If the command worked, assume it removed the file extension from the extracted file
						# If the extracted file name file exists and is empty, remove it
						if cleanup_extracted_fname and os.path.exists(extracted_fname) and file_size(extracted_fname) == 0:
							try:
								os.unlink(extracted_fname)
							except:
								pass
					
					# If the command executed OK, don't try any more rules
					if extract_ok:
						break
					# Else, remove the extracted file if this isn't the last rule in the list.
					# If it is the last rule, leave the file on disk for the user to examine.
					elif i != (len(rules)-1):
						try:
							os.unlink(fname)
						except:
							pass

				# If there was no command to execute, just use the first rule
				else:
					break

			os.chdir(original_dir)

		# If a file was extracted, return the full path to that file	
		if fname:
			fname = os.path.join(self.extract_path, fname)

		return fname
Example #4
0
	def single_scan(self, target_file='', fd=None, offset=0, length=0, show_invalid_results=False, callback=None, plugins_whitelist=[], plugins_blacklist=[]):
		'''
		Performs a binwalk scan on one target file or file descriptor.

		@target_file 	      - File to scan.
		@fd                   - A common.BlockFile object.
		@offset      	      - Starting offset at which to start the scan.
		@length      	      - Number of bytes to scan. Specify -1 for streams.
		@show_invalid_results - Set to True to display invalid results.
		@callback    	      - Callback function to be invoked when matches are found.
		@plugins_whitelist    - A list of plugin names to load. If not empty, only these plugins will be loaded.
		@plugins_blacklist    - A list of plugin names to not load.

		The callback function is passed two arguments: a list of result dictionaries containing the scan results
		(one result per dict), and the offset at which those results were identified. Example callback function:

			def my_callback(offset, results):
				print "Found %d results at offset %d:" % (len(results), offset)
				for result in results:
					print "\t%s" % result['description']

			binwalk.Binwalk(callback=my_callback).scan("firmware.bin")

		Upon completion, the scan method returns a sorted list of tuples containing a list of results dictionaries
		and the offsets at which those results were identified:

			scan_results = [
					(0, [{description : "LZMA compressed data..."}]),
					(112, [{description : "gzip compressed data..."}])
			]

		See SmartSignature.parse for a more detailed description of the results dictionary structure.
		'''
		scan_results = {}
		fsize = 0
		jump_offset = 0
		i_opened_fd = False
		i_loaded_plugins = False
		plugret = PLUGIN_CONTINUE
		plugret_start = PLUGIN_CONTINUE
		self.total_read = 0
		self.total_scanned = 0
		self.scan_length = length
		self.filter.show_invalid_results = show_invalid_results
		self.start_offset = offset

		# Check to make sure either a target file or a file descriptor was supplied
		if not target_file and fd is None:
			raise Exception("Must supply Binwalk.single_scan with a valid file path or BlockFile object")

		# Need the total size of the target file, even if we aren't scanning the whole thing
		if target_file:
			fsize = file_size(target_file)
			
		# If no length was specified, make the length the size of the target file minus the starting offset
		if self.scan_length == 0:
			self.scan_length = fsize - offset

		# Open the target file and seek to the specified start offset
		if fd is None:
			fd = BlockFile(target_file, length=self.scan_length, offset=offset)
			i_opened_fd = True
			# If offset is negative (bytes from EOF), BlockFile class will autmoatically calculate the right offset
			offset = fd.offset

		# Seek to the starting offset.
		#fd.seek(offset)
		
		# If the Plugins class has not already been instantitated, do that now.
		if self.plugins is None:
			self.plugins = Plugins(self, blacklist=plugins_blacklist, whitelist=plugins_whitelist)
			i_loaded_plugins = True
		
			if self.load_plugins:
				self.plugins._load_plugins()

		# Invoke any pre-scan plugins
		plugret_start = self.plugins._pre_scan_callbacks(fd)
		
		# Load the default signatures if self.load_signatures has not already been invoked
		if self.magic is None:
			self.load_signatures()

		# Main loop, scan through all the data
		while not ((plugret | plugret_start) & PLUGIN_TERMINATE):
			i = 0

			# Read in the next block of data from the target file and make sure it's valid
			(data, dlen) = fd.read_block()
			if not data or dlen == 0:
				break

			# The total number of bytes scanned could be bigger than the total number
			# of bytes read from the file if the previous signature result specified a 
			# jump offset that was beyond the end of the then current data block.
			#
			# If this is the case, we need to index into this data block appropriately in order to 
			# resume the scan from the appropriate offset.
			#
			# Don't update dlen though, as it is the literal offset into the data block that we
			# are to scan up to in this loop iteration. It is also appended to self.total_scanned,
			# which is what we want (even if we have been told to skip part of the block, the skipped
			# part is still considered part of the total bytes scanned).
			if jump_offset > 0:
				total_check = self.total_scanned + dlen

				# Is the jump offset beyond the total amount of data that we've currently read in (i.e., in a future data block)?
				if jump_offset >= total_check:
					i = -1
					
					# Try to seek to the jump offset; this won't work if fd == sys.stdin
					try:
						fd.seek(jump_offset)
						self.total_read = jump_offset
						self.total_scanned = jump_offset - dlen
					except:
						pass

				# Is the jump offset inside this block of data?
				elif jump_offset > self.total_scanned and jump_offset < total_check:
					# Index into this block appropriately; jump_offset is the file offset that
					# we need to jump to, and self.total_scanned is the file offset that starts
					# the beginning of the current block
					i = jump_offset - self.total_scanned

				# We're done with jump_offset, zero it out for the next round
				jump_offset = 0

			# Scan through each block of data looking for signatures
			if i >= 0 and i < dlen:

				# Scan this data block for a list of offsets which are candidates for possible valid signatures.
				# Signatures could be split across the block boundary; since data conatins 1KB more than dlen,
				# pass up to dlen+MAX_SIGNATURE_SIZE to find_signature_candidates, but don't accept signatures that 
				# start after the end of dlen.
				for candidate in self.parser.find_signature_candidates(data[i:dlen+self.MAX_SIGNATURE_SIZE], (dlen-i)):

					# If a previous signature specified a jump offset beyond this candidate signature offset, ignore it
					if (i + candidate + self.total_scanned) < jump_offset:
						continue

					# Reset these values on each loop	
					smart = {}
					results = []
					results_offset = -1

					# In python3 we need a bytes object to pass to magic.buffer
					candidate_data = str2bytes(data[i+candidate:i+candidate+fd.MAX_TRAILING_SIZE])

					# Pass the data to libmagic, and split out multiple results into a list
					for magic_result in self.parser.split(self.magic.buffer(candidate_data)):

						i_set_results_offset = False

						# Some signatures need to take into account the length of a given string
						# when specifying additional offsets. Parse the string-len keyword to adjust
						# for this prior to calling self.smart.parse.
						magic_result = self.smart._parse_string_len(magic_result)

						# Some file names are not NULL byte terminated, but rather their length is
						# specified in a size field. To ensure these are not marked as invalid due to
						# non-printable characters existing in the file name, parse the filename(s) and
						# trim them to the specified filename length, if one was specified.
						magic_result = self.smart._parse_raw_strings(magic_result)

						# Invoke any pre-parser callback plugin functions
						if not (plugret_start & PLUGIN_STOP_PLUGINS):
							raw_result = {'description' : magic_result}
							plugret = self.plugins._scan_pre_parser_callbacks(raw_result)
							magic_result = raw_result['description']
							if (plugret & PLUGIN_TERMINATE):
								break
	
						# Make sure this is a valid result before further processing
						if not self.filter.invalid(magic_result):
							# The smart filter parser returns a dictionary of keyword values and the signature description.
							smart = self.smart.parse(magic_result)

							# Validate the jump value and check if the response description should be displayed
							if self._is_valid(smart, candidate+i, fsize):
								# If multiple results are returned and one of them has smart['jump'] set to a non-zero value,
								# the calculated results offset will be wrong since i will have been incremented. Only set the
								# results_offset value when the first match is encountered.
								if results_offset < 0:
									results_offset = offset + i + candidate + smart['adjust'] + self.total_scanned
									i_set_results_offset = True

								# Double check to make sure the smart['adjust'] value is sane. 
								# If it makes results_offset negative, then it is not sane.
								if results_offset >= 0:
									smart['offset'] = results_offset

									# Invoke any scan plugins 
									if not (plugret_start & PLUGIN_STOP_PLUGINS):
										plugret = self.plugins._scan_callbacks(smart)
										results_offset = smart['offset']
										if (plugret & PLUGIN_TERMINATE):
											break

									# Extract the result, if it matches one of the extract rules and is not a delayed extract.
									if self.extractor.enabled and not (self.extractor.delayed and smart['delay']) and not ((plugret | plugret_start) & PLUGIN_NO_EXTRACT):
										# If the signature did not specify a size, extract to the end of the file.
										if not smart['size']:
											smart['size'] = fsize-results_offset
										
										smart['extract'] = self.extractor.extract(	results_offset, 
																smart['description'], 
																target_file, 
																smart['size'], 
																name=smart['name'])

									if not ((plugret | plugret_start) & PLUGIN_NO_DISPLAY):
										# This appears to be a valid result, so append it to the results list.
										results.append(smart)
									elif i_set_results_offset:
										results_offset = -1

					# Did we find any valid results?
					if results_offset >= 0:
						scan_results[results_offset] = results
					
						if callback is not None:
							callback(results_offset, results)
			
						# If a relative jump offset was specified, update the absolute jump_offset variable
						if has_key(smart, 'jump') and smart['jump'] > 0:
							jump_offset = results_offset + smart['jump']

			# Track the total number of bytes scanned
			self.total_scanned += dlen
			# The starting offset only affects the reported offset for results
			# in the first block of data. Zero it out after the first block has
			# been processed.
			offset = 0

		# Sort the results before returning them
		scan_items = list(scan_results.items())
		scan_items.sort()

		# Do delayed extraction, if specified.
		if self.extractor.enabled and self.extractor.delayed:
			scan_items = self.extractor.delayed_extract(scan_items, target_file, fsize)

		# Invoke any post-scan plugins
		#if not (plugret_start & PLUGIN_STOP_PLUGINS):
		self.plugins._post_scan_callbacks(fd)

		# Be sure to delete the Plugins instance so that there isn't a lingering reference to
		# this Binwalk class instance (lingering handles to this Binwalk instance cause the
		# __del__ deconstructor to not be called).
		if i_loaded_plugins:
			del self.plugins
			self.plugins = None

		if i_opened_fd:
			fd.close()

		return scan_items
Example #5
0
	def __init__(self, file_name, binwalk=None, length=0, offset=0, n=MIN_STRING_LENGTH, block=DEFAULT_ENTROPY_BLOCK, algorithm='gzip', plugins=None):
		'''
		Class constructor. Preferred to be invoked from the Strings class instead of directly.

		@file_name - The file name to perform a strings analysis on.
		@binwalk   - An instance of the Binwalk class.
		@length    - The number of bytes in the file to analyze.
		@offset    - The starting offset into the file to begin analysis.
		@n         - The minimum valid string length.
		@block     - The block size to use iwhen performing entropy analysis. Set to None to skip entropy analysis.
		@algorithm - The entropy algorithm to use when performing entropy analysis.
		@plugins   - An instance of the Plugins class.

		Returns None.
		'''
		self.n = n
		self.binwalk = binwalk
		self.length = length
		self.start = offset
		self.data = ''
		self.dlen = 0
		self.i = 0
		self.total_read = 0
		self.entropy = {}
		self.valid_strings = []
		self.external_validators = []
		self.plugins = plugins
		self.block = block

		if not self.n:
			self.n = self.MIN_STRING_LENGTH

		if self.block is not None:
			# Perform an entropy analysis over the entire file (anything less may generate poor entropy data).
			# Give fake file results list to prevent FileEntropy from doing too much analysis.
			with entropy.FileEntropy(file_name, block=self.block, file_results=['foo']) as e:
				(self.x, self.y, self.average_entropy) = e.analyze(algorithm=algorithm)
				for i in range(0, len(self.x)):
					self.entropy[self.x[i]] = self.y[i]
				# Make sure our block size matches the entropy analysis's block size
				self.block = e.block
			# Make sure the starting offset is a multiple of the block size; else, when later checking
			# the entropy analysis, block offsets won't line up.
			self.start -= (self.start % self.block)
		else:
			i = 0
			self.block = common.BlockFile.READ_BLOCK_SIZE
			
			# Fake the entropy scan
			while i < common.file_size(file_name):
				self.entropy[i] = 1.0
				i += self.block

		self.fd = common.BlockFile(file_name, 'r', length=length, offset=self.start)
		# TODO: This is not optimal. We should read in larger chunks and process it into self.block chunks.
		self.fd.READ_BLOCK_SIZE = self.block
		self.fd.MAX_TRAILING_SIZE = 0
		self.start = self.fd.offset

		# Set the total_scanned and scan_length values for plugins and status display messages
		if self.binwalk:
			self.binwalk.total_scanned = 0
			self.binwalk.scan_length = self.fd.length