Example #1
0
    def _dd(self, file_name, offset, size, extension, output_file_name=None):
        '''
		Extracts a file embedded inside the target file.

		@file_name        - Path to the target file.
		@offset           - Offset inside the target file where the embedded file begins.
		@size             - Number of bytes to extract.
		@extension        - The file exension to assign to the extracted file on disk.
		@output_file_name - The requested name of the output file.

		Returns the extracted file name.
		'''
        total_size = 0

        if not output_file_name or output_file_name is None:
            # Default extracted file name is <hex offset>.<extension>
            bname = "%X" % offset
        else:
            # Strip the output file name of invalid/dangerous characters (like file paths)
            bname = os.path.basename(output_file_name)

        fname = unique_file_name(bname, extension)

        try:
            # Open the target file and seek to the offset
            fdin = open(file_name, "rb")
            fdin.seek(offset)

            # Open the output file
            try:
                fdout = open(fname, "wb")
            except:
                # Fall back to the alternate name if the requested name fails
                fname = altname
                fdout = open(fname, "wb")

            # Read data from target file in chunks and write it to the extracted file
            while total_size < size:
                block_size = size - total_size
                if block_size > self.MAX_READ_SIZE:
                    block_size = self.MAX_READ_SIZE

                fdout.write(fdin.read(block_size))
                total_size += block_size

            # Cleanup
            fdout.close()
            fdin.close()
        except Exception, e:
            raise Exception(
                "Extractor.dd failed to extract data from '%s' to '%s': %s" %
                (file_name, fname, str(e)))
Example #2
0
	def _dd(self, file_name, offset, size, extension, output_file_name=None):
		'''
		Extracts a file embedded inside the target file.

		@file_name        - Path to the target file.
		@offset           - Offset inside the target file where the embedded file begins.
		@size             - Number of bytes to extract.
		@extension        - The file exension to assign to the extracted file on disk.
		@output_file_name - The requested name of the output file.

		Returns the extracted file name.
		'''
		total_size = 0

		if not output_file_name or output_file_name is None:
			# Default extracted file name is <hex offset>.<extension>
			bname = "%X" % offset
		else:
			# Strip the output file name of invalid/dangerous characters (like file paths)	
			bname = os.path.basename(output_file_name)
		
		fname = unique_file_name(bname, extension)

		try:
			# Open the target file and seek to the offset
			fdin = open(file_name, "rb")
			fdin.seek(offset)
			
			# Open the output file
			try:
				fdout = open(fname, "wb")
			except:
				# Fall back to the alternate name if the requested name fails
				fname = altname
				fdout = open(fname, "wb")

			# Read data from target file in chunks and write it to the extracted file
			while total_size < size:
				block_size = size - total_size
				if block_size > self.MAX_READ_SIZE:
					block_size = self.MAX_READ_SIZE
			
				fdout.write(fdin.read(block_size))
				total_size += block_size

			# Cleanup
			fdout.close()
			fdin.close()
		except Exception, e:
			raise Exception("Extractor.dd failed to extract data from '%s' to '%s': %s" % (file_name, fname, str(e)))
Example #3
0
    def __init__(self,
                 x,
                 y,
                 title='Entropy',
                 average=0,
                 file_results={},
                 show_legend=True,
                 save=False):
        '''
		Plots entropy data.

		@x            - List of graph x-coordinates (i.e., data offsets).
		@y            - List of graph y-coordinates (i.e., entropy for each offset).
		@title        - Graph title.
		@average      - The average entropy.
		@file_results - Binwalk results, if any.
		@show_legend  - Set to False to not generate a color-coded legend and plotted x coordinates for the graph.
		@save         - If set to True, graph will be saved to disk rather than displayed.

		Returns None.
		'''
        import matplotlib.pyplot as plt
        import numpy as np

        i = 0
        trigger = 0
        new_ticks = []
        color_mappings = {}

        plt.clf()

        if file_results:
            for (offset, results) in file_results:
                label = None
                description = results[0]['description'].split(',')[0]

                if not color_mappings.has_key(description):
                    if show_legend:
                        label = description

                    color_mappings[description] = self.COLORS[i]
                    i += 1
                    if i >= len(self.COLORS):
                        i = 0

                plt.axvline(x=offset,
                            label=label,
                            color=color_mappings[description],
                            linewidth=self.LINE_WIDTH)
                new_ticks.append(offset)

            if show_legend:
                plt.legend()

                if new_ticks:
                    new_ticks.sort()
                    plt.xticks(np.array(new_ticks), new_ticks)

        plt.plot(x, y, linewidth=self.LINE_WIDTH)

        if average:
            plt.plot(x, [average] * len(x), linestyle='--', color='r')

        plt.xlabel(self.XLABEL)
        plt.ylabel(self.YLABEL)
        plt.title(title)
        plt.ylim(self.YLIM_MIN, self.YLIM_MAX)
        if save:
            plt.savefig(common.unique_file_name(title, self.FILE_FORMAT))
        else:
            plt.show()
Example #4
0
	def scan(self, target_files, offset=0, length=0, show_invalid_results=False, callback=None, start_callback=None, end_callback=None, base_dir=None, matryoshka=1, plugins_whitelist=[], plugins_blacklist=[]):
		'''
		Performs a binwalk scan on a file or list of files.

		@target_files         - File or list of files to scan.
		@offset               - Starting offset at which to start the scan.
                @length               - Number of bytes to scan. Specify -1 for streams.
                @show_invalid_results - Set to True to display invalid results.
                @callback             - Callback function to be invoked when matches are found.
		@start_callback       - Callback function to be invoked prior to scanning each file.
		@end_callback         - Callback function to be invoked after scanning each file.
		@base_dir             - Base directory for output files.
		@matryoshka           - Number of levels to traverse into the rabbit hole.
		@plugins_whitelist    - A list of plugin names to load. If not empty, only these plugins will be loaded.
		@plugins_blacklist    - A list of plugin names to not load.

		Returns a dictionary of :

			{
				'target file name' : [
							(0, [{description : "LZMA compressed data..."}]),
							(112, [{description : "gzip compressed data..."}])
				]
			}
		'''
		# Prefix all directory names with an underscore. This prevents accidental deletion of the original file(s)
		# when the user is typing too fast and is trying to deleted the extraction directory.
		prefix = '_'
		dir_extension = 'extracted'
		i = 0
		total_results = {}
		self.matryoshka = matryoshka

		# For backwards compatibility
		if not isinstance(target_files, type([])):
			target_files = [target_files]

		if base_dir is None:
			base_dir = ''

		# Instantiate the Plugins class and load all plugins, if not disabled
		self.plugins = Plugins(self, whitelist=plugins_whitelist, blacklist=plugins_blacklist)
		if self.load_plugins:
			self.plugins._load_plugins()

		while i < self.matryoshka:
			new_target_files = []

			# Scan each target file
			for target_file in target_files:
				ignore_files = []

				# On the first scan, add the base_dir value to dir_prefix. Subsequent target_file values will have this value prepended already.
				if i == 0:
					dir_prefix = os.path.join(base_dir, prefix + os.path.basename(target_file))
				else:
					dir_prefix = os.path.join(os.path.dirname(target_file), prefix + os.path.basename(target_file))

				output_dir = unique_file_name(dir_prefix, dir_extension)

				# Set the output directory for extracted files to go to
				self.extractor.output_directory(output_dir)

				if start_callback is not None:
					start_callback(target_file)
	
				results = self.single_scan(target_file, 
							offset=offset, 
							length=length, 
							show_invalid_results=show_invalid_results,
							callback=callback)
	
				if end_callback is not None:
					end_callback(target_file)

				# Get a list of extracted file names; don't scan them again.
				for (index, results_list) in results:
					for result in results_list:
						if result['extract']:
							ignore_files.append(result['extract'])

				# Find all newly created files and add them to new_target_files / new_target_directories
				for (dir_path, sub_dirs, files) in os.walk(output_dir):
					for fname in files:
						fname = os.path.join(dir_path, fname)
						if fname not in ignore_files:
							new_target_files.append(fname)

					# Don't worry about sub-directories
					break

				total_results[target_file] = results

			target_files = new_target_files
			i += 1

		# Be sure to delete the Plugins instance so that there isn't a lingering reference to
		# this Binwalk class instance (lingering handles to this Binwalk instance cause the
		# __del__ deconstructor to not be called).
		if self.plugins is not None:
			del self.plugins
			self.plugins = None

		return total_results
Example #5
0
	def plot(self, x, y, average=0, file_results=[], show_legend=True, save=False):
		'''
		Plots entropy data.

		@x            - List of graph x-coordinates (i.e., data offsets).
		@y            - List of graph y-coordinates (i.e., entropy for each offset).
		@average      - The average entropy.
		@file_results - A list of tuples containing additional analysis data, as returned by Binwalk.single_scan.
		@show_legend  - Set to False to not generate a color-coded legend and plotted x coordinates for the graph.
		@save         - If set to True, graph will be saved to disk rather than displayed.

		Returns None.
		'''
		import matplotlib.pyplot as plt
		import numpy as np

		i = 0
		trigger = 0
		new_ticks = []
		colors = ['darkgreen', 'blueviolet', 'saddlebrown', 'deeppink', 'goldenrod', 'olive', 'black']
		color_mappings = {}

		plt.clf()

		if not file_results and show_legend and average:
			file_results = []

			# Typically the average entropy is used as the trigger level for rising/falling entropy edges.
			# If the average entropy is too low, false rising and falling edges will be marked; if this is
			# the case, and if there is at least one data point greater than ENTROPY_MAX, use ENTROPY_TRIGGER
			# as the trigger level to avoid false edges.
			if average < self.ENTROPY_TRIGGER:
				for point in y:
					if point > self.ENTROPY_MAX:
						trigger = self.ENTROPY_TRIGGER
						break

			if not trigger:
				trigger = average

			for j in range(0, len(x)):
				if j > 0:
					if y[j] >= trigger and y[j-1] < trigger:
						file_results.append((x[j], [{'description' : 'Entropy rising edge'}]))
					elif y[j] <= trigger and y[j-1] > trigger:
						file_results.append((x[j], [{'description' : 'Entropy falling edge'}]))

		if file_results:
			for (offset, results) in file_results:
				label = None
				description = results[0]['description'].split(',')[0]

				if not color_mappings.has_key(description):
					if show_legend:
						label = description

					color_mappings[description] = colors[i]
					i += 1
					if i >= len(colors):
						i = 0
			
				plt.axvline(x=offset, label=label, color=color_mappings[description], linewidth=1.5)
				new_ticks.append(offset)

			if show_legend:
				plt.legend()

				if new_ticks:
					new_ticks.sort()
					plt.xticks(np.array(new_ticks), new_ticks)

		plt.plot(x, y, linewidth=1.5)

		if average:
			plt.plot(x, [average] * len(x), linestyle='--', color='r')

		plt.xlabel('Offset')
		plt.ylabel('Entropy')
		plt.title(self.fd.name)
		plt.ylim(0, 1.5)
		if save:
			plt.savefig(common.unique_file_name(os.path.join(os.path.dirname(self.fd.name), '_' + os.path.basename(self.fd.name)), self.FILE_FORMAT))
		else:
			plt.show()
Example #6
0
	def __init__(self, x, y, title='Entropy', average=0, file_results={}, show_legend=True, save=False):
		'''
		Plots entropy data.

		@x            - List of graph x-coordinates (i.e., data offsets).
		@y            - List of graph y-coordinates (i.e., entropy for each offset).
		@title        - Graph title.
		@average      - The average entropy.
		@file_results - Binwalk results, if any.
		@show_legend  - Set to False to not generate a color-coded legend and plotted x coordinates for the graph.
		@save         - If set to True, graph will be saved to disk rather than displayed.

		Returns None.
		'''
		import matplotlib.pyplot as plt
		import numpy as np

		i = 0
		trigger = 0
		new_ticks = []
		color_mappings = {}

		plt.clf()

		if file_results:
			for (offset, results) in file_results:
				label = None
				description = results[0]['description'].split(',')[0]

				if not color_mappings.has_key(description):
					if show_legend:
						label = description

					color_mappings[description] = self.COLORS[i]
					i += 1
					if i >= len(self.COLORS):
						i = 0
			
				plt.axvline(x=offset, label=label, color=color_mappings[description], linewidth=self.LINE_WIDTH)
				new_ticks.append(offset)

			if show_legend:
				plt.legend()

				if new_ticks:
					new_ticks.sort()
					plt.xticks(np.array(new_ticks), new_ticks)

		plt.plot(x, y, linewidth=self.LINE_WIDTH)

		if average:
			plt.plot(x, [average] * len(x), linestyle='--', color='r')

		plt.xlabel(self.XLABEL)
		plt.ylabel(self.YLABEL)
		plt.title(title)
		plt.ylim(self.YLIM_MIN, self.YLIM_MAX)
		if save:
			plt.savefig(common.unique_file_name(title, self.FILE_FORMAT))
		else:
			plt.show()
Example #7
0
    def scan(self,
             target_files,
             offset=0,
             length=0,
             show_invalid_results=False,
             callback=None,
             start_callback=None,
             end_callback=None,
             base_dir=None,
             matryoshka=1,
             plugins_whitelist=[],
             plugins_blacklist=[]):
        '''
		Performs a binwalk scan on a file or list of files.

		@target_files         - File or list of files to scan.
		@offset               - Starting offset at which to start the scan.
                @length               - Number of bytes to scan. Specify -1 for streams.
                @show_invalid_results - Set to True to display invalid results.
                @callback             - Callback function to be invoked when matches are found.
		@start_callback       - Callback function to be invoked prior to scanning each file.
		@end_callback         - Callback function to be invoked after scanning each file.
		@base_dir             - Base directory for output files.
		@matryoshka           - Number of levels to traverse into the rabbit hole.
		@plugins_whitelist    - A list of plugin names to load. If not empty, only these plugins will be loaded.
		@plugins_blacklist    - A list of plugin names to not load.

		Returns a dictionary of :

			{
				'target file name' : [
							(0, [{description : "LZMA compressed data..."}]),
							(112, [{description : "gzip compressed data..."}])
				]
			}
		'''
        # Prefix all directory names with an underscore. This prevents accidental deletion of the original file(s)
        # when the user is typing too fast and is trying to deleted the extraction directory.
        prefix = '_'
        dir_extension = 'extracted'
        i = 0
        total_results = {}
        self.matryoshka = matryoshka

        # For backwards compatibility
        if not isinstance(target_files, type([])):
            target_files = [target_files]

        if base_dir is None:
            base_dir = ''

        # Instantiate the Plugins class and load all plugins, if not disabled
        self.plugins = Plugins(self,
                               whitelist=plugins_whitelist,
                               blacklist=plugins_blacklist)
        if self.load_plugins:
            self.plugins._load_plugins()

        while i < self.matryoshka:
            new_target_files = []

            # Scan each target file
            for target_file in target_files:
                ignore_files = []

                # On the first scan, add the base_dir value to dir_prefix. Subsequent target_file values will have this value prepended already.
                if i == 0:
                    dir_prefix = os.path.join(
                        base_dir, prefix + os.path.basename(target_file))
                else:
                    dir_prefix = os.path.join(
                        os.path.dirname(target_file),
                        prefix + os.path.basename(target_file))

                output_dir = unique_file_name(dir_prefix, dir_extension)

                # Set the output directory for extracted files to go to
                self.extractor.output_directory(output_dir)

                if start_callback is not None:
                    start_callback(target_file)

                results = self.single_scan(
                    target_file,
                    offset=offset,
                    length=length,
                    show_invalid_results=show_invalid_results,
                    callback=callback)

                if end_callback is not None:
                    end_callback(target_file)

                # Get a list of extracted file names; don't scan them again.
                for (index, results_list) in results:
                    for result in results_list:
                        if result['extract']:
                            ignore_files.append(result['extract'])

                # Find all newly created files and add them to new_target_files / new_target_directories
                for (dir_path, sub_dirs, files) in os.walk(output_dir):
                    for fname in files:
                        fname = os.path.join(dir_path, fname)
                        if fname not in ignore_files:
                            new_target_files.append(fname)

                    # Don't worry about sub-directories
                    break

                total_results[target_file] = results

            target_files = new_target_files
            i += 1

        # Be sure to delete the Plugins instance so that there isn't a lingering reference to
        # this Binwalk class instance (lingering handles to this Binwalk instance cause the
        # __del__ deconstructor to not be called).
        if self.plugins is not None:
            del self.plugins
            self.plugins = None

        return total_results