예제 #1
0
class RawCompression(Module):

    DECOMPRESSORS = {
        'deflate': Deflate,
    }

    TITLE = 'Raw Compression'

    CLI = [
        Option(short='X',
               long='deflate',
               kwargs={
                   'enabled': True,
                   'decompressor_class': 'deflate'
               },
               description='Scan for raw deflate compression streams'),
    ]

    KWARGS = [
        Kwarg(name='enabled', default=False),
        Kwarg(name='decompressor_class', default=None),
    ]

    def init(self):
        self.decompressor = self.DECOMPRESSORS[self.decompressor_class](self)

    def run(self):
        for fp in iter(self.next_file, None):

            fp.set_block_size(peek=self.decompressor.BLOCK_SIZE)

            self.header()

            while True:
                (data, dlen) = fp.read_block()
                if not data:
                    break

                for i in range(0, dlen):
                    description = self.decompressor.decompress(
                        data[i:i + self.decompressor.BLOCK_SIZE])
                    if description:
                        self.result(description=description,
                                    file=fp,
                                    offset=fp.tell() - dlen + i)

                self.status.completed = fp.tell() - fp.offset

            self.footer()
예제 #2
0
class RawCompression(Module):

    TITLE = 'Raw Compression'

    CLI = [
        Option(short='X',
               long='deflate',
               kwargs={
                   'enabled': True,
                   'scan_for_deflate': True
               },
               description='Scan for raw deflate compression streams'),
        Option(short='Z',
               long='lzma',
               kwargs={
                   'enabled': True,
                   'scan_for_lzma': True
               },
               description='Scan for raw LZMA compression streams'),
        Option(short='P',
               long='partial',
               kwargs={'partial_scan': True},
               description='Perform a superficial, but faster, scan'),
        Option(short='S',
               long='stop',
               kwargs={'stop_on_first_hit': True},
               description='Stop after the first result'),
    ]

    KWARGS = [
        Kwarg(name='enabled', default=False),
        Kwarg(name='partial_scan', default=False),
        Kwarg(name='stop_on_first_hit', default=False),
        Kwarg(name='scan_for_deflate', default=False),
        Kwarg(name='scan_for_lzma', default=False),
    ]

    def init(self):
        self.decompressors = []

        if self.scan_for_deflate:
            self.decompressors.append(Deflate(self))
        if self.scan_for_lzma:
            self.decompressors.append(LZMA(self))

    def run(self):
        for fp in iter(self.next_file, None):

            file_done = False

            self.header()

            while not file_done:
                (data, dlen) = fp.read_block()
                if not data:
                    break

                for i in range(0, dlen):
                    for decompressor in self.decompressors:
                        description = decompressor.decompress(
                            data[i:i + decompressor.BLOCK_SIZE])
                        if description:
                            self.result(description=description,
                                        file=fp,
                                        offset=fp.tell() - dlen + i)
                            if self.stop_on_first_hit:
                                file_done = True
                                break

                    if file_done:
                        break

                    self.status.completed += 1

                self.status.completed = fp.tell() - fp.offset

            self.footer()
예제 #3
0
class HexDiff(Module):

    COLORS = {
        'red': '31',
        'green': '32',
        'blue': '34',
    }

    SEPERATORS = ['\\', '/']
    DEFAULT_BLOCK_SIZE = 16

    SKIPPED_LINE = "*"
    CUSTOM_DISPLAY_FORMAT = "0x%.8X    %s"

    TITLE = "Binary Diffing"

    CLI = [
        Option(short='W',
               long='hexdump',
               kwargs={'enabled': True},
               description='Perform a hexdump / diff of a file or files'),
        Option(
            short='G',
            long='green',
            kwargs={'show_green': True},
            description=
            'Only show lines containing bytes that are the same among all files'
        ),
        Option(
            short='i',
            long='red',
            kwargs={'show_red': True},
            description=
            'Only show lines containing bytes that are different among all files'
        ),
        Option(
            short='U',
            long='blue',
            kwargs={'show_blue': True},
            description=
            'Only show lines containing bytes that are different among some files'
        ),
        Option(
            short='w',
            long='terse',
            kwargs={'terse': True},
            description=
            'Diff all files, but only display a hex dump of the first file'),
    ]

    KWARGS = [
        Kwarg(name='show_red', default=False),
        Kwarg(name='show_blue', default=False),
        Kwarg(name='show_green', default=False),
        Kwarg(name='terse', default=False),
        Kwarg(name='enabled', default=False),
    ]

    RESULT_FORMAT = "%s\n"
    RESULT = ['display']

    def _no_colorize(self, c, color="red", bold=True):
        return c

    def _colorize(self, c, color="red", bold=True):
        attr = []

        attr.append(self.COLORS[color])
        if bold:
            attr.append('1')

        return "\x1b[%sm%s\x1b[0m" % (';'.join(attr), c)

    def _color_filter(self, data):
        red = '\x1b[' + self.COLORS['red'] + ';'
        green = '\x1b[' + self.COLORS['green'] + ';'
        blue = '\x1b[' + self.COLORS['blue'] + ';'

        if self.show_blue and blue in data:
            return True
        elif self.show_green and green in data:
            return True
        elif self.show_red and red in data:
            return True

        return False

    def hexascii(self, target_data, byte, offset):
        color = "green"

        for (fp_i, data_i) in iterator(target_data):
            diff_count = 0

            for (fp_j, data_j) in iterator(target_data):
                if fp_i == fp_j:
                    continue

                try:
                    if data_i[offset] != data_j[offset]:
                        diff_count += 1
                except IndexError as e:
                    diff_count += 1

            if diff_count == len(target_data) - 1:
                color = "red"
            elif diff_count > 0:
                color = "blue"
                break

        hexbyte = self.colorize("%.2X" % ord(byte), color)

        if byte not in string.printable or byte in string.whitespace:
            byte = "."

        asciibyte = self.colorize(byte, color)

        return (hexbyte, asciibyte)

    def diff_files(self, target_files):
        last_line = None
        loop_count = 0
        sep_count = 0

        # Figure out the maximum diff size (largest file size)
        self.status.total = 0
        for i in range(0, len(target_files)):
            if target_files[i].size > self.status.total:
                self.status.total = target_files[i].size
                self.status.fp = target_files[i]

        while True:
            line = ""
            done_files = 0
            block_data = {}
            seperator = self.SEPERATORS[sep_count % 2]

            for fp in target_files:
                block_data[fp] = fp.read(self.block)
                if not block_data[fp]:
                    done_files += 1

            # No more data from any of the target files? Done.
            if done_files == len(target_files):
                break

            for fp in target_files:
                hexline = ""
                asciiline = ""

                for i in range(0, self.block):
                    if i >= len(block_data[fp]):
                        hexbyte = "XX"
                        asciibyte = "."
                    else:
                        (hexbyte,
                         asciibyte) = self.hexascii(block_data,
                                                    block_data[fp][i], i)

                    hexline += "%s " % hexbyte
                    asciiline += "%s" % asciibyte

                line += "%s |%s|" % (hexline, asciiline)

                if self.terse:
                    break

                if fp != target_files[-1]:
                    line += " %s " % seperator

            offset = fp.offset + (self.block * loop_count)

            if not self._color_filter(line):
                display = line = self.SKIPPED_LINE
            else:
                display = self.CUSTOM_DISPLAY_FORMAT % (offset, line)
                sep_count += 1

            if line != self.SKIPPED_LINE or last_line != line:
                self.result(offset=offset, description=line, display=display)

            last_line = line
            loop_count += 1
            self.status.completed += self.block

    def init(self):
        # To mimic expected behavior, if all options are False, we show
        # everything
        if not any([self.show_red, self.show_green, self.show_blue]):
            self.show_red = self.show_green = self.show_blue = True

        # Always disable terminal formatting, as it won't work properly with
        # colorized output
        self.config.display.fit_to_screen = False

        # Set the block size (aka, hexdump line size)
        self.block = self.config.block
        if not self.block:
            self.block = self.DEFAULT_BLOCK_SIZE

        # Build a list of files to hexdiff
        self.hex_target_files = []
        while True:
            f = self.next_file(close_previous=False)
            if not f:
                break
            else:
                self.hex_target_files.append(f)

        # Build the header format string
        header_width = (self.block * 4) + 2
        if self.terse:
            file_count = 1
        else:
            file_count = len(self.hex_target_files)
        self.HEADER_FORMAT = "OFFSET      " + \
            (("%%-%ds   " % header_width) * file_count) + "\n"

        # Build the header argument list
        self.HEADER = [fp.name for fp in self.hex_target_files]
        if self.terse and len(self.HEADER) > 1:
            self.HEADER = self.HEADER[0]

        # Set up the tty for colorization, if it is supported
        if hasattr(
                sys.stderr,
                'isatty') and sys.stderr.isatty() and not common.MSWindows():
            import curses
            curses.setupterm()
            self.colorize = self._colorize
        else:
            self.colorize = self._no_colorize

    def run(self):
        if self.hex_target_files:
            self.header()
            self.diff_files(self.hex_target_files)
            self.footer()
예제 #4
0
class Entropy(Module):

    XLABEL = 'Offset'
    YLABEL = 'Entropy'

    XUNITS = 'B'
    YUNITS = 'E'

    FILE_WIDTH = 1024
    FILE_FORMAT = 'png'

    COLORS = ['r', 'g', 'c', 'b', 'm']

    DEFAULT_BLOCK_SIZE = 1024
    DEFAULT_DATA_POINTS = 2048

    DEFAULT_TRIGGER_HIGH = .95
    DEFAULT_TRIGGER_LOW = .85

    TITLE = "Entropy Analysis"
    ORDER = 8

    # TODO: Add --dpoints option to set the number of data points?
    CLI = [
        Option(short='E',
               long='entropy',
               kwargs={'enabled': True},
               description='Calculate file entropy'),
        Option(short='F',
               long='fast',
               kwargs={'use_zlib': True},
               description='Use faster, but less detailed, entropy analysis'),
        Option(short='J',
               long='save',
               kwargs={'save_plot': True},
               description='Save plot as a PNG'),
        Option(short='Q',
               long='nlegend',
               kwargs={'show_legend': False},
               description='Omit the legend from the entropy plot graph'),
        Option(short='N',
               long='nplot',
               kwargs={'do_plot': False},
               description='Do not generate an entropy plot graph'),
        Option(
            short='H',
            long='high',
            type=float,
            kwargs={'trigger_high': DEFAULT_TRIGGER_HIGH},
            description=
            'Set the rising edge entropy trigger threshold (default: %.2f)' %
            DEFAULT_TRIGGER_HIGH),
        Option(
            short='L',
            long='low',
            type=float,
            kwargs={'trigger_low': DEFAULT_TRIGGER_LOW},
            description=
            'Set the falling edge entropy trigger threshold (default: %.2f)' %
            DEFAULT_TRIGGER_LOW),
    ]

    KWARGS = [
        Kwarg(name='enabled', default=False),
        Kwarg(name='save_plot', default=False),
        Kwarg(name='trigger_high', default=DEFAULT_TRIGGER_HIGH),
        Kwarg(name='trigger_low', default=DEFAULT_TRIGGER_LOW),
        Kwarg(name='use_zlib', default=False),
        Kwarg(name='display_results', default=True),
        Kwarg(name='do_plot', default=True),
        Kwarg(name='show_legend', default=True),
        Kwarg(name='block_size', default=0),
    ]

    # Run this module last so that it can process all other module's results
    # and overlay them on the entropy graph
    PRIORITY = 0

    def init(self):
        self.HEADER[-1] = "ENTROPY"
        self.max_description_length = 0
        self.file_markers = {}

        if self.use_zlib:
            self.algorithm = self.gzip
        else:
            self.algorithm = self.shannon

        # Get a list of all other module's results to mark on the entropy graph
        for (module, obj) in iterator(self.modules):
            for result in obj.results:
                if result.plot and result.file and result.description:
                    description = result.description.split(',')[0]

                    if not has_key(self.file_markers, result.file.name):
                        self.file_markers[result.file.name] = []

                    if len(description) > self.max_description_length:
                        self.max_description_length = len(description)

                    self.file_markers[result.file.name].append(
                        (result.offset, description))

        # If other modules have been run and they produced results, don't spam
        # the terminal with entropy results
        if self.file_markers:
            self.display_results = False

        if not self.block_size:
            if self.config.block:
                self.block_size = self.config.block
            else:
                self.block_size = None

    def _entropy_sigterm_handler(self, *args):
        print("F**k it all.")

    def run(self):
        # If generating a graphical plot, this function will never return, as it invokes
        # pg.exit. Calling pg.exit is pretty much required, but pg.exit calls os._exit in
        # order to work around QT cleanup issues.
        self._run()

    def _run(self):
        # Sanity check and warning if pyqtgraph isn't found
        if self.do_plot:
            try:
                import pyqtgraph as pg
            except ImportError as e:
                binwalk.core.common.warning(
                    "Failed to import pyqtgraph module, visual entropy graphing will be disabled"
                )
                self.do_plot = False

        for fp in iter(self.next_file, None):

            if self.display_results:
                self.header()

            self.calculate_file_entropy(fp)

            if self.display_results:
                self.footer()

        if self.do_plot:
            if not self.save_plot:
                from pyqtgraph.Qt import QtGui
                QtGui.QApplication.instance().exec_()
            pg.exit()

    def calculate_file_entropy(self, fp):
        # Tracks the last displayed rising/falling edge (0 for falling, 1 for
        # rising, None if nothing has been printed yet)
        last_edge = None
        # Auto-reset the trigger; if True, an entropy above/below
        # self.trigger_high/self.trigger_low will be printed
        trigger_reset = True

        # Clear results from any previously analyzed files
        self.clear(results=True)

        # If -K was not specified, calculate the block size to create
        # DEFAULT_DATA_POINTS data points
        if self.block_size is None:
            block_size = fp.size / self.DEFAULT_DATA_POINTS
            # Round up to the nearest DEFAULT_BLOCK_SIZE (1024)
            block_size = int(block_size +
                             ((self.DEFAULT_BLOCK_SIZE - block_size) %
                              self.DEFAULT_BLOCK_SIZE))
        else:
            block_size = self.block_size

        # Make sure block size is greater than 0
        if block_size <= 0:
            block_size = self.DEFAULT_BLOCK_SIZE

        binwalk.core.common.debug("Entropy block size (%d data points): %d" %
                                  (self.DEFAULT_DATA_POINTS, block_size))

        while True:
            file_offset = fp.tell()

            (data, dlen) = fp.read_block()
            if not data:
                break

            i = 0
            while i < dlen:
                entropy = self.algorithm(data[i:i + block_size])
                display = self.display_results
                description = "%f" % entropy

                if not self.config.verbose:
                    if last_edge in [None, 0] and entropy > self.trigger_low:
                        trigger_reset = True
                    elif last_edge in [None, 1
                                       ] and entropy < self.trigger_high:
                        trigger_reset = True

                    if trigger_reset and entropy >= self.trigger_high:
                        description = "Rising entropy edge (%f)" % entropy
                        display = self.display_results
                        last_edge = 1
                        trigger_reset = False
                    elif trigger_reset and entropy <= self.trigger_low:
                        description = "Falling entropy edge (%f)" % entropy
                        display = self.display_results
                        last_edge = 0
                        trigger_reset = False
                    else:
                        display = False
                        description = "%f" % entropy

                r = self.result(offset=(file_offset + i),
                                file=fp,
                                entropy=entropy,
                                description=description,
                                display=display)

                i += block_size

        if self.do_plot:
            self.plot_entropy(fp.name)

    def shannon(self, data):
        '''
        Performs a Shannon entropy analysis on a given block of data.
        '''
        entropy = 0

        if data:
            length = len(data)

            seen = dict(((chr(x), 0) for x in range(0, 256)))
            for byte in data:
                seen[byte] += 1

            for x in range(0, 256):
                p_x = float(seen[chr(x)]) / length
                if p_x > 0:
                    entropy -= p_x * math.log(p_x, 2)

        return (entropy / 8)

    def gzip(self, data, truncate=True):
        '''
        Performs an entropy analysis based on zlib compression ratio.
        This is faster than the shannon entropy analysis, but not as accurate.
        '''
        # Entropy is a simple ratio of: <zlib compressed size> / <original
        # size>
        e = float(
            float(len(zlib.compress(str2bytes(data), 9))) / float(len(data)))

        if truncate and e > 1.0:
            e = 1.0

        return e

    def plot_entropy(self, fname):
        try:
            import numpy as np
            import pyqtgraph as pg
            import pyqtgraph.exporters as exporters
        except ImportError as e:
            return

        i = 0
        x = []
        y = []
        plotted_colors = {}

        for r in self.results:
            x.append(r.offset)
            y.append(r.entropy)

        plt = pg.plot(title=fname, clear=True)

        # Disable auto-ranging of the Y (entropy) axis, as it
        # can cause some very un-intuitive graphs, particularly
        # for files with only high-entropy data.
        plt.setYRange(0, 1)

        if self.show_legend and has_key(self.file_markers, fname):
            plt.addLegend(size=(self.max_description_length * 10, 0))

            for (offset, description) in self.file_markers[fname]:
                # If this description has already been plotted at a different offset, we need to
                # use the same color for the marker, but set the description to None to prevent
                # duplicate entries in the graph legend.
                #
                # Else, get the next color and use it to mark descriptions of
                # this type.
                if has_key(plotted_colors, description):
                    color = plotted_colors[description]
                    description = None
                else:
                    color = self.COLORS[i]
                    plotted_colors[description] = color

                    i += 1
                    if i >= len(self.COLORS):
                        i = 0

                plt.plot(x=[offset, offset],
                         y=[0, 1.1],
                         name=description,
                         pen=pg.mkPen(color, width=2.5))

        # Plot data points
        plt.plot(x, y, pen='y')

        # TODO: legend is not displayed properly when saving plots to disk
        if self.save_plot:
            # Save graph to CWD
            out_file = os.path.join(os.getcwd(), os.path.basename(fname))

            # exporters.ImageExporter is different in different versions of
            # pyqtgraph
            try:
                exporter = exporters.ImageExporter(plt.plotItem)
            except TypeError:
                exporter = exporters.ImageExporter.ImageExporter(plt.plotItem)
            exporter.parameters()['width'] = self.FILE_WIDTH
            exporter.export(
                binwalk.core.common.unique_file_name(out_file,
                                                     self.FILE_FORMAT))
        else:
            plt.setLabel('left', self.YLABEL, units=self.YUNITS)
            plt.setLabel('bottom', self.XLABEL, units=self.XUNITS)
예제 #5
0
class Signature(Module):

    TITLE = "Signature Scan"
    ORDER = 10

    CLI = [
        Option(short='B',
               long='signature',
               kwargs={
                   'enabled': True,
                   'explicit_signature_scan': True
               },
               description='Scan target file(s) for common file signatures'),
        Option(short='R',
               long='raw',
               kwargs={
                   'enabled': True,
                   'raw_bytes': []
               },
               type=list,
               dtype=str.__name__,
               description=
               'Scan target file(s) for the specified sequence of bytes'),
        Option(short='A',
               long='opcodes',
               kwargs={
                   'enabled': True,
                   'search_for_opcodes': True
               },
               description=
               'Scan target file(s) for common executable opcode signatures'),
        Option(short='m',
               long='magic',
               kwargs={
                   'enabled': True,
                   'magic_files': []
               },
               type=list,
               dtype='file',
               description='Specify a custom magic file to use'),
        Option(short='b',
               long='dumb',
               kwargs={'dumb_scan': True},
               description='Disable smart signature keywords'),
        Option(short='I',
               long='invalid',
               kwargs={'show_invalid': True},
               description='Show results marked as invalid'),
        Option(short='x',
               long='exclude',
               kwargs={'exclude_filters': []},
               type=list,
               dtype=str.__name__,
               description='Exclude results that match <str>'),
        Option(short='y',
               long='include',
               kwargs={'include_filters': []},
               type=list,
               dtype=str.__name__,
               description='Only show results that match <str>'),
    ]

    KWARGS = [
        Kwarg(name='enabled', default=False),
        Kwarg(name='show_invalid', default=False),
        Kwarg(name='include_filters', default=[]),
        Kwarg(name='exclude_filters', default=[]),
        Kwarg(name='raw_bytes', default=[]),
        Kwarg(name='search_for_opcodes', default=False),
        Kwarg(name='explicit_signature_scan', default=False),
        Kwarg(name='dumb_scan', default=False),
        Kwarg(name='magic_files', default=[]),
    ]

    VERBOSE_FORMAT = "%s    %d"

    def init(self):
        self.one_of_many = None

        # Append the user's magic file first so that those signatures take precedence
        if self.search_for_opcodes:
            self.magic_files = [
                self.config.settings.user.binarch,
                self.config.settings.system.binarch,
            ]

        # Use the system default magic file if no other was specified, or if -B was explicitly specified
        if (not self.magic_files
                and not self.raw_bytes) or self.explicit_signature_scan:
            self.magic_files += self.config.settings.user.magic + self.config.settings.system.magic

        # Initialize libmagic
        self.magic = binwalk.core.magic.Magic(include=self.include_filters,
                                              exclude=self.exclude_filters,
                                              invalid=self.show_invalid)

        # Create a signature from the raw bytes, if any
        if self.raw_bytes:
            raw_signatures = []
            for raw_bytes in self.raw_bytes:
                raw_signatures.append("0    string    %s    %s" %
                                      (raw_bytes, raw_bytes))
            binwalk.core.common.debug("Parsing raw signatures: %s" %
                                      str(raw_signatures))
            self.magic.parse(raw_signatures)

        # Parse the magic file(s)
        if self.magic_files:
            binwalk.core.common.debug("Loading magic files: %s" %
                                      str(self.magic_files))
            for f in self.magic_files:
                self.magic.load(f)

        self.VERBOSE = ["Signatures:", len(self.magic.signatures)]

    def validate(self, r):
        '''
        Called automatically by self.result.
        '''
        if self.show_invalid:
            r.valid = True
        elif r.valid:
            if not r.description:
                r.valid = False

            if r.size and (r.size + r.offset) > r.file.size:
                r.valid = False

            if r.jump and (r.jump + r.offset) > r.file.size:
                r.valid = False

        if r.valid:
            # Don't keep displaying signatures that repeat a bunch of times (e.g., JFFS2 nodes)
            if r.id == self.one_of_many:
                r.display = False
            elif r.many:
                self.one_of_many = r.id
            else:
                self.one_of_many = None

    def scan_file(self, fp):
        current_file_offset = 0

        while True:
            (data, dlen) = fp.read_block()
            if not data:
                break

            current_block_offset = 0
            block_start = fp.tell() - dlen
            self.status.completed = block_start - fp.offset

            # Scan this data block for magic signatures
            for r in self.magic.scan(data, dlen):
                # current_block_offset is set when a jump-to-offset keyword is encountered while
                # processing signatures. This points to an offset inside the current data block
                # that scanning should jump to, so ignore any subsequent candidate signatures that
                # occur before this offset inside the current data block.
                if r.offset < current_block_offset:
                    continue

                # Keep a record of the relative offset of this signature inside the current data block
                # (used later for setting current_block_offset).
                relative_offset = r.offset + r.adjust

                # Set the absolute offset inside the target file
                r.offset = block_start + relative_offset

                # Provide an instance of the current file object
                r.file = fp

                # Register the result for futher processing/display
                # self.result automatically calls self.validate for result validation
                self.result(r=r)

                # Is this a valid result and did it specify a jump-to-offset keyword, and are we doing a "smart" scan?
                if r.valid and r.jump > 0 and not self.dumb_scan:
                    absolute_jump_offset = r.offset + r.jump
                    current_block_offset = relative_offset + r.jump

                    # If the jump-to-offset is beyond the confines of the current block, seek the file to
                    # that offset and quit processing this block of data.
                    if absolute_jump_offset >= fp.tell():
                        fp.seek(r.offset + r.jump)
                        break

    def run(self):
        for fp in iter(self.next_file, None):
            self.header()
            self.scan_file(fp)
            self.footer()
예제 #6
0
class Extractor(Module):
    '''
    Extractor class, responsible for extracting files from the target file and executing external applications, if requested.
    '''
    # Extract rules are delimited with a colon.
    # <case insensitive matching string>:<file extension>[:<command to run>]
    RULE_DELIM = ':'

    # Comments in the extract.conf files start with a pound
    COMMENT_DELIM ='#'

    # Place holder for the extracted file name in the command
    FILE_NAME_PLACEHOLDER = '%e'

    # Unique path delimiter, used for generating unique output file/directory names.
    # Useful when, for example, extracting two squashfs images (squashfs-root, squashfs-root-0).
    UNIQUE_PATH_DELIMITER = '%%'

    TITLE = 'Extraction'
    ORDER = 9
    PRIMARY = False

    CLI = [
            Option(short='e',
                   long='extract',
                   kwargs={'load_default_rules' : True, 'enabled' : True},
                   description='Automatically extract known file types'),
            Option(short='D',
                   long='dd',
                   type=list,
                   dtype='type:ext:cmd',
                   kwargs={'manual_rules' : [], 'enabled' : True},
                   description='Extract <type> signatures, give the files an extension of <ext>, and execute <cmd>'),
            Option(short='M',
                   long='matryoshka',
                   kwargs={'matryoshka' : 8},
                   description='Recursively scan extracted files'),
            Option(short='d',
                   long='depth',
                   type=int,
                   kwargs={'matryoshka' : 0},
                   description='Limit matryoshka recursion depth (default: 8 levels deep)'),
            Option(short='C',
                   long='directory',
                   type=str,
                   kwargs={'base_directory' : 0},
                   description='Extract files/folders to a custom directory (default: current working directory)'),
            Option(short='j',
                   long='size',
                   type=int,
                   kwargs={'max_size' : 0},
                   description='Limit the size of each extracted file'),
            Option(short='n',
                   long='count',
                   type=int,
                   kwargs={'max_count' : 0},
                   description='Limit the number of extracted files'),
            Option(short='r',
                   long='rm',
                   kwargs={'remove_after_execute' : True},
                   description='Delete carved files after extraction'),
            Option(short='z',
                   long='carve',
                   kwargs={'run_extractors' : False},
                   description="Carve data from files, but don't execute extraction utilities"),
    ]

    KWARGS = [
            Kwarg(name='max_size', default=None),
            Kwarg(name='max_count', default=None),
            Kwarg(name='base_directory', default=None),
            Kwarg(name='remove_after_execute', default=False),
            Kwarg(name='load_default_rules', default=False),
            Kwarg(name='run_extractors', default=True),
            Kwarg(name='manual_rules', default=[]),
            Kwarg(name='matryoshka', default=0),
            Kwarg(name='enabled', default=False),
    ]

    def load(self):
        # Holds a list of extraction rules loaded either from a file or when manually specified.
        self.extract_rules = []
        # The input file specific output directory path (default to CWD)
        if self.base_directory:
            self.directory = os.path.realpath(self.base_directory)
            if not os.path.exists(self.directory):
                os.makedirs(self.directory)
        else:
            self.directory = os.getcwd()
        # Key value pairs of input file path and output extraction path
        self.output = {}
        # Number of extracted files
        self.extraction_count = 0
        # Override the directory name used for extraction output directories
        self.output_directory_override = None

        if self.load_default_rules:
            self.load_defaults()

        for manual_rule in self.manual_rules:
            self.add_rule(manual_rule)

        if self.matryoshka:
            self.config.verbose = True

    def add_pending(self, f):
        # Ignore symlinks
        if os.path.islink(f):
            return

        # Get the file mode to check and see if it's a block/char device
        try:
            file_mode = os.stat(f).st_mode
        except OSError as e:
            return

        # Only add this to the pending list of files to scan
        # if the file is a regular file. Special files (block/character
        # devices) can be tricky; they may fail to open, or worse, simply
        # hang when an attempt to open them is made. So for recursive
        # extraction purposes, they are ignored, albeit with a warning to
        # the user.
        if stat.S_ISREG(file_mode):
            # Make sure we can open the file too...
            try:
                fp = binwalk.core.common.BlockFile(f)
                fp.close()
                self.pending.append(f)
            except IOError as e:
                binwalk.core.common.warning("Ignoring file '%s': %s" % (f, str(e)))
        else:
            binwalk.core.common.warning("Ignoring file '%s': Not a regular file" % f)

    def reset(self):
        # Holds a list of pending files that should be scanned; only populated if self.matryoshka == True
        self.pending = []
        # Holds a dictionary of extraction directories created for each scanned file.
        self.extraction_directories = {}
        # Holds a dictionary of the last directory listing for a given directory; used for identifying
        # newly created/extracted files that need to be appended to self.pending.
        self.last_directory_listing = {}

    def callback(self, r):
        # Make sure the file attribute is set to a compatible instance of binwalk.core.common.BlockFile
        try:
            r.file.size
        except KeyboardInterrupt as e:
            pass
        except Exception as e:
            return

        if not r.size:
            size = r.file.size - r.offset
        else:
            size = r.size

        # Only extract valid results that have been marked for extraction and displayed to the user.
        # Note that r.display is still True even if --quiet has been specified; it is False if the result has been
        # explicitly excluded via the -y/-x options.
        if r.valid and r.extract and r.display and (not self.max_count or self.extraction_count < self.max_count):
            # Create some extract output for this file, it it doesn't already exist
            if not binwalk.core.common.has_key(self.output, r.file.path):
                self.output[r.file.path] = ExtractInfo()

            # Attempt extraction
            binwalk.core.common.debug("Extractor callback for %s @%d [%s]" % (r.file.name, r.offset, r.description))
            (extraction_directory, dd_file, scan_extracted_files) = self.extract(r.offset, r.description, r.file.path, size, r.name)

            # If the extraction was successful, self.extract will have returned the output directory and name of the dd'd file
            if extraction_directory and dd_file:
                # Track the number of extracted files
                self.extraction_count += 1

                # Get the full path to the dd'd file and save it in the output info for this file
                dd_file_path = os.path.join(extraction_directory, dd_file)
                self.output[r.file.path].carved[r.offset] = dd_file_path
                self.output[r.file.path].extracted[r.offset] = []

                # Do a directory listing of the output directory
                directory_listing = set(os.listdir(extraction_directory))

                # If this is a newly created output directory, self.last_directory_listing won't have a record of it.
                # If we've extracted other files to this directory before, it will.
                if not has_key(self.last_directory_listing, extraction_directory):
                    self.last_directory_listing[extraction_directory] = set()

                # Loop through a list of newly created files (i.e., files that weren't listed in the last directory listing)
                for f in directory_listing.difference(self.last_directory_listing[extraction_directory]):
                    # Build the full file path and add it to the extractor results
                    file_path = os.path.join(extraction_directory, f)
                    real_file_path = os.path.realpath(file_path)
                    self.result(description=file_path, display=False)

                    # Also keep a list of files created by the extraction utility
                    if real_file_path != dd_file_path:
                        self.output[r.file.path].extracted[r.offset].append(real_file_path)

                    # If recursion was specified, and the file is not the same one we just dd'd
                    if (self.matryoshka and
                        file_path != dd_file_path and
                        scan_extracted_files and
                        self.directory in real_file_path):
                        # If the recursion level of this file is less than or equal to our desired recursion level
                        if len(real_file_path.split(self.directory)[1].split(os.path.sep)) <= self.matryoshka:
                            # If this is a directory and we are supposed to process directories for this extractor,
                            # then add all files under that directory to the list of pending files.
                            if os.path.isdir(file_path):
                                for root, dirs, files in os.walk(file_path):
                                    for f in files:
                                        full_path = os.path.join(root, f)
                                        self.add_pending(full_path)
                            # If it's just a file, it to the list of pending files
                            else:
                                self.add_pending(file_path)

                # Update the last directory listing for the next time we extract a file to this same output directory
                self.last_directory_listing[extraction_directory] = directory_listing

    def append_rule(self, r):
        self.extract_rules.append(r.copy())

    def add_rule(self, txtrule=None, regex=None, extension=None, cmd=None, codes=[0, None], recurse=True):
        '''
        Adds a set of rules to the extraction rule list.

        @txtrule   - Rule string, or list of rule strings, in the format <regular expression>:<file extension>[:<command to run>]
        @regex     - If rule string is not specified, this is the regular expression string to use.
        @extension - If rule string is not specified, this is the file extension to use.
        @cmd       - If rule string is not specified, this is the command to run.
                     Alternatively a callable object may be specified, which will be passed one argument: the path to the file to extract.
        @codes     - A list of valid return codes for the extractor.
        @recurse   - If False, extracted directories will not be recursed into when the matryoshka option is enabled.

        Returns None.
        '''
        rules = []
        match = False
        r = {
            'extension'     : '',
            'cmd'           : '',
            'regex'         : None,
            'codes'         : codes,
            'recurse'       : recurse,
        }

        # Process single explicitly specified rule
        if not txtrule and regex and extension:
            r['extension'] = extension
            r['regex'] = re.compile(regex)
            if cmd:
                r['cmd'] = cmd

            self.append_rule(r)
            return

        # Process rule string, or list of rule strings
        if not isinstance(txtrule, type([])):
            rules = [txtrule]
        else:
            rules = txtrule

        for rule in rules:
            r['cmd'] = ''
            r['extension'] = ''

            try:
                values = self._parse_rule(rule)
                match = values[0]
                r['regex'] = re.compile(values[0])
                r['extension'] = values[1]
                r['cmd'] = values[2]
                r['codes'] = values[3]
                r['recurse'] = values[4]
            except KeyboardInterrupt as e:
                raise e
            except Exception:
                pass

            # Verify that the match string was retrieved.
            if match:
                self.append_rule(r)

    def remove_rules(self, description):
        '''
        Remove all rules that match a specified description.

        @description - The description to match against.

        Returns the number of rules removed.
        '''
        rm = []
        description = description.lower()

        for i in range(0, len(self.extract_rules)):
            if self.extract_rules[i]['regex'].search(description):
                rm.append(i)

        for i in rm:
            self.extract_rules.pop(i)

        return len(rm)

    def edit_rules(self, description, key, value):
        '''
        Edit all rules that match a specified description.

        @description - The description to match against.
        @key         - The key to change for each matching rule.
        @value       - The new key value for each matching rule.

        Returns the number of rules modified.
        '''
        count = 0
        description = description.lower()

        for i in range(0, len(self.extract_rules)):
            if self.extract_rules[i]['regex'].search(description):
                if has_key(self.extract_rules[i], key):
                    self.extract_rules[i][key] = value
                    count += 1

        return count

    def clear_rules(self):
        '''
        Deletes all extraction rules.

        Returns None.
        '''
        self.extract_rules = []

    def get_rules(self, description=None):
        '''
        Returns a list of extraction rules that match a given description.

        @description - The description to match against.

        Returns a list of extraction rules that match the given description.
        If no description is provided, a list of all rules are returned.
        '''
        if description:
            rules = []
            description = description.lower()
            for i in range(0, len(self.extract_rules)):
                if self.extract_rules[i]['regex'].search(description):
                    rules.append(self.extract_rules[i])
        else:
            rules = self.extract_rules

        return rules

    def load_from_file(self, fname):
        '''
        Loads extraction rules from the specified file.

        @fname - Path to the extraction rule file.

        Returns None.
        '''
        try:
            # Process each line from the extract file, ignoring comments
            with open(fname, 'r') as f:
                for rule in f.readlines():
                    self.add_rule(rule.split(self.COMMENT_DELIM, 1)[0])
        except KeyboardInterrupt as e:
            raise e
        except Exception as e:
            raise Exception("Extractor.load_from_file failed to load file '%s': %s" % (fname, str(e)))

    def load_defaults(self):
        '''
        Loads default extraction rules from the user and system extract.conf files.

        Returns None.
        '''
        # Load the user extract file first to ensure its rules take precedence.
        extract_files = [
            self.config.settings.user.extract,
            self.config.settings.system.extract,
        ]

        for extract_file in extract_files:
            if extract_file:
                try:
                    self.load_from_file(extract_file)
                except KeyboardInterrupt as e:
                    raise e
                except Exception as e:
                    if binwalk.core.common.DEBUG:
                        raise Exception("Extractor.load_defaults failed to load file '%s': %s" % (extract_file, str(e)))

    def get_output_directory_override(self):
        '''
        Returns the current output directory basename override value.
        '''
        return self.output_directory_override

    def override_output_directory_basename(self, dirname):
        '''
        Allows the overriding of the default extraction directory basename.

        @dirname - The directory base name to use.

        Returns the current output directory basename override value.
        '''
        self.output_directory_override = dirname
        return self.output_directory_override

    def build_output_directory(self, path):
        '''
        Set the output directory for extracted files.

        @path - The path to the file that data will be extracted from.

        Returns None.
        '''
        # If we have not already created an output directory for this target file, create one now
        if not has_key(self.extraction_directories, path):
            basedir = os.path.dirname(path)
            basename = os.path.basename(path)

            if basedir != self.directory:
                # During recursive extraction, extracted files will be in subdirectories
                # of the CWD. This allows us to figure out the subdirectory by simply
                # splitting the target file's base directory on our known CWD.
                #
                # However, the very *first* file being scanned is not necessarily in the
                # CWD, so this will raise an IndexError. This is easy to handle though,
                # since the very first file being scanned needs to have its contents
                # extracted to ${CWD}/_basename.extracted, so we just set the subdir
                # variable to a blank string when an IndexError is encountered.
                try:
                    subdir = basedir.split(self.directory)[1][1:]
                except IndexError as e:
                    subdir = ""
            else:
                subdir = ""

            if self.output_directory_override:
                output_directory = os.path.join(self.directory, subdir, self.output_directory_override)
            else:
                outdir = os.path.join(self.directory, subdir, '_' + basename)
                output_directory = unique_file_name(outdir, extension='extracted')

            if not os.path.exists(output_directory):
                os.mkdir(output_directory)

            self.extraction_directories[path] = output_directory
            self.output[path].directory = os.path.realpath(output_directory) + os.path.sep
        # Else, just use the already created directory
        else:
            output_directory = self.extraction_directories[path]

        return output_directory

    def cleanup_extracted_files(self, tf=None):
        '''
        Set the action to take after a file is extracted.

        @tf - If set to True, extracted files will be cleaned up after running a command against them.
              If set to False, extracted files will not be cleaned up after running a command against them.
              If set to None or not specified, the current setting will not be changed.

        Returns the current cleanup status (True/False).
        '''
        if tf is not None:
            self.remove_after_execute = tf

        return self.remove_after_execute

    def extract(self, offset, description, file_name, size, name=None):
        '''
        Extract an embedded file from the target file, if it matches an extract rule.
        Called automatically by Binwalk.scan().

        @offset      - Offset inside the target file to begin the extraction.
        @description - Description of the embedded file to extract, as returned by libmagic.
        @file_name   - Path to the target file.
        @size        - Number of bytes to extract.
        @name        - Name to save the file as.

        Returns the name of the extracted file (blank string if nothing was extracted).
        '''
        fname = ''
        recurse = False
        original_dir = os.getcwd()
        rules = self.match(description)
        file_path = os.path.realpath(file_name)

        # No extraction rules for this file
        if not rules:
            return (None, None, False)
        else:
            binwalk.core.common.debug("Found %d matching extraction rules" % len(rules))

        # Generate the output directory name where extracted files will be stored
        output_directory = self.build_output_directory(file_name)

        # Extract to end of file if no size was specified
        if not size:
            size = file_size(file_path) - offset

        if os.path.isfile(file_path):
            os.chdir(output_directory)

            # Loop through each extraction rule until one succeeds
            for i in range(0, len(rules)):
                rule = rules[i]

                # Make sure we don't recurse into any extracted directories if instructed not to
                if rule['recurse'] in [True, False]:
                    recurse = rule['recurse']
                else:
                    recurse = True

                # Copy out the data to disk, if we haven't already
                fname = self._dd(file_path, offset, size, rule['extension'], output_file_name=name)

                # If there was a command specified for this rule, try to execute it.
                # If execution fails, the next rule will be attempted.
                if rule['cmd']:

                    # Note the hash of the original file; if --rm is specified and the
                    # extraction utility modifies the original file rather than creating
                    # a new one (AFAIK none currently do, but could happen in the future),
                    # we don't want to remove this file.
                    if self.remove_after_execute:
                        fname_md5 = file_md5(fname)

                    # Execute the specified command against the extracted file
                    if self.run_extractors:
                        extract_ok = self.execute(rule['cmd'], fname, rule['codes'])
                    else:
                        extract_ok = True

                    # Only clean up files if remove_after_execute was specified
                    if extract_ok == True and self.remove_after_execute:

                        # Remove the original file that we extracted,
                        # if it has not been modified by the extractor.
                        try:
                            if file_md5(fname) == fname_md5:
                                os.unlink(fname)
                        except KeyboardInterrupt as e:
                            raise e
                        except Exception as e:
                            pass

                    # If the command executed OK, don't try any more rules
                    if extract_ok == True:
                        break
                    # Else, remove the extracted file if this isn't the last rule in the list.
                    # If it is the last rule, leave the file on disk for the user to examine.
                    elif i != (len(rules)-1):
                        try:
                            os.unlink(fname)
                        except KeyboardInterrupt as e:
                            raise e
                        except Exception as e:
                            pass

                # If there was no command to execute, just use the first rule
                else:
                    break

            os.chdir(original_dir)

        return (output_directory, fname, recurse)

    def _entry_offset(self, index, entries, description):
        '''
        Gets the offset of the first entry that matches the description.

        @index       - Index into the entries list to begin searching.
        @entries     - Dictionary of result entries.
        @description - Case insensitive description.

        Returns the offset, if a matching description is found.
        Returns -1 if a matching description is not found.
        '''
        description = description.lower()

        for (offset, infos) in entries[index:]:
            for info in infos:
                if info['description'].lower().startswith(description):
                    return offset
        return -1

    def match(self, description):
        '''
        Check to see if the provided description string matches an extract rule.
        Called internally by self.extract().

        @description - Description string to check.

        Returns the associated rule dictionary if a match is found.
        Returns None if no match is found.
        '''
        rules = []
        description = description.lower()

        for rule in self.extract_rules:
            if rule['regex'].search(description):
                rules.append(rule)
        return rules

    def _parse_rule(self, rule):
        '''
        Parses an extraction rule.

        @rule - Rule string.

        Returns an array of ['<case insensitive matching string>', '<file extension>', '<command to run>', '<comma separated return codes>', <recurse into extracted directories: True|False>].
        '''
        values = rule.strip().split(self.RULE_DELIM, 4)

        if len(values) >= 4:
            codes = values[3].split(',')
            for i in range(0, len(codes)):
                try:
                    codes[i] = int(codes[i], 0)
                except ValueError as e:
                    binwalk.core.common.warning("The specified return code '%s' for extractor '%s' is not a valid number!" % (codes[i], values[0]))
            values[3] = codes

        if len(values) >= 5:
            values[4] = (values[4].lower() == 'true')

        return values

    def _dd(self, file_name, offset, size, extension, output_file_name=None):
        '''
        Extracts a file embedded inside the target file.

        @file_name        - Path to the target file.
        @offset           - Offset inside the target file where the embedded file begins.
        @size             - Number of bytes to extract.
        @extension        - The file exension to assign to the extracted file on disk.
        @output_file_name - The requested name of the output file.

        Returns the extracted file name.
        '''
        total_size = 0
        # Default extracted file name is <displayed hex offset>.<extension>
        default_bname = "%X" % (offset + self.config.base)

        if self.max_size and size > self.max_size:
            size = self.max_size

        if not output_file_name or output_file_name is None:
            bname = default_bname
        else:
            # Strip the output file name of invalid/dangerous characters (like file paths)
            bname = os.path.basename(output_file_name)

        fname = unique_file_name(bname, extension)

        try:
            # If byte swapping is enabled, we need to start reading at a swap-size
            # aligned offset, then index in to the read data appropriately.
            if self.config.swap_size:
                adjust = offset % self.config.swap_size
            else:
                adjust = 0

            offset -= adjust

            # Open the target file and seek to the offset
            fdin = self.config.open_file(file_name)
            fdin.seek(offset)

            # Open the output file
            try:
                fdout = BlockFile(fname, 'w')
            except KeyboardInterrupt as e:
                raise e
            except Exception as e:
                # Fall back to the default name if the requested name fails
                fname = unique_file_name(default_bname, extension)
                fdout = BlockFile(fname, 'w')

            while total_size < size:
                (data, dlen) = fdin.read_block()
                if not data:
                    break
                else:
                    total_size += (dlen-adjust)
                    if total_size > size:
                        dlen -= (total_size - size)
                    fdout.write(str2bytes(data[adjust:dlen]))
                    adjust = 0

            # Cleanup
            fdout.close()
            fdin.close()
        except KeyboardInterrupt as e:
            raise e
        except Exception as e:
            raise Exception("Extractor.dd failed to extract data from '%s' to '%s': %s" % (file_name, fname, str(e)))

        binwalk.core.common.debug("Carved data block 0x%X - 0x%X from '%s' to '%s'" % (offset, offset+size, file_name, fname))
        return fname

    def execute(self, cmd, fname, codes=[0, None]):
        '''
        Execute a command against the specified file.

        @cmd   - Command to execute.
        @fname - File to run command against.
        @codes - List of return codes indicating cmd success.

        Returns True on success, False on failure, or None if the external extraction utility could not be found.
        '''
        tmp = None
        rval = 0
        retval = True

        binwalk.core.common.debug("Running extractor '%s'" % str(cmd))

        try:
            if callable(cmd):
                try:
                    retval = cmd(fname)
                except KeyboardInterrupt as e:
                    raise e
                except Exception as e:
                    binwalk.core.common.warning("Internal extractor '%s' failed with exception: '%s'" % (str(cmd), str(e)))
            elif cmd:
                # If not in debug mode, create a temporary file to redirect stdout and stderr to
                if not binwalk.core.common.DEBUG:
                    tmp = tempfile.TemporaryFile()

                # Execute.
                for command in cmd.split("&&"):

                    # Generate unique file paths for all paths in the current command that are surrounded by UNIQUE_PATH_DELIMITER
                    while self.UNIQUE_PATH_DELIMITER in command:
                        need_unique_path = command.split(self.UNIQUE_PATH_DELIMITER)[1].split(self.UNIQUE_PATH_DELIMITER)[0]
                        unique_path = binwalk.core.common.unique_file_name(need_unique_path)
                        command = command.replace(self.UNIQUE_PATH_DELIMITER + need_unique_path + self.UNIQUE_PATH_DELIMITER, unique_path)

                    # Replace all instances of FILE_NAME_PLACEHOLDER in the command with fname
                    command = command.strip().replace(self.FILE_NAME_PLACEHOLDER, fname)

                    binwalk.core.common.debug("subprocess.call(%s, stdout=%s, stderr=%s)" % (command, str(tmp), str(tmp)))
                    rval = subprocess.call(shlex.split(command), stdout=tmp, stderr=tmp)

                    if rval in codes:
                        retval = True
                    else:
                        retval = False

                    binwalk.core.common.debug('External extractor command "%s" completed with return code %d (success: %s)' % (cmd, rval, str(retval)))

                    # TODO: Should errors from all commands in a command string be checked? Currently we only support
                    #       specifying one set of error codes, so at the moment, this is not done; it is up to the
                    #       final command to return success or failure (which presumably it will if previous necessary
                    #       commands were not successful, but this is an assumption).
                    #if retval == False:
                    #    break

        except KeyboardInterrupt as e:
            raise e
        except Exception as e:
            binwalk.core.common.warning("Extractor.execute failed to run external extractor '%s': %s, '%s' might not be installed correctly" % (str(cmd), str(e), str(cmd)))
            retval = None

        if tmp is not None:
            tmp.close()

        return retval
예제 #7
0
class General(Module):

    TITLE = "General"
    ORDER = 0

    DEFAULT_DEPENDS = []

    CLI = [
        Option(long='length',
               short='l',
               type=int,
               kwargs={'length': 0},
               description='Number of bytes to scan'),
        Option(long='offset',
               short='o',
               type=int,
               kwargs={'offset': 0},
               description='Start scan at this file offset'),
        Option(long='base',
               short='O',
               type=int,
               kwargs={'base': 0},
               description='Add a base address to all printed offsets'),
        Option(long='block',
               short='K',
               type=int,
               kwargs={'block': 0},
               description='Set file block size'),
        Option(long='swap',
               short='g',
               type=int,
               kwargs={'swap_size': 0},
               description='Reverse every n bytes before scanning'),
        Option(long='log',
               short='f',
               type=argparse.FileType,
               kwargs={'log_file': None},
               description='Log results to file'),
        Option(long='csv',
               short='c',
               kwargs={'csv': True},
               description='Log results to file in CSV format'),
        Option(long='term',
               short='t',
               kwargs={'format_to_terminal': True},
               description='Format output to fit the terminal window'),
        Option(long='quiet',
               short='q',
               kwargs={'quiet': True},
               description='Suppress output to stdout'),
        Option(long='verbose',
               short='v',
               kwargs={'verbose': True},
               description='Enable verbose output'),
        Option(short='h',
               long='help',
               kwargs={'show_help': True},
               description='Show help output'),
        Option(short='a',
               long='finclude',
               type=str,
               kwargs={'file_name_include_regex': ""},
               description='Only scan files whose names match this regex'),
        Option(short='p',
               long='fexclude',
               type=str,
               kwargs={'file_name_exclude_regex': ""},
               description='Do not scan files whose names match this regex'),
        Option(short='s',
               long='status',
               type=int,
               kwargs={'status_server_port': 0},
               description='Enable the status server on the specified port'),
        Option(long=None,
               short=None,
               type=binwalk.core.common.BlockFile,
               kwargs={'files': []}),

        # Hidden, API-only arguments
        Option(long="string",
               hidden=True,
               kwargs={'subclass': binwalk.core.common.StringFile}),
    ]

    KWARGS = [
        Kwarg(name='length', default=0),
        Kwarg(name='offset', default=0),
        Kwarg(name='base', default=0),
        Kwarg(name='block', default=0),
        Kwarg(name='status_server_port', default=0),
        Kwarg(name='swap_size', default=0),
        Kwarg(name='log_file', default=None),
        Kwarg(name='csv', default=False),
        Kwarg(name='format_to_terminal', default=False),
        Kwarg(name='quiet', default=False),
        Kwarg(name='verbose', default=False),
        Kwarg(name='files', default=[]),
        Kwarg(name='show_help', default=False),
        Kwarg(name='keep_going', default=False),
        Kwarg(name='subclass', default=io.FileIO),
        Kwarg(name='file_name_include_regex', default=None),
        Kwarg(name='file_name_exclude_regex', default=None),
    ]

    PRIMARY = False

    def load(self):
        self.threads_active = False
        self.target_files = []

        # A special case for when we're loaded into IDA
        if self.subclass == io.FileIO and binwalk.core.idb.LOADED_IN_IDA:
            self.subclass = binwalk.core.idb.IDBFileIO

        # Order is important with these two methods
        self._open_target_files()
        self._set_verbosity()

        # Build file name filter regex rules
        if self.file_name_include_regex:
            self.file_name_include_regex = re.compile(
                self.file_name_include_regex)
        if self.file_name_exclude_regex:
            self.file_name_exclude_regex = re.compile(
                self.file_name_exclude_regex)

        self.settings = binwalk.core.settings.Settings()
        self.display = binwalk.core.display.Display(
            log=self.log_file,
            csv=self.csv,
            quiet=self.quiet,
            verbose=self.verbose,
            fit_to_screen=self.format_to_terminal)

        if self.show_help:
            show_help()
            if not binwalk.core.idb.LOADED_IN_IDA:
                sys.exit(0)

        if self.status_server_port > 0:
            self.parent.status_server(self.status_server_port)

    def reset(self):
        pass

    def _set_verbosity(self):
        '''
        Sets the appropriate verbosity.
        Must be called after self._test_target_files so that self.target_files is properly set.
        '''
        # If more than one target file was specified, enable verbose mode; else, there is
        # nothing in some outputs to indicate which scan corresponds to which
        # file.
        if len(self.target_files) > 1 and not self.verbose:
            self.verbose = True

    def file_name_filter(self, fp):
        '''
        Checks to see if a file should be scanned based on file name include/exclude filters.
        Most useful for matryoshka scans where only certian files are desired.

        @fp - An instances of binwalk.common.BlockFile

        Returns True if the file should be scanned, False if not.
        '''
        if self.file_name_include_regex and not self.file_name_include_regex.search(
                fp.name):
            return False
        if self.file_name_exclude_regex and self.file_name_exclude_regex.search(
                fp.name):
            return False

        return True

    def open_file(self,
                  fname,
                  length=None,
                  offset=None,
                  swap=None,
                  block=None,
                  peek=None):
        '''
        Opens the specified file with all pertinent configuration settings.
        '''
        if length is None:
            length = self.length
        if offset is None:
            offset = self.offset
        if swap is None:
            swap = self.swap_size

        return binwalk.core.common.BlockFile(fname,
                                             subclass=self.subclass,
                                             length=length,
                                             offset=offset,
                                             swap=swap,
                                             block=block,
                                             peek=peek)

    def _open_target_files(self):
        '''
        Checks if the target files can be opened.
        Any files that cannot be opened are removed from the self.target_files list.
        '''
        # Validate the target files listed in target_files
        for tfile in self.files:
            # Ignore directories.
            if not self.subclass == io.FileIO or not os.path.isdir(tfile):
                # Make sure we can open the target files
                try:
                    fp = self.open_file(tfile)
                    fp.close()
                    self.target_files.append(tfile)
                except KeyboardInterrupt as e:
                    raise e
                except Exception as e:
                    self.error(
                        description="Cannot open file %s (CWD: %s) : %s" %
                        (tfile, os.getcwd(), str(e)))
예제 #8
0
class Signature(Module):

    TITLE = "Signature Scan"
    ORDER = 10

    CLI = [
        Option(short='B',
               long='signature',
               kwargs={
                   'enabled': True,
                   'force_default_scan': True
               },
               description='Scan target file(s) for common file signatures'),
        Option(short='R',
               long='raw-bytes',
               kwargs={
                   'enabled': True,
                   'raw_bytes': ''
               },
               type=str,
               description=
               'Scan target file(s) for the specified sequence of bytes'),
        Option(
            short='A',
            long='opcodes',
            kwargs={
                'enabled': True,
                'search_for_opcodes': True
            },
            description='Scan target file(s) for common executable opcodes'),
        Option(
            short='C',
            long='cast',
            kwargs={
                'enabled': True,
                'cast_data_types': True
            },
            description=
            'Cast offsets as a given data type (use -y to specify the data type / endianess)'
        ),
        Option(short='m',
               long='magic',
               kwargs={'magic_files': []},
               type=list,
               dtype='file',
               description='Specify a custom magic file to use'),
        Option(short='b',
               long='dumb',
               kwargs={'dumb_scan': True},
               description='Disable smart signature keywords'),
    ]

    KWARGS = [
        Kwarg(name='enabled', default=False),
        Kwarg(name='raw_bytes', default=None),
        Kwarg(name='search_for_opcodes', default=False),
        Kwarg(name='cast_data_types', default=False),
        Kwarg(name='dumb_scan', default=False),
        Kwarg(name='force_default_scan', default=False),
        Kwarg(name='magic_files', default=[]),
    ]

    VERBOSE_FORMAT = "%s    %d"

    def init(self):
        # Create Signature and MagicParser class instances. These are mostly for internal use.
        self.smart = binwalk.core.smart.Signature(
            self.config.filter, ignore_smart_signatures=self.dumb_scan)
        self.parser = binwalk.core.parser.MagicParser(self.config.filter,
                                                      self.smart)

        # If a raw byte sequence was specified, build a magic file from that instead of using the default magic files
        if self.raw_bytes is not None:
            self.magic_files = [self.parser.file_from_string(self.raw_bytes)]

        # Append the user's magic file first so that those signatures take precedence
        if self.search_for_opcodes:
            self.magic_files += [
                self.config.settings.get_file_path(
                    'user', self.config.settings.BINARCH_MAGIC_FILE),
                self.config.settings.get_file_path(
                    'system', self.config.settings.BINARCH_MAGIC_FILE),
            ]

        if self.cast_data_types:
            self.magic_files += [
                self.config.settings.get_file_path(
                    'user', self.config.settings.BINCAST_MAGIC_FILE),
                self.config.settings.get_file_path(
                    'system', self.config.settings.BINCAST_MAGIC_FILE),
            ]

        # Use the system default magic file if no other was specified, or if -B was explicitly specified
        if not self.magic_files or self.force_default_scan:
            self.magic_files += [
                self.config.settings.get_file_path(
                    'user', self.config.settings.BINWALK_MAGIC_FILE),
                self.config.settings.get_file_path(
                    'system', self.config.settings.BINWALK_MAGIC_FILE),
            ]

        # Parse the magic file(s) and initialize libmagic
        self.mfile = self.parser.parse(self.magic_files)
        self.magic = binwalk.core.magic.Magic(self.mfile)

        # Once the temporary magic files are loaded into libmagic, we don't need them anymore; delete the temp files
        self.parser.rm_magic_files()

        self.VERBOSE = ["Signatures:", self.parser.signature_count]

    def validate(self, r):
        '''
        Called automatically by self.result.
        '''
        if not r.description:
            r.valid = False

        if r.size and (r.size + r.offset) > r.file.size:
            r.valid = False

        if r.jump and (r.jump + r.offset) > r.file.size:
            r.valid = False

        r.valid = self.config.filter.valid_result(r.description)

    def scan_file(self, fp):
        current_file_offset = 0

        while True:
            (data, dlen) = fp.read_block()
            if not data:
                break

            current_block_offset = 0
            block_start = fp.tell() - dlen
            self.status.completed = block_start - fp.offset

            for candidate_offset in self.parser.find_signature_candidates(
                    data, dlen):

                # current_block_offset is set when a jump-to-offset keyword is encountered while
                # processing signatures. This points to an offset inside the current data block
                # that scanning should jump to, so ignore any subsequent candidate signatures that
                # occurr before this offset inside the current data block.
                if candidate_offset < current_block_offset:
                    continue

                # Pass the data to libmagic for parsing
                magic_result = self.magic.buffer(
                    data[candidate_offset:candidate_offset +
                         fp.block_peek_size])
                if not magic_result:
                    continue

                # The smart filter parser returns a binwalk.core.module.Result object
                r = self.smart.parse(magic_result)

                # Set the absolute offset inside the target file
                r.offset = block_start + candidate_offset + r.adjust

                # Provide an instance of the current file object
                r.file = fp

                # Register the result for futher processing/display
                # self.result automatically calls self.validate for result validation
                self.result(r=r)

                # Is this a valid result and did it specify a jump-to-offset keyword?
                if r.valid and r.jump > 0:
                    absolute_jump_offset = r.offset + r.jump
                    current_block_offset = candidate_offset + r.jump

                    # If the jump-to-offset is beyond the confines of the current block, seek the file to
                    # that offset and quit processing this block of data.
                    if absolute_jump_offset >= fp.tell():
                        fp.seek(r.offset + r.jump)
                        break

    def run(self):
        for fp in iter(self.next_file, None):
            self.header()
            self.scan_file(fp)
            self.footer()
예제 #9
0
class Entropy(Module):

    XLABEL = 'Offset'
    YLABEL = 'Entropy'

    XUNITS = 'B'
    YUNITS = 'E'

    FILE_WIDTH = 1024
    FILE_FORMAT = 'png'

    COLORS = ['r', 'g', 'c', 'b', 'm']

    DEFAULT_BLOCK_SIZE = 1024
    DEFAULT_DATA_POINTS = 2048

    TITLE = "Entropy Analysis"
    ORDER = 8

    # TODO: Add --dpoints option to set the number of data points?
    CLI = [
        Option(short='E',
               long='entropy',
               kwargs={'enabled': True},
               description='Calculate file entropy'),
        Option(short='H',
               long='fast',
               kwargs={'use_zlib': True},
               description='Use faster, but less detailed, entropy analysis'),
        Option(short='J',
               long='save',
               kwargs={'save_plot': True},
               description='Save plot as a PNG'),
        Option(short='N',
               long='nplot',
               kwargs={'do_plot': False},
               description='Do not generate an entropy plot graph'),
        Option(short='Q',
               long='nlegend',
               kwargs={'show_legend': False},
               description='Omit the legend from the entropy plot graph'),
    ]

    KWARGS = [
        Kwarg(name='enabled', default=False),
        Kwarg(name='save_plot', default=False),
        Kwarg(name='use_zlib', default=False),
        Kwarg(name='display_results', default=True),
        Kwarg(name='do_plot', default=True),
        Kwarg(name='show_legend', default=True),
        Kwarg(name='block_size', default=0),
    ]

    # Run this module last so that it can process all other module's results and overlay them on the entropy graph
    PRIORITY = 0

    def init(self):
        self.HEADER[-1] = "ENTROPY"
        self.max_description_length = 0
        self.file_markers = {}

        if self.use_zlib:
            self.algorithm = self.gzip
        else:
            self.algorithm = self.shannon

        # Get a list of all other module's results to mark on the entropy graph
        for (module, obj) in iterator(self.modules):
            for result in obj.results:
                if result.plot and result.file and result.description:
                    description = result.description.split(',')[0]

                    if not has_key(self.file_markers, result.file.name):
                        self.file_markers[result.file.name] = []

                    if len(description) > self.max_description_length:
                        self.max_description_length = len(description)

                    self.file_markers[result.file.name].append(
                        (result.offset, description))

        # If other modules have been run and they produced results, don't spam the terminal with entropy results
        if self.file_markers:
            self.display_results = False

        if not self.block_size:
            if self.config.block:
                self.block_size = self.config.block
            else:
                self.block_size = None

    def run(self):
        for fp in iter(self.next_file, None):

            if self.display_results:
                self.header()

            self.calculate_file_entropy(fp)

            if self.display_results:
                self.footer()

        if self.do_plot:
            import pyqtgraph as pg

            if not self.save_plot:
                from pyqtgraph.Qt import QtGui
                QtGui.QApplication.instance().exec_()

            pg.exit()

    def calculate_file_entropy(self, fp):
        # Clear results from any previously analyzed files
        self.clear(results=True)

        # If -K was not specified, calculate the block size to create DEFAULT_DATA_POINTS data points
        if self.block_size is None:
            block_size = fp.size / self.DEFAULT_DATA_POINTS
            # Round up to the nearest DEFAULT_BLOCK_SIZE (1024)
            block_size = int(block_size +
                             ((self.DEFAULT_BLOCK_SIZE - block_size) %
                              self.DEFAULT_BLOCK_SIZE))
        else:
            block_size = self.block_size

        binwalk.core.common.debug("Entropy block size (%d data points): %d" %
                                  (self.DEFAULT_DATA_POINTS, block_size))

        while True:
            file_offset = fp.tell()

            (data, dlen) = fp.read_block()
            if not data:
                break

            i = 0
            while i < dlen:
                entropy = self.algorithm(data[i:i + block_size])
                r = self.result(offset=(file_offset + i),
                                file=fp,
                                entropy=entropy,
                                description=("%f" % entropy),
                                display=self.display_results)
                i += block_size

        if self.do_plot:
            self.plot_entropy(fp.name)

    def shannon(self, data):
        '''
        Performs a Shannon entropy analysis on a given block of data.
        '''
        entropy = 0

        if data:
            length = len(data)

            seen = dict(((chr(x), 0) for x in range(0, 256)))
            for byte in data:
                seen[byte] += 1

            for x in range(0, 256):
                p_x = float(seen[chr(x)]) / length
                if p_x > 0:
                    entropy -= p_x * math.log(p_x, 2)

        return (entropy / 8)

    def gzip(self, data, truncate=True):
        '''
        Performs an entropy analysis based on zlib compression ratio.
        This is faster than the shannon entropy analysis, but not as accurate.
        '''
        # Entropy is a simple ratio of: <zlib compressed size> / <original size>
        e = float(
            float(len(zlib.compress(str2bytes(data), 9))) / float(len(data)))

        if truncate and e > 1.0:
            e = 1.0

        return e

    def plot_entropy(self, fname):
        import numpy as np
        import pyqtgraph as pg
        import pyqtgraph.exporters as exporters

        i = 0
        x = []
        y = []
        plotted_colors = {}

        for r in self.results:
            x.append(r.offset)
            y.append(r.entropy)

        plt = pg.plot(title=fname, clear=True)

        if self.show_legend and has_key(self.file_markers, fname):
            plt.addLegend(size=(self.max_description_length * 10, 0))

            for (offset, description) in self.file_markers[fname]:
                # If this description has already been plotted at a different offset, we need to
                # use the same color for the marker, but set the description to None to prevent
                # duplicate entries in the graph legend.
                #
                # Else, get the next color and use it to mark descriptions of this type.
                if has_key(plotted_colors, description):
                    color = plotted_colors[description]
                    description = None
                else:
                    color = self.COLORS[i]
                    plotted_colors[description] = color

                    i += 1
                    if i >= len(self.COLORS):
                        i = 0

                plt.plot(x=[offset, offset],
                         y=[0, 1.1],
                         name=description,
                         pen=pg.mkPen(color, width=2.5))

        # Plot data points
        plt.plot(x, y, pen='y')

        # TODO: legend is not displayed properly when saving plots to disk
        if self.save_plot:
            exporter = exporters.ImageExporter.ImageExporter(plt.plotItem)
            exporter.parameters()['width'] = self.FILE_WIDTH
            exporter.export(
                binwalk.core.common.unique_file_name(os.path.basename(fname),
                                                     self.FILE_FORMAT))
        else:
            plt.setLabel('left', self.YLABEL, units=self.YUNITS)
            plt.setLabel('bottom', self.XLABEL, units=self.XUNITS)
예제 #10
0
class HashMatch(Module):
    '''
    Class for fuzzy hash matching of files and directories.
    '''
    DEFAULT_CUTOFF = 0
    CONSERVATIVE_CUTOFF = 90

    TITLE = "Fuzzy Hash"

    CLI = [
        Option(short='F',
               long='fuzzy',
               kwargs={'enabled': True},
               description='Perform fuzzy hash matching on files/directories'),
        Option(short='u',
               long='cutoff',
               priority=100,
               type=int,
               kwargs={'cutoff': DEFAULT_CUTOFF},
               description='Set the cutoff percentage'),
        Option(
            short='S',
            long='strings',
            kwargs={'strings': True},
            description='Diff strings inside files instead of the entire file'
        ),
        Option(short='s',
               long='same',
               kwargs={
                   'same': True,
                   'cutoff': CONSERVATIVE_CUTOFF
               },
               description='Only show files that are the same'),
        Option(short='p',
               long='diff',
               kwargs={
                   'same': False,
                   'cutoff': CONSERVATIVE_CUTOFF
               },
               description='Only show files that are different'),
        Option(short='n',
               long='name',
               kwargs={'filter_by_name': True},
               description='Only compare files whose base names are the same'),
        Option(short='L',
               long='symlinks',
               kwargs={'symlinks': True},
               description="Don't ignore symlinks"),
    ]

    KWARGS = [
        Kwarg(name='cutoff', default=DEFAULT_CUTOFF),
        Kwarg(name='strings', default=False),
        Kwarg(name='same', default=True),
        Kwarg(name='symlinks', default=False),
        Kwarg(name='max_results', default=None),
        Kwarg(name='abspath', default=False),
        Kwarg(name='filter_by_name', default=False),
        Kwarg(name='symlinks', default=False),
        Kwarg(name='enabled', default=False),
    ]

    # Requires libfuzzybinwalk.so
    LIBRARY_NAME = "infuzzy"
    LIBRARY_FUNCTIONS = [
        binwalk.core.C.Function(name="fuzzy_hash_buf", type=int),
        binwalk.core.C.Function(name="fuzzy_hash_filename", type=int),
        binwalk.core.C.Function(name="fuzzy_compare", type=int),
    ]

    # Max result is 148 (http://ssdeep.sourceforge.net/api/html/fuzzy_8h.html)
    FUZZY_MAX_RESULT = 150
    # Files smaller than this won't produce meaningful fuzzy results (from ssdeep.h)
    FUZZY_MIN_FILE_SIZE = 4096

    HEADER_FORMAT = "\n%s" + " " * 11 + "%s\n"
    RESULT_FORMAT = "%d%%" + " " * 17 + "%s\n"
    HEADER = ["SIMILARITY", "FILE NAME"]
    RESULT = ["percentage", "description"]

    def init(self):
        self.total = 0
        self.last_file1 = HashResult(None)
        self.last_file2 = HashResult(None)

        self.lib = binwalk.core.C.Library(self.LIBRARY_NAME,
                                          self.LIBRARY_FUNCTIONS)

    def _get_strings(self, fname):
        return ''.join(list(binwalk.core.common.strings(fname, minimum=10)))

    def _show_result(self, match, fname):
        if self.abspath:
            fname = os.path.abspath(fname)

        # Add description string padding for alignment
        if match < 100:
            fname = ' ' + fname
        if match < 10:
            fname = ' ' + fname

        self.result(percentage=match, description=fname, plot=False)

    def _compare_files(self, file1, file2):
        '''
        Fuzzy diff two files.
            
        @file1 - The first file to diff.
        @file2 - The second file to diff.
    
        Returns the match percentage.    
        Returns None on error.
        '''
        status = 0
        file1_dup = False
        file2_dup = False

        if not self.filter_by_name or os.path.basename(
                file1) == os.path.basename(file2):
            if os.path.exists(file1) and os.path.exists(file2):

                hash1 = ctypes.create_string_buffer(self.FUZZY_MAX_RESULT)
                hash2 = ctypes.create_string_buffer(self.FUZZY_MAX_RESULT)

                # Check if the last file1 or file2 matches this file1 or file2; no need to re-hash if they match.
                if file1 == self.last_file1.name and self.last_file1.hash:
                    file1_dup = True
                else:
                    self.last_file1.name = file1

                if file2 == self.last_file2.name and self.last_file2.hash:
                    file2_dup = True
                else:
                    self.last_file2.name = file2

                try:
                    if self.strings:
                        if file1_dup:
                            file1_strings = self.last_file1.strings
                        else:
                            self.last_file1.strings = file1_strings = self._get_strings(
                                file1)

                        if file2_dup:
                            file2_strings = self.last_file2.strings
                        else:
                            self.last_file2.strings = file2_strings = self._get_strings(
                                file2)

                        if file1_strings == file2_strings:
                            return 100
                        else:
                            if file1_dup:
                                hash1 = self.last_file1.hash
                            else:
                                status |= self.lib.fuzzy_hash_buf(
                                    file1_strings, len(file1_strings), hash1)

                            if file2_dup:
                                hash2 = self.last_file2.hash
                            else:
                                status |= self.lib.fuzzy_hash_buf(
                                    file2_strings, len(file2_strings), hash2)

                    else:
                        if file1_dup:
                            hash1 = self.last_file1.hash
                        else:
                            status |= self.lib.fuzzy_hash_filename(
                                file1, hash1)

                        if file2_dup:
                            hash2 = self.last_file2.hash
                        else:
                            status |= self.lib.fuzzy_hash_filename(
                                file2, hash2)

                    if status == 0:
                        if not file1_dup:
                            self.last_file1.hash = hash1
                        if not file2_dup:
                            self.last_file2.hash = hash2

                        if hash1.raw == hash2.raw:
                            return 100
                        else:
                            return self.lib.fuzzy_compare(hash1, hash2)
                except Exception as e:
                    binwalk.core.common.warning(
                        "Exception while doing fuzzy hash: %s" % str(e))

        return None

    def is_match(self, match):
        '''
        Returns True if this is a good match.
        Returns False if his is not a good match.
        '''
        return (match is not None
                and ((match >= self.cutoff and self.same) or
                     (match < self.cutoff and not self.same)))

    def _get_file_list(self, directory):
        '''
        Generates a directory tree.

        @directory - The root directory to start from.

        Returns a set of file paths, excluding the root directory.
        '''
        file_list = []

        # Normalize directory path so that we can exclude it from each individual file path
        directory = os.path.abspath(directory) + os.path.sep

        for (root, dirs, files) in os.walk(directory):
            # Don't include the root directory in the file paths
            root = ''.join(root.split(directory, 1)[1:])

            # Get a list of files, with or without symlinks as specified during __init__
            files = [
                os.path.join(root, f) for f in files
                if self.symlinks or not os.path.islink(f)
            ]

            file_list += files

        return set(file_list)

    def hash_files(self, needle, haystack):
        '''
        Compare one file against a list of other files.
        
        Returns a list of tuple results.
        '''
        self.total = 0

        for f in haystack:
            m = self._compare_files(needle, f)
            if m is not None and self.is_match(m):
                self._show_result(m, f)

                self.total += 1
                if self.max_results and self.total >= self.max_results:
                    break

    def hash_file(self, needle, haystack):
        '''
        Search for one file inside one or more directories.

        Returns a list of tuple results.
        '''
        matching_files = []
        self.total = 0
        done = False

        for directory in haystack:
            for f in self._get_file_list(directory):
                f = os.path.join(directory, f)
                m = self._compare_files(needle, f)
                if m is not None and self.is_match(m):
                    self._show_result(m, f)
                    matching_files.append((m, f))

                    self.total += 1
                    if self.max_results and self.total >= self.max_results:
                        done = True
                        break
            if done:
                break

        return matching_files

    def hash_directories(self, needle, haystack):
        '''
        Compare the contents of one directory with the contents of other directories.

        Returns a list of tuple results.
        '''
        done = False
        self.total = 0

        source_files = self._get_file_list(needle)

        for directory in haystack:
            dir_files = self._get_file_list(directory)

            for source_file in source_files:
                for dir_file in dir_files:
                    file1 = os.path.join(needle, source_file)
                    file2 = os.path.join(directory, dir_file)

                    m = self._compare_files(file1, file2)
                    if m is not None and self.is_match(m):
                        self._show_result(m, "%s => %s" % (file1, file2))

                        self.total += 1
                        if self.max_results and self.total >= self.max_results:
                            done = True
                            break
            if done:
                break

    def run(self):
        '''
        Main module method.
        '''
        # Access the raw self.config.files list directly here, since we accept both
        # files and directories and self.next_file only works for files.
        needle = self.config.files[0]
        haystack = self.config.files[1:]

        self.header()

        if os.path.isfile(needle):
            if os.path.isfile(haystack[0]):
                self.hash_files(needle, haystack)
            else:
                self.hash_file(needle, haystack)
        else:
            self.hash_directories(needle, haystack)

        self.footer()

        return True
예제 #11
0
class Extractor(Module):
    '''
    Extractor class, responsible for extracting files from the target file and executing external applications, if requested.
    '''
    # Extract rules are delimited with a colon.
    # <case insensitive matching string>:<file extension>[:<command to run>]
    RULE_DELIM = ':'

    # Comments in the extract.conf files start with a pound
    COMMENT_DELIM = '#'

    # Place holder for the extracted file name in the command
    FILE_NAME_PLACEHOLDER = '%e'

    TITLE = 'Extraction'
    ORDER = 9
    PRIMARY = False

    CLI = [
        Option(short='e',
               long='extract',
               kwargs={
                   'load_default_rules': True,
                   'enabled': True
               },
               description='Automatically extract known file types'),
        Option(
            short='D',
            long='dd',
            type=list,
            dtype='type:ext:cmd',
            kwargs={
                'manual_rules': [],
                'enabled': True
            },
            description=
            'Extract <type> signatures, give the files an extension of <ext>, and execute <cmd>'
        ),
        Option(short='M',
               long='matryoshka',
               kwargs={'matryoshka': 8},
               description='Recursively scan extracted files'),
        Option(short='d',
               long='depth',
               type=int,
               kwargs={'matryoshka': 0},
               description=
               'Limit matryoshka recursion depth (default: 8 levels deep)'),
        Option(short='j',
               long='max-size',
               type=int,
               kwargs={'max_size': 0},
               description='Limit the size of each extracted file'),
        Option(
            short='r',
            long='rm',
            kwargs={'remove_after_execute': True},
            description='Cleanup extracted / zero-size files after extraction'
        ),
        Option(
            short='z',
            long='carve',
            kwargs={'run_extractors': False},
            description=
            "Carve data from files, but don't execute extraction utilities"),
    ]

    KWARGS = [
        Kwarg(name='max_size', default=None),
        Kwarg(name='remove_after_execute', default=False),
        Kwarg(name='load_default_rules', default=False),
        Kwarg(name='run_extractors', default=True),
        Kwarg(name='manual_rules', default=[]),
        Kwarg(name='matryoshka', default=0),
        Kwarg(name='enabled', default=False),
    ]

    def load(self):
        # Holds a list of extraction rules loaded either from a file or when manually specified.
        self.extract_rules = []

        if self.load_default_rules:
            self.load_defaults()

        for manual_rule in self.manual_rules:
            self.add_rule(manual_rule)

    def reset(self):
        # Holds a list of pending files that should be scanned; only populated if self.matryoshka == True
        self.pending = []
        # Holds a dictionary of extraction directories created for each scanned file.
        self.extraction_directories = {}
        # Holds a dictionary of the last directory listing for a given directory; used for identifying
        # newly created/extracted files that need to be appended to self.pending.
        self.last_directory_listing = {}
        # Set to the directory path of the first extracted directory; this allows us to track recursion depth.
        self.base_recursion_dir = ""

    def callback(self, r):
        # Make sure the file attribute is set to a compatible instance of binwalk.core.common.BlockFile
        try:
            r.file.size
        except KeyboardInterrupt as e:
            pass
        except Exception as e:
            return

        if not r.size:
            size = r.file.size - r.offset
        else:
            size = r.size

        # Only extract valid results displayed to the user and marked for extraction
        if r.valid and r.display and r.extract:
            # Do the extraction
            (extraction_directory,
             dd_file) = self.extract(r.offset, r.description, r.file.name,
                                     size, r.name)

            # If the extraction was successful, self.extract will have returned the output directory and name of the dd'd file
            if extraction_directory and dd_file:
                # Get the full path to the dd'd file
                dd_file_path = os.path.join(extraction_directory, dd_file)

                # Do a directory listing of the output directory
                directory_listing = set(os.listdir(extraction_directory))

                # If this is a newly created output directory, self.last_directory_listing won't have a record of it.
                # If we've extracted other files to this directory before, it will.
                if not has_key(self.last_directory_listing,
                               extraction_directory):
                    self.last_directory_listing[extraction_directory] = set()

                # Loop through a list of newly created files (i.e., files that weren't listed in the last directory listing)
                for f in directory_listing.difference(
                        self.last_directory_listing[extraction_directory]):
                    # Build the full file path and add it to the extractor results
                    file_path = os.path.join(extraction_directory, f)
                    real_file_path = os.path.realpath(file_path)
                    self.result(description=file_path, display=False)

                    # If recursion was specified, and the file is not the same one we just dd'd, and if it is not a directory
                    if self.matryoshka and file_path != dd_file_path and not os.path.isdir(
                            file_path):
                        # If the recursion level of this file is less than or equal to our desired recursion level
                        if len(
                                real_file_path.split(
                                    self.base_recursion_dir)[1].split(
                                        os.path.sep)) <= self.matryoshka:
                            # Add the file to our list of pending files
                            self.pending.append(file_path)

                # Update the last directory listing for the next time we extract a file to this same output directory
                self.last_directory_listing[
                    extraction_directory] = directory_listing

    def append_rule(self, r):
        self.extract_rules.append(r.copy())

    def add_rule(self, txtrule=None, regex=None, extension=None, cmd=None):
        '''
        Adds a set of rules to the extraction rule list.

        @txtrule   - Rule string, or list of rule strings, in the format <regular expression>:<file extension>[:<command to run>]
        @regex     - If rule string is not specified, this is the regular expression string to use.
        @extension - If rule string is not specified, this is the file extension to use.
        @cmd       - If rule string is not specified, this is the command to run.
                     Alternatively a callable object may be specified, which will be passed one argument: the path to the file to extract.

        Returns None.
        '''
        rules = []
        match = False
        r = {'extension': '', 'cmd': '', 'regex': None}

        # Process single explicitly specified rule
        if not txtrule and regex and extension:
            r['extension'] = extension
            r['regex'] = re.compile(regex)
            if cmd:
                r['cmd'] = cmd

            self.append_rule(r)
            return

        # Process rule string, or list of rule strings
        if not isinstance(txtrule, type([])):
            rules = [txtrule]
        else:
            rules = txtrule

        for rule in rules:
            r['cmd'] = ''
            r['extension'] = ''

            try:
                values = self._parse_rule(rule)
                match = values[0]
                r['regex'] = re.compile(values[0])
                r['extension'] = values[1]
                r['cmd'] = values[2]
            except KeyboardInterrupt as e:
                raise e
            except Exception:
                pass

            # Verify that the match string was retrieved.
            if match:
                self.append_rule(r)

    def remove_rule(self, text):
        '''
        Remove all rules that match a specified text.

        @text - The text to match against.

        Returns the number of rules removed.
        '''
        rm = []

        for i in range(0, len(self.extract_rules)):
            if self.extract_rules[i]['regex'].match(text):
                rm.append(i)

        for i in rm:
            self.extract_rules.pop(i)

        return len(rm)

    def clear_rules(self):
        '''
        Deletes all extraction rules.

        Returns None.
        '''
        self.extract_rules = []

    def get_rules(self):
        '''
        Returns a list of all extraction rules.
        '''
        return self.extract_rules

    def load_from_file(self, fname):
        '''
        Loads extraction rules from the specified file.

        @fname - Path to the extraction rule file.
        
        Returns None.
        '''
        try:
            # Process each line from the extract file, ignoring comments
            with open(fname, 'r') as f:
                for rule in f.readlines():
                    self.add_rule(rule.split(self.COMMENT_DELIM, 1)[0])
        except KeyboardInterrupt as e:
            raise e
        except Exception as e:
            raise Exception(
                "Extractor.load_from_file failed to load file '%s': %s" %
                (fname, str(e)))

    def load_defaults(self):
        '''
        Loads default extraction rules from the user and system extract.conf files.

        Returns None.
        '''
        # Load the user extract file first to ensure its rules take precedence.
        extract_files = [
            self.config.settings.get_file_path(
                'user', self.config.settings.EXTRACT_FILE),
            self.config.settings.get_file_path(
                'system', self.config.settings.EXTRACT_FILE),
        ]

        for extract_file in extract_files:
            if extract_file:
                try:
                    self.load_from_file(extract_file)
                except KeyboardInterrupt as e:
                    raise e
                except Exception as e:
                    if self.config.verbose:
                        raise Exception(
                            "Extractor.load_defaults failed to load file '%s': %s"
                            % (extract_file, str(e)))

    def build_output_directory(self, path):
        '''
        Set the output directory for extracted files.

        @path - The path to the file that data will be extracted from.

        Returns None.
        '''
        # If we have not already created an output directory for this target file, create one now
        if not has_key(self.extraction_directories, path):
            output_directory = os.path.join(
                os.path.dirname(path),
                unique_file_name('_' + os.path.basename(path),
                                 extension='extracted'))

            if not os.path.exists(output_directory):
                os.mkdir(output_directory)

            self.extraction_directories[path] = output_directory
        # Else, just use the already created directory
        else:
            output_directory = self.extraction_directories[path]

        # Set the initial base extraction directory for later determining the level of recusion
        if not self.base_recursion_dir:
            self.base_recursion_dir = os.path.realpath(
                output_directory) + os.path.sep

        return output_directory

    def cleanup_extracted_files(self, tf=None):
        '''
        Set the action to take after a file is extracted.

        @tf - If set to True, extracted files will be cleaned up after running a command against them.
              If set to False, extracted files will not be cleaned up after running a command against them.
              If set to None or not specified, the current setting will not be changed.

        Returns the current cleanup status (True/False).
        '''
        if tf is not None:
            self.remove_after_execute = tf

        return self.remove_after_execute

    def extract(self, offset, description, file_name, size, name=None):
        '''
        Extract an embedded file from the target file, if it matches an extract rule.
        Called automatically by Binwalk.scan().

        @offset      - Offset inside the target file to begin the extraction.
        @description - Description of the embedded file to extract, as returned by libmagic.
        @file_name   - Path to the target file.
        @size        - Number of bytes to extract.
        @name        - Name to save the file as.

        Returns the name of the extracted file (blank string if nothing was extracted).
        '''
        fname = ''
        cleanup_extracted_fname = True
        original_dir = os.getcwd()
        rules = self._match(description)
        file_path = os.path.realpath(file_name)

        # No extraction rules for this file
        if not rules:
            return (None, None)

        output_directory = self.build_output_directory(file_name)

        # Extract to end of file if no size was specified
        if not size:
            size = file_size(file_path) - offset

        if os.path.isfile(file_path):
            os.chdir(output_directory)

            # Loop through each extraction rule until one succeeds
            for i in range(0, len(rules)):
                rule = rules[i]

                # Copy out the data to disk, if we haven't already
                fname = self._dd(file_path,
                                 offset,
                                 size,
                                 rule['extension'],
                                 output_file_name=name)

                # If there was a command specified for this rule, try to execute it.
                # If execution fails, the next rule will be attempted.
                if rule['cmd']:

                    # Many extraction utilities will extract the file to a new file, just without
                    # the file extension (i.e., myfile.7z -> myfile). If the presumed resulting
                    # file name already exists before executing the extract command, do not attempt
                    # to clean it up even if its resulting file size is 0.
                    if self.remove_after_execute:
                        extracted_fname = os.path.splitext(fname)[0]
                        if os.path.exists(extracted_fname):
                            cleanup_extracted_fname = False

                    # Execute the specified command against the extracted file
                    if self.run_extractors:
                        extract_ok = self.execute(rule['cmd'], fname)
                    else:
                        extract_ok = True

                    # Only clean up files if remove_after_execute was specified
                    if extract_ok == True and self.remove_after_execute:

                        # Remove the original file that we extracted
                        try:
                            os.unlink(fname)
                        except KeyboardInterrupt as e:
                            raise e
                        except Exception as e:
                            pass

                        # If the command worked, assume it removed the file extension from the extracted file
                        # If the extracted file name file exists and is empty, remove it
                        if cleanup_extracted_fname and os.path.exists(
                                extracted_fname) and file_size(
                                    extracted_fname) == 0:
                            try:
                                os.unlink(extracted_fname)
                            except KeyboardInterrupt as e:
                                raise e
                            except Exception as e:
                                pass

                    # If the command executed OK, don't try any more rules
                    if extract_ok == True:
                        break
                    # Else, remove the extracted file if this isn't the last rule in the list.
                    # If it is the last rule, leave the file on disk for the user to examine.
                    elif i != (len(rules) - 1):
                        try:
                            os.unlink(fname)
                        except KeyboardInterrupt as e:
                            raise e
                        except Exception as e:
                            pass

                # If there was no command to execute, just use the first rule
                else:
                    break

            os.chdir(original_dir)

        return (output_directory, fname)

    def _entry_offset(self, index, entries, description):
        '''
        Gets the offset of the first entry that matches the description.

        @index       - Index into the entries list to begin searching.
        @entries     - Dictionary of result entries.
        @description - Case insensitive description.

        Returns the offset, if a matching description is found.
        Returns -1 if a matching description is not found.
        '''
        description = description.lower()

        for (offset, infos) in entries[index:]:
            for info in infos:
                if info['description'].lower().startswith(description):
                    return offset
        return -1

    def _match(self, description):
        '''
        Check to see if the provided description string matches an extract rule.
        Called internally by self.extract().

        @description - Description string to check.

        Returns the associated rule dictionary if a match is found.
        Returns None if no match is found.
        '''
        rules = []
        description = description.lower()

        for rule in self.extract_rules:
            if rule['regex'].search(description):
                rules.append(rule)
        return rules

    def _parse_rule(self, rule):
        '''
        Parses an extraction rule.

        @rule - Rule string.

        Returns an array of ['<case insensitive matching string>', '<file extension>', '<command to run>'].
        '''
        return rule.strip().split(self.RULE_DELIM, 2)

    def _dd(self, file_name, offset, size, extension, output_file_name=None):
        '''
        Extracts a file embedded inside the target file.

        @file_name        - Path to the target file.
        @offset           - Offset inside the target file where the embedded file begins.
        @size             - Number of bytes to extract.
        @extension        - The file exension to assign to the extracted file on disk.
        @output_file_name - The requested name of the output file.

        Returns the extracted file name.
        '''
        total_size = 0
        # Default extracted file name is <hex offset>.<extension>
        default_bname = "%X" % offset

        if self.max_size and size > self.max_size:
            size = self.max_size

        if not output_file_name or output_file_name is None:
            bname = default_bname
        else:
            # Strip the output file name of invalid/dangerous characters (like file paths)
            bname = os.path.basename(output_file_name)

        fname = unique_file_name(bname, extension)

        try:
            # Open the target file and seek to the offset
            fdin = self.config.open_file(file_name, length=size, offset=offset)

            # Open the output file
            try:
                fdout = BlockFile(fname, 'w')
            except KeyboardInterrupt as e:
                raise e
            except Exception as e:
                # Fall back to the default name if the requested name fails
                fname = unique_file_name(default_bname, extension)
                fdout = BlockFile(fname, 'w')

            while total_size < size:
                (data, dlen) = fdin.read_block()
                if not data:
                    break
                else:
                    fdout.write(str2bytes(data[:dlen]))
                    total_size += dlen

            # Cleanup
            fdout.close()
            fdin.close()
        except KeyboardInterrupt as e:
            raise e
        except Exception as e:
            raise Exception(
                "Extractor.dd failed to extract data from '%s' to '%s': %s" %
                (file_name, fname, str(e)))

        return fname

    def execute(self, cmd, fname):
        '''
        Execute a command against the specified file.

        @cmd   - Command to execute.
        @fname - File to run command against.

        Returns True on success, False on failure, or None if the external extraction utility could not be found.
        '''
        tmp = None
        retval = True

        try:
            if callable(cmd):
                try:
                    cmd(fname)
                except KeyboardInterrupt as e:
                    raise e
                except Exception as e:
                    sys.stderr.write(
                        "WARNING: Extractor.execute failed to run internal extractor '%s': %s\n"
                        % (str(cmd), str(e)))
            else:
                # If not in verbose mode, create a temporary file to redirect stdout and stderr to
                if not self.config.verbose:
                    tmp = tempfile.TemporaryFile()

                # Replace all instances of FILE_NAME_PLACEHOLDER in the command with fname
                cmd = cmd.replace(self.FILE_NAME_PLACEHOLDER, fname)

                # Execute.
                if subprocess.call(shlex.split(cmd), stdout=tmp,
                                   stderr=tmp) == 0:
                    retval = True
                else:
                    retval = False
        except KeyboardInterrupt as e:
            raise e
        except Exception as e:
            # Silently ignore no such file or directory errors. Why? Because these will inevitably be raised when
            # making the switch to the new firmware mod kit directory structure. We handle this elsewhere, but it's
            # annoying to see this spammed out to the console every time.
            if self.config.verbose or (not hasattr(e, 'errno')
                                       or e.errno != 2):
                sys.stderr.write(
                    "WARNING: Extractor.execute failed to run external extrator '%s': %s\n"
                    % (str(cmd), str(e)))
            retval = None

        if tmp is not None:
            tmp.close()

        return retval
예제 #12
0
파일: entropy.py 프로젝트: kisbuddy/binwalk
class Entropy(Module):

    XLABEL = 'Offset'
    YLABEL = 'Entropy'

    XUNITS = 'B'
    YUNITS = 'E'

    FILE_WIDTH = 1024
    FILE_FORMAT = 'png'

    COLORS = ['g', 'r', 'c', 'm', 'y']

    DEFAULT_BLOCK_SIZE = 1024
    DEFAULT_DATA_POINTS = 2048

    DEFAULT_TRIGGER_HIGH = .95
    DEFAULT_TRIGGER_LOW = .85

    TITLE = "Entropy"
    ORDER = 8

    # TODO: Add --dpoints option to set the number of data points?
    CLI = [
        Option(short='E',
               long='entropy',
               kwargs={'enabled': True},
               description='Calculate file entropy'),
        Option(short='F',
               long='fast',
               kwargs={'use_zlib': True},
               description='Use faster, but less detailed, entropy analysis'),
        Option(short='J',
               long='save',
               kwargs={'save_plot': True},
               description='Save plot as a PNG'),
        Option(short='Q',
               long='nlegend',
               kwargs={'show_legend': False},
               description='Omit the legend from the entropy plot graph'),
        Option(short='N',
               long='nplot',
               kwargs={'do_plot': False},
               description='Do not generate an entropy plot graph'),
        Option(
            short='H',
            long='high',
            type=float,
            kwargs={'trigger_high': DEFAULT_TRIGGER_HIGH},
            description=
            'Set the rising edge entropy trigger threshold (default: %.2f)' %
            DEFAULT_TRIGGER_HIGH),
        Option(
            short='L',
            long='low',
            type=float,
            kwargs={'trigger_low': DEFAULT_TRIGGER_LOW},
            description=
            'Set the falling edge entropy trigger threshold (default: %.2f)' %
            DEFAULT_TRIGGER_LOW),
    ]

    KWARGS = [
        Kwarg(name='enabled', default=False),
        Kwarg(name='save_plot', default=False),
        Kwarg(name='trigger_high', default=DEFAULT_TRIGGER_HIGH),
        Kwarg(name='trigger_low', default=DEFAULT_TRIGGER_LOW),
        Kwarg(name='use_zlib', default=False),
        Kwarg(name='display_results', default=True),
        Kwarg(name='do_plot', default=True),
        Kwarg(name='show_legend', default=True),
        Kwarg(name='block_size', default=0),
    ]

    # Run this module last so that it can process all other module's results
    # and overlay them on the entropy graph
    PRIORITY = 0

    def init(self):
        self.HEADER[-1] = "ENTROPY"
        self.max_description_length = 0
        self.file_markers = {}

        if self.use_zlib:
            self.algorithm = self.gzip
        else:
            self.algorithm = self.shannon

        # Get a list of all other module's results to mark on the entropy graph
        for (module, obj) in iterator(self.modules):
            for result in obj.results:
                if result.plot and result.file and result.description:
                    description = result.description.split(',')[0]

                    if not has_key(self.file_markers, result.file.name):
                        self.file_markers[result.file.name] = []

                    if len(description) > self.max_description_length:
                        self.max_description_length = len(description)

                    self.file_markers[result.file.name].append(
                        (result.offset, description))

        # If other modules have been run and they produced results, don't spam
        # the terminal with entropy results
        if self.file_markers:
            self.display_results = False

        if not self.block_size:
            if self.config.block:
                self.block_size = self.config.block
            else:
                self.block_size = None

    def _entropy_sigterm_handler(self, *args):
        print("F**k it all.")

    def run(self):
        self._run()

    def _run(self):
        # Sanity check and warning if matplotlib isn't found
        if self.do_plot:
            try:
                # If we're saving the plot to a file, configure matplotlib
                # to use the Agg back-end. This does not require a X server,
                # allowing users to generate plot files on headless systems.
                if self.save_plot:
                    import matplotlib as mpl
                    mpl.use('Agg')
                import matplotlib.pyplot as plt
            except ImportError as e:
                binwalk.core.common.warning(
                    "Failed to import matplotlib module, visual entropy graphing will be disabled"
                )
                self.do_plot = False

        for fp in iter(self.next_file, None):

            if self.display_results:
                self.header()

            self.calculate_file_entropy(fp)

            if self.display_results:
                self.footer()

    def calculate_file_entropy(self, fp):
        # Tracks the last displayed rising/falling edge (0 for falling, 1 for
        # rising, None if nothing has been printed yet)
        last_edge = None
        # Auto-reset the trigger; if True, an entropy above/below
        # self.trigger_high/self.trigger_low will be printed
        trigger_reset = True

        # Clear results from any previously analyzed files
        self.clear(results=True)

        # If -K was not specified, calculate the block size to create
        # DEFAULT_DATA_POINTS data points
        if self.block_size is None:
            block_size = fp.size / self.DEFAULT_DATA_POINTS
            # Round up to the nearest DEFAULT_BLOCK_SIZE (1024)
            block_size = int(block_size +
                             ((self.DEFAULT_BLOCK_SIZE - block_size) %
                              self.DEFAULT_BLOCK_SIZE))
        else:
            block_size = self.block_size

        # Make sure block size is greater than 0
        if block_size <= 0:
            block_size = self.DEFAULT_BLOCK_SIZE

        binwalk.core.common.debug("Entropy block size (%d data points): %d" %
                                  (self.DEFAULT_DATA_POINTS, block_size))

        while True:
            file_offset = fp.tell()

            (data, dlen) = fp.read_block()
            if dlen < 1:
                break

            i = 0
            while i < dlen:
                entropy = self.algorithm(data[i:i + block_size])
                display = self.display_results
                description = "%f" % entropy

                if not self.config.verbose:
                    if last_edge in [None, 0] and entropy > self.trigger_low:
                        trigger_reset = True
                    elif last_edge in [None, 1
                                       ] and entropy < self.trigger_high:
                        trigger_reset = True

                    if trigger_reset and entropy >= self.trigger_high:
                        description = "Rising entropy edge (%f)" % entropy
                        display = self.display_results
                        last_edge = 1
                        trigger_reset = False
                    elif trigger_reset and entropy <= self.trigger_low:
                        description = "Falling entropy edge (%f)" % entropy
                        display = self.display_results
                        last_edge = 0
                        trigger_reset = False
                    else:
                        display = False
                        description = "%f" % entropy

                r = self.result(offset=(file_offset + i),
                                file=fp,
                                entropy=entropy,
                                description=description,
                                display=display)

                i += block_size

        if self.do_plot:
            self.plot_entropy(fp.name)

    def shannon(self, data):
        '''
        Performs a Shannon entropy analysis on a given block of data.
        '''
        entropy = 0

        if data:
            length = len(data)

            seen = dict(((chr(x), 0) for x in range(0, 256)))
            for byte in data:
                seen[byte] += 1

            for x in range(0, 256):
                p_x = float(seen[chr(x)]) / length
                if p_x > 0:
                    entropy -= p_x * math.log(p_x, 2)

        return (entropy / 8)

    def gzip(self, data, truncate=True):
        '''
        Performs an entropy analysis based on zlib compression ratio.
        This is faster than the shannon entropy analysis, but not as accurate.
        '''
        # Entropy is a simple ratio of: <zlib compressed size> / <original
        # size>
        e = float(
            float(len(zlib.compress(str2bytes(data), 9))) / float(len(data)))

        if truncate and e > 1.0:
            e = 1.0

        return e

    def plot_entropy(self, fname):
        try:
            import matplotlib.pyplot as plt
        except ImportError as e:
            return

        i = 0
        x = []
        y = []
        plotted_colors = {}

        for r in self.results:
            x.append(r.offset)
            y.append(r.entropy)

        fig = plt.figure()

        # axisbg is depreciated, but older versions of matplotlib don't support facecolor.
        # This tries facecolor first, thus preventing the annoying depreciation warnings,
        # and falls back to axisbg if that fails.
        try:
            ax = fig.add_subplot(1, 1, 1, autoscale_on=True, facecolor='black')
        except AttributeError:
            ax = fig.add_subplot(1, 1, 1, autoscale_on=True, axisbg='black')

        ax.set_title(self.TITLE)
        ax.set_xlabel(self.XLABEL)
        ax.set_ylabel(self.YLABEL)
        ax.plot(x, y, 'y', lw=2)

        # Add a fake, invisible plot entry so that offsets at/near the
        # minimum x value (0) are actually visible on the plot.
        ax.plot(-(max(x) * .001), 1.1, lw=0)
        ax.plot(-(max(x) * .001), 0, lw=0)

        if self.show_legend and has_key(self.file_markers, fname):
            for (offset, description) in self.file_markers[fname]:
                # If this description has already been plotted at a different offset, we need to
                # use the same color for the marker, but set the description to None to prevent
                # duplicate entries in the graph legend.
                #
                # Else, get the next color and use it to mark descriptions of
                # this type.
                if has_key(plotted_colors, description):
                    color = plotted_colors[description]
                    description = None
                else:
                    color = self.COLORS[i]
                    plotted_colors[description] = color

                    i += 1
                    if i >= len(self.COLORS):
                        i = 0

                ax.plot([offset, offset], [0, 1.1],
                        '%s-' % color,
                        lw=2,
                        label=description)

            ax.legend(loc='lower right', shadow=True)

        if self.save_plot:
            out_file = os.path.join(os.getcwd(),
                                    os.path.basename(fname)) + '.png'
            fig.savefig(out_file)
        else:
            plt.show()
예제 #13
0
class General(Module):

    TITLE = "General"
    ORDER = 0

    DEFAULT_DEPENDS = []

    CLI = [
        Option(long='length',
               short='l',
               type=int,
               kwargs={'length': 0},
               description='Number of bytes to scan'),
        Option(long='offset',
               short='o',
               type=int,
               kwargs={'offset': 0},
               description='Start scan at this file offset'),
        Option(long='block',
               short='K',
               type=int,
               kwargs={'block': 0},
               description='Set file block size'),
        Option(
            long='continue',
            short='k',
            kwargs={'keep_going': True},
            description='Show all matches for every offset, not just the first'
        ),
        Option(long='swap',
               short='g',
               type=int,
               kwargs={'swap_size': 0},
               description='Reverse every n bytes before scanning'),
        Option(short='I',
               long='invalid',
               kwargs={'show_invalid': True},
               description='Show results marked as invalid'),
        Option(short='x',
               long='exclude',
               kwargs={'exclude_filters': []},
               type=list,
               dtype=str.__name__,
               description='Exclude results that match <str>'),
        Option(short='y',
               long='include',
               kwargs={'include_filters': []},
               type=list,
               dtype=str.__name__,
               description='Only show results that match <str>'),
        Option(long='log',
               short='f',
               type=argparse.FileType,
               kwargs={'log_file': None},
               description='Log results to file'),
        Option(long='csv',
               short='c',
               kwargs={'csv': True},
               description='Log results to file in CSV format'),
        Option(long='term',
               short='t',
               kwargs={'format_to_terminal': True},
               description='Format output to fit the terminal window'),
        Option(long='quiet',
               short='q',
               kwargs={'quiet': True},
               description='Suppress output to stdout'),
        Option(long='verbose',
               short='v',
               kwargs={'verbose': True},
               description='Enable verbose output'),
        Option(short='h',
               long='help',
               kwargs={'show_help': True},
               description='Show help output'),
        Option(long=None,
               short=None,
               type=binwalk.core.common.BlockFile,
               kwargs={'files': []}),
    ]

    KWARGS = [
        Kwarg(name='length', default=0),
        Kwarg(name='offset', default=0),
        Kwarg(name='block', default=0),
        Kwarg(name='swap_size', default=0),
        Kwarg(name='show_invalid', default=False),
        Kwarg(name='include_filters', default=[]),
        Kwarg(name='exclude_filters', default=[]),
        Kwarg(name='log_file', default=None),
        Kwarg(name='csv', default=False),
        Kwarg(name='format_to_terminal', default=False),
        Kwarg(name='quiet', default=False),
        Kwarg(name='verbose', default=False),
        Kwarg(name='files', default=[]),
        Kwarg(name='show_help', default=False),
        Kwarg(name='keep_going', default=False),
    ]

    PRIMARY = False

    def load(self):
        self.target_files = []

        # Order is important with these two methods
        self._open_target_files()
        self._set_verbosity()

        #self.filter = binwalk.core.filter.Filter(self._display_invalid)
        self.filter = binwalk.core.filter.Filter(self.show_invalid)

        # Set any specified include/exclude filters
        for regex in self.exclude_filters:
            self.filter.exclude(regex)
        for regex in self.include_filters:
            self.filter.include(regex)

        self.settings = binwalk.core.settings.Settings()
        self.display = binwalk.core.display.Display(
            log=self.log_file,
            csv=self.csv,
            quiet=self.quiet,
            verbose=self.verbose,
            filter=self.filter,
            fit_to_screen=self.format_to_terminal)

        if self.show_help:
            show_help()
            if not binwalk.core.idb.LOADED_IN_IDA:
                sys.exit(0)

    def reset(self):
        for fp in self.target_files:
            fp.reset()

    def __del__(self):
        self._cleanup()

    def __exit__(self, a, b, c):
        self._cleanup()

    def _cleanup(self):
        if hasattr(self, 'target_files'):
            for fp in self.target_files:
                fp.close()

    def _set_verbosity(self):
        '''
        Sets the appropriate verbosity.
        Must be called after self._test_target_files so that self.target_files is properly set.
        '''
        # If more than one target file was specified, enable verbose mode; else, there is
        # nothing in some outputs to indicate which scan corresponds to which file.
        if len(self.target_files) > 1 and not self.verbose:
            self.verbose = True

    def open_file(self,
                  fname,
                  length=None,
                  offset=None,
                  swap=None,
                  block=None,
                  peek=None):
        '''
        Opens the specified file with all pertinent configuration settings.
        '''
        if length is None:
            length = self.length
        if offset is None:
            offset = self.offset
        if swap is None:
            swap = self.swap_size

        return binwalk.core.common.BlockFile(fname,
                                             length=length,
                                             offset=offset,
                                             swap=swap,
                                             block=block,
                                             peek=peek)

    def _open_target_files(self):
        '''
        Checks if the target files can be opened.
        Any files that cannot be opened are removed from the self.target_files list.
        '''
        # Validate the target files listed in target_files
        for tfile in self.files:
            # Ignore directories.
            if not os.path.isdir(tfile):
                # Make sure we can open the target files
                try:
                    self.target_files.append(self.open_file(tfile))
                except KeyboardInterrupt as e:
                    raise e
                except Exception as e:
                    self.error(description="Cannot open file : %s" % str(e))
예제 #14
0
class HeuristicCompressionAnalyzer(Module):
    '''
    Performs analysis and attempts to interpret the results.
    '''

    BLOCK_SIZE = 32
    CHI_CUTOFF = 512
    ENTROPY_TRIGGER = .90
    MIN_BLOCK_SIZE = 4096
    BLOCK_OFFSET = 1024
    ENTROPY_BLOCK_SIZE = 1024

    TITLE = "Heuristic Compression"

    DEPENDS = [
        Dependency(name='Entropy',
                   attribute='entropy',
                   kwargs={
                       'enabled': True,
                       'do_plot': False,
                       'display_results': False,
                       'block_size': ENTROPY_BLOCK_SIZE
                   }),
    ]

    CLI = [
        Option(short='H',
               long='heuristic',
               kwargs={'enabled': True},
               description='Heuristically classify high entropy data'),
        Option(short='a',
               long='trigger',
               kwargs={'trigger_level': 0},
               type=float,
               description=
               'Set the entropy trigger level (0.0 - 1.0, default: %.2f)' %
               ENTROPY_TRIGGER),
    ]

    KWARGS = [
        Kwarg(name='enabled', default=False),
        Kwarg(name='trigger_level', default=ENTROPY_TRIGGER),
    ]

    def init(self):
        self.blocks = {}

        self.HEADER[-1] = "HEURISTIC ENTROPY ANALYSIS"

        # Trigger level sanity check
        if self.trigger_level > 1.0:
            self.trigger_level = 1.0
        elif self.trigger_level < 0.0:
            self.trigger_level = 0.0

        if self.config.block:
            self.block_size = self.config.block
        else:
            self.block_size = self.BLOCK_SIZE

        for result in self.entropy.results:
            if not has_key(self.blocks, result.file.name):
                self.blocks[result.file.name] = []

            if result.entropy >= self.trigger_level and (
                    not self.blocks[result.file.name]
                    or self.blocks[result.file.name][-1].end is not None):
                self.blocks[result.file.name].append(
                    EntropyBlock(start=result.offset + self.BLOCK_OFFSET))
            elif result.entropy < self.trigger_level and self.blocks[
                    result.file.
                    name] and self.blocks[result.file.name][-1].end is None:
                self.blocks[result.file.
                            name][-1].end = result.offset - self.BLOCK_OFFSET

    def run(self):
        for fp in iter(self.next_file, None):

            if has_key(self.blocks, fp.name):

                self.header()

                for block in self.blocks[fp.name]:

                    if block.end is None:
                        block.length = fp.offset + fp.length - block.start
                    else:
                        block.length = block.end - block.start

                    if block.length >= self.MIN_BLOCK_SIZE:
                        self.analyze(fp, block)

                self.footer()

    def analyze(self, fp, block):
        '''
        Perform analysis and interpretation.
        '''
        i = 0
        num_error = 0
        analyzer_results = []

        chi = ChiSquare()
        fp.seek(block.start)

        while i < block.length:
            j = 0
            (d, dlen) = fp.read_block()
            if not d:
                break

            while j < dlen:
                chi.reset()

                data = d[j:j + self.block_size]
                if len(data) < self.block_size:
                    break

                chi.update(data)

                if chi.chisq() >= self.CHI_CUTOFF:
                    num_error += 1

                j += self.block_size

                if (j + i) > block.length:
                    break

            i += dlen

        if num_error > 0:
            verdict = 'Moderate entropy data, best guess: compressed'
        else:
            verdict = 'High entropy data, best guess: encrypted'

        desc = '%s, size: %d, %d low entropy blocks' % (verdict, block.length,
                                                        num_error)
        self.result(offset=block.start, description=desc, file=fp)
예제 #15
0
class Plotter(Module):
    '''
    Base class for visualizing binaries in Qt.
    Other plotter classes are derived from this.
    '''
    VIEW_DISTANCE = 1024
    MAX_2D_PLOT_POINTS = 12500
    MAX_3D_PLOT_POINTS = 25000

    TITLE = "Binary Visualization"

    CLI = [
            Option(short='3',
                   long='3D',
                   kwargs={'axis' : 3, 'enabled' : True},
                   description='Generate a 3D binary visualization'),
            Option(short='2',
                   long='2D',
                   kwargs={'axis' : 2, 'enabled' : True},
                   description='Project data points onto 3D cube walls only'),
            Option(short='Z',
                   long='points',
                   type=int,
                   kwargs={'max_points' : 0},
                   description='Set the maximum number of plotted data points'),
#            Option(short='V',
#                   long='grids',
#                   kwargs={'show_grids' : True},
#                   description='Display the x-y-z grids in the resulting plot'),
    ]

    KWARGS = [
            Kwarg(name='axis', default=3),
            Kwarg(name='max_points', default=0),
            Kwarg(name='show_grids', default=False),
            Kwarg(name='enabled', default=False),
    ]

    # There isn't really any useful data to print to console. Disable header and result output.
    HEADER = None
    RESULT = None

    def init(self):
        import pyqtgraph.opengl as gl
        from pyqtgraph.Qt import QtGui

        self.verbose = self.config.verbose
        self.offset = self.config.offset
        self.length = self.config.length
        self.plane_count = -1
        self.plot_points = None

        if self.axis == 2:
            self.MAX_PLOT_POINTS = self.MAX_2D_PLOT_POINTS
            self._generate_data_point = self._generate_2d_data_point
        elif self.axis == 3:
            self.MAX_PLOT_POINTS = self.MAX_3D_PLOT_POINTS
            self._generate_data_point = self._generate_3d_data_point
        else:
            raise Exception("Invalid Plotter axis specified: %d. Must be one of: [2,3]" % self.axis)

        if not self.max_points:
            self.max_points = self.MAX_PLOT_POINTS

        self.app = QtGui.QApplication([])
        self.window = gl.GLViewWidget()
        self.window.opts['distance'] = self.VIEW_DISTANCE

        if len(self.config.target_files) == 1:
            self.window.setWindowTitle(self.config.target_files[0])

    def _print(self, message):
        '''
        Print console messages. For internal use only.
        '''
        if self.verbose:
            print(message)

    def _generate_plot_points(self, data_points):
        '''
        Generates plot points from a list of data points.

        @data_points - A dictionary containing each unique point and its frequency of occurance.

        Returns a set of plot points.
        '''
        total = 0
        min_weight = 0
        weightings = {}
        plot_points = {}

        # If the number of data points exceeds the maximum number of allowed data points, use a
        # weighting system to eliminate data points that occur less freqently.
        if sum(data_points.values()) > self.max_points:

            # First, generate a set of weight values 1 - 10
            for i in range(1, 11):
                weightings[i] = 0

            # Go through every data point and how many times that point occurs
            for (point, count) in iterator(data_points):
                # For each data point, compare it to each remaining weight value
                for w in get_keys(weightings):

                    # If the number of times this data point occurred is >= the weight value,
                    # then increment the weight value. Since weight values are ordered lowest
                    # to highest, this means that more frequent data points also increment lower
                    # weight values. Thus, the more high-frequency data points there are, the
                    # more lower-frequency data points are eliminated.
                    if count >= w:
                        weightings[w] += 1
                    else:
                        break

                    # Throw out weight values that exceed the maximum number of data points
                    if weightings[w] > self.max_points:
                        del weightings[w]

                # If there's only one weight value left, no sense in continuing the loop...
                if len(weightings) == 1:
                    break

            # The least weighted value is our minimum weight
            min_weight = min(weightings)

            # Get rid of all data points that occur less frequently than our minimum weight
            for point in get_keys(data_points):
                if data_points[point] < min_weight:
                    del data_points[point]

        for point in sorted(data_points, key=data_points.get, reverse=True):
            plot_points[point] = data_points[point]
            # Register this as a result in case future modules need access to the raw point information,
            # but mark plot as False to prevent the entropy module from attempting to overlay this data on its graph.
            self.result(point=point, plot=False)
            total += 1
            if total >= self.max_points:
                break

        return plot_points

    def _generate_data_point(self, data):
        '''
        Subclasses must override this to return the appropriate data point.

        @data - A string of data self.axis in length.

        Returns a data point tuple.
        '''
        return (0,0,0)

    def _generate_data_points(self, fp):
        '''
        Generates a dictionary of data points and their frequency of occurrance.

        @fp - The BlockFile object to generate data points from.

        Returns a dictionary.
        '''
        i = 0
        data_points = {}

        self._print("Generating data points for %s" % fp.name)

        # We don't need any extra data from BlockFile
        fp.set_block_size(peek=0)

        while True:
            (data, dlen) = fp.read_block()
            if not data or not dlen:
                break

            i = 0
            while (i+(self.axis-1)) < dlen:
                point = self._generate_data_point(data[i:i+self.axis])
                if has_key(data_points, point):
                    data_points[point] += 1
                else:
                    data_points[point] = 1
                i += 3

        return data_points

    def _generate_plot(self, plot_points):
        import numpy as np
        import pyqtgraph.opengl as gl

        nitems = float(len(plot_points))

        pos = np.empty((nitems, 3))
        size = np.empty((nitems))
        color = np.empty((nitems, 4))

        i = 0
        for (point, weight) in iterator(plot_points):
            r = 0.0
            g = 0.0
            b = 0.0

            pos[i] = point
            frequency_percentage = (weight / nitems)

            # Give points that occur more frequently a brighter color and larger point size.
            # Frequency is determined as a percentage of total unique data points.
            if frequency_percentage > .010:
                size[i] = .20
                r = 1.0
            elif frequency_percentage > .005:
                size[i] = .15
                b = 1.0
            elif frequency_percentage > .002:
                size[i] = .10
                g = 1.0
                r = 1.0
            else:
                size[i] = .05
                g = 1.0

            color[i] = (r, g, b, 1.0)

            i += 1

        scatter_plot = gl.GLScatterPlotItem(pos=pos, size=size, color=color, pxMode=False)
        scatter_plot.translate(-127.5, -127.5, -127.5)

        return scatter_plot

    def plot(self, wait=True):
        import pyqtgraph.opengl as gl

        self.window.show()

        if self.show_grids:
            xgrid = gl.GLGridItem()
            ygrid = gl.GLGridItem()
            zgrid = gl.GLGridItem()

            self.window.addItem(xgrid)
            self.window.addItem(ygrid)
            self.window.addItem(zgrid)

            # Rotate x and y grids to face the correct direction
            xgrid.rotate(90, 0, 1, 0)
            ygrid.rotate(90, 1, 0, 0)

            # Scale grids to the appropriate dimensions
            xgrid.scale(12.8, 12.8, 12.8)
            ygrid.scale(12.8, 12.8, 12.8)
            zgrid.scale(12.8, 12.8, 12.8)

        for fd in iter(self.next_file, None):
            data_points = self._generate_data_points(fd)

            self._print("Generating plot points from %d data points" % len(data_points))

            self.plot_points = self._generate_plot_points(data_points)
            del data_points

            self._print("Generating graph from %d plot points" % len(self.plot_points))

            self.window.addItem(self._generate_plot(self.plot_points))

        if wait:
            self.wait()

    def wait(self):
        from pyqtgraph.Qt import QtCore, QtGui

        t = QtCore.QTimer()
        t.start(50)
        QtGui.QApplication.instance().exec_()

    def _generate_3d_data_point(self, data):
        '''
        Plot data points within a 3D cube.
        '''
        return (ord(data[0]), ord(data[1]), ord(data[2]))

    def _generate_2d_data_point(self, data):
        '''
        Plot data points projected on each cube face.
        '''
        self.plane_count += 1
        if self.plane_count > 5:
            self.plane_count = 0

        if self.plane_count == 0:
            return (0, ord(data[0]), ord(data[1]))
        elif self.plane_count == 1:
            return (ord(data[0]), 0, ord(data[1]))
        elif self.plane_count == 2:
            return (ord(data[0]), ord(data[1]), 0)
        elif self.plane_count == 3:
            return (255, ord(data[0]), ord(data[1]))
        elif self.plane_count == 4:
            return (ord(data[0]), 255, ord(data[1]))
        elif self.plane_count == 5:
            return (ord(data[0]), ord(data[1]), 255)

    def run(self):
        self.plot()
        return True
예제 #16
0
class Disasm(Module):

    THRESHOLD = 10
    DEFAULT_MIN_INSN_COUNT = 500

    TITLE = "Disassembly Scan"
    ORDER = 10

    CLI = [
            Option(short='Y',
                   long='disasm',
                   kwargs={'enabled' : True},
                   description='Identify the CPU architecture of a file using the capstone disassembler'),
            Option(short='T',
                   long='minsn',
                   type=int,
                   kwargs={'min_insn_count' : 0},
                   description='Minimum number of consecutive instructions to be considered valid (default: %d)' % DEFAULT_MIN_INSN_COUNT),
            Option(long='continue',
                   short='k',
                   kwargs={'keep_going' : True},
                   description="Don't stop at the first match"),
          ]

    KWARGS = [
                Kwarg(name='enabled', default=False),
                Kwarg(name='keep_going', default=False),
                Kwarg(name='min_insn_count', default=DEFAULT_MIN_INSN_COUNT),
             ]

    ARCHITECTURES = [
                    Architecture(type=capstone.CS_ARCH_ARM,
                                 mode=capstone.CS_MODE_ARM,
                                 endianess=capstone.CS_MODE_BIG_ENDIAN,
                                 description="ARM executable code, 32-bit, big endian"),
                    Architecture(type=capstone.CS_ARCH_ARM,
                                 mode=capstone.CS_MODE_ARM,
                                 endianess=capstone.CS_MODE_LITTLE_ENDIAN,
                                 description="ARM executable code, 32-bit, little endian"),
                    Architecture(type=capstone.CS_ARCH_ARM64,
                                 mode=capstone.CS_MODE_ARM,
                                 endianess=capstone.CS_MODE_BIG_ENDIAN,
                                 description="ARM executable code, 64-bit, big endian"),
                    Architecture(type=capstone.CS_ARCH_ARM64,
                                 mode=capstone.CS_MODE_ARM,
                                 endianess=capstone.CS_MODE_LITTLE_ENDIAN,
                                 description="ARM executable code, 64-bit, little endian"),

                    Architecture(type=capstone.CS_ARCH_PPC,
                                 mode=capstone.CS_MODE_BIG_ENDIAN,
                                 endianess=capstone.CS_MODE_BIG_ENDIAN,
                                 description="PPC executable code, 32/64-bit, big endian"),

                    Architecture(type=capstone.CS_ARCH_MIPS,
                                 mode=capstone.CS_MODE_64,
                                 endianess=capstone.CS_MODE_BIG_ENDIAN,
                                 description="MIPS executable code, 32/64-bit, big endian"),
                    Architecture(type=capstone.CS_ARCH_MIPS,
                                 mode=capstone.CS_MODE_64,
                                 endianess=capstone.CS_MODE_LITTLE_ENDIAN,
                                 description="MIPS executable code, 32/64-bit, little endian"),

                    Architecture(type=capstone.CS_ARCH_ARM,
                                 mode=capstone.CS_MODE_THUMB,
                                 endianess=capstone.CS_MODE_LITTLE_ENDIAN,
                                 description="ARM executable code, 16-bit (Thumb), little endian"),
                    Architecture(type=capstone.CS_ARCH_ARM,
                                 mode=capstone.CS_MODE_THUMB,
                                 endianess=capstone.CS_MODE_BIG_ENDIAN,
                                 description="ARM executable code, 16-bit (Thumb), big endian"),
                    ]

    def init(self):
        self.disassemblers = []

        if not self.min_insn_count:
            self.min_insn_count = self.DEFAULT_MIN_INSN_COUNT

        self.disasm_data_size = self.min_insn_count * 10

        for arch in self.ARCHITECTURES:
            self.disassemblers.append((capstone.Cs(arch.type, (arch.mode + arch.endianess)), arch.description))

    def scan_file(self, fp):
        total_read = 0

        while True:
            result = None

            (data, dlen) = fp.read_block()
            if not data:
                break

            # If this data block doesn't contain at least two different bytes, skip it
            # to prevent false positives (e.g., "\x00\x00\x00\x00" is a nop in MIPS).
            if len(set(data)) >= 2:
                block_offset = 0

                # Loop through the entire block, or until we're pretty sure we've found some valid code in this block
                while (block_offset < dlen) and (result is None or result.count < self.THRESHOLD):
                    # Don't pass the entire data block into disasm_lite, it's horribly inefficient
                    # to pass large strings around in Python. Break it up into smaller code blocks instead.
                    code_block = binwalk.core.compat.str2bytes(data[block_offset:block_offset+self.disasm_data_size])

                    # If this code block doesn't contain at least two different bytes, skip it
                    # to prevent false positives (e.g., "\x00\x00\x00\x00" is a nop in MIPS).
                    if len(set(code_block)) >= 2:
                        for (md, description) in self.disassemblers:
                            insns = [insn for insn in md.disasm_lite(code_block, (total_read+block_offset))]
                            binwalk.core.common.debug("0x%.8X   %s, at least %d valid instructions" % ((total_read+block_offset),
                                                                                                        description,
                                                                                                        len(insns)))

                            # Did we disassemble at least self.min_insn_count instructions?
                            if len(insns) >= self.min_insn_count:
                                # If we've already found the same type of code in this block, simply update the result counter
                                if result and result.description == description:
                                    result.count += 1
                                    if result.count >= self.THRESHOLD:
                                        break
                                else:
                                    result = ArchResult(offset=total_read+block_offset+fp.offset,
                                                        description=description,
                                                        insns=insns,
                                                        count=1)

                    block_offset += 1
                    self.status.completed += 1

                if result is not None:
                    r = self.result(offset=result.offset,
                                    file=fp,
                                    description=(result.description + ", at least %d valid instructions" % len(result.insns)))

                    if r.valid and r.display:
                        if self.config.verbose:
                            for (position, size, mnem, opnds) in result.insns:
                                self.result(offset=position, file=fp, description="%s %s" % (mnem, opnds))
                        if not self.keep_going:
                            return

            total_read += dlen
            self.status.completed = total_read

    def run(self):
        for fp in iter(self.next_file, None):
            self.header()
            self.scan_file(fp)
            self.footer()
예제 #17
0
class General(Module):

    TITLE = "General"
    ORDER = 0

    DEFAULT_DEPENDS = []

    CLI = [
        Option(long='length',
               short='l',
               type=int,
               kwargs={'length': 0},
               description='Number of bytes to scan'),
        Option(long='offset',
               short='o',
               type=int,
               kwargs={'offset': 0},
               description='Start scan at this file offset'),
        Option(long='base',
               short='O',
               type=int,
               kwargs={'base': 0},
               description='Add a base address to all printed offsets'),
        Option(long='block',
               short='K',
               type=int,
               kwargs={'block': 0},
               description='Set file block size'),
        Option(long='swap',
               short='g',
               type=int,
               kwargs={'swap_size': 0},
               description='Reverse every n bytes before scanning'),
        Option(long='log',
               short='f',
               type=argparse.FileType,
               kwargs={'log_file': None},
               description='Log results to file'),
        Option(long='csv',
               short='c',
               kwargs={'csv': True},
               description='Log results to file in CSV format'),
        Option(long='term',
               short='t',
               kwargs={'format_to_terminal': True},
               description='Format output to fit the terminal window'),
        Option(long='quiet',
               short='q',
               kwargs={'quiet': True},
               description='Suppress output to stdout'),
        Option(long='verbose',
               short='v',
               kwargs={'verbose': True},
               description='Enable verbose output'),
        Option(short='h',
               long='help',
               kwargs={'show_help': True},
               description='Show help output'),
        Option(long=None,
               short=None,
               type=binwalk.core.common.BlockFile,
               kwargs={'files': []}),

        # Hidden, API-only arguments
        Option(long="string",
               hidden=True,
               kwargs={'subclass': binwalk.core.common.StringFile}),
    ]

    KWARGS = [
        Kwarg(name='length', default=0),
        Kwarg(name='offset', default=0),
        Kwarg(name='base', default=0),
        Kwarg(name='block', default=0),
        Kwarg(name='swap_size', default=0),
        Kwarg(name='log_file', default=None),
        Kwarg(name='csv', default=False),
        Kwarg(name='format_to_terminal', default=False),
        Kwarg(name='quiet', default=False),
        Kwarg(name='verbose', default=False),
        Kwarg(name='files', default=[]),
        Kwarg(name='show_help', default=False),
        Kwarg(name='keep_going', default=False),
        Kwarg(name='subclass', default=io.FileIO),
    ]

    PRIMARY = False

    def load(self):
        self.target_files = []

        # A special case for when we're loaded into IDA
        if self.subclass == io.FileIO and binwalk.core.idb.LOADED_IN_IDA:
            self.subclass = binwalk.core.idb.IDBFileIO

        # Order is important with these two methods
        self._open_target_files()
        self._set_verbosity()

        self.settings = binwalk.core.settings.Settings()
        self.display = binwalk.core.display.Display(
            log=self.log_file,
            csv=self.csv,
            quiet=self.quiet,
            verbose=self.verbose,
            fit_to_screen=self.format_to_terminal)

        if self.show_help:
            show_help()
            if not binwalk.core.idb.LOADED_IN_IDA:
                sys.exit(0)

    def reset(self):
        pass
        #for fp in self.target_files:
        #    fp.reset()

    def __del__(self):
        self._cleanup()

    def __exit__(self, a, b, c):
        self._cleanup()

    def _cleanup(self):
        pass
        #if hasattr(self, 'target_files'):
        #    for fp in self.target_files:
        #        fp.close()

    def _set_verbosity(self):
        '''
        Sets the appropriate verbosity.
        Must be called after self._test_target_files so that self.target_files is properly set.
        '''
        # If more than one target file was specified, enable verbose mode; else, there is
        # nothing in some outputs to indicate which scan corresponds to which file.
        if len(self.target_files) > 1 and not self.verbose:
            self.verbose = True

    def open_file(self,
                  fname,
                  length=None,
                  offset=None,
                  swap=None,
                  block=None,
                  peek=None):
        '''
        Opens the specified file with all pertinent configuration settings.
        '''
        if length is None:
            length = self.length
        if offset is None:
            offset = self.offset
        if swap is None:
            swap = self.swap_size

        return binwalk.core.common.BlockFile(fname,
                                             subclass=self.subclass,
                                             length=length,
                                             offset=offset,
                                             swap=swap,
                                             block=block,
                                             peek=peek)

    def _open_target_files(self):
        '''
        Checks if the target files can be opened.
        Any files that cannot be opened are removed from the self.target_files list.
        '''
        # Validate the target files listed in target_files
        for tfile in self.files:
            # Ignore directories.
            if not self.subclass == io.FileIO or not os.path.isdir(tfile):
                # Make sure we can open the target files
                try:
                    fp = self.open_file(tfile)
                    fp.close()
                    self.target_files.append(tfile)
                except KeyboardInterrupt as e:
                    raise e
                except Exception as e:
                    self.error(description="Cannot open file : %s" % str(e))
예제 #18
0
class CodeID(Module):

    DEFAULT_MIN_INSN_COUNT = 500

    TITLE = "Disassembly Scan"
    ORDER = 10

    CLI = [
        Option(
            short='Y',
            long='code',
            kwargs={'enabled': True},
            description=
            'Attempts to identify the CPU architecture of a file using the capstone disassembler'
        ),
        Option(
            short='T',
            long='minsn',
            type=int,
            kwargs={'min_insn_count': 0},
            description=
            'Minimum number of consecutive instructions to be considered valid (default: %d)'
            % DEFAULT_MIN_INSN_COUNT),
        Option(short='V',
               long='disasm',
               kwargs={'show_disasm': True},
               description='Display the disassembled instructions'),
    ]

    KWARGS = [
        Kwarg(name='enabled', default=False),
        Kwarg(name='show_disasm', default=False),
        Kwarg(name='min_insn_count', default=DEFAULT_MIN_INSN_COUNT),
    ]

    ARCHITECTURES = [
        Architecture(type=capstone.CS_ARCH_MIPS,
                     mode=capstone.CS_MODE_32,
                     endianess=capstone.CS_MODE_BIG_ENDIAN,
                     description="MIPS executable code, 32-bit, big endian"),
        Architecture(
            type=capstone.CS_ARCH_MIPS,
            mode=capstone.CS_MODE_32,
            endianess=capstone.CS_MODE_LITTLE_ENDIAN,
            description="MIPS executable code, 32-bit, little endian"),
        Architecture(type=capstone.CS_ARCH_ARM,
                     mode=capstone.CS_MODE_ARM,
                     endianess=capstone.CS_MODE_BIG_ENDIAN,
                     description="ARM executable code, 32-bit, big endian"),
        Architecture(type=capstone.CS_ARCH_ARM,
                     mode=capstone.CS_MODE_ARM,
                     endianess=capstone.CS_MODE_LITTLE_ENDIAN,
                     description="ARM executable code, 32-bit, little endian"),
        Architecture(type=capstone.CS_ARCH_PPC,
                     mode=capstone.CS_MODE_BIG_ENDIAN,
                     endianess=capstone.CS_MODE_BIG_ENDIAN,
                     description="PPC executable code, 32/64-bit, big endian"),

        #Architecture(type=capstone.CS_ARCH_MIPS,
        #             mode=capstone.CS_MODE_16,
        #             endianess=capstone.CS_MODE_BIG_ENDIAN,
        #             description="MIPS executable code, 16-bit, big endian"),
        #Architecture(type=capstone.CS_ARCH_MIPS,
        #             mode=capstone.CS_MODE_16,
        #             endianess=capstone.CS_MODE_LITTLE_ENDIAN,
        #             description="MIPSEL executable code, 16-bit, little endian"),
        Architecture(
            type=capstone.CS_ARCH_ARM,
            mode=capstone.CS_MODE_THUMB,
            endianess=capstone.CS_MODE_LITTLE_ENDIAN,
            description="ARM executable code, 16-bit (Thumb), little endian"),
        Architecture(
            type=capstone.CS_ARCH_ARM,
            mode=capstone.CS_MODE_THUMB,
            endianess=capstone.CS_MODE_BIG_ENDIAN,
            description="ARM executable code, 16-bit (Thumb), big endian"),
        Architecture(type=capstone.CS_ARCH_MIPS,
                     mode=capstone.CS_MODE_64,
                     endianess=capstone.CS_MODE_BIG_ENDIAN,
                     description="MIPS executable code, 64-bit, big endian"),
        Architecture(
            type=capstone.CS_ARCH_MIPS,
            mode=capstone.CS_MODE_64,
            endianess=capstone.CS_MODE_LITTLE_ENDIAN,
            description="MIPS executable code, 64-bit, little endian"),
        Architecture(type=capstone.CS_ARCH_ARM64,
                     mode=capstone.CS_MODE_ARM,
                     endianess=capstone.CS_MODE_BIG_ENDIAN,
                     description="ARM executable code, 64-bit, big endian"),
        Architecture(type=capstone.CS_ARCH_ARM64,
                     mode=capstone.CS_MODE_ARM,
                     endianess=capstone.CS_MODE_LITTLE_ENDIAN,
                     description="ARM executable code, 64-bit, little endian"),
    ]

    def init(self):
        self.disassemblers = []

        if not self.min_insn_count:
            self.min_insn_count = self.DEFAULT_MIN_INSN_COUNT

        self.disasm_data_size = self.min_insn_count * 10

        for arch in self.ARCHITECTURES:
            self.disassemblers.append(
                (capstone.Cs(arch.type,
                             (arch.mode + arch.endianess)), arch.description))

    def scan_file(self, fp):
        total_read = 0

        while True:
            (data, dlen) = fp.read_block()
            if not data:
                break

            # If this data block doesn't contain at least two different bytes, skip it
            # to prevent false positives (e.g., "\x00\x00\x00x\00" is a nop in MIPS).
            if len(set(data)) >= 2:
                block_offset = 0
                while block_offset < dlen:
                    # Don't pass the entire data block into disasm_lite, it's horribly inefficient
                    # to pass large strings around in Python. Break it up into smaller code blocks instead.
                    code_block = binwalk.core.compat.str2bytes(
                        data[block_offset:block_offset +
                             self.disasm_data_size])

                    # If this code block doesn't contain at least two different bytes, skip it
                    # to prevent false positives (e.g., "\x00\x00\x00x\00" is a nop in MIPS).
                    if len(set(code_block)) >= 2:
                        for (md, description) in self.disassemblers:
                            insns = [
                                insn for insn in md.disasm_lite(
                                    code_block, (total_read + block_offset))
                            ]
                            binwalk.core.common.debug(
                                "0x%.8X   %s, at least %d valid instructions" %
                                ((total_read + block_offset), description,
                                 len(insns)))

                            if len(insns) >= self.min_insn_count:
                                r = self.result(
                                    offset=total_read + block_offset,
                                    file=fp,
                                    description=(
                                        description +
                                        ", at least %d valid instructions" %
                                        len(insns)))
                                if r.valid and r.display:
                                    if self.show_disasm:
                                        for (position, size, mnem,
                                             opnds) in insns:
                                            self.result(offset=position,
                                                        file=fp,
                                                        description="\t%s %s" %
                                                        (mnem, opnds))
                                    if not self.config.verbose:
                                        return

                    block_offset += 1

            total_read += dlen

    def run(self):
        for fp in iter(self.next_file, None):
            self.header()
            self.scan_file(fp)
            self.footer()