コード例 #1
0
ファイル: coverage.py プロジェクト: wgwjifeng/lighthouse
class DatabaseCoverage(object):
    """
    Database level coverage mapping.
    """
    def __init__(self, palette, name="", filepath=None, data=None):

        # color palette
        self.palette = palette

        # the name of the DatabaseCoverage object
        self.name = name

        # the filepath this coverage data was sourced from
        self.filepath = filepath

        #
        # this is the coverage mapping's reference to the underlying database
        # metadata. it will use this for all its mapping operations.
        #
        # here we simply populate the DatabaseCoverage object with a stub
        # DatabaseMetadata object, but at runtime we will inject a fully
        # collected DatabaseMetadata object as maintained by the director.
        #

        self._metadata = DatabaseMetadata()

        #
        # the address hitmap is a dictionary that effectively holds the lowest
        # level representation of the original coverage data loaded from disk.
        #
        # as the name implies, the hitmap will track the number of times a
        # given address appeared in the original coverage data.
        #
        #  Eg:
        #      hitmap =
        #      {
        #          0x8040100: 1,
        #          0x8040102: 1,
        #          0x8040105: 3,
        #          0x8040108: 3,  # 0x8040108 was executed 3 times...
        #          0x804010a: 3,
        #          0x804010f: 1,
        #          ...
        #      }
        #
        # the hitmap gives us an interesting degree of flexibility with regard
        # to what data sources we can load coverage data from, and how we
        # choose to consume it (eg, visualize coverage, heatmaps, ...)
        #
        # using hitmap.keys(), we effectively have a coverage bitmap of all
        # the addresses executed in the coverage log
        #

        self._hitmap = build_hitmap(data)

        #
        # the coverage hash is a simple hash of the coverage mask (hitmap keys)
        #
        # it is primarily used by the director as a means of quickly comparing
        # two database coverage objects against each other, and speculating on
        # the output of logical/arithmetic operations of their coverage data.
        #
        # this hash will need to be recomputed via _update_coverage_hash()
        # anytime new coverage data is introduced to this object, or when the
        # hitmap is otherwise modified internally.
        #
        # this is necessary because we cache the coverage hash. computing the
        # hash on demand is expensive, and it really shouldn't changne often.
        #
        # see the usage of 'coverage_hash' in director.py for more info
        #

        self.coverage_hash = 0
        self._update_coverage_hash()

        #
        # unmapped data is a list of addresses that we have coverage for, but
        # could not map to any defined function in the database.
        #
        # a shortcoming of lighthouse (as recently as v0.8) is that it does
        # *not* compute statistics for, or paint, loaded coverage that falls
        # outside of defined functions.
        #
        # under normal circumstances, one can just define a function at the
        # area of interest (assuming it was a disassembler issue) and refresh
        # the lighthouse metadata to 'map' the missing coverage.
        #
        # in cases of obfuscation, abnormal control flow, or self modifying
        # code, lighthouse will probably not perform well. but to be fair,
        # lighthouse was designed for displaying coverage more-so than hit
        # tracing or trace exploration.
        #
        # initially, all loaded coverage data is marked as unmapped
        #

        self._unmapped_data = set(self._hitmap.keys())
        self._unmapped_data.add(BADADDR)
        self._misaligned_data = set()

        #
        # at runtime, the map_coverage() member function of this class is
        # responsible for taking the unmapped_data mapping it on top of the
        # lifted database metadata (self._metadata).
        #
        # the process of mapping the raw coverage data will yield NodeCoverage
        # and FunctionCoverage objects. these are the buckets that the unmapped
        # coverage data is poured into during the mappinng process.
        #
        # NodeCoverage objects represent coverage at the node (basic block)
        # level and are owned by a respective FunctionCoverage object.
        #
        # FunctionCoverage represent coverage at the function level, grouping
        # children NodeCoverage objects and providing higher level statistics.
        #
        # self.nodes: address --> NodeCoverage
        # self.functions: address --> FunctionCoverage
        #

        self.nodes = {}
        self.functions = {}
        self.instruction_percent = 0.0

        #
        # we instantiate a single weakref of ourself (the DatbaseCoverage
        # object) such that we can distribute it to the children we create
        # without having to repeatedly instantiate new ones.
        #

        self._weak_self = weakref.proxy(self)

    #--------------------------------------------------------------------------
    # Properties
    #--------------------------------------------------------------------------

    @property
    def data(self):
        """
        Return the backing coverage data (a hitmap).
        """
        return self._hitmap

    @property
    def coverage(self):
        """
        Return the instruction-level coverage bitmap/mask.
        """
        return self._hitmap.viewkeys()

    @property
    def suspicious(self):
        """
        Return a bool indicating if the coverage seems badly mapped.
        """
        bad = 0
        total = len(self.nodes)

        #
        # count the number of nodes (basic blocks) that allegedly were executed
        # (they have coverage data) but don't actually have their first
        # instruction logged as executed.
        #
        # this is considered 'suspicious' and should be a red flag that the
        # provided coverage data is malformed, or for a different binary
        #

        for adddress, node_coverage in self.nodes.iteritems():
            if adddress in node_coverage.executed_instructions:
                continue
            bad += 1

        # compute a percentage of the 'bad nodes'
        percent = (bad / float(total)) * 100
        logger.debug("SUSPICIOUS: %5.2f%% (%u/%u)" % (percent, bad, total))

        #
        # if the percentage of 'bad' coverage nodes is too high, we consider
        # this database coverage as 'suspicious' or 'badly mapped'
        #
        # this number (2%) may need to be tuned. really any non-zero figure
        # is strange, but we will give some wiggle room for DBI or
        # disassembler fudginess.
        #

        return percent > 2.0

    #--------------------------------------------------------------------------
    # Metadata Population
    #--------------------------------------------------------------------------

    def update_metadata(self, metadata, delta=None):
        """
        Install a new databasee metadata object.
        """
        self._metadata = weakref.proxy(metadata)
        self.unmap_all()

    def refresh(self):
        """
        Refresh the mapping of our coverage data to the database metadata.
        """

        # rebuild our coverage mapping
        dirty_nodes, dirty_functions = self._map_coverage()

        # bake our coverage map
        self._finalize(dirty_nodes, dirty_functions)

        # update the coverage hash incase the hitmap changed
        self._update_coverage_hash()

        # dump the unmappable coverage data
        #self.dump_unmapped()

    def _finalize(self, dirty_nodes, dirty_functions):
        """
        Finalize the DatabaseCoverage statistics / data for use.
        """
        self._finalize_nodes(dirty_nodes)
        self._finalize_functions(dirty_functions)
        self._finalize_instruction_percent()

    def _finalize_nodes(self, dirty_nodes):
        """
        Finalize the NodeCoverage objects statistics / data for use.
        """
        for node_coverage in dirty_nodes.itervalues():
            node_coverage.finalize()

    def _finalize_functions(self, dirty_functions):
        """
        Finalize the FunctionCoverage objects statistics / data for use.
        """
        for function_coverage in dirty_functions.itervalues():
            function_coverage.finalize()

    def _finalize_instruction_percent(self):
        """
        Finalize the DatabaseCoverage's coverage % by instructions executed.
        """

        # sum all the instructions in the database metadata
        total = sum(f.instruction_count
                    for f in self._metadata.functions.itervalues())
        if not total:
            self.instruction_percent = 0.0
            return

        # sum the unique instructions executed across all functions
        executed = sum(f.instructions_executed
                       for f in self.functions.itervalues())

        # save the computed percentage of database instructions executed (0 to 1.0)
        self.instruction_percent = float(executed) / total

    #--------------------------------------------------------------------------
    # Data Operations
    #--------------------------------------------------------------------------

    def add_data(self, data, update=True):
        """
        Add an existing instruction hitmap to the coverage mapping.
        """

        # add the given runtime data to our data source
        for address, hit_count in data.iteritems():
            self._hitmap[address] += hit_count

        # do not update other internal structures if requested
        if not update:
            return

        # update the coverage hash in case the hitmap changed
        self._update_coverage_hash()

        # mark these touched addresses as dirty
        self._unmapped_data |= data.viewkeys()

    def add_addresses(self, addresses, update=True):
        """
        Add a list of instruction addresses to the coverage mapping.
        """

        # increment the hit count for an address
        for address in addresses:
            self._hitmap[address] += 1

        # do not update other internal structures if requested
        if not update:
            return

        # update the coverage hash in case the hitmap changed
        self._update_coverage_hash()

        # mark these touched addresses as dirty
        self._unmapped_data |= set(addresses)

    def subtract_data(self, data):
        """
        Subtract an existing instruction hitmap from the coverage mapping.
        """

        # subtract the given hitmap from our existing hitmap
        for address, hit_count in data.iteritems():
            self._hitmap[address] -= hit_count

            #
            # if there is no longer any hits for this address, delete its
            # entry from the hitmap dictionary. we don't want its entry to
            # hang around because we use self._hitmap.viewkeys() as a
            # coverage bitmap/mask
            #

            if not self._hitmap[address]:
                del self._hitmap[address]

        # update the coverage hash as the hitmap has probably changed
        self._update_coverage_hash()

        #
        # unmap everything because a complete re-mapping is easier with the
        # current implementation of things
        #

        self.unmap_all()

    def mask_data(self, coverage_mask):
        """
        Mask the hitmap data against a given coverage mask.

        Returns a new DatabaseCoverage containing the masked hitmap.
        """
        composite_data = collections.defaultdict(int)

        # preserve only hitmap data that matches the coverage mask
        for address in coverage_mask:
            composite_data[address] = self._hitmap[address]

        # done, return a new DatabaseCoverage masked with the given coverage
        return DatabaseCoverage(self.palette, data=composite_data)

    def _update_coverage_hash(self):
        """
        Update the hash of the coverage mask.
        """
        if self._hitmap:
            self.coverage_hash = hash(frozenset(self._hitmap.viewkeys()))
        else:
            self.coverage_hash = 0

    #--------------------------------------------------------------------------
    # Coverage Mapping
    #--------------------------------------------------------------------------

    def _map_coverage(self):
        """
        Map loaded coverage data to the underlying database metadata.
        """
        dirty_nodes = self._map_nodes()
        dirty_functions = self._map_functions(dirty_nodes)
        return (dirty_nodes, dirty_functions)

    def _map_nodes(self):
        """
        Map loaded coverage data to database defined nodes (basic blocks).
        """
        dirty_nodes = {}

        # the coverage data we will attempt to process in this function
        coverage_addresses = collections.deque(sorted(self._unmapped_data))

        #
        # the loop below is the core of our coverage mapping process.
        #
        # operating on whatever coverage data (instruction addresses) reside
        # within unmapped_data, this loop will attempt to bucket the coverage
        # into NodeCoverage objects where possible.
        #
        # the higher level coverage mappings (eg FunctionCoverage,
        # DatabaseCoverage) get built on top of the node mapping that we
        # perform here.
        #
        # since this loop is the most computationally expensive part of the
        # mapping process, it has been carefully profiled & optimized for
        # speed. please be careful if you wish to modify it...
        #

        while coverage_addresses:

            # get the next coverage address to map
            address = coverage_addresses.popleft()

            # get the node (basic block) metadata that this address falls in
            node_metadata = self._metadata.get_node(address)

            #
            # should we fail to locate node metadata for the coverage address
            # that we are trying to map, then the address must not fall inside
            # of a defined function.
            #
            # in this case, the coverage address will remain unmapped...
            #

            if not node_metadata:
                continue

            #
            # we found applicable node metadata for this address, now we will
            # try to find an existing bucket (NodeCoverage) for the address
            #

            if node_metadata.address in self.nodes:
                node_coverage = self.nodes[node_metadata.address]

            #
            # failed to locate an existing NodeCoverage object for this
            # address, it looks like this is the first time we have attempted
            # to bucket coverage for this node.
            #
            # create a new NodeCoverage bucket and use it now
            #

            else:
                node_coverage = NodeCoverage(node_metadata.address,
                                             self._weak_self)
                self.nodes[node_metadata.address] = node_coverage

            # compute the end address of the current basic block
            node_end = node_metadata.address + node_metadata.size

            #
            # the loop below is as an inlined fast-path that assumes the next
            # several coverage addresses will likely belong to the same node
            # that we just looked up (or created) in the code above
            #
            # we can simply re-use the current node and its coverage object
            # until the next address to be processed falls outside the node
            #

            while 1:

                #
                # map the hitmap data for the current address (an instruction)
                # to this NodeCoverage and mark the instruction as mapped by
                # discarding its address from the unmapped data list
                #

                if address in node_metadata.instructions:
                    node_coverage.executed_instructions[
                        address] = self._hitmap[address]
                    self._unmapped_data.discard(address)

                #
                # if the given address allegedly falls within this node's
                # address range, but doesn't line up with the known
                # instructions, log it as 'misaligned' / suspicious
                #

                else:
                    self._misaligned_data.add(address)

                # get the next address to attempt mapping on
                try:
                    address = coverage_addresses.popleft()

                # an IndexError implies there is nothing left to map...
                except IndexError:
                    break

                #
                # if the next address is not in this node, it's time break out
                # of this loop and send it through the full node lookup path
                #

                if not (node_metadata.address <= address < node_end):
                    coverage_addresses.appendleft(address)
                    break

            # the node was updated, so save its coverage as dirty
            dirty_nodes[node_metadata.address] = node_coverage

        # done, return a map of NodeCoverage objects that were modified
        return dirty_nodes

    def _map_functions(self, dirty_nodes):
        """
        Map loaded coverage data to database defined functions.
        """
        dirty_functions = {}

        #
        # thanks to the map_nodes(), we now have a repository of NodeCoverage
        # objects that are considered 'dirty' and can be used precisely to
        # build or update the function level coverage metadata
        #

        for node_coverage in dirty_nodes.itervalues():

            #
            # using a given NodeCoverage object, we retrieve its underlying
            # metadata so that we can perform a reverse lookup of its function
            # (parent) metadata.
            #

            function_metadata = self._metadata.nodes[
                node_coverage.address].function

            #
            # now we will attempt to retrieve the the FunctionCoverage object
            # that we need to parent the given NodeCoverage object to
            #

            function_coverage = self.functions.get(function_metadata.address,
                                                   None)

            #
            # if we failed to locate a FunctionCoverage for this node, it means
            # that this is the first time we have seen coverage for this
            # function. create a new coverage function object and use it now.
            #

            if not function_coverage:
                function_coverage = FunctionCoverage(function_metadata.address,
                                                     self._weak_self)
                self.functions[function_metadata.address] = function_coverage

            # add the NodeCoverage object to its parent FunctionCoverage
            function_coverage.mark_node(node_coverage)
            dirty_functions[function_metadata.address] = function_coverage

        # done, return a map of FunctionCoverage objects that were modified
        return dirty_functions

    def unmap_all(self):
        """
        Unmap all mapped coverage data.
        """
        self._unmapped_data = set(self._hitmap.keys())
        self._unmapped_data.add(BADADDR)
        self._misaligned_data = set()
        self.nodes = {}
        self.functions = {}

    #--------------------------------------------------------------------------
    # Debug
    #--------------------------------------------------------------------------

    def dump_unmapped(self):
        """
        Dump the unmapped coverage data.
        """
        lmsg("Unmapped Coverage:")
        for address in self._unmapped_data:
            lmsg(" * 0x%X" % address)
コード例 #2
0
class DatabaseCoverage(object):
    """
    Database level coverage mapping.
    """

    def __init__(self, data, palette):

        # color palette
        self.palette = palette

        #
        # the abstract above gives some background to the design employed by
        # Lighthouse to map coverage data to the database.
        #
        # coverage objects such as this (DatabaseCoverage) are basically
        # glorified mappings of coverage / runtime data on top of their
        # metadata counterparts. A coverage object by itself is mostly useless
        # without its corresponding metadata object.
        #
        # here we simply populate self._metadata with a stub metadata object,
        # but at runtime we will inject a fully collected DatabaseMetadata
        # object as maintained by the director.
        #

        self._metadata = DatabaseMetadata()

        #
        # the hitmap effectively holds the raw coverage data. the name
        # should speak for itself, but a hitmap will track the number of
        # times a given address / instruction was executed.
        #
        #  Eg:
        #      hitmap =
        #      {
        #          0x8040100: 1,
        #          0x8040102: 1,
        #          0x8040105: 3,
        #          0x8040108: 3,  # 0x8040108 was executed 3 times...
        #          0x804010a: 3,
        #          0x804010f: 1,
        #          ...
        #      }
        #
        # this structure gives us an interesting degree of flexibility with
        # regard to what data sources we can consue (inst trace, coverage, etc)
        # and ways we can leverage said data (visualize coverage, heatmaps)
        #

        self._hitmap = build_hitmap(data)

        #
        # the coverage hash is a simple hash of the coverage bitmap/mask.
        # it is primarily used by the director as a means of quickly comparing
        # coverage, and predicting outputs of logical / arithmetic operations.
        #
        # the hash will need to be updated via _update_coverage_hash() anytime
        # the hitmap is modified or changed internally. we cache a concrete
        # result of the coverage hash because computing the hash on demand can
        # be expensive in terms of time.
        #
        # see the usage of 'coverage_hash' in director.py for more info
        #

        self.coverage_hash = 0
        self._update_coverage_hash()

        #
        # Lighthouse will only compute coverage for code within defined
        # functions. therefore, all coverage / runtime data will get bucketed
        # into its appropriate NodeCoverage object (eg, a basic block) or it
        # will be considered 'unmapped'
        #
        # starting out, all coverage data is marked as unmapped
        #

        self._unmapped_data = set(self._hitmap.keys())
        self._unmapped_data.add(idaapi.BADADDR)

        #
        # self._map_coverage is responsible for mapping coverage data to the
        # database (via the lifted 'DatabaseMetadata' cache). The mapping
        # process will yield NodeCoverage & FunctionCoverage objects.
        #
        # NodeCoverage objects represent coverage at the node (basic block)
        # level and are owned by their respective FunctionCoverage objects.
        #
        # FunctionCoverage represent coverage at the function level by
        # leveraging their respective NodeCoverage children.
        #

        self.nodes     = {}
        self.functions = {}

        #
        # we instantiate a single weakref of ourself (the DatbaseMapping
        # object) such that we can distribute it to the children we create
        # without having to repeatedly instantiate new ones.
        #

        self._weak_self = weakref.proxy(self)

    #--------------------------------------------------------------------------
    # Propertiens
    #--------------------------------------------------------------------------

    @property
    def data(self):
        """
        The data (a hitmap) used by this mapping.
        """
        return self._hitmap

    @property
    def coverage(self):
        """
        The instruction-level coverage bitmap/mask of this mapping.
        """
        return self._hitmap.viewkeys()

    @property
    def instruction_percent(self):
        """
        The database coverage % by instructions executed in all defined functions.
        """
        num_funcs = len(self._metadata.functions)

        # avoid a zero division error
        if not num_funcs:
            return 0

        # sum all the function coverage %'s
        func_sum = sum(f.instruction_percent for f in self.functions.itervalues())

        # return the average function coverage % aka 'the database coverage %'
        return func_sum / num_funcs

    #--------------------------------------------------------------------------
    # Metadata Population
    #--------------------------------------------------------------------------

    def update_metadata(self, metadata, delta=None):
        """
        Install a new databasee metadata object.
        """

        # install the new metadata
        self._metadata = weakref.proxy(metadata)

        # unmap all the coverage affected by the metadata delta
        if delta:
            self._unmap_delta(delta)

    def refresh(self):
        """
        Refresh the mapping of our coverage data to the database metadata.
        """

        # rebuild our coverage mapping
        dirty_nodes, dirty_functions = self._map_coverage()

        # bake our coverage map
        self._finalize(dirty_nodes, dirty_functions)

        # dump the unmappable coverage data
        #self.dump_unmapped()

    def refresh_nodes(self):
        """
        Special fast-refresh of nodes as used in the un-painting process.
        """
        dirty_nodes = self._map_nodes()
        self._finalize_nodes(dirty_nodes)

    def _finalize(self, dirty_nodes, dirty_functions):
        """
        Finalize coverage objects for use.
        """
        self._finalize_nodes(dirty_nodes)
        self._finalize_functions(dirty_functions)

    def _finalize_nodes(self, dirty_nodes):
        """
        Finalize coverage nodes for use.
        """
        for node_coverage in dirty_nodes.itervalues():
            node_coverage.finalize()

    def _finalize_functions(self, dirty_functions):
        """
        Finalize coverage nodes for use.
        """
        for function_coverage in dirty_functions.itervalues():
            function_coverage.finalize()

    #--------------------------------------------------------------------------
    # Data Operations
    #--------------------------------------------------------------------------

    def add_data(self, data):
        """
        Add runtime data to this mapping.
        """

        # add the given runtime data to our data source
        for address, hit_count in data.iteritems():
            self._hitmap[address] += hit_count

        # update the coverage hash incase the hitmap changed
        self._update_coverage_hash()

        # mark these touched addresses as dirty
        self._unmapped_data |= data.viewkeys()

    def subtract_data(self, data):
        """
        Subtract runtime data from this mapping.
        """

        # subtract the given runtime data from our data source
        for address, hit_count in data.iteritems():
            self._hitmap[address] -= hit_count

            #assert self._hitmap[address] >= 0

            #
            # if there is no longer any hits for this address, delete its
            # entry from the source_data dictonary. we don't want its entry to
            # hang around because we use self._hitmap.viewkeys() as a
            # coverage bitmap/mask
            #

            if not self._hitmap[address]:
                del self._hitmap[address]

        # update the coverage hash incase the hitmap changed
        self._update_coverage_hash()

        #
        # unmap everything because a complete re-mapping is easier with the
        # current implementation of things
        #

        self._unmap_all()

    #--------------------------------------------------------------------------
    # Coverage Operations
    #--------------------------------------------------------------------------

    def mask_data(self, coverage_mask):
        """
        Mask the hitmap data against a given coverage mask.

        Returns a new DatabaseCoverage containing the masked hitmap.
        """
        composite_data = collections.defaultdict(int)

        # preserve only hitmap data that matches the coverage mask
        for address in coverage_mask:
            composite_data[address] = self._hitmap[address]

        # done, return a new DatabaseCoverage masked with the given coverage
        return DatabaseCoverage(composite_data, self.palette)

    def _update_coverage_hash(self):
        """
        Update the hash of the coverage mask.
        """
        if self._hitmap:
            self.coverage_hash = hash(frozenset(self._hitmap.viewkeys()))
        else:
            self.coverage_hash = 0

    #--------------------------------------------------------------------------
    # Coverage Mapping
    #--------------------------------------------------------------------------

    def _map_coverage(self):
        """
        Map loaded coverage data to the given database metadata.
        """

        # re-map any unmapped coverage to nodes
        dirty_nodes = self._map_nodes()

        # re-map nodes to functions
        dirty_functions = self._map_functions(dirty_nodes)

        # return the modified objects
        return (dirty_nodes, dirty_functions)

    def _map_nodes(self):
        """
        Map loaded runtime data to database defined nodes (basic blocks).
        """
        dirty_nodes = {}
        addresses_to_map = collections.deque(sorted(self._unmapped_data))

        #
        # This while loop is the core of our coverage mapping process.
        #
        # The '_unmapped_data' list is consumed by this loop, mapping
        # any unmapped runtime data maintained by this DatabaseCoverage
        # to the given database metadata.
        #
        # It should be noted that the rest of the database coverage
        # mapping (eg functions) gets built ontop of the mappings we build
        # for nodes here using the more or less raw/recycled runtime data.
        #

        while addresses_to_map:

            # get the next address to map
            address = addresses_to_map.popleft()

            # get the node (basic block) that contains this address
            try:
                node_metadata = self._metadata.get_node(address)

            #
            # failed to locate the node (basic block) for this address.
            # this address must not fall inside of a defined function...
            #

            except ValueError:
                continue

            #
            # we found applicable node metadata for this address, now try
            # to find the mapping object for this node address
            #

            if node_metadata.address in self.nodes:
                node_coverage = self.nodes[node_metadata.address]

            #
            # failed to locate a node coverage object, looks like this is
            # the first time we have identiied coverage for this node.
            # create a coverage node object and use it now.
            #

            else:
                node_coverage = NodeCoverage(node_metadata.address, self._weak_self)
                self.nodes[node_metadata.address] = node_coverage

            # compute the basic block end now to reduce overhead in the loop below
            node_end = node_metadata.address + node_metadata.size

            #
            # the loop below can be thought of almost as an inlined fast-path
            # where we expect the next several addresses to belong to the same
            # node (basic block).
            #
            # with the assumption of linear program execution, we can reduce
            # the heavier overhead of all the lookup code above by simply
            # checking if the next address in the queue (addresses_to_map)
            # falls into the same / current node (basic block).
            #
            # we can simply re-use the current node and its coverage object
            # until the next address to be processed falls outside our scope
            #

            while 1:

                #
                # map the hitmap data for the current address (an instruction)
                # to this node mapping and mark the instruction as mapped by
                # discarding its address from the unmapped data list
                #

                if address in node_metadata.instructions:
                    node_coverage.executed_instructions[address] = self._hitmap[address]
                    self._unmapped_data.discard(address)

                # get the next address to attempt mapping on
                address = addresses_to_map.popleft()

                #
                # if the next address is not in this node, it's time break out
                # of this loop and send it through the full node lookup path
                #

                if not (node_metadata.address <= address < node_end):
                    addresses_to_map.appendleft(address)
                    break

                #
                # the next address to be mapped DOES fall within our current
                # node, loop back around in the fast-path and map it
                #

                # ...

            # since we updated this node, ensure we're tracking it as dirty
            dirty_nodes[node_metadata.address] = node_coverage

        # done
        return dirty_nodes

    def _map_functions(self, dirty_nodes):
        """
        Map loaded coverage data to database defined functions.
        """
        dirty_functions = {}

        #
        # thanks to the _map_nodes function, we now have a repository of
        # node coverage objects that are considered 'dirty' and can be used
        # precisely guide the generation of our function level coverage
        #

        for node_coverage in dirty_nodes.itervalues():

            #
            # using the node_coverage object, we retrieve its underlying
            # metadata so that we can perform a reverse lookup of the fun
            #

            function_metadata = self._metadata.nodes[node_coverage.address].function

            #
            # now we can add this node to its respective function level
            # coverage mapping
            #

            function_coverage = self.functions.get(function_metadata.address, None)

            #
            # if we failed to locate a function coverage object, it means
            # that this is the first time we have identified coverage for this
            # function. create a new coverage function object and use it now.
            #

            if not function_coverage:
                function_coverage = FunctionCoverage(function_metadata.address, self._weak_self)
                self.functions[function_metadata.address] = function_coverage

            # mark this node as executed in the function level mappping
            function_coverage.mark_node(node_coverage)
            dirty_functions[function_metadata.address] = function_coverage

            # end of nodes loop

        # done
        return dirty_functions

    def _unmap_all(self):
        """
        Unmap all mapped data.
        """
        self._unmapped_data = set(self._hitmap.keys())
        self._unmapped_data.add(idaapi.BADADDR)
        self.nodes     = {}
        self.functions = {}

    def _unmap_delta(self, delta):
        """
        Unmap node & function coverage affected by the metadata delta.

        The metadata delta tells us exactly which parts of the database
        changed since our last coverage mapping. This function surgically
        unmaps the pieces of our coverage that may now be stale.

        This enables us to recompute only what is necessary upon refresh.
        """
        self._unmap_nodes(itertools.chain(delta.nodes_removed, delta.nodes_modified))
        self._unmap_functions(delta.functions_removed)

    def _unmap_nodes(self, node_addresses):
        """
        Unmap any data associated with a given list of node addresses.
        """

        #
        # using the metdata delta as a guide, we loop through all the nodes it
        # has noted as either modified, or deleted. it is in our best interest
        # unmap any of these dirty (stale) node addresses in OUR coverage
        # mapping so we can selectively regenerate their coverage later.
        #

        for node_address in node_addresses:

            #
            # if there's no coverage for this node, then we have nothing to do.
            # continue on to the next dirty node address
            #

            node_coverage = self.nodes.pop(node_address, None)
            if not node_coverage:
                continue

            # the node was found, unmap any of its tracked coverage blocks
            self._unmapped_data.update(
                node_coverage.executed_instructions.viewkeys()
            )

    def _unmap_functions(self, function_addresses):
        """
        Unmap any data associated with a given list of function addresses.
        """
        for function_address in function_addresses:
            self.functions.pop(function_address, None)

    #--------------------------------------------------------------------------
    # Debug
    #--------------------------------------------------------------------------

    def dump_unmapped(self):
        """
        Dump the unmapped coverage data.
        """
        lmsg("Unmapped Coverage:")
        for address in self._unmapped_data:
            lmsg(" * 0x%X" % address)
コード例 #3
0
ファイル: coverage.py プロジェクト: ww9210/lighthouse
class DatabaseCoverage(object):
    """
    Database level coverage mapping.
    """
    def __init__(self, base, indexed_data, palette):

        # the color palette used when painting this coverage
        self.palette = palette

        if not indexed_data:
            indexed_data = collections.defaultdict(int)

        self._base = base
        self.coverage_data = indexed_data
        self.unmapped_coverage = set(indexed_data.keys())
        self.unmapped_coverage.add(idaapi.BADADDR)

        # the metadata this coverage will be mapped to
        self._metadata = DatabaseMetadata(False)

        # maps to the child coverage objects
        self.nodes = {}
        self.functions = {}

        #
        # profiling revealed that letting every child (eg, FunctionCoverage
        # or NodeCoverage) create their own weakref to the parent/database
        # was actually adding a reasonable and unecessary overhead. There's
        # really no reason they need to do that anyway.
        #
        # we instantiate a single weakref of ourself (the DatbaseCoverage
        # object) such that we can distribute it to the children we create
        # without having to repeatedly instantiate new ones.
        #

        self._weak_self = weakref.proxy(self)

    #--------------------------------------------------------------------------
    # Operator Overloads
    #--------------------------------------------------------------------------

    @property
    def instruction_percent(self):
        """
        The coverage % by instructions executed.
        """
        try:
            return sum(f.instruction_percent
                       for f in self.functions.itervalues()) / len(
                           self._metadata.functions)
        except ZeroDivisionError:
            return 0.0

    #--------------------------------------------------------------------------
    # Operator Overloads
    #--------------------------------------------------------------------------

    def __or__(self, other):
        """
        Overload of '|' (logical or) operator.
        """

        if other is None:
            other = DatabaseCoverage(self._base, None, self.palette)
        elif not isinstance(other, DatabaseCoverage):
            raise NotImplementedError(
                "Cannot OR DatabaseCoverage against type '%s'" % type(other))

        # initialize
        composite_data = collections.defaultdict(int)

        #----------------------------------------------------------------------

        # TODO / v0.4.0: this will be refactored as a 'coverage add/or'

        # compute the union of the two coverage sets
        for address, hit_count in self.coverage_data.iteritems():
            composite_data[address] = hit_count
        for address, hit_count in other.coverage_data.iteritems():
            composite_data[address] += hit_count

        # done
        return DatabaseCoverage(self._base, composite_data, self.palette)

    def __and__(self, other):
        """
        Overload of '&' (logical and) operator.
        """

        if other is None:
            other = DatabaseCoverage(self._base, None, self.palette)
        elif not isinstance(other, DatabaseCoverage):
            raise NotImplementedError(
                "Cannot AND DatabaseCoverage against type '%s'" % type(other))

        # initialize the object
        composite_data = collections.defaultdict(int)

        #----------------------------------------------------------------------

        # compute the intersecting addresses of the two coverage sets
        intersected_addresses = self.coverage_data.viewkeys(
        ) & other.coverage_data.viewkeys()

        # TODO / v0.4.0: this will be refactored as a 'coverage and'

        # accumulate the hit counters for the intersecting coverage
        for address in intersected_addresses:
            composite_data[address] = self.coverage_data[
                address] + other.coverage_data[address]

        # done
        return DatabaseCoverage(self._base, composite_data, self.palette)

    def __sub__(self, other):
        """
        Overload of '-' (subtract) operator.
        """

        if other is None:
            other = DatabaseCoverage(self._base, None, self.palette)
        elif not isinstance(other, DatabaseCoverage):
            raise NotImplementedError(
                "Cannot SUB DatabaseCoverage against type '%s'" % type(other))

        # initialize the object
        composite_data = collections.defaultdict(int)

        #----------------------------------------------------------------------

        # compute the difference addresses of the two coverage sets
        difference_addresses = self.coverage_data.viewkeys(
        ) - other.coverage_data.viewkeys()

        #
        # NOTE:
        #   I'm not convinced I should acumulate the subtractee's hit counts,
        #   and I don't think it makes sense to? so for now we don't.
        #
        # TODO / v0.4.0: this will be refactored as a 'coverage subtract'
        #

        # build the new coverage data
        for address in difference_addresses:
            composite_data[address] = self.coverage_data[
                address]  #- other.coverage_data[address]

        # done
        return DatabaseCoverage(self._base, composite_data, self.palette)

    def hitmap_subtract(self, other):
        """
        Subtract hitmaps from each other.

        TODO: dirty hack that will be removed in v0.4.0
        """

        if other is None:
            other = DatabaseCoverage(self._base, None, self.palette)
        elif not isinstance(other, DatabaseCoverage):
            raise NotImplementedError(
                "Cannot SUB DatabaseCoverage hitmap against type '%s'" %
                type(other))

        # initialize the object
        composite_data = collections.defaultdict(int)

        #----------------------------------------------------------------------

        # build the new coverage data
        for address in self.coverage_data.viewkeys():
            composite_data[address] = self.coverage_data[address]
        for address in other.coverage_data.viewkeys():
            composite_data[address] -= other.coverage_data[address]
            if not composite_data[address]:
                del composite_data[address]

        # done
        return DatabaseCoverage(self._base, composite_data, self.palette)

    def __xor__(self, other):
        """
        Overload of '^' xor operator.
        """

        if other is None:
            other = DatabaseCoverage(self._base, None, self.palette)
        elif not isinstance(other, DatabaseCoverage):
            raise NotImplementedError(
                "Cannot XOR DatabaseCoverage against type '%s'" % type(other))

        # initialize the object
        composite_data = collections.defaultdict(int)

        #----------------------------------------------------------------------

        # compute the symmetric difference (xor) between two coverage sets
        xor_addresses = self.coverage_data.viewkeys(
        ) ^ other.coverage_data.viewkeys()

        # accumulate the hit counters for the xor'd coverage
        for address in xor_addresses & self.coverage_data.viewkeys():
            composite_data[address] = self.coverage_data[address]
        for address in xor_addresses & other.coverage_data.viewkeys():
            composite_data[address] = other.coverage_data[address]

        # done
        return DatabaseCoverage(self._base, composite_data, self.palette)

    def __ror__(self, other):
        return self.__or__(other)

    def __rand__(self, other):
        return self.__and__(other)

    #def __rsub__(self, other):
    #    return self.__sub__(other)

    def __rxor__(self, other):
        return self.__xor__(other)

    #--------------------------------------------------------------------------
    # Metadata Population
    #--------------------------------------------------------------------------

    def update_metadata(self, metadata, delta=None):
        """
        Update the installed metadata.
        """

        # install the new metadata
        self._metadata = weakref.proxy(metadata)

        # unmap all the coverage affected by the metadata delta
        if delta:
            self._unmap_dirty(delta)

    def refresh(self):
        """
        Refresh the mapping of our coverage data to the database metadata.
        """

        # rebuild our coverage mapping
        dirty_nodes, dirty_functions = self._map_coverage()

        # bake our coverage map
        self._finalize(dirty_nodes, dirty_functions)

    def refresh_nodes(self):
        """
        Special fast-refresh of nodes as used in the un-painting process.
        """
        dirty_nodes = self._map_nodes()
        self._finalize_nodes(dirty_nodes)

    def _finalize(self, dirty_nodes, dirty_functions):
        """
        Finalize coverage objects for use.
        """
        self._finalize_nodes(dirty_nodes)
        self._finalize_functions(dirty_functions)

    def _finalize_nodes(self, dirty_nodes):
        """
        Finalize coverage nodes for use.
        """
        for node_coverage in dirty_nodes.itervalues():
            node_coverage.finalize()

    def _finalize_functions(self, dirty_functions):
        """
        Finalize coverage nodes for use.
        """
        for function_coverage in dirty_functions.itervalues():
            function_coverage.finalize()

    #--------------------------------------------------------------------------
    # Coverage Mapping
    #--------------------------------------------------------------------------

    def _map_coverage(self):
        """
        Map loaded coverage data to the given database metadata.
        """

        # re-map any unmapped coverage to nodes
        dirty_nodes = self._map_nodes()

        # re-map nodes to functions
        dirty_functions = self._map_functions(dirty_nodes)

        # return the modified objects
        return (dirty_nodes, dirty_functions)

    def _map_nodes(self):
        """
        Map loaded coverage data to database defined nodes (basic blocks).
        """
        dirty_nodes = {}
        addresses_to_map = collections.deque(sorted(self.unmapped_coverage))

        #
        # This while loop is the core of our coverage mapping process.
        #
        # The 'unmapped_coverage' list is consumed by this loop, mapping
        # any unmapped coverage data maintained by this DatabaseCoverage
        # to the given database metadata.
        #
        # It should be noted that the rest of the database coverage
        # mapping (eg functions) gets built ontop of the mappings we build
        # for nodes here using the more or less raw/recycled coverage data.
        #

        while addresses_to_map:

            # get the next address to map
            address = addresses_to_map.popleft()

            # get the node (basic block) that contains this address
            try:
                node_metadata = self._metadata.get_node(address)

            #
            # failed to locate the node (basic block) for this address.
            # this address must not fall inside of a defined function...
            #

            except ValueError:
                continue

            #
            # we found applicable node metadata for this address, now try
            # to find the coverage object for this node address
            #

            if node_metadata.address in self.nodes:
                node_coverage = self.nodes[node_metadata.address]

            #
            # failed to locate a node coverage object, looks like this is
            # the first time we have identiied coverage for this node.
            # create a coverage node object and use it now.
            #

            else:
                node_coverage = NodeCoverage(node_metadata.address,
                                             self._weak_self)
                self.nodes[node_metadata.address] = node_coverage

            # compute the basic block end now to reduce overhead in the loop below
            node_end = node_metadata.address + node_metadata.size

            #
            # the loop below can be thought of almost as an inlined fast-path
            # where we expect the next several addresses to belong to the same
            # node (basic block).
            #
            # with the assumption of linear program execution, we can reduce
            # the heavier overhead of all the lookup code above by simply
            # checking if the next address in the queue (addresses_to_map)
            # falls into the same / current node (basic block).
            #
            # we can simply re-use the current node and its coverage object
            # until the next address to be processed falls outside our scope
            #

            while 1:

                # map the coverage data for the current address to this node
                node_coverage.executed_bytes.add(address)

                #
                # ownership has been transfered to node_coverage, so this
                # address is no longer considered 'unmapped'
                #

                self.unmapped_coverage.discard(address)

                # get the next address to attempt mapping on
                address = addresses_to_map.popleft()

                #
                # if the address is not in this node, it's time break out of
                # this loop and sned it back through the full node lookup path
                #

                if not (node_metadata.address <= address < node_end):
                    addresses_to_map.appendleft(address)
                    break

                #
                # the next address to be mapped DOES fall within our current
                # node, loop back around in the fast-path and map it
                #

                # ...

            # since we updated this node, ensure we're tracking it as dirty
            dirty_nodes[node_metadata.address] = node_coverage

        # done
        return dirty_nodes

    def _map_functions(self, dirty_nodes):
        """
        Map loaded coverage data to database defined functions.
        """
        dirty_functions = {}

        #
        # thanks to the _map_nodes function, we now have a repository of
        # node coverage objects that are considered 'dirty' and can be used
        # precisely guide the generation of our function level coverage
        #

        for node_coverage in dirty_nodes.itervalues():

            #
            # using the node_coverage object, we retrieve its underlying
            # metadata so that we can perform a reverse lookup of all the
            # functions in the database that reference it
            #

            functions = self._metadata.nodes[node_coverage.address].functions

            #
            # now we can loop through every function that references this
            # node and initialize or add this node to its respective
            # coverage mapping
            #

            for function_metadata in functions.itervalues():

                #
                # retrieve the coverage object for this function address
                #

                try:
                    function_coverage = self.functions[
                        function_metadata.address]

                #
                # failed to locate a function coverage object, looks like this
                # is the first time we have identiied coverage for this
                # function. creaate a coverage function object and use it now.
                #

                except KeyError as e:
                    function_coverage = FunctionCoverage(
                        function_metadata.address, self._weak_self)
                    self.functions[
                        function_metadata.address] = function_coverage

                # mark this node as executed in the function level mappping
                function_coverage.mark_node(node_coverage)
                dirty_functions[function_metadata.address] = function_coverage

                # end of functions loop

            # end of nodes loop

        # done
        return dirty_functions

    def _unmap_dirty(self, delta):
        """
        Unmap node & function coverage affected by the metadata delta.

        The metadata delta tells us exactly which parts of the database
        changed since our last coverage mapping. This function surgically
        unmaps the pieces of our coverage that may now be stale.

        This enables us to recompute only what is necessary upon refresh.
        """

        #
        # Dirty Nodes
        #

        #
        # using the metdata delta as a guide, we loop through all the nodes it
        # has noted as either modified, or deleted. it is in our best interest
        # unmap any of these dirty (stale) node addresses in OUR coverage
        # mapping so we can selectively regenerate their coverage later.
        #

        for node_address in itertools.chain(delta.nodes_removed,
                                            delta.nodes_modified):

            #
            # if there's no coverage for this node, then we have nothing to do.
            # continue on to the next dirty node address
            #

            node_coverage = self.nodes.pop(node_address, None)
            if not node_coverage:
                continue

            # the node was found, unmap any of its tracked coverage blocks
            self.unmapped_coverage.update(node_coverage.executed_bytes)

            #
            # NOTE:
            #
            #   since we pop'd node_coverage from the database-wide self.nodes
            #   list, this loop iteration owns the last remaining 'hard' ref to
            #   the object. once the loop rolls over, it will be released.
            #
            #   what is cool about this is that its corresponding entry for
            #   this node_coverage object in any FunctionCoverage objects that
            #   reference this node will also dissapear. This is because the
            #   executed_nodes dictionaries are built using WeakValueDictionary.
            #

        #
        # Dirty Functions
        #

        # delete function coverage objects for the allegedly deleted functions
        for function_address in delta.functions_removed:
            self.functions.pop(function_address, None)