예제 #1
0
def get_flowgraph_from(address):
    """
    Generates a flowgraph object that can be fed into FunctionSimSearch from a
    given address in IDA.
  """
    ida_flowgraph = idaapi.FlowChart(idaapi.get_func(here()))
    flowgraph = functionsimsearch.FlowgraphWithInstructions()
    for block in ida_flowgraph:
        # Add all the ida-flowgraph-basic blocks. We do this up-front so we can
        # more easily add edges later, and adding a node twice does not hurt.
        flowgraph.add_node(block.start_ea)

    for block in ida_flowgraph:
        instructions = [(i, GetMnem(i), (print_operand(i,
                                                       0), print_operand(i,
                                                                         1)))
                        for i in Heads(block.start_ea, block.end_ea)]
        small_blocks = split_instruction_list(instructions, "call")
        for small_block in small_blocks:
            flowgraph.add_node(small_block[0][0])
            small_block_instructions = tuple(instruction[1:]
                                             for instruction in small_block)
            flowgraph.add_instructions(small_block[0][0],
                                       small_block_instructions)
        for index in range(len(small_blocks) - 1):
            flowgraph.add_edge(small_blocks[index][0][0],
                               small_blocks[index + 1][0][0])
        for successor_block in block.succs():
            flowgraph.add_edge(small_blocks[-1][0][0],
                               successor_block.start_ea)
    return flowgraph
    def test_json(self):
        """ Test JSON decoding of graphs """
        jsonstring = """{"edges":[{"destination":1518838580,"source":1518838565},{"destination":1518838572,"source":1518838565},{"destination":1518838578,"source":1518838572},{"destination":1518838574,"source":1518838572},{"destination":1518838580,"source":1518838574},{"destination":1518838578,"source":1518838574},{"destination":1518838580,"source":1518838578}],"name":"CFG","nodes":[{"address":1518838565,"instructions":[{"mnemonic":"xor","operands":["EAX","EAX"]},{"mnemonic":"cmp","operands":["[ECX + 4]","EAX"]},{"mnemonic":"jnle","operands":["5a87a334"]}]},{"address":1518838572,"instructions":[{"mnemonic":"jl","operands":["5a87a332"]}]},{"address":1518838574,"instructions":[{"mnemonic":"cmp","operands":["[ECX]","EAX"]},{"mnemonic":"jnb","operands":["5a87a334"]}]},{"address":1518838578,"instructions":[{"mnemonic":"mov","operands":["AL","1"]}]},{"address":1518838580,"instructions":[{"mnemonic":"ret near","operands":["[ESP]"]}]}]}"""

        fg = functionsimsearch.FlowgraphWithInstructions()
        fg.from_json(jsonstring)
        hasher = functionsimsearch.SimHasher()
        function_hash = hasher.calculate_hash(fg)
        self.assertTrue(function_hash[0] == 0xa7ef296fa5dea3ee)
예제 #3
0
  def test_hasher_with_weights(self):
    """ Tests whether the loading of a weights file works. """
    jsonstring = """{"edges":[{"destination":1518838580,"source":1518838565},{"destination":1518838572,"source":1518838565},{"destination":1518838578,"source":1518838572},{"destination":1518838574,"source":1518838572},{"destination":1518838580,"source":1518838574},{"destination":1518838578,"source":1518838574},{"destination":1518838580,"source":1518838578}],"name":"CFG","nodes":[{"address":1518838565,"instructions":[{"mnemonic":"xor","operands":["EAX","EAX"]},{"mnemonic":"cmp","operands":["[ECX + 4]","EAX"]},{"mnemonic":"jnle","operands":["5a87a334"]}]},{"address":1518838572,"instructions":[{"mnemonic":"jl","operands":["5a87a332"]}]},{"address":1518838574,"instructions":[{"mnemonic":"cmp","operands":["[ECX]","EAX"]},{"mnemonic":"jnb","operands":["5a87a334"]}]},{"address":1518838578,"instructions":[{"mnemonic":"mov","operands":["AL","1"]}]},{"address":1518838580,"instructions":[{"mnemonic":"ret near","operands":["[ESP]"]}]}]}"""

    fg = functionsimsearch.FlowgraphWithInstructions()
    fg.from_json(jsonstring)
    hasher = functionsimsearch.SimHasher("../testdata/weights.txt")
    function_hash = hasher.calculate_hash(fg)
    self.assertTrue(function_hash[0] == 0xa6ef292a658e83ee)
예제 #4
0
    def extract_flowgraph_hash(self, function, minimum_size=5):
        """
      Generates a flowgraph object that can be fed into FunctionSimSearch from a
      given address in Binary Ninja and returns set of hashes.
    """

        nodes = []
        graph = []

        # Retrieve CFG data
        for block in function:
            local_node = []
            shift = 0
            position = block.start

            for instruction in block:
                local_node.append(instruction[0][0].text)
                shift += instruction[1]

                if instruction[0][
                        0].text == 'call':  # Split on call with assumption that we only care about x86/64 for now
                    nodes.append((position, local_node))
                    local_node = []
                    graph.append((position, block.start + shift))
                    position = block.start + shift

            for edge in block.outgoing_edges:
                graph.append((position, edge.target.start))

            if local_node:
                nodes.append((position, local_node))
            else:
                graph.pop(-1)

        # Generate flowgraph
        flowgraph = fss.FlowgraphWithInstructions()

        for node in nodes:
            flowgraph.add_node(node[0])
            flowgraph.add_instructions(node[0],
                                       tuple([((i), ()) for i in node[1]
                                              ]))  # Format conversion

        for edge in graph:
            flowgraph.add_edge(edge[0], edge[1])

        if flowgraph.number_of_branching_nodes() < minimum_size:
            return (None, None)
        hasher = fss.SimHasher()

        return hasher.calculate_hash(flowgraph)
예제 #5
0
  def test_construction(self):
    flowgraph = functionsimsearch.FlowgraphWithInstructions()
    # Create an example CFG.
    nodedata = [
      (0x5A5F6179, (("mov", ()), ("shr", ()), ("xor", ()), ("jz", ()))),
      (0x5A5F6187, (("mov", ()), ("and", ()), ("cmp", ()), ("jz", ()))),
      (0x5A5F6195, (("mov", ()), ("jmp", ()))),
      (0x5A5F6184, (("mov", ()), ("ret", ()))),
      (0x5A5F6182, (("xor", ()),)) ]

    edgedata = [
      (0x5A5F6179, 0x5A5F6187),
      (0x5A5F6179, 0x5A5F6182),
      (0x5A5F6182, 0x5A5F6184),
      (0x5A5F6187, 0x5A5F6184),
      (0x5A5F6187, 0x5A5F5195)]

    for n in nodedata:
      flowgraph.add_node(n[0])
    for e in edgedata:
      flowgraph.add_edge(e[0], e[1])
    for n in nodedata:
      flowgraph.add_instructions(n[0], n[1])

    # Now calculate the similarity hash of the graph.
    hasher = functionsimsearch.SimHasher()
    function_hash = hasher.calculate_hash(flowgraph)

    # Make a minor change to the graph (adding an extra node (5 node graph becomes
    # a 6-node graph).
    flowgraph.add_node(0)
    flowgraph.add_edge(0, nodedata[0][0])

    # Hash the changed version.
    function_hash2 = hasher.calculate_hash(flowgraph)

    # Calculate the distance between the two hashes - simply hamming distance between
    # bit vectors:
    distance = popcount(function_hash[0]^function_hash2[0]) +\
      popcount(function_hash[1]^function_hash2[1])

    self.assertTrue((1.0 - (distance/128.0) > 0.7))
예제 #6
0
def get_flowgraph_from(address, ignore_instructions=False):
    """
    Generates a flowgraph object that can be fed into FunctionSimSearch from a
    given address in IDA.
  """
    call_instruction_string
    if not address:
        address = here()
    ida_flowgraph = idaapi.FlowChart(idaapi.get_func(address))
    flowgraph = functionsimsearch.FlowgraphWithInstructions()
    for block in ida_flowgraph:
        # Add all the ida-flowgraph-basic blocks. We do this up-front so we can
        # more easily add edges later, and adding a node twice does not hurt.
        flowgraph.add_node(block.start_ea)

    for block in ida_flowgraph:
        # There seems to be no good way to get operands without IDA substituting
        # local variable names etc., so this is a very ugly hack to deal with that.
        # TODO(thomasdullien): IDA 7.2 will provide a way to perform print_operand
        # without replacement (by providing empty type arguments?), replace the hack
        # here with a "proper" solution.
        instructions = [(i, GetMnem(i),
                         (print_operand(i, 0).replace("+var_", "-0x"),
                          print_operand(i, 1).replace("+var_", "-0x")))
                        for i in Heads(block.start_ea, block.end_ea)]
        small_blocks = split_instruction_list(instructions,
                                              call_instruction_string)
        for small_block in small_blocks:
            flowgraph.add_node(small_block[0][0])
            small_block_instructions = tuple(instruction[1:]
                                             for instruction in small_block)
            if not ignore_instructions:
                flowgraph.add_instructions(small_block[0][0],
                                           small_block_instructions)
        for index in range(len(small_blocks) - 1):
            flowgraph.add_edge(small_blocks[index][0][0],
                               small_blocks[index + 1][0][0])
        for successor_block in block.succs():
            flowgraph.add_edge(small_blocks[-1][0][0],
                               successor_block.start_ea)
    return flowgraph