def test_json(self):
        """ Test JSON decoding of graphs """
        jsonstring = """{"edges":[{"destination":1518838580,"source":1518838565},{"destination":1518838572,"source":1518838565},{"destination":1518838578,"source":1518838572},{"destination":1518838574,"source":1518838572},{"destination":1518838580,"source":1518838574},{"destination":1518838578,"source":1518838574},{"destination":1518838580,"source":1518838578}],"name":"CFG","nodes":[{"address":1518838565,"instructions":[{"mnemonic":"xor","operands":["EAX","EAX"]},{"mnemonic":"cmp","operands":["[ECX + 4]","EAX"]},{"mnemonic":"jnle","operands":["5a87a334"]}]},{"address":1518838572,"instructions":[{"mnemonic":"jl","operands":["5a87a332"]}]},{"address":1518838574,"instructions":[{"mnemonic":"cmp","operands":["[ECX]","EAX"]},{"mnemonic":"jnb","operands":["5a87a334"]}]},{"address":1518838578,"instructions":[{"mnemonic":"mov","operands":["AL","1"]}]},{"address":1518838580,"instructions":[{"mnemonic":"ret near","operands":["[ESP]"]}]}]}"""

        fg = functionsimsearch.FlowgraphWithInstructions()
        fg.from_json(jsonstring)
        hasher = functionsimsearch.SimHasher()
        function_hash = hasher.calculate_hash(fg)
        self.assertTrue(function_hash[0] == 0xa7ef296fa5dea3ee)
예제 #2
0
  def test_hasher_with_weights(self):
    """ Tests whether the loading of a weights file works. """
    jsonstring = """{"edges":[{"destination":1518838580,"source":1518838565},{"destination":1518838572,"source":1518838565},{"destination":1518838578,"source":1518838572},{"destination":1518838574,"source":1518838572},{"destination":1518838580,"source":1518838574},{"destination":1518838578,"source":1518838574},{"destination":1518838580,"source":1518838578}],"name":"CFG","nodes":[{"address":1518838565,"instructions":[{"mnemonic":"xor","operands":["EAX","EAX"]},{"mnemonic":"cmp","operands":["[ECX + 4]","EAX"]},{"mnemonic":"jnle","operands":["5a87a334"]}]},{"address":1518838572,"instructions":[{"mnemonic":"jl","operands":["5a87a332"]}]},{"address":1518838574,"instructions":[{"mnemonic":"cmp","operands":["[ECX]","EAX"]},{"mnemonic":"jnb","operands":["5a87a334"]}]},{"address":1518838578,"instructions":[{"mnemonic":"mov","operands":["AL","1"]}]},{"address":1518838580,"instructions":[{"mnemonic":"ret near","operands":["[ESP]"]}]}]}"""

    fg = functionsimsearch.FlowgraphWithInstructions()
    fg.from_json(jsonstring)
    hasher = functionsimsearch.SimHasher("../testdata/weights.txt")
    function_hash = hasher.calculate_hash(fg)
    self.assertTrue(function_hash[0] == 0xa6ef292a658e83ee)
예제 #3
0
    def extract_flowgraph_hash(self, function, minimum_size=5):
        """
      Generates a flowgraph object that can be fed into FunctionSimSearch from a
      given address in Binary Ninja and returns set of hashes.
    """

        nodes = []
        graph = []

        # Retrieve CFG data
        for block in function:
            local_node = []
            shift = 0
            position = block.start

            for instruction in block:
                local_node.append(instruction[0][0].text)
                shift += instruction[1]

                if instruction[0][
                        0].text == 'call':  # Split on call with assumption that we only care about x86/64 for now
                    nodes.append((position, local_node))
                    local_node = []
                    graph.append((position, block.start + shift))
                    position = block.start + shift

            for edge in block.outgoing_edges:
                graph.append((position, edge.target.start))

            if local_node:
                nodes.append((position, local_node))
            else:
                graph.pop(-1)

        # Generate flowgraph
        flowgraph = fss.FlowgraphWithInstructions()

        for node in nodes:
            flowgraph.add_node(node[0])
            flowgraph.add_instructions(node[0],
                                       tuple([((i), ()) for i in node[1]
                                              ]))  # Format conversion

        for edge in graph:
            flowgraph.add_edge(edge[0], edge[1])

        if flowgraph.number_of_branching_nodes() < minimum_size:
            return (None, None)
        hasher = fss.SimHasher()

        return hasher.calculate_hash(flowgraph)
예제 #4
0
  def test_construction(self):
    flowgraph = functionsimsearch.FlowgraphWithInstructions()
    # Create an example CFG.
    nodedata = [
      (0x5A5F6179, (("mov", ()), ("shr", ()), ("xor", ()), ("jz", ()))),
      (0x5A5F6187, (("mov", ()), ("and", ()), ("cmp", ()), ("jz", ()))),
      (0x5A5F6195, (("mov", ()), ("jmp", ()))),
      (0x5A5F6184, (("mov", ()), ("ret", ()))),
      (0x5A5F6182, (("xor", ()),)) ]

    edgedata = [
      (0x5A5F6179, 0x5A5F6187),
      (0x5A5F6179, 0x5A5F6182),
      (0x5A5F6182, 0x5A5F6184),
      (0x5A5F6187, 0x5A5F6184),
      (0x5A5F6187, 0x5A5F5195)]

    for n in nodedata:
      flowgraph.add_node(n[0])
    for e in edgedata:
      flowgraph.add_edge(e[0], e[1])
    for n in nodedata:
      flowgraph.add_instructions(n[0], n[1])

    # Now calculate the similarity hash of the graph.
    hasher = functionsimsearch.SimHasher()
    function_hash = hasher.calculate_hash(flowgraph)

    # Make a minor change to the graph (adding an extra node (5 node graph becomes
    # a 6-node graph).
    flowgraph.add_node(0)
    flowgraph.add_edge(0, nodedata[0][0])

    # Hash the changed version.
    function_hash2 = hasher.calculate_hash(flowgraph)

    # Calculate the distance between the two hashes - simply hamming distance between
    # bit vectors:
    distance = popcount(function_hash[0]^function_hash2[0]) +\
      popcount(function_hash[1]^function_hash2[1])

    self.assertTrue((1.0 - (distance/128.0) > 0.7))
예제 #5
0
        del hotkey_context_L
        del hotkey_context_H
        del hotkey_context_A
        del hotkey_context_M
    else:
        print("FunctionSimSearch: Hotkeys registered.")

    create_index = True
    if os.path.isfile(index_file):
        create_index = False
    if os.path.isfile(metadata_file):
        print("Parsing meta_data from file %s" % metadata_file)
        meta_data = parse_function_meta_data(metadata_file)
        print("Parsed meta_data.")
        for i in meta_data.keys()[0:10]:
            print("%lx:%lx" % i)
    else:
        meta_data = {}
    print("Calling functionsimsearch.SimHashSearchIndex(\"%s\", %s, 50)" %
          (index_file, create_index))
    try:
        search_index = functionsimsearch.SimHashSearchIndex(
            index_file, create_index, 50)
        if os.path.isfile(weights_file):
            print("Calling functionsimsearch.SimHasher(\"%s\")" % weights_file)
            sim_hasher = functionsimsearch.SimHasher(weights_file)
        else:
            sim_hasher = functionsimsearch.SimHasher()
    except:
        print("Failure to create/open the search index!")
예제 #6
0
        del hotkey_context_L
    else:
        print("FunctionSimSearch: Failed to unregister hotkey L.")
    search_index
    sim_hasher
    del search_index
    del sim_hasher
except:
    hotkey_context_S = idaapi.add_hotkey("Shift-S", save_function)
    hotkey_context_L = idaapi.add_hotkey("Shift-L", load_function)
    if hotkey_context_S is None or hotkey_context_L is None:
        print("FunctionSimSearch: Failed to register hotkeys.")
        del hotkey_context_S
        del hotkey_context_L
    else:
        print("FunctionSimSearch: Hotkeys registered.")
    create_index = True
    if os.path.isfile('/tmp/example.simhash'):
        create_index = False
    if os.path.isfile('/tmp/example.simhash.meta'):
        print("Parsing meta_data")
        meta_data = parse_function_meta_data('/tmp/example.simhash.meta')
        print("Parsed meta_data")
        for i in meta_data.keys()[0:10]:
            print("%lx:%lx" % i)
    else:
        meta_data = {}
    search_index = functionsimsearch.SimHashSearchIndex(
        "/tmp/example.simhash", create_index, 28)
    sim_hasher = functionsimsearch.SimHasher()
예제 #7
0
        del hotkey_context_L
    else:
        print("FunctionSimSearch: Failed to unregister hotkey L.")
    search_index
    sim_hasher
    del search_index
    del sim_hasher
except:
    hotkey_context_S = idaapi.add_hotkey("Shift-S", save_function)
    hotkey_context_L = idaapi.add_hotkey("Shift-L", load_function)
    if hotkey_context_S is None or hotkey_context_L is None:
        print("FunctionSimSearch: Failed to register hotkeys.")
        del hotkey_context_S
        del hotkey_context_L
    else:
        print("FunctionSimSearch: Hotkeys registered.")
    create_index = True
    if os.path.isfile('/tmp/example.simhash'):
        create_index = False
    if os.path.isfile('/tmp/example.simhash.meta'):
        print("Parsing meta_data")
        meta_data = parse_function_meta_data('/tmp/example.simhash.meta')
        print("Parsed meta_data")
        for i in meta_data.keys()[0:10]:
            print("%lx:%lx" % i)
    else:
        meta_data = {}
    search_index = functionsimsearch.SimHashSearchIndex(
        "/tmp/example.simhash", create_index, 28)
    sim_hasher = functionsimsearch.SimHasher("/tmp/example.simhash.weights")