Esempio n. 1
0
    def test_open_empty(self):
        """If the file is not there, the default values should be loaded."""
        m = Metadata(TESTFILE)

        self.assertEqual(m.get('antenna'), 'unknown')
        self.assertEqual(m.get('antenna-type'), 'unknown')
        self.assertEqual(m.get('receiver'), 'RTL-SDR v3')
        self.assertEqual(m.get('lna'), 'none')
        self.assertEqual(m.get('filter'), 'none')
Esempio n. 2
0
    def test_add_key(self):
        """Check if it's possible to add and overwrite keys."""
        m = Metadata(TESTFILE)
        m.set('foo', 'bar')
        m.set('baz', 123)

        self.assertEqual(m.get('foo'), 'bar')
        self.assertEqual(m.get('baz'), 123)

        # Check that set overwrites, if the key already exists
        m.set('baz', 1)
        m.set('baz', None)
        m.set('baz', 123)
        self.assertEqual(m.get('baz'), 123)
Esempio n. 3
0
    def test_del_key(self):
        """Check if deleting existing (and non-existing) keys is working ok."""
        m = Metadata(TESTFILE)
        m.set('foo', 'bar')
        self.assertEqual(m.get('antenna'), 'unknown')  # default entry
        self.assertEqual(m.get('foo'), 'bar')  # user entry

        m.delete('foo')
        m.delete('antenna')

        self.assertEqual(m.get('foo'), '')
        self.assertEqual(m.get('antenna'), '')

        # Check that deleting non-existing key is OK
        m.delete('foo')
        m.delete('foo')
        m.delete('foo')
Esempio n. 4
0
class Plugin:
    def __init__(self):
        self.sim_hash_location = None
        self.metadata = None

    def init_db(self):
        # Fetch location
        location = bn.interaction.get_open_filename_input(
            "Load SimHash database", ".simhash")
        if not location:
            bn.log_info(
                "[*] Using default location for SimHash database: {}".format(
                    default_sim_hash_location))
            location = default_sim_hash_location

        # setup metadata class
        self.sim_hash_location = location
        self.metadata = Metadata(location + '.meta')

    def extract_flowgraph_hash(self, function, minimum_size=5):
        """
      Generates a flowgraph object that can be fed into FunctionSimSearch from a
      given address in Binary Ninja and returns set of hashes.
    """

        nodes = []
        graph = []

        # Retrieve CFG data
        for block in function:
            local_node = []
            shift = 0
            position = block.start

            for instruction in block:
                local_node.append(instruction[0][0].text)
                shift += instruction[1]

                if instruction[0][
                        0].text == 'call':  # Split on call with assumption that we only care about x86/64 for now
                    nodes.append((position, local_node))
                    local_node = []
                    graph.append((position, block.start + shift))
                    position = block.start + shift

            for edge in block.outgoing_edges:
                graph.append((position, edge.target.start))

            if local_node:
                nodes.append((position, local_node))
            else:
                graph.pop(-1)

        # Generate flowgraph
        flowgraph = fss.FlowgraphWithInstructions()

        for node in nodes:
            flowgraph.add_node(node[0])
            flowgraph.add_instructions(node[0],
                                       tuple([((i), ()) for i in node[1]
                                              ]))  # Format conversion

        for edge in graph:
            flowgraph.add_edge(edge[0], edge[1])

        if flowgraph.number_of_branching_nodes() < minimum_size:
            return (None, None)
        hasher = fss.SimHasher()

        return hasher.calculate_hash(flowgraph)

    def get_exec_id(self, filename):
        h = hashlib.sha256()
        with open(filename, 'r') as fh:
            h.update(fh.read())

        return long(h.hexdigest()[0:16], 16)

    def save_single_function_hash(self, bv, search_index, function):
        """
      Save the hash of a given function into a given search index.
    """
        # TODO: detect if we are opening database instead of binary
        exec_id = self.get_exec_id(bv.file.filename)
        h1, h2 = self.extract_flowgraph_hash(function)
        if h1 and h2:
            search_index.add_function(h1, h2, exec_id, function.start)
            bn.log_info(
                '[+] Added function <{:x}:0x{:x} {:x}-{:x}> to search index.'.
                format(exec_id, function.start, h1, h2))
            self.metadata.add(exec_id, function.start, bv.file.filename,
                              function.name)
        else:
            bn.log_info(
                '[-] Did not add function <{:x}:0x{:x}> to search index.'.
                format(exec_id, function.start))

    def init_index(self, bv, current_function):
        if not self.sim_hash_location:
            self.init_db()

        # Supported platform check
        if bv.platform.name not in supported_arch:
            bn.log_error(
                '[!] Right now this plugin supports only the following architectures: '
                + str(supported_arch))
            return -1

        if os.path.isfile(self.sim_hash_location):
            create_index = False
        else:
            create_index = True

        search_index = fss.SimHashSearchIndex(self.sim_hash_location,
                                              create_index, 50)
        return search_index

    def save_hash(self, bv, current_function):
        """
      Save hash of current function into search index.
    """
        search_index = self.init_index(bv, current_function)
        self.save_single_function_hash(bv, search_index, current_function)

    def save_all_functions(self, bv, current_function):
        """
      Walk through all functions and save them into the index.
    """
        search_index = self.init_index(bv, current_function)
        for function in bv.functions:
            self.save_single_function_hash(bv, search_index, function)

    def add_report_from_result(self,
                               results,
                               report,
                               address,
                               minimal_match=100):
        results = [r for r in results if r[0] > minimal_match]
        if len(results) > 0:
            report += "## Best match results for 0x{:x}\n".format(address)
            for r in results:
                m = self.metadata.get(r[1], r[2])  # file name, function name
                if not m or len(m) == 0:
                    line = "- {:f} - {:x}:0x{:x}".format(
                        max(float(r[0]) / 128.0 - 0.5, 0.0) * 2, r[1], r[2])
                else:
                    line = "- {:f} - {:x}:0x{:x} {} '{}'".format(
                        max(float(r[0]) / 128.0 - 0.5, 0.0) * 2, r[1], r[2],
                        m[0], m[1])
                report += line + "\n"
        return report

    def find_function_hash(self, bv, h1, h2, address, search_index, report):
        results = search_index.query_top_N(h1, h2, 5)
        return self.add_report_from_result(results, report, address)

    def find_hash(self, bv, current_function):
        """
      Find functions similar to the current one.
    """
        search_index = self.init_index(bv, current_function)
        h1, h2 = self.extract_flowgraph_hash(current_function)
        if h1 and h2:
            report = self.find_function_hash(bv, h1, h2,
                                             current_function.start,
                                             search_index, "")
            bn.interaction.show_markdown_report(
                'Function Similarity Search Report', report)
        else:
            bn.log_info(
                '[-] Did not search for function <{:x}:0x{:x}> to search index.'
                .format(exec_id, function.start))

    def find_all_hashes(self, bv, current_function):
        search_index = self.init_index(bv, current_function)
        report = ""
        for function in bv.functions:
            h1, h2 = self.extract_flowgraph_hash(function)
            if h1 and h2:
                report = self.find_function_hash(bv, h1, h2, function.start,
                                                 search_index, report)
            else:
                bn.log_info('[-] Did not search for function 0x{:x}.'.format(
                    function.start))

        bn.interaction.show_markdown_report(
            'Function Similarity Search Report', report)
Esempio n. 5
0
def entry_exists(date):
    """Check if an entry for the given date exists."""
    from metadata import Metadata
    data = Metadata.get(date.year, date.month).get_data_for_day(date.day)
    return data is not None
Esempio n. 6
0
class Plugin:
  def __init__(self):
    self.sim_hash_location = None
    self.metadata = None

  def init_db(self):
    # Fetch location
    location = bn.interaction.get_open_filename_input("Load SimHash database", ".simhash")
    if not location:
      bn.log_info("[*] Using default location for SimHash database: {}".format(default_sim_hash_location))
      location = default_sim_hash_location

    # setup metadata class
    self.sim_hash_location = location
    self.metadata = Metadata(location+ '.meta')
  

  def extract_flowgraph_hash(self, function):
    """
      Generates a flowgraph object that can be fed into FunctionSimSearch from a
      given address in Binary Ninja and returns set of hashes.
    """

    nodes = []
    graph = []

    # Retrieve CFG data
    for block in function:
      local_node = []
      shift = 0
      position = block.start

      for instruction in block:
        local_node.append(instruction[0][0].text)
        shift += instruction[1]

        if instruction[0][0].text == 'call': # Split on call with assumption that we only care about x86/64 for now
          nodes.append((position, local_node))
          local_node = []
          graph.append((position, block.start+shift))
          position = block.start + shift
      
      for edge in block.outgoing_edges:
        graph.append((position, edge.target.start))

      if local_node:
        nodes.append((position, local_node))
      else:
        graph.pop(-1)

    # Generate flowgraph
    flowgraph = fss.FlowgraphWithInstructions()

    for node in nodes:
      flowgraph.add_node(node[0])
      flowgraph.add_instructions(node[0],tuple([((i), ()) for i in node[1]]))  # Format conversion

    for edge in graph:
      flowgraph.add_edge(edge[0], edge[1])
    
    hasher = fss.SimHasher()

    return hasher.calculate_hash(flowgraph)


  def get_exec_id(self, filename):
    h = hashlib.sha256()
    with open(filename, 'r') as fh:
      h.update(fh.read())

    return long(h.hexdigest()[0:16], 16)


  def save_hash(self, bv, current_function):
    """
      Save hash of current function into search index.
    """
    if not self.sim_hash_location:
      self.init_db()

    # Supported platform check
    if bv.platform.name not in supported_arch:
      bn.log_error('[!] Right now this plugin supports only the following architectures: ' + str(supported_arch))
      return -1

    h1, h2 = self.extract_flowgraph_hash(current_function)

    if os.path.isfile(self.sim_hash_location):
      create_index = False
    else:
      create_index = True
    
    search_index = fss.SimHashSearchIndex(self.sim_hash_location, create_index, 28)
    # TODO: detect if we are opening database instead of binary
    exec_id = self.get_exec_id(bv.file.filename)
    search_index.add_function(h1, h2, exec_id, current_function.start)
    bn.log_info('[+] Added function <{:x}:0x{:x} {:x}-{:x}> to search index.'.format(exec_id, current_function.start, h1, h2))
    
    self.metadata.add(exec_id, current_function.start, bv.file.filename, current_function.name)

  def find_hash(self, bv, current_function):
    """
      Find functions similar to the current one.
    """
    if not self.sim_hash_location:
      self.init_db()

    # Supported platform check
    if bv.platform.name not in supported_arch:
      bn.log_error('[!] Right now this plugin supports only the following architectures: ' + str(supported_arch))
      return -1
    
    h1, h2 = self.extract_flowgraph_hash(current_function)

    if os.path.isfile(self.sim_hash_location):
      create_index = False
    else:
      create_index = True
    
    search_index = fss.SimHashSearchIndex(self.sim_hash_location, create_index, 28)
    results = search_index.query_top_N(h1, h2, 5)

    # TODO: refactor, possibly with report template
    report = ""

    if len(results) == 0:
      report += "# No similar functions found"
    else:
      #TODO: add better header, but that will require some refactoring of extract function
      report += "# Best match results\n"
      for r in results:
        print r
        m = self.metadata.get(r[1], r[2]) # file name, function name
        
        if len(m) == 0: 
          line = "- {:f} - {:x}:0x{:x}".format(max(float(r[0]) / 128.0 - 0.5, 0.0)*2, r[1], r[2])
        else:
          line = "- {:f} - {:x}:0x{:x} {} '{}'".format(max(float(r[0]) / 128.0 - 0.5, 0.0)*2, r[1], r[2], m[0], m[1])

        report += line + "\n"

    # Display results
    bn.interaction.show_markdown_report('Function Similarity Search Report', report)