Esempio n. 1
0
class Plugin:
  def __init__(self):
    self.sim_hash_location = None
    self.metadata = None

  def init_db(self):
    # Fetch location
    location = bn.interaction.get_open_filename_input("Load SimHash database", ".simhash")
    if not location:
      bn.log_info("[*] Using default location for SimHash database: {}".format(default_sim_hash_location))
      location = default_sim_hash_location

    # setup metadata class
    self.sim_hash_location = location
    self.metadata = Metadata(location+ '.meta')
  

  def extract_flowgraph_hash(self, function):
    """
      Generates a flowgraph object that can be fed into FunctionSimSearch from a
      given address in Binary Ninja and returns set of hashes.
    """

    nodes = []
    graph = []

    # Retrieve CFG data
    for block in function:
      local_node = []
      shift = 0
      position = block.start

      for instruction in block:
        local_node.append(instruction[0][0].text)
        shift += instruction[1]

        if instruction[0][0].text == 'call': # Split on call with assumption that we only care about x86/64 for now
          nodes.append((position, local_node))
          local_node = []
          graph.append((position, block.start+shift))
          position = block.start + shift
      
      for edge in block.outgoing_edges:
        graph.append((position, edge.target.start))

      if local_node:
        nodes.append((position, local_node))
      else:
        graph.pop(-1)

    # Generate flowgraph
    flowgraph = fss.FlowgraphWithInstructions()

    for node in nodes:
      flowgraph.add_node(node[0])
      flowgraph.add_instructions(node[0],tuple([((i), ()) for i in node[1]]))  # Format conversion

    for edge in graph:
      flowgraph.add_edge(edge[0], edge[1])
    
    hasher = fss.SimHasher()

    return hasher.calculate_hash(flowgraph)


  def get_exec_id(self, filename):
    h = hashlib.sha256()
    with open(filename, 'r') as fh:
      h.update(fh.read())

    return long(h.hexdigest()[0:16], 16)


  def save_hash(self, bv, current_function):
    """
      Save hash of current function into search index.
    """
    if not self.sim_hash_location:
      self.init_db()

    # Supported platform check
    if bv.platform.name not in supported_arch:
      bn.log_error('[!] Right now this plugin supports only the following architectures: ' + str(supported_arch))
      return -1

    h1, h2 = self.extract_flowgraph_hash(current_function)

    if os.path.isfile(self.sim_hash_location):
      create_index = False
    else:
      create_index = True
    
    search_index = fss.SimHashSearchIndex(self.sim_hash_location, create_index, 28)
    # TODO: detect if we are opening database instead of binary
    exec_id = self.get_exec_id(bv.file.filename)
    search_index.add_function(h1, h2, exec_id, current_function.start)
    bn.log_info('[+] Added function <{:x}:0x{:x} {:x}-{:x}> to search index.'.format(exec_id, current_function.start, h1, h2))
    
    self.metadata.add(exec_id, current_function.start, bv.file.filename, current_function.name)

  def find_hash(self, bv, current_function):
    """
      Find functions similar to the current one.
    """
    if not self.sim_hash_location:
      self.init_db()

    # Supported platform check
    if bv.platform.name not in supported_arch:
      bn.log_error('[!] Right now this plugin supports only the following architectures: ' + str(supported_arch))
      return -1
    
    h1, h2 = self.extract_flowgraph_hash(current_function)

    if os.path.isfile(self.sim_hash_location):
      create_index = False
    else:
      create_index = True
    
    search_index = fss.SimHashSearchIndex(self.sim_hash_location, create_index, 28)
    results = search_index.query_top_N(h1, h2, 5)

    # TODO: refactor, possibly with report template
    report = ""

    if len(results) == 0:
      report += "# No similar functions found"
    else:
      #TODO: add better header, but that will require some refactoring of extract function
      report += "# Best match results\n"
      for r in results:
        print r
        m = self.metadata.get(r[1], r[2]) # file name, function name
        
        if len(m) == 0: 
          line = "- {:f} - {:x}:0x{:x}".format(max(float(r[0]) / 128.0 - 0.5, 0.0)*2, r[1], r[2])
        else:
          line = "- {:f} - {:x}:0x{:x} {} '{}'".format(max(float(r[0]) / 128.0 - 0.5, 0.0)*2, r[1], r[2], m[0], m[1])

        report += line + "\n"

    # Display results
    bn.interaction.show_markdown_report('Function Similarity Search Report', report)
Esempio n. 2
0
class Plugin:
    def __init__(self):
        self.sim_hash_location = None
        self.metadata = None

    def init_db(self):
        # Fetch location
        location = bn.interaction.get_open_filename_input(
            "Load SimHash database", ".simhash")
        if not location:
            bn.log_info(
                "[*] Using default location for SimHash database: {}".format(
                    default_sim_hash_location))
            location = default_sim_hash_location

        # setup metadata class
        self.sim_hash_location = location
        self.metadata = Metadata(location + '.meta')

    def extract_flowgraph_hash(self, function, minimum_size=5):
        """
      Generates a flowgraph object that can be fed into FunctionSimSearch from a
      given address in Binary Ninja and returns set of hashes.
    """

        nodes = []
        graph = []

        # Retrieve CFG data
        for block in function:
            local_node = []
            shift = 0
            position = block.start

            for instruction in block:
                local_node.append(instruction[0][0].text)
                shift += instruction[1]

                if instruction[0][
                        0].text == 'call':  # Split on call with assumption that we only care about x86/64 for now
                    nodes.append((position, local_node))
                    local_node = []
                    graph.append((position, block.start + shift))
                    position = block.start + shift

            for edge in block.outgoing_edges:
                graph.append((position, edge.target.start))

            if local_node:
                nodes.append((position, local_node))
            else:
                graph.pop(-1)

        # Generate flowgraph
        flowgraph = fss.FlowgraphWithInstructions()

        for node in nodes:
            flowgraph.add_node(node[0])
            flowgraph.add_instructions(node[0],
                                       tuple([((i), ()) for i in node[1]
                                              ]))  # Format conversion

        for edge in graph:
            flowgraph.add_edge(edge[0], edge[1])

        if flowgraph.number_of_branching_nodes() < minimum_size:
            return (None, None)
        hasher = fss.SimHasher()

        return hasher.calculate_hash(flowgraph)

    def get_exec_id(self, filename):
        h = hashlib.sha256()
        with open(filename, 'r') as fh:
            h.update(fh.read())

        return long(h.hexdigest()[0:16], 16)

    def save_single_function_hash(self, bv, search_index, function):
        """
      Save the hash of a given function into a given search index.
    """
        # TODO: detect if we are opening database instead of binary
        exec_id = self.get_exec_id(bv.file.filename)
        h1, h2 = self.extract_flowgraph_hash(function)
        if h1 and h2:
            search_index.add_function(h1, h2, exec_id, function.start)
            bn.log_info(
                '[+] Added function <{:x}:0x{:x} {:x}-{:x}> to search index.'.
                format(exec_id, function.start, h1, h2))
            self.metadata.add(exec_id, function.start, bv.file.filename,
                              function.name)
        else:
            bn.log_info(
                '[-] Did not add function <{:x}:0x{:x}> to search index.'.
                format(exec_id, function.start))

    def init_index(self, bv, current_function):
        if not self.sim_hash_location:
            self.init_db()

        # Supported platform check
        if bv.platform.name not in supported_arch:
            bn.log_error(
                '[!] Right now this plugin supports only the following architectures: '
                + str(supported_arch))
            return -1

        if os.path.isfile(self.sim_hash_location):
            create_index = False
        else:
            create_index = True

        search_index = fss.SimHashSearchIndex(self.sim_hash_location,
                                              create_index, 50)
        return search_index

    def save_hash(self, bv, current_function):
        """
      Save hash of current function into search index.
    """
        search_index = self.init_index(bv, current_function)
        self.save_single_function_hash(bv, search_index, current_function)

    def save_all_functions(self, bv, current_function):
        """
      Walk through all functions and save them into the index.
    """
        search_index = self.init_index(bv, current_function)
        for function in bv.functions:
            self.save_single_function_hash(bv, search_index, function)

    def add_report_from_result(self,
                               results,
                               report,
                               address,
                               minimal_match=100):
        results = [r for r in results if r[0] > minimal_match]
        if len(results) > 0:
            report += "## Best match results for 0x{:x}\n".format(address)
            for r in results:
                m = self.metadata.get(r[1], r[2])  # file name, function name
                if not m or len(m) == 0:
                    line = "- {:f} - {:x}:0x{:x}".format(
                        max(float(r[0]) / 128.0 - 0.5, 0.0) * 2, r[1], r[2])
                else:
                    line = "- {:f} - {:x}:0x{:x} {} '{}'".format(
                        max(float(r[0]) / 128.0 - 0.5, 0.0) * 2, r[1], r[2],
                        m[0], m[1])
                report += line + "\n"
        return report

    def find_function_hash(self, bv, h1, h2, address, search_index, report):
        results = search_index.query_top_N(h1, h2, 5)
        return self.add_report_from_result(results, report, address)

    def find_hash(self, bv, current_function):
        """
      Find functions similar to the current one.
    """
        search_index = self.init_index(bv, current_function)
        h1, h2 = self.extract_flowgraph_hash(current_function)
        if h1 and h2:
            report = self.find_function_hash(bv, h1, h2,
                                             current_function.start,
                                             search_index, "")
            bn.interaction.show_markdown_report(
                'Function Similarity Search Report', report)
        else:
            bn.log_info(
                '[-] Did not search for function <{:x}:0x{:x}> to search index.'
                .format(exec_id, function.start))

    def find_all_hashes(self, bv, current_function):
        search_index = self.init_index(bv, current_function)
        report = ""
        for function in bv.functions:
            h1, h2 = self.extract_flowgraph_hash(function)
            if h1 and h2:
                report = self.find_function_hash(bv, h1, h2, function.start,
                                                 search_index, report)
            else:
                bn.log_info('[-] Did not search for function 0x{:x}.'.format(
                    function.start))

        bn.interaction.show_markdown_report(
            'Function Similarity Search Report', report)
Esempio n. 3
0
class FreyaFS(Operations):
    def __init__(self, root, mountpoint):
        self.root = root

        # Retrieve FreyaFS metadata
        self.metadata = Metadata(os.path.join(root, ".freyafs"))
        # Keep track of open files
        self.cache = Cache()

        print(f"[*] FreyaFS mounted")
        print(f"Now, through the FreyaFS mountpoint ({mountpoint}), you can use a Mix&Slice encrypted filesystem seemlessly.")
        print(f"FreyaFS will persist your encrypted data at {root}.")

    # --------------------------------------------------------------------- Helpers

    def _full_path(self, partial):
        partial = partial.lstrip("/")
        path = os.path.join(self.root, partial)
        return path

    def _is_file(self, path):
        if not os.path.exists(self._full_path(path)):
            return False

        attr = self.getattr(path)
        return attr['st_mode'] & stat.S_IFREG == stat.S_IFREG

    # --------------------------------------------------------------------- Filesystem methods

    def access(self, path, mode):
        full_path = self._full_path(path)
        if not os.access(full_path, mode):
            raise FuseOSError(errno.EACCES)

    def chmod(self, path, mode):
        full_path = self._full_path(path)
        return os.chmod(full_path, mode)

    def chown(self, path, uid, gid):
        full_path = self._full_path(path)
        return os.chown(full_path, uid, gid)

    # Attributi di path (file o cartella)
    def getattr(self, path, fh=None):
        full_path = self._full_path(path)

        st = os.lstat(full_path)

        if path not in self.metadata:
            return dict((key, getattr(st, key)) for key in ('st_atime', 'st_ctime',
                                                            'st_gid', 'st_mode', 'st_mtime', 'st_nlink', 'st_size', 'st_uid'))

        try:
            return {
                'st_mode': stat.S_IFREG | (st.st_mode & ~stat.S_IFDIR),
                'st_nlink': 1,
                'st_atime': st.st_atime,
                'st_ctime': st.st_ctime,
                'st_gid': st.st_gid,
                'st_mtime': st.st_mtime,
                'st_size': self.metadata[path].size,
                'st_uid': st.st_uid
            }
        except:
            return dict((key, getattr(st, key)) for key in ('st_atime', 'st_ctime',
                                                            'st_gid', 'st_mode', 'st_mtime', 'st_nlink', 'st_size', 'st_uid'))

    def readdir(self, path, fh):
        full_path = self._full_path(path)
        dirents = ['.', '..']

        if os.path.isdir(full_path):
            real_stuff = os.listdir(full_path)
            virtual_stuff = [
                x for x in real_stuff if not is_metadata(x)]
            dirents.extend(virtual_stuff)

        for r in dirents:
            yield r

    def readlink(self, path):
        pathname = os.readlink(self._full_path(path))
        if pathname.startswith("/"):
            # Path name is absolute, sanitize it.
            return os.path.relpath(pathname, self.root)
        else:
            return pathname

    def mknod(self, path, mode, dev):
        return os.mknod(self._full_path(path), mode, dev)

    def rmdir(self, path):
        os.rmdir(self._full_path(path))

    def mkdir(self, path, mode):
        os.mkdir(self._full_path(path), mode)

    def statfs(self, path):
        full_path = self._full_path(path)
        stv = os.statvfs(full_path)
        return dict((key, getattr(stv, key)) for key in ('f_bavail', 'f_bfree',
                                                         'f_blocks', 'f_bsize', 'f_favail', 'f_ffree', 'f_files', 'f_flag',
                                                         'f_frsize', 'f_namemax'))

    def unlink(self, path):
        full_path = self._full_path(path)
        shutil.rmtree(full_path)
        self.metadata.remove(path)
        return

    def symlink(self, name, target):
        return os.symlink(name, self._full_path(target))

    def rename(self, old, new):
        full_old_path = self._full_path(old)
        full_new_path = self._full_path(new)

        if self._is_file(old):
            # Rinomino un file
            if self._is_file(new):
                self.unlink(new)

            os.rename(full_old_path, full_new_path)

            if full_old_path in self.cache:
                self.cache.rename(full_old_path, full_new_path)
            self.metadata.rename(old, new)
        else:
            # Rinomino una cartella
            os.rename(full_old_path, full_new_path)
            self.metadata.renamedir(old, new)

    def link(self, target, name):
        return os.link(self._full_path(target), self._full_path(name))

    def utimens(self, path, times=None):
        os.utime(self._full_path(path), times)

    # --------------------------------------------------------------------- File methods

    def open(self, path, flags):
        full_path = self._full_path(path)
        info = self.metadata[path]
        attr = self.getattr(path)
        mtime = attr['st_mtime']
        self.cache.open(full_path, info.key, info.iv, mtime)
        return 0

    def create(self, path, mode, fi=None):
        full_path = self._full_path(path)
        key, iv = self.metadata.add(path)
        self.cache.create(full_path, key, iv)
        return 0

    def read(self, path, length, offset, fh):
        full_path = self._full_path(path)
        if full_path in self.cache:
            return self.cache.read_bytes(full_path, offset, length)

        os.lseek(fh, offset, os.SEEK_SET)
        return os.read(fh, length)

    def write(self, path, buf, offset, fh):
        full_path = self._full_path(path)
        if full_path in self.cache:
            bytes_written = self.cache.write_bytes(full_path, buf, offset)
            self.metadata.update(path, self.cache.get_size(full_path))
            return bytes_written

        os.lseek(fh, offset, os.SEEK_SET)
        return os.write(fh, buf)

    def truncate(self, path, length, fh=None):
        full_path = self._full_path(path)
        if full_path in self.cache:
            self.cache.truncate_bytes(full_path, length)
            self.metadata.update(path, length)
            return

        with open(full_path, 'r+') as f:
            f.truncate(length)

    def flush(self, path, fh):
        full_path = self._full_path(path)
        if full_path in self.cache:
            info = self.metadata[path]
            self.cache.flush(full_path, info.key, info.iv)
            return 0

        return os.fsync(fh)

    def release(self, path, fh):
        full_path = self._full_path(path)
        if full_path in self.cache:
            self.cache.release(full_path)
            return 0

        return os.close(fh)

    def fsync(self, path, fdatasync, fh):
        return self.flush(path, fh)