def test_open_empty(self): """If the file is not there, the default values should be loaded.""" m = Metadata(TESTFILE) self.assertEqual(m.get('antenna'), 'unknown') self.assertEqual(m.get('antenna-type'), 'unknown') self.assertEqual(m.get('receiver'), 'RTL-SDR v3') self.assertEqual(m.get('lna'), 'none') self.assertEqual(m.get('filter'), 'none')
def test_add_key(self): """Check if it's possible to add and overwrite keys.""" m = Metadata(TESTFILE) m.set('foo', 'bar') m.set('baz', 123) self.assertEqual(m.get('foo'), 'bar') self.assertEqual(m.get('baz'), 123) # Check that set overwrites, if the key already exists m.set('baz', 1) m.set('baz', None) m.set('baz', 123) self.assertEqual(m.get('baz'), 123)
def test_del_key(self): """Check if deleting existing (and non-existing) keys is working ok.""" m = Metadata(TESTFILE) m.set('foo', 'bar') self.assertEqual(m.get('antenna'), 'unknown') # default entry self.assertEqual(m.get('foo'), 'bar') # user entry m.delete('foo') m.delete('antenna') self.assertEqual(m.get('foo'), '') self.assertEqual(m.get('antenna'), '') # Check that deleting non-existing key is OK m.delete('foo') m.delete('foo') m.delete('foo')
class Plugin: def __init__(self): self.sim_hash_location = None self.metadata = None def init_db(self): # Fetch location location = bn.interaction.get_open_filename_input( "Load SimHash database", ".simhash") if not location: bn.log_info( "[*] Using default location for SimHash database: {}".format( default_sim_hash_location)) location = default_sim_hash_location # setup metadata class self.sim_hash_location = location self.metadata = Metadata(location + '.meta') def extract_flowgraph_hash(self, function, minimum_size=5): """ Generates a flowgraph object that can be fed into FunctionSimSearch from a given address in Binary Ninja and returns set of hashes. """ nodes = [] graph = [] # Retrieve CFG data for block in function: local_node = [] shift = 0 position = block.start for instruction in block: local_node.append(instruction[0][0].text) shift += instruction[1] if instruction[0][ 0].text == 'call': # Split on call with assumption that we only care about x86/64 for now nodes.append((position, local_node)) local_node = [] graph.append((position, block.start + shift)) position = block.start + shift for edge in block.outgoing_edges: graph.append((position, edge.target.start)) if local_node: nodes.append((position, local_node)) else: graph.pop(-1) # Generate flowgraph flowgraph = fss.FlowgraphWithInstructions() for node in nodes: flowgraph.add_node(node[0]) flowgraph.add_instructions(node[0], tuple([((i), ()) for i in node[1] ])) # Format conversion for edge in graph: flowgraph.add_edge(edge[0], edge[1]) if flowgraph.number_of_branching_nodes() < minimum_size: return (None, None) hasher = fss.SimHasher() return hasher.calculate_hash(flowgraph) def get_exec_id(self, filename): h = hashlib.sha256() with open(filename, 'r') as fh: h.update(fh.read()) return long(h.hexdigest()[0:16], 16) def save_single_function_hash(self, bv, search_index, function): """ Save the hash of a given function into a given search index. """ # TODO: detect if we are opening database instead of binary exec_id = self.get_exec_id(bv.file.filename) h1, h2 = self.extract_flowgraph_hash(function) if h1 and h2: search_index.add_function(h1, h2, exec_id, function.start) bn.log_info( '[+] Added function <{:x}:0x{:x} {:x}-{:x}> to search index.'. format(exec_id, function.start, h1, h2)) self.metadata.add(exec_id, function.start, bv.file.filename, function.name) else: bn.log_info( '[-] Did not add function <{:x}:0x{:x}> to search index.'. format(exec_id, function.start)) def init_index(self, bv, current_function): if not self.sim_hash_location: self.init_db() # Supported platform check if bv.platform.name not in supported_arch: bn.log_error( '[!] Right now this plugin supports only the following architectures: ' + str(supported_arch)) return -1 if os.path.isfile(self.sim_hash_location): create_index = False else: create_index = True search_index = fss.SimHashSearchIndex(self.sim_hash_location, create_index, 50) return search_index def save_hash(self, bv, current_function): """ Save hash of current function into search index. """ search_index = self.init_index(bv, current_function) self.save_single_function_hash(bv, search_index, current_function) def save_all_functions(self, bv, current_function): """ Walk through all functions and save them into the index. """ search_index = self.init_index(bv, current_function) for function in bv.functions: self.save_single_function_hash(bv, search_index, function) def add_report_from_result(self, results, report, address, minimal_match=100): results = [r for r in results if r[0] > minimal_match] if len(results) > 0: report += "## Best match results for 0x{:x}\n".format(address) for r in results: m = self.metadata.get(r[1], r[2]) # file name, function name if not m or len(m) == 0: line = "- {:f} - {:x}:0x{:x}".format( max(float(r[0]) / 128.0 - 0.5, 0.0) * 2, r[1], r[2]) else: line = "- {:f} - {:x}:0x{:x} {} '{}'".format( max(float(r[0]) / 128.0 - 0.5, 0.0) * 2, r[1], r[2], m[0], m[1]) report += line + "\n" return report def find_function_hash(self, bv, h1, h2, address, search_index, report): results = search_index.query_top_N(h1, h2, 5) return self.add_report_from_result(results, report, address) def find_hash(self, bv, current_function): """ Find functions similar to the current one. """ search_index = self.init_index(bv, current_function) h1, h2 = self.extract_flowgraph_hash(current_function) if h1 and h2: report = self.find_function_hash(bv, h1, h2, current_function.start, search_index, "") bn.interaction.show_markdown_report( 'Function Similarity Search Report', report) else: bn.log_info( '[-] Did not search for function <{:x}:0x{:x}> to search index.' .format(exec_id, function.start)) def find_all_hashes(self, bv, current_function): search_index = self.init_index(bv, current_function) report = "" for function in bv.functions: h1, h2 = self.extract_flowgraph_hash(function) if h1 and h2: report = self.find_function_hash(bv, h1, h2, function.start, search_index, report) else: bn.log_info('[-] Did not search for function 0x{:x}.'.format( function.start)) bn.interaction.show_markdown_report( 'Function Similarity Search Report', report)
def entry_exists(date): """Check if an entry for the given date exists.""" from metadata import Metadata data = Metadata.get(date.year, date.month).get_data_for_day(date.day) return data is not None
class Plugin: def __init__(self): self.sim_hash_location = None self.metadata = None def init_db(self): # Fetch location location = bn.interaction.get_open_filename_input("Load SimHash database", ".simhash") if not location: bn.log_info("[*] Using default location for SimHash database: {}".format(default_sim_hash_location)) location = default_sim_hash_location # setup metadata class self.sim_hash_location = location self.metadata = Metadata(location+ '.meta') def extract_flowgraph_hash(self, function): """ Generates a flowgraph object that can be fed into FunctionSimSearch from a given address in Binary Ninja and returns set of hashes. """ nodes = [] graph = [] # Retrieve CFG data for block in function: local_node = [] shift = 0 position = block.start for instruction in block: local_node.append(instruction[0][0].text) shift += instruction[1] if instruction[0][0].text == 'call': # Split on call with assumption that we only care about x86/64 for now nodes.append((position, local_node)) local_node = [] graph.append((position, block.start+shift)) position = block.start + shift for edge in block.outgoing_edges: graph.append((position, edge.target.start)) if local_node: nodes.append((position, local_node)) else: graph.pop(-1) # Generate flowgraph flowgraph = fss.FlowgraphWithInstructions() for node in nodes: flowgraph.add_node(node[0]) flowgraph.add_instructions(node[0],tuple([((i), ()) for i in node[1]])) # Format conversion for edge in graph: flowgraph.add_edge(edge[0], edge[1]) hasher = fss.SimHasher() return hasher.calculate_hash(flowgraph) def get_exec_id(self, filename): h = hashlib.sha256() with open(filename, 'r') as fh: h.update(fh.read()) return long(h.hexdigest()[0:16], 16) def save_hash(self, bv, current_function): """ Save hash of current function into search index. """ if not self.sim_hash_location: self.init_db() # Supported platform check if bv.platform.name not in supported_arch: bn.log_error('[!] Right now this plugin supports only the following architectures: ' + str(supported_arch)) return -1 h1, h2 = self.extract_flowgraph_hash(current_function) if os.path.isfile(self.sim_hash_location): create_index = False else: create_index = True search_index = fss.SimHashSearchIndex(self.sim_hash_location, create_index, 28) # TODO: detect if we are opening database instead of binary exec_id = self.get_exec_id(bv.file.filename) search_index.add_function(h1, h2, exec_id, current_function.start) bn.log_info('[+] Added function <{:x}:0x{:x} {:x}-{:x}> to search index.'.format(exec_id, current_function.start, h1, h2)) self.metadata.add(exec_id, current_function.start, bv.file.filename, current_function.name) def find_hash(self, bv, current_function): """ Find functions similar to the current one. """ if not self.sim_hash_location: self.init_db() # Supported platform check if bv.platform.name not in supported_arch: bn.log_error('[!] Right now this plugin supports only the following architectures: ' + str(supported_arch)) return -1 h1, h2 = self.extract_flowgraph_hash(current_function) if os.path.isfile(self.sim_hash_location): create_index = False else: create_index = True search_index = fss.SimHashSearchIndex(self.sim_hash_location, create_index, 28) results = search_index.query_top_N(h1, h2, 5) # TODO: refactor, possibly with report template report = "" if len(results) == 0: report += "# No similar functions found" else: #TODO: add better header, but that will require some refactoring of extract function report += "# Best match results\n" for r in results: print r m = self.metadata.get(r[1], r[2]) # file name, function name if len(m) == 0: line = "- {:f} - {:x}:0x{:x}".format(max(float(r[0]) / 128.0 - 0.5, 0.0)*2, r[1], r[2]) else: line = "- {:f} - {:x}:0x{:x} {} '{}'".format(max(float(r[0]) / 128.0 - 0.5, 0.0)*2, r[1], r[2], m[0], m[1]) report += line + "\n" # Display results bn.interaction.show_markdown_report('Function Similarity Search Report', report)