def load(self): outdir = config.get_cache_filename( config.CACHE_SIGNATURE_GROUPS_DIR, self._context.dumpname) inname = os.path.sep.join([outdir, self._name]) self._similarities = utils.int_array_cache(inname) return
def graphStructureGroups(context, chains, originAddr=None): # TODO change generic fn chains.sort() decoder = dsa.FieldReverser(context.memory_handler) graph = networkx.DiGraph() for chain in chains: log.debug('\t[-] chain len:%d' % len(chain)) if originAddr is not None: if originAddr not in chain: continue # ignore chain if originAddr is not in it for addr in map(long, chain): record = context.get_record_for_address(addr) ## record.decodeFields() # can be long decoder.analyze_fields(record) print context.get_record_for_address(addr).to_string() targets = set() _record = context.get_record_for_address(addr) pointer_fields = [f for f in _record.get_fields() if f.is_pointer()] for f in pointer_fields: addr_child = f.get_value_for_field(_record) child = context.get_record_at_address(addr) targets.add(('%x' % addr, '%x' % child.address)) graph.add_edges_from(targets) print '#', '-' * 78 networkx.readwrite.gexf.write_gexf( graph, config.get_cache_filename( config.CACHE_GRAPH, context.dumpname))
def cacheSizes(self): """Find the number of different sizes, and creates that much numpyarray""" # if not os.access outdir = config.get_cache_filename( config.CACHE_SIGNATURE_SIZES_DIR, self._context.dumpname) if not os.path.isdir(outdir): os.mkdir(outdir) if not os.access(outdir, os.W_OK): raise IOError('cant write to %s' % (outdir)) # sizes = map(int, set(self._context._malloc_sizes)) arrays = dict([(s, []) for s in sizes]) # sort all addr in all sizes.. [arrays[self._context._malloc_sizes[i]].append( long(addr)) for i, addr in enumerate(self._context._malloc_addresses)] # saving all sizes dictionary in files... for size, lst in arrays.items(): fout = os.path.sep.join([outdir, 'size.%0.4x' % (size)]) arrays[size] = utils.int_array_save(fout, lst) # saved all sizes dictionaries. # tag it as done file( os.path.sep.join([outdir, config.CACHE_SIGNATURE_SIZES_DIR_TAG]), 'w') self._sizes = arrays return
def cacheSizes(self): """Find the number of different sizes, and creates that much numpyarray""" # if not os.access outdir = config.get_cache_filename(config.CACHE_SIGNATURE_SIZES_DIR, self._context.dumpname) if not os.path.isdir(outdir): os.mkdir(outdir) if not os.access(outdir, os.W_OK): raise IOError('cant write to %s' % (outdir)) # sizes = map(int, set(self._context._malloc_sizes)) arrays = dict([(s, []) for s in sizes]) # sort all addr in all sizes.. [ arrays[self._context._malloc_sizes[i]].append(long(addr)) for i, addr in enumerate(self._context._malloc_addresses) ] # saving all sizes dictionary in files... for size, lst in arrays.items(): fout = os.path.sep.join([outdir, 'size.%0.4x' % (size)]) arrays[size] = utils.int_array_save(fout, lst) # saved all sizes dictionaries. # tag it as done file(os.path.sep.join([outdir, config.CACHE_SIGNATURE_SIZES_DIR_TAG]), 'w') self._sizes = arrays return
def graphStructureGroups(context, chains, originAddr=None): # TODO change generic fn chains.sort() decoder = dsa.FieldReverser(context.memory_handler) graph = networkx.DiGraph() for chain in chains: log.debug('\t[-] chain len:%d' % len(chain)) if originAddr is not None: if originAddr not in chain: continue # ignore chain if originAddr is not in it for addr in map(long, chain): record = context.get_record_for_address(addr) ## record.decodeFields() # can be long decoder.analyze_fields(record) print context.get_record_for_address(addr).to_string() targets = set() _record = context.get_record_for_address(addr) pointer_fields = [ f for f in _record.get_fields() if f.is_pointer() ] for f in pointer_fields: addr_child = f.get_value_for_field(_record) child = context.get_record_at_address(addr) targets.add(('%x' % addr, '%x' % child.address)) graph.add_edges_from(targets) print '#', '-' * 78 networkx.readwrite.gexf.write_gexf( graph, config.get_cache_filename(config.CACHE_GRAPH, context.dumpname))
def persist(self): outdir = config.get_cache_filename(config.CACHE_SIGNATURE_GROUPS_DIR, self._context.dumpname) config.create_cache_folder(outdir) # outname = os.path.sep.join([outdir, self._name]) ar = utils.int_array_save(outname, self._similarities) return
def persist(self): outdir = config.get_cache_filename( config.CACHE_SIGNATURE_GROUPS_DIR, self._context.dumpname) config.create_cache_folder(outdir) # outname = os.path.sep.join([outdir, self._name]) ar = utils.int_array_save(outname, self._similarities) return
def persist(self): outdir = config.get_cache_filename(config.CACHE_SIGNATURE_GROUPS_DIR, self._context.dumpname) if not os.path.isdir(outdir): os.mkdir(outdir) if not os.access(outdir, os.W_OK): raise IOError('cant write to %s' % (outdir)) # outname = os.path.sep.join([outdir, self._name]) ar = utils.int_array_save(outname, self._similarities) return
def persist(self): outdir = config.get_cache_filename( config.CACHE_SIGNATURE_GROUPS_DIR, self._context.dumpname) if not os.path.isdir(outdir): os.mkdir(outdir) if not os.access(outdir, os.W_OK): raise IOError('cant write to %s' % (outdir)) # outname = os.path.sep.join([outdir, self._name]) ar = utils.int_array_save(outname, self._similarities) return
def _loadCache(self): outdir = config.get_cache_filename(config.CACHE_SIGNATURE_SIZES_DIR, self._context.dumpname) fdone = os.path.sep.join( [outdir, config.CACHE_SIGNATURE_SIZES_DIR_TAG]) if not os.access(fdone, os.R_OK): return False for myfile in os.listdir(outdir): try: # FIXME: not sure its - # and what that section is about in general. addr = int(myfile.split('-')[1], 16) except IndexError as e: continue # ignore file
def _loadCache(self): outdir = config.get_cache_filename( config.CACHE_SIGNATURE_SIZES_DIR, self._context.dumpname) fdone = os.path.sep.join( [outdir, config.CACHE_SIGNATURE_SIZES_DIR_TAG]) if not os.access(fdone, os.R_OK): return False for myfile in os.listdir(outdir): try: # FIXME: not sure its - # and what that section is about in general. addr = int(myfile.split('-')[1], 16) except IndexError as e: continue # ignore file
def cacheLoad(cls, memory_handler, heap_addr): dumpname = os.path.abspath(memory_handler.get_name()) config.create_cache_folder_name(dumpname) context_cache = config.get_cache_filename(config.CACHE_CONTEXT, dumpname, heap_addr) try: with file(context_cache, 'r') as fin: ctx = pickle.load(fin) except EOFError as e: os.remove(context_cache) log.error('Error in the context file. File cleaned. Please restart.') raise RuntimeError('Error in the context file. File cleaned. Please restart.') log.debug('\t[-] loaded my context from cache') ctx.config = config ctx.memory_handler = memory_handler ctx.heap = ctx.memory_handler.get_mapping_for_address(ctx._heap_start) # and initialize ctx._init2() return ctx
def get_filename_cache_strings(self): return config.get_cache_filename(config.CACHE_STRINGS, self.dumpname, self._heap_start)
def isPersisted(self): outdir = config.get_cache_filename( config.CACHE_SIGNATURE_GROUPS_DIR, self._context.dumpname) return os.access(os.path.sep.join([outdir, self._name]), os.F_OK)
def get_filename_cache_context(self): return config.get_cache_filename(config.CACHE_CONTEXT, self.dumpname, self._heap_start)
def get_filename_cache_signatures(self): return config.get_cache_filename(config.CACHE_SIGNATURE_GROUPS_DIR, self.dumpname, self._heap_start)
def get_filename_cache_allocations_sizes(self): return config.get_cache_filename(config.CACHE_MALLOC_CHUNKS_SIZES, self.dumpname, self._heap_start)
def clean(digraph): # clean solos isolates = networkx.algorithms.isolate.isolates(digraph) digraph.remove_nodes_from(isolates) # clean solos clusters graph = networkx.Graph(digraph) # undirected subgraphs = networkx.algorithms.components.connected.connected_component_subgraphs( graph) isolates1 = set(utils.flatten(g.nodes() for g in subgraphs if len(g) == 1)) # self connected isolates2 = set(utils.flatten(g.nodes() for g in subgraphs if len(g) == 2)) isolates3 = set(utils.flatten(g.nodes() for g in subgraphs if len(g) == 3)) digraph.remove_nodes_from(isolates1) digraph.remove_nodes_from(isolates2) digraph.remove_nodes_from(isolates3) # #graph = digraph.to_undirected() #subgraphs = networkx.algorithms.components.connected.connected_component_subgraphs(graph) subgraphs = [g for g in subgraphs if len(g) > 3] isolatedGraphs = subgraphs[1:100] # group by nodes number isoDict = defaultdict(list) [isoDict[len(g)].append(g) for g in isolatedGraphs] # test isomorphism isoGraphs = dict() for numNodes, graphs in isoDict.items(): numgraphs = len(graphs) if numgraphs == 1: continue isoGraph = networkx.Graph() # quick find isomorphisms todo = set(graphs) for i, g1 in enumerate(graphs): for g2 in graphs[i + 1:]: if networkx.is_isomorphic(g1, g2): print 'numNodes:%d graphs %d, %d are isomorphic' % (numNodes, i, i + 1) isoGraph.add_edge(g1, g2, {'isomorphic': True}) if g2 in todo: todo.remove(g2) if g1 in todo: todo.remove(g1) # we can stop here, chain comparaison will work between g2 # and g3 break if len(isoGraph) > 0: isoGraphs[numNodes] = isoGraph # draw the isomorphisms for i, item in enumerate(isoGraphs.items()): num, g = item # networkx.draw(g) for rg in g.nodes(): networkx.draw(rg) fname = os.path.sep.join( [config.imgCacheDir, 'isomorph_subgraphs_%d.png' % (num)]) plt.savefig(fname) plt.clf() # need to use gephi-like for rendering nicely on the same pic bigGraph = networkx.DiGraph() bigGraph.add_edges_from(digraph.edges(subgraphs[0].nodes())) stack_addrs = utils.int_array_cache( config.get_cache_filename(config.CACHE_STACK_VALUES, ctx.dumpname, ctx._heap_addr)) stack_addrs_txt = set(['%x' % (addr) for addr in stack_addrs]) # new, no long stacknodes = list(set(bigGraph.nodes()) & stack_addrs_txt) print 'stacknodes left', len(stacknodes) orig = list(set(graph.nodes()) & stack_addrs_txt) print 'stacknodes orig', len(orig) # identify strongly referenced allocators degreesList = [(bigGraph.in_degree(node), node) for node in bigGraph.nodes()] degreesList.sort(reverse=True)
def get_filename_cache_pointers_values(self): return config.get_cache_filename(config.CACHE_HEAP_VALUES, self.dumpname, self._heap_start)
def get_filename_cache_pointers_addresses(self): return config.get_cache_filename(config.CACHE_HEAP_ADDRS, self.dumpname, self._heap_start)
def get_filename_cache_graph(self): return config.get_cache_filename(config.CACHE_GRAPH, self.dumpname, self._heap_start)
def get_filename_cache_headers(self): return config.get_cache_filename(config.CACHE_GENERATED_PY_HEADERS_VALUES, self.dumpname, self._heap_start)
def isPersisted(self): outdir = config.get_cache_filename(config.CACHE_SIGNATURE_GROUPS_DIR, self._context.dumpname) return os.access(os.path.sep.join([outdir, self._name]), os.F_OK)
def load(self): outdir = config.get_cache_filename(config.CACHE_SIGNATURE_GROUPS_DIR, self._context.dumpname) inname = os.path.sep.join([outdir, self._name]) self._similarities = utils.int_array_cache(inname) return
def clean(digraph): # clean solos isolates = networkx.algorithms.isolate.isolates(digraph) digraph.remove_nodes_from(isolates) # clean solos clusters graph = networkx.Graph(digraph) # undirected subgraphs = networkx.algorithms.components.connected.connected_component_subgraphs( graph) isolates1 = set(utils.flatten(g.nodes() for g in subgraphs if len(g) == 1)) # self connected isolates2 = set(utils.flatten(g.nodes() for g in subgraphs if len(g) == 2)) isolates3 = set(utils.flatten(g.nodes() for g in subgraphs if len(g) == 3)) digraph.remove_nodes_from(isolates1) digraph.remove_nodes_from(isolates2) digraph.remove_nodes_from(isolates3) # #graph = digraph.to_undirected() #subgraphs = networkx.algorithms.components.connected.connected_component_subgraphs(graph) subgraphs = [g for g in subgraphs if len(g) > 3] isolatedGraphs = subgraphs[1:100] # group by nodes number isoDict = defaultdict(list) [isoDict[len(g)].append(g) for g in isolatedGraphs] # test isomorphism isoGraphs = dict() for numNodes, graphs in isoDict.items(): numgraphs = len(graphs) if numgraphs == 1: continue isoGraph = networkx.Graph() # quick find isomorphisms todo = set(graphs) for i, g1 in enumerate(graphs): for g2 in graphs[i + 1:]: if networkx.is_isomorphic(g1, g2): print('numNodes:%d graphs %d, %d are isomorphic' % (numNodes, i, i + 1)) isoGraph.add_edge(g1, g2, {'isomorphic': True}) if g2 in todo: todo.remove(g2) if g1 in todo: todo.remove(g1) # we can stop here, chain comparaison will work between g2 # and g3 break if len(isoGraph) > 0: isoGraphs[numNodes] = isoGraph # draw the isomorphisms for i, item in enumerate(isoGraphs.items()): num, g = item # networkx.draw(g) for rg in g.nodes(): networkx.draw(rg) fname = os.path.sep.join( [config.imgCacheDir, 'isomorph_subgraphs_%d.png' % num]) plt.savefig(fname) plt.clf() # need to use gephi-like for rendering nicely on the same pic bigGraph = networkx.DiGraph() bigGraph.add_edges_from(digraph.edges(subgraphs[0].nodes())) stack_addrs = utils.int_array_cache( config.get_cache_filename(config.CACHE_STACK_VALUES, ctx.dumpname, ctx._heap_addr)) stack_addrs_txt = set(['%x' % addr for addr in stack_addrs]) # new, no long stacknodes = list(set(bigGraph.nodes()) & stack_addrs_txt) print('stacknodes left', len(stacknodes)) orig = list(set(graph.nodes()) & stack_addrs_txt) print('stacknodes orig', len(orig)) # identify strongly referenced allocators degreesList = [(bigGraph.in_degree(node), node) for node in bigGraph.nodes()] degreesList.sort(reverse=True)
def get_filename_cache_allocations_addresses(self): return config.get_cache_filename(config.CACHE_MALLOC_CHUNKS_ADDRS, self.dumpname, self._heap_start)
def get_filename_cache_headers(self): dumpname = self.memory_handler.get_name() return config.get_cache_filename(config.CACHE_GENERATED_PY_HEADERS_VALUES, dumpname)