def getAllPointers(dumpfilename, mappings): ''' Search all mmap pointers values in heap. records values and pointers address in heap. ''' import pointerfinder F_HEAP_O = Config.getCacheFilename(Config.CACHE_ALL_PTRS_ADDRS, dumpfilename) F_HEAP_V = Config.getCacheFilename(Config.CACHE_ALL_PTRS_VALUES, dumpfilename) heap_addrs = int_array_cache(F_HEAP_O) heap_values = int_array_cache(F_HEAP_V) if heap_addrs is None or heap_values is None: log.info('[+] Making new cache - all pointers') heap_enumerator = pointerfinder.PointerEnumerator(mappings.getHeap()) heap_enumerator.setTargetMapping(mappings) # all pointers heap_enum = heap_enumerator.search() if len(heap_enum)>0: heap_addrs, heap_values = zip(*heap_enum) # WTF else: heap_addrs, heap_values = (),() log.info('\t[-] got %d pointers '%(len(heap_enum)) ) # merge int_array_save(F_HEAP_O, heap_addrs) int_array_save(F_HEAP_V, heap_values) else: log.info('[+] Loading from cache %d pointers %d unique'%(len(heap_values), len(set(heap_values)) )) return heap_addrs, heap_values
def getAllocations(dumpfilename, mappings, heap, get_user_alloc=None): ''' Search malloc_chunks in heap . records addrs and sizes. ''' # TODO if linux # TODO from haystack.reverse import heapwalker import libc.ctypes_malloc f_addrs = Config.getCacheFilename('%x.%s'%(heap.start,Config.CACHE_MALLOC_CHUNKS_ADDRS), dumpfilename) f_sizes = Config.getCacheFilename('%x.%s'%(heap.start,Config.CACHE_MALLOC_CHUNKS_SIZES), dumpfilename) log.debug('reading from %s'%(f_addrs)) addrs = int_array_cache(f_addrs) sizes = int_array_cache(f_sizes) if addrs is None or sizes is None: log.info('[+] Making new cache - getting malloc_chunks from heap ') ### TODO : HeapWalker + order addresses ASC ... # allocations = sorted(heapwalker.get_user_allocations(mappings, heap)) ## TODO 2 , allocations should be triaged by mmapping ( heap.start ) before write2disk. ## Or the heap.start should be removed from the cache name.. it has no impact. ## heapwalker.getuserAllocations should parse ALL mmappings to get all user allocations. ### But in that case, there will/could be a problem when using utils.closestFloorValue... ### in case of a pointer ( bad allocation ) out of a mmapping space. ### But that is not possible, because we are reporting factual reference to existing address space. ### OK. heap.start should be deleted from the cache name. allocations = mappings.get_user_allocations(mappings, heap) addrs, sizes = zip(*allocations) int_array_save(f_addrs, addrs) int_array_save(f_sizes, sizes) else: log.info('[+] Loading from cache') log.info('\t[-] we have %d malloc_chunks'%(len(addrs)) ) return addrs, sizes
def getAllocations(dumpfilename, mappings, heap): ''' Search malloc_chunks in heap . records addrs and sizes. ''' # TODO if linux import libc.ctypes_malloc f_addrs = Config.getCacheFilename(Config.CACHE_MALLOC_CHUNKS_ADDRS, dumpfilename + '.%x' % (heap.start)) f_sizes = Config.getCacheFilename(Config.CACHE_MALLOC_CHUNKS_SIZES, dumpfilename + '.%x' % (heap.start)) log.debug('reading from %s' % (f_addrs)) addrs = int_array_cache(f_addrs) sizes = int_array_cache(f_sizes) if addrs is None or sizes is None: log.info('[+] Making new cache - getting malloc_chunks from heap ') allocations = libc.ctypes_malloc.getUserAllocations(mappings, heap, filterInuse=True) addrs, sizes = zip(*allocations) int_array_save(f_addrs, addrs) int_array_save(f_sizes, sizes) else: log.info('[+] Loading from cache') log.info('\t[-] we have %d malloc_chunks' % (len(addrs))) return addrs, sizes
def cacheLoad(cls, mappings): #from haystack.reverse.context import ReverserContext dumpname = os.path.normpath(mappings.name) Config.makeCache(dumpname) context_cache = Config.getCacheFilename(Config.CACHE_CONTEXT, dumpname) try: context = pickle.load(file(context_cache,'r')) except EOFError,e: os.remove(context_cache) log.error('Error in the context file. File cleaned. Please restart.') raise e
def reverseInstances(dumpname): from haystack.reverse import context log.debug ('[+] Loading the memory dump ') ctx = context.get_context(dumpname) try: if not os.access(Config.getStructsCacheDir(ctx.dumpname), os.F_OK): os.mkdir(Config.getStructsCacheDir(ctx.dumpname)) # we use common allocators to find structures. #log.debug('Reversing malloc') #mallocRev = MallocReverser() #ctx = mallocRev.reverse(ctx) #mallocRev.check_inuse(ctx) # try to find some logical constructs. log.debug('Reversing DoubleLinkedListReverser') doublelink = DoubleLinkedListReverser() ctx = doublelink.reverse(ctx) # decode bytes contents to find basic types. log.debug('Reversing Fields') fr = FieldReverser() ctx = fr.reverse(ctx) # identify pointer relation between structures log.debug('Reversing PointerFields') pfr = PointerFieldReverser() ctx = pfr.reverse(ctx) # graph pointer relations between structures log.debug('Reversing PointerGraph') ptrgraph = PointerGraphReverser() ctx = ptrgraph.reverse(ctx) ptrgraph._saveStructures(ctx) #save to file save_headers(ctx) #fr._saveStructures(ctx) ##libRev = KnowStructReverser('libQt') ##ctx = libRev.reverse(ctx) # we have more enriched context # etc except KeyboardInterrupt,e: #except IOError,e: log.warning(e) log.info('[+] %d structs extracted'%( context.structuresCount()) ) raise e pass
def reverseInstances(dumpname): from haystack.reverse import context log.debug('[+] Loading the memory dump ') ctx = context.get_context(dumpname) try: if not os.access(Config.getStructsCacheDir(ctx.dumpname), os.F_OK): os.mkdir(Config.getStructsCacheDir(ctx.dumpname)) # we use common allocators to find structures. #log.debug('Reversing malloc') #mallocRev = MallocReverser() #ctx = mallocRev.reverse(ctx) #mallocRev.check_inuse(ctx) # try to find some logical constructs. log.debug('Reversing DoubleLinkedListReverser') doublelink = DoubleLinkedListReverser() ctx = doublelink.reverse(ctx) # decode bytes contents to find basic types. log.debug('Reversing Fields') fr = FieldReverser() ctx = fr.reverse(ctx) # identify pointer relation between structures log.debug('Reversing PointerFields') pfr = PointerFieldReverser() ctx = pfr.reverse(ctx) # graph pointer relations between structures log.debug('Reversing PointerGraph') ptrgraph = PointerGraphReverser() ctx = ptrgraph.reverse(ctx) ptrgraph._saveStructures(ctx) #save to file save_headers(ctx) #fr._saveStructures(ctx) ##libRev = KnowStructReverser('libQt') ##ctx = libRev.reverse(ctx) # we have more enriched context # etc except KeyboardInterrupt, e: #except IOError,e: log.warning(e) log.info('[+] %d structs extracted' % (context.structuresCount())) raise e pass
def getHeapPointers(dumpfilename, mappings): ''' Search Heap pointers values in stack and heap. records values and pointers address in heap. ''' import pointerfinder #F_VALUES = Config.getCacheFilename(Config.CACHE_HS_POINTERS_VALUES, dumpfilename) F_HEAP_O = Config.getCacheFilename(Config.CACHE_HEAP_ADDRS, dumpfilename) F_HEAP_V = Config.getCacheFilename(Config.CACHE_HEAP_VALUES, dumpfilename) #F_STACK_O = Config.getCacheFilename(Config.CACHE_STACK_ADDRS, dumpfilename) #F_STACK_V = Config.getCacheFilename(Config.CACHE_STACK_VALUES, dumpfilename) #log.debug('reading from %s'%(F_VALUES)) #values = int_array_cache(F_VALUES) heap_addrs = int_array_cache(F_HEAP_O) heap_values = int_array_cache(F_HEAP_V) #stack_addrs = int_array_cache(F_STACK_O) #stack_values = int_array_cache(F_STACK_V) if heap_addrs is None or heap_values is None: log.info( '[+] Making new cache ') #- getting pointers values from stack') #stack_enumerator = pointerfinder.PointerEnumerator(mappings.getStack()) #stack_enumerator.setTargetMapping(mappings.getHeap()) #only interested in heap pointers #stack_enum = stack_enumerator.search() #if len(stack_enum)>0: # stack_offsets, stack_values = zip(*stack_enum) #else: # stack_offsets, stack_values = (),() #log.info('\t[-] got %d pointers '%(len(stack_enum)) ) #log.info('\t[-] merging pointers from heap') heap_enum = pointerfinder.PointerEnumerator( mappings.getHeap()).search() heap_addrs, heap_values = zip(*heap_enum) # WTF log.info('\t[-] got %d pointers ' % (len(heap_enum))) # merge #values = sorted(set(heap_values+stack_values)) #int_array_save(F_VALUES , values) int_array_save(F_HEAP_O, heap_addrs) int_array_save(F_HEAP_V, heap_values) #int_array_save(F_STACK_O, stack_addrs) #int_array_save(F_STACK_V, stack_values) #log.info('\t[-] we have %d unique pointers values out of %d orig.'%(len(values), len(heap_values)+len(stack_values)) ) else: log.info('[+] Loading from cache %d pointers %d unique' % (len(heap_values), len(set(heap_values)))) #log.info('\t[-] we have %d unique pointers values, and %d pointers in heap .'%(len(values), len(heap_addrs)) ) #aligned = numpy.asarray(filter(lambda x: (x%4) == 0, values)) #not_aligned = numpy.asarray(sorted( set(values)^set(aligned))) #log.info('\t[-] only %d are aligned values.'%(len(aligned) ) ) return heap_addrs, heap_values #, stack_addrs, stack_values #values, aligned, not_aligned
def getHeapPointers(dumpfilename, mappings): ''' Search Heap pointers values in stack and heap. records values and pointers address in heap. ''' import pointerfinder #F_VALUES = Config.getCacheFilename(Config.CACHE_HS_POINTERS_VALUES, dumpfilename) F_HEAP_O = Config.getCacheFilename(Config.CACHE_HEAP_ADDRS, dumpfilename) F_HEAP_V = Config.getCacheFilename(Config.CACHE_HEAP_VALUES, dumpfilename) #F_STACK_O = Config.getCacheFilename(Config.CACHE_STACK_ADDRS, dumpfilename) #F_STACK_V = Config.getCacheFilename(Config.CACHE_STACK_VALUES, dumpfilename) #log.debug('reading from %s'%(F_VALUES)) #values = int_array_cache(F_VALUES) heap_addrs = int_array_cache(F_HEAP_O) heap_values = int_array_cache(F_HEAP_V) #stack_addrs = int_array_cache(F_STACK_O) #stack_values = int_array_cache(F_STACK_V) if heap_addrs is None or heap_values is None: log.info('[+] Making new cache - heap pointers') #- getting pointers values from stack') #stack_enumerator = pointerfinder.PointerEnumerator(mappings.getStack()) #stack_enumerator.setTargetMapping(mappings.getHeap()) #only interested in heap pointers #stack_enum = stack_enumerator.search() #if len(stack_enum)>0: # stack_offsets, stack_values = zip(*stack_enum) #else: # stack_offsets, stack_values = (),() #log.info('\t[-] got %d pointers '%(len(stack_enum)) ) #log.info('\t[-] merging pointers from heap') heap_enum = pointerfinder.PointerEnumerator(mappings.getHeap()).search() if len(heap_enum)>0: heap_addrs, heap_values = zip(*heap_enum) # WTF else: heap_addrs, heap_values = (),() log.info('\t[-] got %d pointers '%(len(heap_enum)) ) # merge #values = sorted(set(heap_values+stack_values)) #int_array_save(F_VALUES , values) int_array_save(F_HEAP_O, heap_addrs) int_array_save(F_HEAP_V, heap_values) #int_array_save(F_STACK_O, stack_addrs) #int_array_save(F_STACK_V, stack_values) #log.info('\t[-] we have %d unique pointers values out of %d orig.'%(len(values), len(heap_values)+len(stack_values)) ) else: log.info('[+] Loading from cache %d pointers %d unique'%(len(heap_values), len(set(heap_values)) )) #log.info('\t[-] we have %d unique pointers values, and %d pointers in heap .'%(len(values), len(heap_addrs)) ) #aligned = numpy.asarray(filter(lambda x: (x%4) == 0, values)) #not_aligned = numpy.asarray(sorted( set(values)^set(aligned))) #log.info('\t[-] only %d are aligned values.'%(len(aligned) ) ) return heap_addrs, heap_values #, stack_addrs, stack_values #values, aligned, not_aligned
def reverseLocalFonctionPointerNames(context): ''' reverse fn pointer names by trying to rebase the ptr value to a local ld_open. load local memdump map all librairies go through all pointers in librairies try to dl_addr the pointers by rebasing. ''' fsave = Config.getCacheFilename(Config.CACHE_FUNCTION_NAMES, context.dumpname) if os.access(fsave, os.F_OK): import pickle vtable = pickle.load(file(fsave,'rb')) for x in vtable.items(): yield x raise StopIteration import ctypes IGNORES = ['None', '[heap]', '[stack]','[vdso]'] # XXX this is not portable. libdl = ctypes.CDLL('libdl.so') def getname(fnaddr): info = Dl_info() ret = libdl.dladdr( fnaddr, ctypes.byref(info)) return info.dli_sname.string, info.dli_saddr mappings = context.mappings ldso = dict() for m in mappings: if m.pathname not in IGNORES and m.pathname not in ldso: try: ldso[m.pathname] = ctypes.CDLL(m.pathname) except OSError,e: IGNORES.append(m.pathname)
def _reverse(self, context): import networkx #import code #code.interact(local=locals()) graph = networkx.DiGraph() graph.add_nodes_from([ '%x' % k for k in context.listStructuresAddresses() ]) # we only need the addresses... log.info('[+] Graph - added %d nodes' % (graph.number_of_nodes())) t0 = time.time() tl = t0 for i, ptr_value in enumerate(context.listStructuresAddresses()): struct = context.getStructureForAddr(ptr_value) #targets = set(( '%x'%ptr_value, '%x'%child.target_struct_addr ) for child in struct.getPointerFields()) #target_struct_addr targets = set( ('%x' % ptr_value, '%x' % child._child_addr) for child in struct.getPointerFields()) #target_struct_addr ## DEBUG if len(struct.getPointerFields()) > 0: if len(targets) == 0: raise ValueError ## DEBUG graph.add_edges_from(targets) if time.time() - tl > 30: tl = time.time() rate = ( (tl - t0) / (i)) #if decoded else ((tl-t0)/(fromcache)) log.info('%2.2f secondes to go (g:%d)' % ((len(graph) - (i)) * rate, i)) log.info('[+] Graph - added %d edges' % (graph.number_of_edges())) networkx.readwrite.gexf.write_gexf( graph, Config.getCacheFilename(Config.CACHE_GRAPH, context.dumpname)) context.parsed.add(str(self)) return
def _load_metadata(self): """ Load amemory dump meta data """ mappingsFile = self._open_file(self.archive, self.indexFilename) self.metalines = [] for l in mappingsFile.readlines(): fields = l.strip().split(' ') if '' in fields: fields.remove('') self.metalines.append( ( fields[0], fields[1], fields[2], fields[3], fields[4], fields[5], ' '.join(fields[6:]) ) ) # test if x32 or x64 if len(fields[0]) > 10: log.info('[+] WORDSIZE = 8 #x64 arch dump detected') Config.set_word_size(8) else: Config.set_word_size(4) return
def _reverse(self, context): log.info('[+] FieldReverser: decoding fields') t0 = time.time() tl = t0 decoded = 0 fromcache = 0 ## writing to file fout = file(Config.getCacheFilename(Config.CACHE_GENERATED_PY_HEADERS_VALUES, context.dumpname),'w') towrite=[] #for ptr_value,anon in context.structures.items(): for ptr_value in context.listStructuresAddresses(): # lets try reverse anon = context.getStructureForAddr(ptr_value) if anon.isResolved(): # TODO this is a performance hit, unproxying... fromcache+=1 else: decoded+=1 anon.decodeFields() anon.saveme() ## output headers towrite.append(anon.toString()) if time.time()-tl > 30: #i>0 and i%10000 == 0: tl = time.time() rate = ((tl-t0)/(decoded+fromcache)) if decoded else ((tl-t0)/(fromcache)) log.info('%2.2f secondes to go (d:%d,c:%d)'%( (context.structuresCount()-(fromcache+decoded))*rate, decoded,fromcache ) ) fout.write('\n'.join(towrite) ) towrite=[] log.info('[+] FieldReverser: finished %d structures in %2.0f (d:%d,c:%d)'%(fromcache+decoded, time.time()-t0, decoded,fromcache ) ) context.parsed.add(str(self)) return
def _reverse(self, context): log.info('[+] FieldReverser: decoding fields') t0 = time.time() tl = t0 decoded = 0 fromcache = 0 ## writing to file fout = file(Config.getCacheFilename(Config.CACHE_GENERATED_PY_HEADERS_VALUES, context.dumpname),'w') towrite=[] from haystack.reverse.heuristics.dsa import DSASimple dsa = DSASimple() #for ptr_value,anon in context.structures.items(): for ptr_value in context.listStructuresAddresses(): # lets try reverse anon = context.getStructureForAddr(ptr_value) if anon.is_resolved(): # TODO this is a performance hit, unproxying... fromcache+=1 else: decoded+=1 dsa.analyze_fields(anon) anon.saveme() ## output headers towrite.append(anon.toString()) if time.time()-tl > 30: #i>0 and i%10000 == 0: tl = time.time() rate = ((tl-t0)/(decoded+fromcache)) if decoded else ((tl-t0)/(fromcache)) log.info('%2.2f secondes to go (d:%d,c:%d)'%( (context.structuresCount()-(fromcache+decoded))*rate, decoded,fromcache ) ) fout.write('\n'.join(towrite) ) towrite=[] log.info('[+] FieldReverser: finished %d structures in %2.0f (d:%d,c:%d)'%(fromcache+decoded, time.time()-t0, decoded,fromcache ) ) context.parsed.add(str(self)) return
def load(self): outdir = Config.getCacheFilename( Config.CACHE_SIGNATURE_GROUPS_DIR, self._context.dumpname) inname = os.path.sep.join([outdir, self._name]) self._similarities = utils.int_array_cache(inname) return
def cacheSizes(self): """Find the number of different sizes, and creates that much numpyarray""" # if not os.access outdir = Config.getCacheFilename( Config.CACHE_SIGNATURE_SIZES_DIR, self._context.dumpname) if not os.path.isdir(outdir): os.mkdir(outdir) if not os.access(outdir, os.W_OK): raise IOError('cant write to %s' % (outdir)) # sizes = map(int, set(self._context._malloc_sizes)) arrays = dict([(s, []) for s in sizes]) # sort all addr in all sizes.. [arrays[self._context._malloc_sizes[i]].append( long(addr)) for i, addr in enumerate(self._context._malloc_addresses)] # saving all sizes dictionary in files... for size, lst in arrays.items(): fout = os.path.sep.join([outdir, 'size.%0.4x' % (size)]) arrays[size] = utils.int_array_save(fout, lst) # saved all sizes dictionaries. # tag it as done file( os.path.sep.join([outdir, Config.CACHE_SIGNATURE_SIZES_DIR_TAG]), 'w') self._sizes = arrays return
def _reverse(self, context): import networkx #import code #code.interact(local=locals()) graph = networkx.DiGraph() graph.add_nodes_from([ '%x'%k for k in context.listStructuresAddresses()]) # we only need the addresses... log.info('[+] Graph - added %d nodes'%(graph.number_of_nodes())) t0 = time.time() tl = t0 for i, ptr_value in enumerate(context.listStructuresAddresses()) : struct = context.getStructureForAddr(ptr_value) #targets = set(( '%x'%ptr_value, '%x'%child.target_struct_addr ) for child in struct.getPointerFields()) #target_struct_addr targets = set(( '%x'%ptr_value, '%x'%child._child_addr ) for child in struct.getPointerFields()) #target_struct_addr ## DEBUG if len(struct.getPointerFields()) >0: if len(targets) == 0: raise ValueError ## DEBUG graph.add_edges_from( targets ) if time.time()-tl > 30: tl = time.time() rate = ((tl-t0)/(i)) #if decoded else ((tl-t0)/(fromcache)) log.info('%2.2f secondes to go (g:%d)'%( (len(graph)-(i))*rate, i ) ) log.info('[+] Graph - added %d edges'%(graph.number_of_edges())) networkx.readwrite.gexf.write_gexf( graph, Config.getCacheFilename(Config.CACHE_GRAPH, context.dumpname)) context.parsed.add(str(self)) return
def _loadCache(self): outdir = Config.getCacheFilename(Config.CACHE_SIGNATURE_SIZES_DIR, self._context.dumpname) fdone = os.path.sep.join([outdir, Config.CACHE_SIGNATURE_SIZES_DIR_TAG]) if not os.access(fdone, os.R_OK): return False for myfile in os.listdir(outdir): try: addr = int( myfile.split(_)[1], 16 ) except IndexError,e: continue # ignore file
def cacheLoad(cls, mappings): from haystack.reverse.reversers import ReverserContext dumpname = os.path.normpath(mappings.name) context_cache = Config.getCacheFilename(Config.CACHE_CONTEXT, dumpname) try: context = pickle.load(file(context_cache,'r')) except EOFError,e: os.remove(context_cache) log.error('Error in the context file. File cleaned. Please restart.') raise e
def persist(self): outdir = Config.getCacheFilename(Config.CACHE_SIGNATURE_GROUPS_DIR, self._context.dumpname) if not os.path.isdir(outdir): os.mkdir(outdir) if not os.access(outdir, os.W_OK): raise IOError('cant write to %s'%(outdir)) # outname = os.path.sep.join([outdir,self._name]) ar = utils.int_array_save(outname, self._similarities) return
def getAllocations(dumpfilename, mappings, heap): ''' Search malloc_chunks in heap . records addrs and sizes. ''' # TODO if linux import libc.ctypes_malloc f_addrs = Config.getCacheFilename(Config.CACHE_MALLOC_CHUNKS_ADDRS, dumpfilename+'.%x'%(heap.start)) f_sizes = Config.getCacheFilename(Config.CACHE_MALLOC_CHUNKS_SIZES, dumpfilename+'.%x'%(heap.start)) log.debug('reading from %s'%(f_addrs)) addrs = int_array_cache(f_addrs) sizes = int_array_cache(f_sizes) if addrs is None or sizes is None: log.info('[+] Making new cache - getting malloc_chunks from heap ') allocations = libc.ctypes_malloc.getUserAllocations(mappings, heap, filterInuse=True) addrs, sizes = zip(*allocations) int_array_save(f_addrs, addrs) int_array_save(f_sizes, sizes) else: log.info('[+] Loading from cache') log.info('\t[-] we have %d malloc_chunks'%(len(addrs)) ) return addrs, sizes
def saveme(self): if not self._dirty: return sdir = Config.getStructsCacheDir(self._context.dumpname) if not os.path.isdir(sdir): os.mkdir(sdir) fname = makeFilename(self._context, self) try: # FIXME : loops create pickle loops #print self.__dict__.keys() pickle.dump(self, file(fname,'w')) except RuntimeError,e: log.error(e) print self.toString()
def saveme(self): if not self._dirty: return sdir = Config.getStructsCacheDir(self._context.dumpname) if not os.path.isdir(sdir): os.mkdir(sdir) fname = makeFilename(self._context, self) try: # FIXME : loops create pickle loops #print self.__dict__.keys() pickle.dump(self, file(fname, 'w')) except RuntimeError, e: log.error(e) print self.toString()
def saveSignatures(cache, structCache, dumpname): ''' cache is {} of sig: [structs] ''' fout = file(Config.getCacheFilename(Config.CACHE_GENERATED_PY_HEADERS,dumpname),'w') towrite = [] tuples = [(len(structs), sig, structs) for sig,structs in cache.items() ] tuples.sort(reverse=True) for l, sig,structs in tuples: values='' s=''' # %d structs #class %s %s '''%(len(structs), sig, structs[0].toString()) fout.write(s) fout.close()
def _loadCache(self): outdir = Config.getCacheFilename( Config.CACHE_SIGNATURE_SIZES_DIR, self._context.dumpname) fdone = os.path.sep.join( [outdir, Config.CACHE_SIGNATURE_SIZES_DIR_TAG]) if not os.access(fdone, os.R_OK): return False for myfile in os.listdir(outdir): try: # FIXME: not sure its - # and what that section is about in general. addr = int(myfile.split('-')[1], 16) except IndexError as e: continue # ignore file
def saveme(self): if not self._dirty: return sdir = Config.getStructsCacheDir(self._context.dumpname) if not os.path.isdir(sdir): os.mkdir(sdir) fname = makeFilename(self._context, self) try: pickle.dump(self, file(fname, 'w')) except KeyboardInterrupt, e: # clean it, its stale os.remove(fname) log.warning('removing %s' % (fname)) import sys ex = sys.exc_info() raise ex[1], None, ex[2]
def saveme(self): if not self._dirty: return sdir = Config.getStructsCacheDir(self._context.dumpname) if not os.path.isdir(sdir): os.mkdir(sdir) fname = makeFilename(self._context, self) try: pickle.dump(self, file(fname,'w')) except KeyboardInterrupt, e: # clean it, its stale os.remove(fname) log.warning('removing %s'%(fname)) import sys ex = sys.exc_info() raise ex[1], None, ex[2]
def saveSignatures(cache, structCache, dumpname): ''' cache is {} of sig: [structs] ''' fout = file( Config.getCacheFilename(Config.CACHE_GENERATED_PY_HEADERS, dumpname), 'w') towrite = [] tuples = [(len(structs), sig, structs) for sig, structs in cache.items()] tuples.sort(reverse=True) for l, sig, structs in tuples: values = '' s = ''' # %d structs #class %s %s ''' % (len(structs), sig, structs[0].toString()) fout.write(s) fout.close()
def save_headers(context, addrs=None): ''' structs_addrs is sorted ''' log.info('[+] saving headers') fout = file(Config.getCacheFilename(Config.CACHE_GENERATED_PY_HEADERS_VALUES, context.dumpname),'w') towrite = [] if addrs is None: addrs = iter(context.listStructuresAddresses()) for vaddr in addrs: #anon = context._get_structures()[vaddr] anon = context.getStructureForAddr( vaddr ) towrite.append(anon.toString()) if len(towrite) >= 10000: try: fout.write('\n'.join(towrite) ) except UnicodeDecodeError, e: print 'ERROR on ',anon towrite = [] fout.flush()
def rewrite(structs_addrs, structCache, dumpname): ''' structs_addrs is sorted ''' structs_addrs.sort() fout = file(Config.getCacheFilename(Config.CACHE_GENERATED_PY_HEADERS_VALUES, dumpname),'w') towrite = [] for vaddr in structs_addrs: ## debug if vaddr in DEBUG_ADDRS: logging.getLogger('progressive').setLevel(logging.DEBUG) else: logging.getLogger('progressive').setLevel(logging.INFO) anon = structCache[vaddr] anon.resolvePointers() towrite.append(anon.toString()) if len(towrite) >= 10000: fout.write('\n'.join(towrite) ) towrite = [] fout.write('\n'.join(towrite) ) fout.close() return
def graphStructureGroups(context, chains, originAddr=None): # TODO change generic fn chains.sort() import networkx graph = networkx.DiGraph() for chain in chains: log.debug('\t[-] chain len:%d'%len(chain) ) if originAddr is not None: if originAddr not in chain: continue # ignore chain if originAddr is not in it for addr in map(long,chain): context.getStructureForAddr(addr).decodeFields() # can be long print context.getStructureForAddr(addr).toString() targets = set() for f in context.getStructureForAddr(addr).getPointerFields(): addr_child = f._getValue(0) child = context.getStructureForOffset(addr) targets.add(( '%x'%addr, '%x'%child._vaddr ) ) graph.add_edges_from( targets ) print '#','-'*78 networkx.readwrite.gexf.write_gexf( graph, Config.getCacheFilename(Config.CACHE_GRAPH, context.dumpname))
def rewrite(structs_addrs, structCache, dumpname): ''' structs_addrs is sorted ''' structs_addrs.sort() fout = file( Config.getCacheFilename(Config.CACHE_GENERATED_PY_HEADERS_VALUES, dumpname), 'w') towrite = [] for vaddr in structs_addrs: ## debug if vaddr in DEBUG_ADDRS: logging.getLogger('progressive').setLevel(logging.DEBUG) else: logging.getLogger('progressive').setLevel(logging.INFO) anon = structCache[vaddr] anon.resolvePointers() towrite.append(anon.toString()) if len(towrite) >= 10000: fout.write('\n'.join(towrite)) towrite = [] fout.write('\n'.join(towrite)) fout.close() return
def cacheSizes(self): """Find the number of different sizes, and creates that much numpyarray""" # if not os.access outdir = Config.getCacheFilename(Config.CACHE_SIGNATURE_SIZES_DIR, self._context.dumpname) if not os.path.isdir(outdir): os.mkdir(outdir) if not os.access(outdir, os.W_OK): raise IOError('cant write to %s'%(outdir)) # sizes = map(int,set(self._context._malloc_sizes)) arrays = dict([(s,[]) for s in sizes]) #sort all addr in all sizes.. [arrays[ self._context._malloc_sizes[i] ].append( long(addr) ) for i, addr in enumerate(self._context._malloc_addresses) ] #saving all sizes dictionary in files... for size,lst in arrays.items(): fout = os.path.sep.join([outdir, 'size.%0.4x'%(size)]) arrays[size] = utils.int_array_save( fout , lst) #saved all sizes dictionaries. # tag it as done file(os.path.sep.join([outdir, Config.CACHE_SIGNATURE_SIZES_DIR_TAG]),'w') self._sizes = arrays return
def reverseLocalFonctionPointerNames(context): ''' reverse fn pointer names by trying to rebase the ptr value to a local ld_open. load local memdump map all librairies go through all pointers in librairies try to dl_addr the pointers by rebasing. ''' fsave = Config.getCacheFilename(Config.CACHE_FUNCTION_NAMES, context.dumpname) if os.access(fsave, os.F_OK): import pickle vtable = pickle.load(file(fsave, 'rb')) for x in vtable.items(): yield x raise StopIteration import ctypes IGNORES = ['None', '[heap]', '[stack]', '[vdso]'] # XXX this is not portable. libdl = ctypes.CDLL('libdl.so') def getname(fnaddr): info = Dl_info() ret = libdl.dladdr(fnaddr, ctypes.byref(info)) return info.dli_sname.string, info.dli_saddr mappings = context.mappings ldso = dict() for m in mappings: if m.pathname not in IGNORES and m.pathname not in ldso: try: ldso[m.pathname] = ctypes.CDLL(m.pathname) except OSError, e: IGNORES.append(m.pathname)
def save(self): # we only need dumpfilename to reload mappings, addresses to reload cached structures context_cache = Config.getCacheFilename(Config.CACHE_CONTEXT, self.dumpname) pickle.dump(self, file(context_cache,'w'))
#!/usr/bin/env python # -*- coding: utf-8 -*- """Tests for haystack.reverse.structure.""" import logging import struct import operator import os import unittest import pickle import sys from haystack.config import Config Config.set_word_size(4) # forcing it on these unittest from haystack.reverse import context from haystack.reverse.libc import ctypes_malloc as ctypes_alloc from haystack.reverse.libc import libcheapwalker from haystack import dump_loader __author__ = "Loic Jaquemet" __copyright__ = "Copyright (C) 2012 Loic Jaquemet" __license__ = "GPL" __maintainer__ = "Loic Jaquemet" __email__ = "*****@*****.**" __status__ = "Production" import ctypes
def load(self): outdir = Config.getCacheFilename(Config.CACHE_SIGNATURE_GROUPS_DIR, self._context.dumpname) inname = os.path.sep.join([outdir,self._name]) self._similarities = utils.int_array_cache(inname) return
def isPersisted(self): outdir = Config.getCacheFilename(Config.CACHE_SIGNATURE_GROUPS_DIR, self._context.dumpname) return os.access(os.path.sep.join([outdir,self._name]), os.F_OK)
def makeFilename(context, st): sdir = Config.getStructsCacheDir(context.dumpname) if not os.path.isdir(sdir): os.mkdir(sdir) return os.path.sep.join([sdir, str(st)])
#!/usr/bin/env python # -*- coding: utf-8 -*- # # Copyright (C) 2011 Loic Jaquemet [email protected] # __author__ = "Loic Jaquemet [email protected]" import struct import operator import os import unittest from haystack.config import Config Config.set_word_size(4) from haystack import memory_mapping from haystack.reverse import pointerfinder Config.MMAP_START = 0x0C00000 Config.MMAP_STOP = 0x0C01000 Config.MMAP_LENGTH = 4096 Config.STRUCT_OFFSET = 44 # Config.cacheDir = os.path.normpath('./outputs/') def accumulate(iterable, func=operator.add): it = iter(iterable) total = next(it) yield total
#!/usr/bin/env python # -*- coding: utf-8 -*- """Tests haystack.model .""" import logging import unittest import sys from haystack.config import Config Config.set_word_size(4) # force it from haystack import dump_loader from haystack import model from haystack import utils from haystack.reverse.win32 import win7heapwalker from haystack.utils import isCStringPointer, isPointerType, isVoidPointerType, isFunctionType, getaddress class TestReferenceBook(unittest.TestCase): ''' Test the reference book ''' def setUp(self): self.mappings = dump_loader.load('test/dumps/putty/putty.1.dump') heap = self.mappings.getHeap() # execute a loadMembers walker = win7heapwalker.Win7HeapWalker(self.mappings, heap, 0) self.heap_obj = walker._heap def tearDown(self): model.reset()
def test_reverseInstances(self): ctx = context.get_context('test/dumps/ssh/ssh.1') dumpname = 'test/dumps/ssh/ssh.1' ctx = Config.cleanCache(dumpname) ctx = reversers.reverseInstances(dumpname)
#!/usr/bin/env python # -*- coding: utf-8 -*- """Tests for haystack.reverse.structure.""" import logging import struct import operator import os import unittest import pickle import sys from haystack.config import Config Config.set_word_size(4) # forcing it on these unittest from haystack import utils, model from haystack.reverse.win32 import win7heapwalker, win7heap from haystack.reverse.win32.win7heap import HEAP, HEAP_ENTRY from haystack import dump_loader __author__ = "Loic Jaquemet" __copyright__ = "Copyright (C) 2012 Loic Jaquemet" __license__ = "GPL" __maintainer__ = "Loic Jaquemet" __email__ = "*****@*****.**" __status__ = "Production" import ctypes log = logging.getLogger('testwalker')
def save(self): # we only need dumpfilename to reload mappings, addresses to reload cached structures context_cache = Config.getCacheFilename(Config.CACHE_CONTEXT, self.dumpname) pickle.dump(self, file(context_cache,'w')) def reset(self): try: os.remove(Config.getCacheFilename(Config.CACHE_CONTEXT, self.dumpname) ) except OSError,e: pass try: if not os.access(Config.CACHE_STRUCT_DIR, os.F_OK): return for r,d,files in os.walk( Config.getCacheFilename(Config.CACHE_STRUCT_DIR, self.dumpname)): for f in files: os.remove(os.path.join(r,f) ) os.rmdir(r) except OSError,e: pass def __getstate__(self): d = self.__dict__.copy() del d['mappings'] del d['heap'] del d['_structures'] del d['_structures_addresses'] #del d['_pointers_values'] #del d['_pointers_offsets']
def reset(self): try: os.remove(Config.getCacheFilename(Config.CACHE_CONTEXT, self.dumpname) ) except OSError,e: pass
#!/usr/bin/env python # -*- coding: utf-8 -*- """Tests haystack.utils .""" import struct import operator import os import unittest from haystack.config import Config Config.set_word_size(4) import ctypes from haystack import memory_mapping from haystack.model import LoadableMembersStructure from haystack import utils __author__ = "Loic Jaquemet" __copyright__ = "Copyright (C) 2012 Loic Jaquemet" __email__ = "*****@*****.**" __license__ = "GPL" __maintainer__ = "Loic Jaquemet" __status__ = "Production" class St(LoadableMembersStructure): _fields_ = [('a', ctypes.c_int)] class St2(LoadableMembersStructure): _fields_ = [('a', ctypes.c_int)]
#!/usr/bin/env python # -*- coding: utf-8 -*- """Tests haystack.model .""" import logging import unittest import sys from haystack.config import Config Config.set_word_size(4) # force it from haystack import dump_loader from haystack import model from haystack import utils from haystack.reverse.win32 import win7heapwalker from haystack.utils import isCStringPointer, isPointerType, isVoidPointerType, isFunctionType, getaddress class TestReferenceBook(unittest.TestCase): ''' Test the reference book ''' def setUp(self): self.mappings = dump_loader.load('test/dumps/putty/putty.1.dump') heap = self.mappings.getHeap() # execute a loadMembers walker = win7heapwalker.Win7HeapWalker(self.mappings, heap, 0) self.heap_obj = walker._heap def tearDown(self): model.reset() self.mappings = None