def scan(filelist, conf=DEFAULTCONF): results = [] libmagicresults, libmagicmeta = REQUIRES[0] for fname, libmagicresult in libmagicresults: if fname not in filelist: print("DEBUG: File not in filelist") if not libmagicresult.startswith('PE32'): continue impfuzzy_hash = pyimpfuzzy.get_impfuzzy(fname) chunksize, chunk, double_chunk = impfuzzy_hash.split(':') chunksize = int(chunksize) doc = { 'impfuzzy_hash': impfuzzy_hash, 'chunksize': chunksize, 'chunk': chunk, 'double_chunk': double_chunk, 'analyzed': 'false', 'matches': {}, } results.append((fname, doc)) metadata = {} metadata["Name"] = NAME metadata["Type"] = TYPE metadata["Include"] = False return (results, metadata)
def _impfuzzy(self): """ Calculate impfuzzy hash and return """ impfuzzyhash = None try: # this code was implemented from # https://github.com/JPCERTCC/impfuzzy # impfuzzyhash = str(pyimpfuzzy.get_impfuzzy(self.filepath)) except Exception as e: logger.warning(e) return impfuzzyhash
def run(self): if not has_pyimpfuzzy: vollog.info( "Python pyimpfuzzy module not found, plugin (and dependent plugins) not available" ) raise # This is a impfuzzys threshold if self.config.get('threshold', None) is not None: threshold = self.config['threshold'] else: threshold = 30 filter_func = pslist.PsList.create_pid_filter( [self.config.get('pid', None)]) files = [] hashlist = [] if self.config.get('impfuzzy', None) is not None: hashlist.append(self.config['impfuzzy']) elif self.config.get('impfuzzylist', None) is not None: rf = open(self.config['impfuzzylist'], "r") lines = rf.readlines() for line in lines: hashlist.append(line.rstrip()) elif self.config.get('exefile', None) is not None: if os.path.isdir(self.config['exefile']): for root, dirs, filenames in os.walk(self.config['exefile']): for name in filenames: files.append(os.path.join(root, name)) elif os.path.isfile(self.config['exefile']): files.append(self.config['exefile']) for file in files: hashlist.append(pyimpfuzzy.get_impfuzzy(file)) return renderers.TreeGrid( [("PID", int), ("ImageFileName", str), ("Module Base", format_hints.Hex), ("Module Name", str), ("impfuzzy", str)], self._generator( pslist.PsList.list_processes( context=self.context, layer_name=self.config['primary'], symbol_table=self.config['nt_symbols'], filter_func=filter_func), hashlist, threshold))
def get_digest(self, file): """ return hash, impuzzy and scout """ md5 = hashlib.md5() sha1 = hashlib.sha1() sha256 = hashlib.sha256() try: impfuzzy = pyimpfuzzy.get_impfuzzy(file) except: impfuzzy = "" if os.path.isfile(file): with open(file, "rb") as f_binary: binary = f_binary.read() try: scout_ev = self.scout.evaluateImportTable(binary, is_unmapped=True) scout_result = (self.scout.getWinApi1024Vectors(scout_ev).get( "import_table", {}).get("vector", None)) scout_confidence = self.scout._apivector.getVectorConfidence( scout_result) except: with open("fail_list.txt", "a") as f: f.write(file + "\n") scout_result = None scout_confidence = None with open(file, "rb") as f: while True: buf = f.read(2047) if not buf: break md5.update(buf) sha1.update(buf) sha256.update(buf) return ( scout_result, impfuzzy, md5.hexdigest(), sha1.hexdigest(), sha256.hexdigest(), scout_confidence, )
def get_digest(file): md5 = hashlib.md5() sha1 = hashlib.sha1() sha256 = hashlib.sha256() try: impfuzzy = pyimpfuzzy.get_impfuzzy(file) except: impfuzzy = "" with open(file, "rb") as f: while True: buf = f.read(2047) if not buf: break md5.update(buf) sha1.update(buf) sha256.update(buf) return impfuzzy, md5.hexdigest(), sha1.hexdigest(), sha256.hexdigest()
def getimphash(path): """function to recursively parse a file directory and get the imphash and fuzzy imphash of a PE file. The hashes are stored in a dictionary, with the filepath as key""" hashes = {} for root, subdirs, files in os.walk(path): for f in files: # surround code with try/ except to skip non-PE or non-readable files try: filepath = os.path.join(root, f) # full file path pe = pefile.PE(filepath) # pe file object imphash = pe.get_imphash() # get the impash impfuzzy = pyimpfuzzy.get_impfuzzy( filepath) # get the fuzzy hash hashes[f] = [imphash, impfuzzy ] # dictionary -> filepath: [imphash, fuzzyhash] #shutil.move(filepath, 'dumpfolder/'+ f) # move the file to the dump folder except: continue print hashes pickle.dump(hashes, open( 'hashes.pkl', 'wb')) # save dictionary to disk for clustering by similarity
def Main(exe): ape = False try: ape = pefile.PE(exe, fast_load=True) except: pass if ape != False: ''' After successful verification that the sample is a valid PE, generate some static meta about the header characteristics. ''' pe = pefile.PE(exe) sample = ReadSample(exe) print '# MD5 : ' + str(hashlib.md5(sample).hexdigest()) print '# SHA1 : ' + str(hashlib.sha1(sample).hexdigest()) print '# SHA256 : ' + str(hashlib.sha256(sample).hexdigest()) print '# SSDEEP : ' + str(ssdeep.hash(sample)) print '# Import Hash : ' + str(pe.get_imphash()) try: print '# Fuzzy Import Hash: ' + str(pyimpfuzzy.get_impfuzzy(exe)) except: pass print '# File Size : ' + str(len(sample)) print '# Major Version : ' + str( pe.OPTIONAL_HEADER.MajorOperatingSystemVersion) print '# Minor Version : ' + str( pe.OPTIONAL_HEADER.MinorOperatingSystemVersion) print '# Compiletime UTC : ' + str( time.strftime('%m/%d/%Y %H:%M:%S', time.gmtime(pe.FILE_HEADER.TimeDateStamp))) print '# Compiletime EPOCH: ' + str(pe.FILE_HEADER.TimeDateStamp) mime_magic = magic.Magic(mime=True) print '# MIME Type : ' + str(mime_magic.from_buffer(sample)) full_magic = magic.Magic() print '# File Magic : ' + str(full_magic.from_buffer(sample)) print '# PE Sections : ' for section in pe.sections: print ' Name : ' + str(section.Name) print ' MD5 : ' + str(section.get_hash_md5()) print ' Size : ' + str(section.SizeOfRawData) start = section.PointerToRawData endofdata = start + section.SizeOfRawData print ' SSDEEP: ' + str( ssdeep.hash(section.get_data(start)[:endofdata])) print '' lang = [] subLang = [] if hasattr(pe, 'DIRECTORY_ENTRY_RESOURCE'): for resource_type in pe.DIRECTORY_ENTRY_RESOURCE.entries: if hasattr(resource_type, 'directory'): for resource_id in resource_type.directory.entries: if hasattr(resource_id, 'directory'): for resource_lang in resource_id.directory.entries: language = pefile.LANG.get( resource_lang.data.lang, None) sublanguage = pefile.get_sublang_name_for_lang( resource_lang.data.lang, resource_lang.data.sublang) lang.append(language) subLang.append(sublanguage) for l in set(lang): print '# Compiled Language : ' + str(l) for sl in set(subLang): print '# Compiled Sub-Language: ' + str(sl) print '# Imports:' try: for entry in pe.DIRECTORY_ENTRY_IMPORT: for imp in entry.imports: print ' ' + str(entry.dll) + '!' + str(imp.name) except: pass print '# Exports:' try: for exp in pe.DIRECTORY_ENTRY_EXPORT.symbols: print ' ' + str(exp.name) except: print ' <none>' pass ''' Look for Crpyto Constants with YARA ''' print '# Scanning with YARA signatures' ScanWithYARA(sample) ''' The following functions look for cleartext and XOR encoded strings of interest [domains, IP's, email addresses, pdb paths, executables] and print them in seperate sections. The last function will attempt to carve out any single byte XOR encoded execcutables and give them the correct extension. ''' regex = ['[a-zA-Z0-9-\s\\\.\:]+\.pdb', \ '(?:[\d]{1,3})\.(?:[\d]{1,3})\.(?:[\d]{1,3})\.(?:[\d]{1,3})', \ '[_a-z0-9-]+(\.[_a-z0-9-]+)*@[a-z0-9-]+(\.[a-z0-9-]+)*(\.[a-z]{2,4})', \ '[A-z|a-z|0-9]{1,}\.(dll|scr|exe|bat)'] print '# Cleartext Interesting Strings' FindStrings(sample, regex) print '# XOR Encdoded Interesting Strings' FindXorStrings(sample, regex) print '# Possible Domain Names' DomainHunter(sample) print '# Carving Additional PEs' MultiByteXor(sample) print '# Looking for Common Shellcode Techniques' ShellcodeHunter(sample) else: print 'This application only supports analyzing Windows PEs'
def render_text(self, outfd, data): # This is a impfuzzys threshold ss_threshold = 30 if not has_pyimpfuzzy: debug.error("pyimpfuzzy must be installed for this plugin") files = [] impfuzzys = [] impfuzzy = "" if self._config.EXEFILE is not None: mode = "search" if os.path.isdir(self._config.EXEFILE): for root, dirs, filenames in os.walk(self._config.EXEFILE): for name in filenames: files.append(os.path.join(root, name)) elif os.path.isfile(self._config.EXEFILE): files.append(self._config.EXEFILE) for file in files: impfuzzys.append(pyimpfuzzy.get_impfuzzy(file)) # outfd.write("%s Impfuzzy : %s\n" % (file, pyimpfuzzy.get_impfuzzy(file))) elif self._config.COMPIMPFUZZY is not None: mode = "search" of = open(self._config.COMPIMPFUZZY, "r") lines = of.readlines() for line in lines: impfuzzys.append(line.rstrip()) elif self._config.LISTIMPFUZZY: mode = "list" else: debug.error( "Please set option -e (PE file or directory) or -i (impfuzzy hash list file) or -a (Listing the impfuzzy)") if self._config.THRESHOLD is not None: ss_threshold = self._config.THRESHOLD if "search" in mode: self.table_header(outfd, [("Process", "[addrpad]"), ("Name", "20"), ("Module Base", "[addrpad]"), ("Module Name", "20"), ("impfuzzy", "20"), ("Compare", "7")]) for offset, FileName, base, ModName, hash_result, fuzzy_result in data: for impfuzzy in impfuzzys: if not "Error" in fuzzy_result: if pyimpfuzzy.hash_compare(impfuzzy, fuzzy_result) >= ss_threshold: self.table_row(outfd, offset, FileName, base, ModName, fuzzy_result, pyimpfuzzy.hash_compare(impfuzzy, fuzzy_result)) if "list" in mode: self.table_header(outfd, [("Process", "[addrpad]"), ("Name", "20"), ("Module Base", "[addrpad]"), ("Module Name", "20"), ("impfuzzy", "110")]) for offset, FileName, base, ModName, hash_result, fuzzy_result in data: if not "Error" in fuzzy_result: self.table_row(outfd, offset, FileName, base, ModName, fuzzy_result)
def analyse(filepath, pefiles_dir, pe, collection, useVT, api_key): ret_list = [] # 'date', 'md5', 'sha1', 'sha256', m5 = hashlib.md5() s1 = hashlib.sha1() s256 = hashlib.sha256() with open(filepath, 'rb') as f: while True: c = f.read(8192 * s256.block_size) if len(c) == 0: break m5.update(c) s1.update(c) s256.update(c) s256path = pefiles_dir + '/' + s256.hexdigest() + '.txt' if os.path.isfile(s256path): print(filepath, ' already run this program.', file=sys.stderr) return os.path.basename(filepath) + 'はすでに解析を完了しています。', s256.hexdigest() ret_list.append(datetime.datetime.now().strftime('%Y%m%d')) ret_list.append(m5.hexdigest()) ret_list.append(s1.hexdigest()) ret_list.append(s256.hexdigest()) # 'ssdeep', 'imphash', 'impfuzzy' ret_list.append(ssdeep.hash_from_file(filepath)) ret_list.append(pe.get_imphash()) ret_list.append(pyimpfuzzy.get_impfuzzy(filepath)) # 'Totalhash', 'AnyMaster', 'AnyMaster_v1_0_1', 'EndGame', 'Crits', 'peHashNG' ret_list.append(pehash.totalhash_hex(filepath)) ret_list.append(pehash.anymaster_hex(filepath)) ret_list.append(pehash.anymaster_v1_0_1_hex(filepath)) ret_list.append(pehash.endgame_hex(filepath)) ret_list.append(pehash.crits_hex(filepath)) ret_list.append(pehash.pehashng_hex(filepath)) # 'Platform', 'GUI Program', 'Console Program', 'DLL', 'Packed', 'Anti-Debug' # 'mutex', 'contains base64', 'AntiDebugMethod', 'PEiD' cwd = os.getcwd() os.chdir(cwd + '/PEiD') res = subprocess.check_output(['./PEiD', filepath]) res = res.decode('utf-8').split('\n') r = re.compile('^\s+([^:]+)\s:\s(.+)$') res_dict = {} for s in res: m = r.match(s) if m: res_dict[m.group(1)] = m.group(2) res_contains = [s for s in res if s.startswith(' contains base64')] ret_list.append(res_dict['PE']) ret_list.append(res_dict['GUI Program']) ret_list.append(res_dict['Console Program']) ret_list.append(res_dict['DLL']) ret_list.append(res_dict['Packed']) ret_list.append(res_dict['Anti-Debug']) ret_list.append('yes' if 'mutex' in list(res_dict.keys()) else 'no') ret_list.append('yes' if res_contains else '') if 'AntiDebug' in list(res_dict.keys()): res_antidebug = re.sub('[\[\]"]', '', res_dict['AntiDebug']).replace(' ', '|') ret_list.append(res_antidebug) else: ret_list.append('') if 'PEiD' in list(res_dict.keys()): res_peid = re.sub('[\[\]"]', '', res_dict['PEiD']).replace(' ', '|') ret_list.append(res_peid) else: ret_list.append('') # 'TrID' os.chdir(cwd + '/trid') res = subprocess.check_output(['./trid', filepath]) res = res.decode('utf-8').split('\n') res = [s for s in res if re.match('^\s*[0-9]+\.[0-9]%', s)] res_trid = '\\n'.join(res) ret_list.append(res_trid) os.chdir(cwd) # nearest sha256, nearest value newfuzzy = ret_list[HEADER.index('impfuzzy')] nearest_sha256 = '' nearest_value = -1 result = collection.find() if result.count() > 0: overwrites = [] for r in result: cmpval = pyimpfuzzy.hash_compare(newfuzzy, r['impfuzzy']) if cmpval > nearest_value: nearest_sha256 = r['sha256'] nearest_value = cmpval if cmpval > r['nearest value']: overwrites.append([{ 'sha256': r['sha256'] }, { '$set': { 'nearest sha256': s256.hexdigest(), 'nearest value': cmpval } }]) for r in overwrites: collection.update(r[0], r[1]) ret_list.append(nearest_sha256) ret_list.append(nearest_value) # ismalware, VirusTotalLink flag = False if useVT == 'on': vt = VirusTotalPublicApi(api_key) res = vt.get_file_report(m5.hexdigest()) if 'results' in res: if 'positives' in res['results']: if res['results']['positives'] > 0: ret_list.append('True') else: ret_list.append('False') else: ret_list.append('False') if 'permalink' in res['results']: ret_list.append(str(res['results']['permalink'])) else: ret_list.append('') else: print('Server error occured.', file=sys.stderr) return 'VirusTotalでエラーが発生しました。', s256.hexdigest() else: ret_list.append('') ret_list.append('') # strings res = subprocess.check_output(['strings', filepath]) res = res.decode('utf-8') ret_list.append(res) # import table imports = [] for entry in pe.DIRECTORY_ENTRY_IMPORT: for imp in entry.imports: try: imports.append(imp.name.decode('utf-8')) except: pass imports_str = '\n'.join(imports) ret_list.append(imports_str) # export table if hasattr(pe, 'DIRECTORY_ENTRY_EXPORT'): exports = [ s.name.decode('utf-8') for s in pe.DIRECTORY_ENTRY_EXPORT.symbols ] exports_str = '\n'.join(exports) else: exports_str = '' ret_list.append(exports_str) # 'Dynamic Base', 'ASLR', 'High Entropy VA', 'Force Integrity', # 'Isolation', 'NX', 'SEH', 'CFG', 'RFG', 'SafeSEH', 'GS', # 'Authenticode', '.NET' if isUbuntu: cwd = os.getcwd() os.chdir(cwd + '/winchecksec/build') res = subprocess.check_output(['./winchecksec', filepath]) res = res.decode('utf-8').split('\n') r = re.compile('([^:]+):\s\"([^"]+)\"') res_dict = {} for s in res: m = r.match(s) if m: k = m.group(1).strip() res_dict[k if k != '.NET' else 'dotNET'] = m.group(2) ret_list.append(res_dict['Dynamic Base']) ret_list.append(res_dict['ASLR']) ret_list.append(res_dict['High Entropy VA']) ret_list.append(res_dict['Force Integrity']) ret_list.append(res_dict['Isolation']) ret_list.append(res_dict['NX']) ret_list.append(res_dict['SEH']) ret_list.append(res_dict['CFG']) ret_list.append(res_dict['RFG']) ret_list.append(res_dict['SafeSEH']) ret_list.append(res_dict['GS']) ret_list.append(res_dict['Authenticode']) ret_list.append(res_dict['dotNET']) os.chdir(cwd) ret_dict = {} for i in range(0, len(HEADER)): ret_dict[HEADER[i]] = ret_list[i] return ret_dict, s256path
from dbhandler import * import os import pefile import pyimpfuzzy import ssdeep test_path = "D:\\SRC\\imphash\\test" for i in os.listdir(test_path): fname = i print(i) fpath = test_path + "//" + i pe = pefile.PE(fpath) imph = pe.get_imphash() fuzimp = pyimpfuzzy.get_impfuzzy(fpath) fuzfhash = ssdeep.hash_from_file(fpath) print(fpath + '--' + imph + '--' + fuzimp + '--' + fuzfhash) dbsearch(fpath, fuzimp) # dbInsert(fname,md5,sha256,imph,fuzimp,fuzfhash)
def get_impfuzzy(self): # Get impfuzzy filename = self.filename impfuzzy = pyimpfuzzy.get_impfuzzy(filename) return impfuzzy