def impfuzzy_comp(list, list_new): ssdeep = re.compile("^[0-9]{1,5}:[0-9a-zA-Z\/\+]+:[0-9a-zA-Z\/\+]+$", re.DOTALL) complist = [] list_len = len(list_new) i = 0 for item_new in list_new: i += 1 if re.search(ssdeep, item_new[2]) and len(item_new[2]) < 150: for j in range(i, list_len): if re.search(ssdeep, list_new[j][2]) and len(list_new[j][2]) < 150: complist.append([item_new[0], list_new[j][0], pyimpfuzzy.hash_compare(item_new[2], list_new[j][2])]) else: ("[!] This impfuzzy hash is not ssdeep format: %s" % item_new[2]) if list: for item_new in list_new: if re.search(ssdeep, item_new[2]) and len(item_new[2]) < 150: for item in list: if re.search(ssdeep, item[2]) and len(item[2]) < 150: complist.append([item_new[0], item[0], pyimpfuzzy.hash_compare(item_new[2], item[2])]) else: ("[!] This impfuzzy hash is not ssdeep format: %s" % item[2]) else: ("[!] This impfuzzy hash is not ssdeep format: %s" % item_new[2]) return complist
def dbsearch(path, finp): conn = dbConnect() c = conn.cursor() cout = c.execute('select fuzImp from MalwareDump;') rows = cout.fetchall() for i in rows: print(path + "--" + str(pyimpfuzzy.hash_compare(finp, i)))
def run(self, obj, config): self._info("Impfuzzy: run()") threshold = config.get("threshold", 50) target_impfuzzy = None try: target_impfuzzy = pyimpfuzzy.get_impfuzzy_data(obj.filedata.read()) except Exception: pass target_md5 = obj.md5 if not target_impfuzzy: logger.error = "impfuzzy: Could not generate impfuzzy value for sample: %s" % str(obj.id) self._error("Could not generate impfuzzy value for sample") return # setup the sample space to compare against # first use the mimetype as a comparator if available if obj.impfuzzy: obj.impfuzzy = target_impfuzzy obj.save() self._info("impfuzzy: Filled-in in the impfuzzy") else: self._info("impfuzzy attribute already present, not overwriting") self._add_result('impfuzzy_hash', target_impfuzzy,{'impfuzzy': target_impfuzzy}) target_mimetype = obj.mimetype query_filter = {} if target_mimetype: query_filter['mimetype'] = target_mimetype # then use only samples with a multiple of chunksize chunk_size = int(target_impfuzzy.split(":")[0]) query_filter["$or"] = [] query_filter["$or"].append({"impfuzzy": {"$regex": "^%d:" % chunk_size * 2}}) query_filter["$or"].append({"impfuzzy": {"$regex": "^%d:" % chunk_size}}) query_filter["$or"].append({"impfuzzy": {"$regex": "^%d:" % (chunk_size // 2)}}) result_filter = {'md5': 1, 'impfuzzy': 1, 'description':1} candidate_space = Sample.objects(__raw__=query_filter).only(*result_filter) # self.info("candidate: %s" % repr(candidate_space)) match_list = [] for candidate in candidate_space: if "impfuzzy" in candidate: score = pyimpfuzzy.hash_compare(target_impfuzzy, candidate["impfuzzy"]) if score >= threshold and candidate["md5"] != target_md5: # Grab the md5 and the description for later match_list.append({'md5': candidate["md5"], 'description': candidate["description"], 'score': score}) # finally sort the results match_list.sort(key=lambda sample: sample["score"], reverse=True) for match in match_list: #Show the MD5 and the Description self._add_result("impfuzzy_match", match["md5"], {'description': match["description"], 'md5': match["md5"], 'score': match["score"]}) self._info("impfuzzy run() done")
def run(self, obj, config): threshold = config.get("threshold", 50) target_impfuzzy = None try: target_impfuzzy = pyimpfuzzy.get_impfuzzy_data(obj.filedata.read()) except Exception: pass target_md5 = obj.md5 if not target_impfuzzy: logger.error = "impfuzzy: Could not generate impfuzzy value for sample: %s" % str(obj.id) self._error("Could not generate impfuzzy value for sample") return # setup the sample space to compare against # first use the mimetype as a comparator if available if obj.impfuzzy: obj.impfuzzy = target_impfuzzy obj.save() self._info("impfuzzy: Filled-in in the impfuzzy") else: self._info("impfuzzy attribute already present, not overwriting") self._add_result('impfuzzy_hash', target_impfuzzy,{'impfuzzy': target_impfuzzy}) target_mimetype = obj.mimetype query_filter = {} if target_mimetype: query_filter['mimetype'] = target_mimetype # then use only samples with a multiple of chunksize chunk_size = int(target_impfuzzy.split(":")[0]) query_filter["$or"] = [] query_filter["$or"].append({"impfuzzy": {"$regex": "^%d:" % chunk_size * 2}}) query_filter["$or"].append({"impfuzzy": {"$regex": "^%d:" % chunk_size}}) query_filter["$or"].append({"impfuzzy": {"$regex": "^%d:" % (chunk_size // 2)}}) result_filter = {'md5': 1, 'impfuzzy': 1, 'description':1} candidate_space = Sample.objects(__raw__=query_filter).only(*result_filter) # self.info("candidate: %s" % repr(candidate_space)) match_list = [] for candidate in candidate_space: if "impfuzzy" in candidate: score = pyimpfuzzy.hash_compare(target_impfuzzy, candidate["impfuzzy"]) if score >= threshold and candidate["md5"] != target_md5: # Grab the md5 and the description for later match_list.append({'md5': candidate["md5"], 'description': candidate["description"], 'score': score}) # finally sort the results match_list.sort(key=lambda sample: sample["score"], reverse=True) for match in match_list: #Show the MD5 and the Description self._add_result("impfuzzy_match (MD5)", match["md5"], {'description': match["description"], 'score': match["score"]})
fuzzy_lib = ctypes.cdll.LoadLibrary(str(fuzzy_lib_path)) result_buf = ctypes.create_string_buffer(FUZZY_MAX_RESULT) data_buf = ctypes.create_string_buffer(data) fuzzy_lib.fuzzy_hash_buf(data_buf, len(data_buf) - 1, result_buf) hash_value = result_buf.value.decode("ascii") return hash_value if len(sys.argv) == 4: hash_type = sys.argv[1] hash1 = sys.argv[2] hash2 = sys.argv[3] if hash_type == "compare": print(pyimpfuzzy.hash_compare(hash1, hash2), end="") elif len(sys.argv) == 2: hash_type = sys.argv[1] # Receive data data = sys.stdin.buffer.read() if hash_type == "ssdeep": print(ssdeep(data), end="") elif hash_type == "impfuzzy": file_type = magic.from_buffer(data) if file_type[:4] == "PE32": try: pe = None pe = pefile.PE(data=data)
import pyimpfuzzy import sys #hash1 = pyimpfuzzy.get_impfuzzy("D:\\SRC\\imphash\\samples\\Git-2.20.1-64-bit.exe") #hash2 = pyimpfuzzy.get_impfuzzy("D:\\SRC\\imphash\\samples\\vlc-3.0.6-win64.exe") hash1 = "48:o4/c+4QjuC5Q4FNO0MeAXGo4E/gjF5J/RscXr9ubudS19WOG/iB:oc94A5TNO0MHYXrMeS1oXiB" hash2 = "48:I2/dKEE8QyoOttGarYau/LsquqQEHA9vPel1bEX/KA/6UyJra8ESvS55w+0o4Rn2:ImdKNstGoYa6ZGxfOBA" print("ImpFuzzy1: %s" % hash1) print("ImpFuzzy2: %s" % hash2) print("Compare: %i" % pyimpfuzzy.hash_compare(hash1, hash2))
def render_text(self, outfd, data): # This is a impfuzzys threshold ss_threshold = 30 if not has_pyimpfuzzy: debug.error("pyimpfuzzy must be installed for this plugin") files = [] impfuzzys = [] impfuzzy = "" if self._config.EXEFILE is not None: mode = "search" if os.path.isdir(self._config.EXEFILE): for root, dirs, filenames in os.walk(self._config.EXEFILE): for name in filenames: files.append(os.path.join(root, name)) elif os.path.isfile(self._config.EXEFILE): files.append(self._config.EXEFILE) for file in files: impfuzzys.append(pyimpfuzzy.get_impfuzzy(file)) # outfd.write("%s Impfuzzy : %s\n" % (file, pyimpfuzzy.get_impfuzzy(file))) elif self._config.COMPIMPFUZZY is not None: mode = "search" of = open(self._config.COMPIMPFUZZY, "r") lines = of.readlines() for line in lines: impfuzzys.append(line.rstrip()) elif self._config.LISTIMPFUZZY: mode = "list" else: debug.error( "Please set option -e (PE file or directory) or -i (impfuzzy hash list file) or -a (Listing the impfuzzy)") if self._config.THRESHOLD is not None: ss_threshold = self._config.THRESHOLD if "search" in mode: self.table_header(outfd, [("Process", "[addrpad]"), ("Name", "20"), ("Module Base", "[addrpad]"), ("Module Name", "20"), ("impfuzzy", "20"), ("Compare", "7")]) for offset, FileName, base, ModName, hash_result, fuzzy_result in data: for impfuzzy in impfuzzys: if not "Error" in fuzzy_result: if pyimpfuzzy.hash_compare(impfuzzy, fuzzy_result) >= ss_threshold: self.table_row(outfd, offset, FileName, base, ModName, fuzzy_result, pyimpfuzzy.hash_compare(impfuzzy, fuzzy_result)) if "list" in mode: self.table_header(outfd, [("Process", "[addrpad]"), ("Name", "20"), ("Module Base", "[addrpad]"), ("Module Name", "20"), ("impfuzzy", "110")]) for offset, FileName, base, ModName, hash_result, fuzzy_result in data: if not "Error" in fuzzy_result: self.table_row(outfd, offset, FileName, base, ModName, fuzzy_result)
def _generator(self, procs, hashlist, threshold): pe_table_name = intermed.IntermediateSymbolTable.create( self.context, self.config_path, "windows", "pe", class_types=pe.class_types) filter_func = lambda _: False if self.config.get('address', None) is not None: filter_func = lambda x: x.get_start( ) not in [self.config['address']] for proc in procs: proc_id = "Unknown" try: proc_id = proc.UniqueProcessId proc_layer_name = proc.add_process_layer() except exceptions.InvalidAddressException as excp: vollog.debug( "Process {}: invalid address {} in layer {}".format( proc_id, excp.invalid_address, excp.layer_name)) continue for vad in vadinfo.VadInfo.list_vads(proc, filter_func=filter_func): # this parameter is inherited from the VadInfo plugin. if a user specifies # an address, then it bypasses the DLL identification heuristics if self.config.get("address", None) is None: # rather than relying on the PEB for DLLs, which can be swapped, # it requires special handling on wow64 processes, and its # unreliable from an integrity standpoint, let's use the VADs instead protection_string = vad.get_protection( vadinfo.VadInfo.protect_values( self.context, self.config['primary'], self.config['nt_symbols']), vadinfo.winnt_protections) # DLLs are write copy... if protection_string != "PAGE_EXECUTE_WRITECOPY": continue # DLLs have mapped files... if isinstance(vad.get_file_name(), interfaces.renderers.BaseAbsentValue): continue try: dos_header = self.context.object( pe_table_name + constants.BANG + "_IMAGE_DOS_HEADER", offset=vad.get_start(), layer_name=proc_layer_name) pe_data = io.BytesIO() for offset, data in dos_header.reconstruct(): pe_data.seek(offset) pe_data.write(data) pe_data_raw = pe_data.getvalue() pe_data.close() result_text = self.calc_hash(pe_data_raw) except Exception: result_text = "Unable to dump PE at {0:#x}".format( vad.get_start()) if len(hashlist) == 0: yield (0, (proc.UniqueProcessId, proc.ImageFileName.cast( "string", max_length=proc.ImageFileName.vol.count, errors='replace'), format_hints.Hex(vad.get_start()), vad.get_file_name(), result_text)) elif not "Unable" in result_text: for hash in hashlist: if pyimpfuzzy.hash_compare(result_text, hash) >= threshold: yield (0, ( proc.UniqueProcessId, proc.ImageFileName.cast( "string", max_length=proc.ImageFileName.vol.count, errors='replace'), format_hints.Hex(vad.get_start()), vad.get_file_name(), result_text))
def analyse(filepath, pefiles_dir, pe, collection, useVT, api_key): ret_list = [] # 'date', 'md5', 'sha1', 'sha256', m5 = hashlib.md5() s1 = hashlib.sha1() s256 = hashlib.sha256() with open(filepath, 'rb') as f: while True: c = f.read(8192 * s256.block_size) if len(c) == 0: break m5.update(c) s1.update(c) s256.update(c) s256path = pefiles_dir + '/' + s256.hexdigest() + '.txt' if os.path.isfile(s256path): print(filepath, ' already run this program.', file=sys.stderr) return os.path.basename(filepath) + 'はすでに解析を完了しています。', s256.hexdigest() ret_list.append(datetime.datetime.now().strftime('%Y%m%d')) ret_list.append(m5.hexdigest()) ret_list.append(s1.hexdigest()) ret_list.append(s256.hexdigest()) # 'ssdeep', 'imphash', 'impfuzzy' ret_list.append(ssdeep.hash_from_file(filepath)) ret_list.append(pe.get_imphash()) ret_list.append(pyimpfuzzy.get_impfuzzy(filepath)) # 'Totalhash', 'AnyMaster', 'AnyMaster_v1_0_1', 'EndGame', 'Crits', 'peHashNG' ret_list.append(pehash.totalhash_hex(filepath)) ret_list.append(pehash.anymaster_hex(filepath)) ret_list.append(pehash.anymaster_v1_0_1_hex(filepath)) ret_list.append(pehash.endgame_hex(filepath)) ret_list.append(pehash.crits_hex(filepath)) ret_list.append(pehash.pehashng_hex(filepath)) # 'Platform', 'GUI Program', 'Console Program', 'DLL', 'Packed', 'Anti-Debug' # 'mutex', 'contains base64', 'AntiDebugMethod', 'PEiD' cwd = os.getcwd() os.chdir(cwd + '/PEiD') res = subprocess.check_output(['./PEiD', filepath]) res = res.decode('utf-8').split('\n') r = re.compile('^\s+([^:]+)\s:\s(.+)$') res_dict = {} for s in res: m = r.match(s) if m: res_dict[m.group(1)] = m.group(2) res_contains = [s for s in res if s.startswith(' contains base64')] ret_list.append(res_dict['PE']) ret_list.append(res_dict['GUI Program']) ret_list.append(res_dict['Console Program']) ret_list.append(res_dict['DLL']) ret_list.append(res_dict['Packed']) ret_list.append(res_dict['Anti-Debug']) ret_list.append('yes' if 'mutex' in list(res_dict.keys()) else 'no') ret_list.append('yes' if res_contains else '') if 'AntiDebug' in list(res_dict.keys()): res_antidebug = re.sub('[\[\]"]', '', res_dict['AntiDebug']).replace(' ', '|') ret_list.append(res_antidebug) else: ret_list.append('') if 'PEiD' in list(res_dict.keys()): res_peid = re.sub('[\[\]"]', '', res_dict['PEiD']).replace(' ', '|') ret_list.append(res_peid) else: ret_list.append('') # 'TrID' os.chdir(cwd + '/trid') res = subprocess.check_output(['./trid', filepath]) res = res.decode('utf-8').split('\n') res = [s for s in res if re.match('^\s*[0-9]+\.[0-9]%', s)] res_trid = '\\n'.join(res) ret_list.append(res_trid) os.chdir(cwd) # nearest sha256, nearest value newfuzzy = ret_list[HEADER.index('impfuzzy')] nearest_sha256 = '' nearest_value = -1 result = collection.find() if result.count() > 0: overwrites = [] for r in result: cmpval = pyimpfuzzy.hash_compare(newfuzzy, r['impfuzzy']) if cmpval > nearest_value: nearest_sha256 = r['sha256'] nearest_value = cmpval if cmpval > r['nearest value']: overwrites.append([{ 'sha256': r['sha256'] }, { '$set': { 'nearest sha256': s256.hexdigest(), 'nearest value': cmpval } }]) for r in overwrites: collection.update(r[0], r[1]) ret_list.append(nearest_sha256) ret_list.append(nearest_value) # ismalware, VirusTotalLink flag = False if useVT == 'on': vt = VirusTotalPublicApi(api_key) res = vt.get_file_report(m5.hexdigest()) if 'results' in res: if 'positives' in res['results']: if res['results']['positives'] > 0: ret_list.append('True') else: ret_list.append('False') else: ret_list.append('False') if 'permalink' in res['results']: ret_list.append(str(res['results']['permalink'])) else: ret_list.append('') else: print('Server error occured.', file=sys.stderr) return 'VirusTotalでエラーが発生しました。', s256.hexdigest() else: ret_list.append('') ret_list.append('') # strings res = subprocess.check_output(['strings', filepath]) res = res.decode('utf-8') ret_list.append(res) # import table imports = [] for entry in pe.DIRECTORY_ENTRY_IMPORT: for imp in entry.imports: try: imports.append(imp.name.decode('utf-8')) except: pass imports_str = '\n'.join(imports) ret_list.append(imports_str) # export table if hasattr(pe, 'DIRECTORY_ENTRY_EXPORT'): exports = [ s.name.decode('utf-8') for s in pe.DIRECTORY_ENTRY_EXPORT.symbols ] exports_str = '\n'.join(exports) else: exports_str = '' ret_list.append(exports_str) # 'Dynamic Base', 'ASLR', 'High Entropy VA', 'Force Integrity', # 'Isolation', 'NX', 'SEH', 'CFG', 'RFG', 'SafeSEH', 'GS', # 'Authenticode', '.NET' if isUbuntu: cwd = os.getcwd() os.chdir(cwd + '/winchecksec/build') res = subprocess.check_output(['./winchecksec', filepath]) res = res.decode('utf-8').split('\n') r = re.compile('([^:]+):\s\"([^"]+)\"') res_dict = {} for s in res: m = r.match(s) if m: k = m.group(1).strip() res_dict[k if k != '.NET' else 'dotNET'] = m.group(2) ret_list.append(res_dict['Dynamic Base']) ret_list.append(res_dict['ASLR']) ret_list.append(res_dict['High Entropy VA']) ret_list.append(res_dict['Force Integrity']) ret_list.append(res_dict['Isolation']) ret_list.append(res_dict['NX']) ret_list.append(res_dict['SEH']) ret_list.append(res_dict['CFG']) ret_list.append(res_dict['RFG']) ret_list.append(res_dict['SafeSEH']) ret_list.append(res_dict['GS']) ret_list.append(res_dict['Authenticode']) ret_list.append(res_dict['dotNET']) os.chdir(cwd) ret_dict = {} for i in range(0, len(HEADER)): ret_dict[HEADER[i]] = ret_list[i] return ret_dict, s256path