def calc_hash(cls, pe_data): try: fuzzy_result = pyimpfuzzy.get_impfuzzy_data(pe_data) except: fuzzy_result = "Unable to calc impfuzzy" return fuzzy_result
def compute_hashes_impl(sample, pe): md5_value = hashlib.md5(sample).hexdigest() sha1_value = hashlib.sha1(sample).hexdigest() sha256_value = hashlib.sha256(sample).hexdigest() ssdeep_value = ssdeep.hash(sample) impfuzzy_value = pyimpfuzzy.get_impfuzzy_data(sample) tlsh_value = tlsh.hash(sample) totalhash = pehash.totalhash_hex(pe=pe) anymaster = pehash.anymaster_hex(pe=pe) anymaster_v1_0_1 = pehash.anymaster_v1_0_1_hex(pe=pe) endgame = pehash.endgame_hex(pe=pe) crits = pehash.crits_hex(pe=pe) pehashng = pehash.pehashng_hex(pe=pe) imphash = pe.get_imphash() return { "md5": md5_value, "sha1": sha1_value, "sha256": sha256_value, "ssdeep": ssdeep_value, "imphash": imphash, "impfuzzy": impfuzzy_value, "tlsh": tlsh_value, "totalhash": totalhash, "anymaster": anymaster, "anymaster_v1_0_1": anymaster_v1_0_1, "endgame": endgame, "crits": crits, "pehashng": pehashng, }
def run(self, obj, config): self._info("Impfuzzy: run()") threshold = config.get("threshold", 50) target_impfuzzy = None try: target_impfuzzy = pyimpfuzzy.get_impfuzzy_data(obj.filedata.read()) except Exception: pass target_md5 = obj.md5 if not target_impfuzzy: logger.error = "impfuzzy: Could not generate impfuzzy value for sample: %s" % str(obj.id) self._error("Could not generate impfuzzy value for sample") return # setup the sample space to compare against # first use the mimetype as a comparator if available if obj.impfuzzy: obj.impfuzzy = target_impfuzzy obj.save() self._info("impfuzzy: Filled-in in the impfuzzy") else: self._info("impfuzzy attribute already present, not overwriting") self._add_result('impfuzzy_hash', target_impfuzzy,{'impfuzzy': target_impfuzzy}) target_mimetype = obj.mimetype query_filter = {} if target_mimetype: query_filter['mimetype'] = target_mimetype # then use only samples with a multiple of chunksize chunk_size = int(target_impfuzzy.split(":")[0]) query_filter["$or"] = [] query_filter["$or"].append({"impfuzzy": {"$regex": "^%d:" % chunk_size * 2}}) query_filter["$or"].append({"impfuzzy": {"$regex": "^%d:" % chunk_size}}) query_filter["$or"].append({"impfuzzy": {"$regex": "^%d:" % (chunk_size // 2)}}) result_filter = {'md5': 1, 'impfuzzy': 1, 'description':1} candidate_space = Sample.objects(__raw__=query_filter).only(*result_filter) # self.info("candidate: %s" % repr(candidate_space)) match_list = [] for candidate in candidate_space: if "impfuzzy" in candidate: score = pyimpfuzzy.hash_compare(target_impfuzzy, candidate["impfuzzy"]) if score >= threshold and candidate["md5"] != target_md5: # Grab the md5 and the description for later match_list.append({'md5': candidate["md5"], 'description': candidate["description"], 'score': score}) # finally sort the results match_list.sort(key=lambda sample: sample["score"], reverse=True) for match in match_list: #Show the MD5 and the Description self._add_result("impfuzzy_match", match["md5"], {'description': match["description"], 'md5': match["md5"], 'score': match["score"]}) self._info("impfuzzy run() done")
def run(self, obj, config): threshold = config.get("threshold", 50) target_impfuzzy = None try: target_impfuzzy = pyimpfuzzy.get_impfuzzy_data(obj.filedata.read()) except Exception: pass target_md5 = obj.md5 if not target_impfuzzy: logger.error = "impfuzzy: Could not generate impfuzzy value for sample: %s" % str(obj.id) self._error("Could not generate impfuzzy value for sample") return # setup the sample space to compare against # first use the mimetype as a comparator if available if obj.impfuzzy: obj.impfuzzy = target_impfuzzy obj.save() self._info("impfuzzy: Filled-in in the impfuzzy") else: self._info("impfuzzy attribute already present, not overwriting") self._add_result('impfuzzy_hash', target_impfuzzy,{'impfuzzy': target_impfuzzy}) target_mimetype = obj.mimetype query_filter = {} if target_mimetype: query_filter['mimetype'] = target_mimetype # then use only samples with a multiple of chunksize chunk_size = int(target_impfuzzy.split(":")[0]) query_filter["$or"] = [] query_filter["$or"].append({"impfuzzy": {"$regex": "^%d:" % chunk_size * 2}}) query_filter["$or"].append({"impfuzzy": {"$regex": "^%d:" % chunk_size}}) query_filter["$or"].append({"impfuzzy": {"$regex": "^%d:" % (chunk_size // 2)}}) result_filter = {'md5': 1, 'impfuzzy': 1, 'description':1} candidate_space = Sample.objects(__raw__=query_filter).only(*result_filter) # self.info("candidate: %s" % repr(candidate_space)) match_list = [] for candidate in candidate_space: if "impfuzzy" in candidate: score = pyimpfuzzy.hash_compare(target_impfuzzy, candidate["impfuzzy"]) if score >= threshold and candidate["md5"] != target_md5: # Grab the md5 and the description for later match_list.append({'md5': candidate["md5"], 'description': candidate["description"], 'score': score}) # finally sort the results match_list.sort(key=lambda sample: sample["score"], reverse=True) for match in match_list: #Show the MD5 and the Description self._add_result("impfuzzy_match (MD5)", match["md5"], {'description': match["description"], 'score': match["score"]})
def migrate_4_to_5(self): """ Migrate from schema 4 to 5. """ if self.schema_version < 4: migrate_3_to_4(self) if self.is_pe(): try: import pyimpfuzzy if not self.impfuzzy: self.impfuzzy = pyimpfuzzy.get_impfuzzy_data(self.filedata.read()) except Exception: self.impfuzzy = None else: # not a PE, so no point in populating it self.impfuzzy = None self.schema_version = 5 self.save() self.reload()
def _generate_file_metadata(self, data): import pydeep import magic from hashlib import md5, sha1, sha256 try: import pyimpfuzzy except ImportError: pass try: self.filetype = magic.from_buffer(data) if len(self.filetype) > 1000: self.filetype = self.filetype[0:1000] + '<TRUNCATED>' except: self.filetype = "Unavailable" try: mimetype = magic.from_buffer(data, mime=True) if mimetype: self.mimetype = mimetype.split(";")[0] if not mimetype: self.mimetype = "unknown" except: self.mimetype = "Unavailable" self.size = len(data) # this is a shard key. you can't modify it once it's set. # MongoEngine will still mark the field as modified even if you set it # to the same value. if not self.md5: self.md5 = md5(data).hexdigest() self.sha1 = sha1(data).hexdigest() self.sha256 = sha256(data).hexdigest() try: self.ssdeep = pydeep.hash_bytes(data) except: self.ssdeep = None try: self.impfuzzy = pyimpfuzzy.get_impfuzzy_data(data) except: self.impfuzzy = None
def migrate_4_to_5(self): """ Migrate from schema 4 to 5. """ if self.schema_version < 4: migrate_3_to_4(self) if self.is_pe(): try: import pyimpfuzzy if not self.impfuzzy: self.impfuzzy = pyimpfuzzy.get_impfuzzy_data( self.filedata.read()) except Exception: self.impfuzzy = None else: # not a PE, so no point in populating it self.impfuzzy = None self.schema_version = 5 self.save() self.reload()
def calc_hash(self, pe_data, addr_space, base, proc, space): try: pe = pefile.PE(data=pe_data) hash_result = pe.get_imphash() except: hash_result = "Error: This file is not PE file imphash" try: fuzzy_result = pyimpfuzzy.get_impfuzzy_data(pe_data) except: fuzzy_result = "Error: This file is not PE file impfuzzy" if not hash_result and not self._config.FASTMODE: pid = proc.UniqueProcessId simp = SearchImp(self._config) apilists = simp.get_apilist(pid, addr_space, base, proc, space) if apilists is not None: hash_result = hashlib.md5(apilists).hexdigest() fuzzy_result = impfuzzyutil.hash_data(apilists) else: hash_result = "" fuzzy_result = "" return hash_result, fuzzy_result
# Receive data data = sys.stdin.buffer.read() if hash_type == "ssdeep": print(ssdeep(data), end="") elif hash_type == "impfuzzy": file_type = magic.from_buffer(data) if file_type[:4] == "PE32": try: pe = None pe = pefile.PE(data=data) if pe and hasattr(pe, "DIRECTORY_ENTRY_IMPORT"): print(pyimpfuzzy.get_impfuzzy_data(data), end="") except: pass # Do nothing if data is not valid PE file else: # Receive data data = sys.stdin.buffer.read() print("") print("CRC32: %x" % (zlib.crc32(data) & 0xffffffff)) print("MD5: %s" % hashlib.md5(data).hexdigest()) print("SHA1: %s" % hashlib.sha1(data).hexdigest()) print("SHA256: %s" % hashlib.sha256(data).hexdigest()) print("ssdeep: %s" % ssdeep(data)) file_type = magic.from_buffer(data)
def _analyzer_imp_fuzzy(self): """ Compute ImpFuzzyHash :return: """ return pyimpfuzzy.get_impfuzzy_data(self.file_buffer)