def gen_ssdeep_hash(self, filepath, exclude=False): files = os.listdir(filepath) for file in files: if not os.path.isfile(filepath + file): print "[+] WARNING: %s is not a file and will not analysis it. " % ( filepath + file) continue tmp_ssdeep_hash = '' if self.cluster_type == 'strings_ssdeep': data = os.popen('strings %s' % (filepath + file)).read() tmp_ssdeep_hash = ssdeep.hash(data) elif self.cluster_type == 'file_ssdeep': tmp_ssdeep_hash = ssdeep.hash_from_file(filepath + file) elif self.cluster_type == 'imp_exp_ssdeep': imp_exp_str = imp_exp_functions(filepath + file) if imp_exp_str: tmp_ssdeep_hash = ssdeep.hash(imp_exp_str) if tmp_ssdeep_hash: dst_file = self.tmpdir + str(self.count) f = open(dst_file, 'w') f.write(tmp_ssdeep_hash) f.close() self.count += 1 if tmp_ssdeep_hash not in self.ssdeep_stats.keys(): self.ssdeep_stats[tmp_ssdeep_hash] = [] tmp_file_ssdeep = {} tmp_file_ssdeep['file_path'] = filepath + file tmp_file_ssdeep['file_md5'] = file_md5(filepath + file) tmp_file_ssdeep['cluster_type'] = self.cluster_type tmp_file_ssdeep['exclude'] = 1 if exclude else 0 self.ssdeep_stats[tmp_ssdeep_hash].append(tmp_file_ssdeep)
def _calc_score(self, lt_new): try: import ssdeep except ImportError: raise ImportError( "ltgroup algorithm <ssdeep> needs python package ssdeep") ret = [] h1 = ssdeep.hash(str(lt_new)) if self._mem_hash: if len(self._d_hash) == 0: # initialize d_hash for lt in self._lttable: h = ssdeep.hash(str(lt)) self._d_hash[lt.ltid] = h for ltid, lt_temp in enumerate(self._lttable): h2 = self._d_hash[lt_temp.ltid] score = ssdeep.compare(h1, h2) ret.append((ltid, score)) self._d_hash[lt_new.ltid] = h1 else: for lt_temp in self._lttable: ltid = lt_temp.ltid score = hash_score(str(lt_new), str(lt_temp)) ret.append((ltid, score)) return ret
def e_ssdeep(path, file_): pe = pefile.PE(path) # print type(pe.__data__[::]) whole_file = ssdeep.hash(pe.__data__[::]) # print whole_file base = pe.OPTIONAL_HEADER.ImageBase ep = pe.OPTIONAL_HEADER.AddressOfEntryPoint # print hex(ep) ep_section = None for section in pe.sections: size = section.Misc_VirtualSize if size == 0: size = section.SizeOfRawData if ep >= section.VirtualAddress and ep < section.VirtualAddress + size: # print section.Name # sect_data = pe.__data__[section.PointerToRawData:section.SizeOfRawData] # print sect_data[:10].encode("hex") ep_section = ssdeep.hash(pe.get_data(section.VirtualAddress)) break if not ep_section: logging.error("Couldn't fine EP section in {}".format(path)) else: return database.SSDEEP(file=file_, whole_file=whole_file, ep_section=ep_section)
def simindex(sign, web, size): if size == 'H': sign1 = map(lambda x: ssdeep.hash(str(x)), sign) web1 = map(lambda x: ssdeep.hash(str(x)), web) else: sign1 = map(lambda x: str(x), sign) web1 = map(lambda x: str(x), web) table = [] for s in sign1: table.append(map(lambda x: similarity.compare(x, s), web1)) if len(web1) < len(sign1): table = map(lambda x: x + [0] * (len(sign1) - len(web1)), table) maxi = 0 for t in range(0, len(table)): #print map(lambda x: len(x), table) m = max(table[t]) maxi += m i = table[t].index(m) for k in range(t+1, len(table)): del table[k][i] return maxi*1.0/len(sign)
def Detect_macro_script(self): distinction = True self.macro_parser_result_dic = {} try: script_data = self.hwp_info.openstream( 'Scripts/DefaultJScript').read() data = zlib.decompress(script_data, -15) except: data = script_data for index, value in enumerate(data): if (eq(hex(value), '0x2f')): distinction = False if (distinction == True): if (eq(hex(value), '0x0') == False): self.variable_source.append(chr(value)) if (distinction == False): if (eq(hex(value), '0x0') == False): self.macro_source.append(chr(value)) variable_result = ''.join( (self.variable_source[i]) for i in range(1, len(self.variable_source) - 4)) macro_result = ''.join((self.macro_source[i]) for i in range(1, len(self.macro_source) - 4)) variable_ssdeep = ssdeep.hash(variable_result) macro_ssdeep = ssdeep.hash(macro_result) # 다를 경우 if (len(macro_result) != 47 & macro_result.find('function OnDocument_New()') == -1): ip_match = re.compile(Regular_IP, re.MULTILINE) email_match = re.compile(Regular_email, re.MULTILINE) url_match = re.compile(Regular_url, re.MULTILINE) self.macro_parser_result_dic['ip'] = ip_match.findall( "macro_result") self.macro_parser_result_dic['email'] = email_match.findall( "macro_result") self.macro_parser_result_dic['url'] = url_match.findall( "macro_result") self.macro_parser_result_dic['variable_ssdeep'] = ssdeep.hash( variable_result) self.macro_parser_result_dic['macro_ssdeep'] = ssdeep.hash( macro_result) # 기존과 같은 경우 else: self.macro_parser_result_dic['variable_ssdeep'] = 'None' self.macro_parser_result_dic['macro_ssdeep'] = 'None' self.macro_parser_result_dic['ip'] = 'None' self.macro_parser_result_dic['email'] = 'None' self.macro_parser_result_dic['url'] = 'None' return self.macro_parser_result_dic
def testComputeHash(self): self.assertEqual( ssdeep.hash("Also called fuzzy hashes, Ctph can match inputs that have homologies."), "3:AXGBicFlgVNhBGcL6wCrFQEv:AXGHsNhxLsr2C" ) self.assertEqual( ssdeep.hash("Also called fuzzy hashes, CTPH can match inputs that have homologies."), "3:AXGBicFlIHBGcL6wCrFQEv:AXGH6xLsr2C" )
def main(): parser = argparse.ArgumentParser(description="Scan document for embedded objects.") parser.add_argument("file", help="File to process.") parser.add_argument('--extract', dest='extract', action='store_true', help="Extract ActiveMime Objects.") args = parser.parse_args() print 'ActiveMime Helper' print '-----------------' print ' [*] Loading file....%s ' % args.file if isfile(args.file): tmpf = open(args.file, 'rb') args.file = tmpf.read() amd = None if ActiveMimeDoc.is_activemime(args.file): amd = ActiveMimeDoc(args.file, ActiveMimeDoc.is_base64(args.file)) else: print formatmsg(' [*] File is not an ActiveMime Document', 'yellow') print " [*] Parsing as MIME Document" amd = process(args.file, False) if amd: print ' ------------------------------------------------------' print ' ActiveMime Document' print ' - {:18}{}'.format('Size:', len(amd.rawdoc)) print ' - {:18}{}'.format('Hash:', hashlib.sha1(amd.rawdoc).hexdigest()) if __has_ssdeep: print ' - {:18}{}'.format('ssdeep:', ssdeep.hash(amd.rawdoc)) print ' Payload Data' print ' - {:18}{}'.format('Compressed Size:',amd.compressed_size) print ' - {:18}{}'.format('Size:', amd.size) print ' - {:18}{}'.format('Hash:', hashlib.sha1(amd.data).hexdigest()) if __has_ssdeep: print ' - {:18}{}'.format('Data ssdeep:', ssdeep.hash(amd.data)) print ' - {:18}{}'.format('VBA Tail:', amd.has_vba_tail) print ' - {:18}{}'.format('OLE Doc:', amd.is_ole_doc) print ' ------------------------------------------------------' if args.extract: print ' [*] Writing decoded Project file' with open(hashlib.md5(amd.data).hexdigest(), 'wb') as out: out.write(amd.data) return 0 else: print formatmsg(' [!] File does not exist...exiting', 'red') return
def file_similarity(file1, file2): file1_cat = folder_category(file1) file2_cat = folder_category(file2) if file1_cat == "jadx": file1_codelist = parser.parser_cfr(file1) elif file1_cat == "cfr": file1_codelist = parser.parser_cfr(file1) elif file1_cat == "jdcore": file1_codelist = parser.parser_jdgui(file1) if file2_cat == "jadx": file2_codelist = parser.parser_cfr(file2) elif file2_cat == "cfr": file2_codelist = parser.parser_cfr(file2) elif file2_cat == "jdcore": file2_codelist = parser.parser_jdgui(file2) all_code = max(len(file1_codelist), len(file2_codelist)) all_score = 0 for file1_code in file1_codelist: temp = 0 for file2_code in file2_codelist: if " {" not in file1_code or " {" not in file2_code: if isSamefunc(file1_code, file2_code): h1 = ssdeep.hash(parser.trim(file1_code)) h2 = ssdeep.hash(parser.trim(file2_code)) score = ssdeep.compare(h1, h2) all_score = all_score + score # print(file1_code) # print(file2_code) # print(score) else: if isSamefunc(file1_code[:file1_code.index(" {")], file2_code[:file2_code.index(" {")]): # parser.trim(file1_code) h1 = ssdeep.hash(parser.trim(file1_code)) h2 = ssdeep.hash(parser.trim(file2_code)) score = ssdeep.compare(h1, h2) all_score = all_score + score #print(file1_code) #print(file2_code) #print(score) # print (h_jadx) # print (h_cfr) return all_score / (all_code)
def ssdeepcompare(target, IP): try: ss_target = requests.get('http://{}/'.format(target)) ssdeep_target_fuzz = ssdeep.hash(ss_target.text) print target, ssdeep_target_fuzz content = requests.get('https://{}'.format(IP), verify=False, timeout=5, headers={'Host': target}) ssdeep_fuzz = ssdeep.hash(content.text) print IP, ssdeep_fuzz print "ssdeep score for", IP, "is", ssdeep.compare( ssdeep_target_fuzz, ssdeep_fuzz) except (requests.exceptions.ConnectionError): print "cant connect to", IP
def getPerFunctionHash(): """ Iterates on program function and, for each, computes - MD5 sum - SSDEEP """ functions = Functions() hashes = {} for function in functions: funcCode = getFunctionCode(function) funcCode = cleanUpCode(function, funcCode) ssdeepstr = ssdeep.hash(funcCode) md5str = md5.new(funcCode).hexdigest() #lsh = LSHash(512, len(funcCode)) #lsh.index(funcCode) # TODO ADD OTHER TYPE OF HASHES hashes[function] = { "md5" : md5str, "ssdeep" : ssdeepstr, } if debug: print "sub_%08x %s %s" % (function, md5str, ssdeepstr) # DEBUG if dump: # save hash table in dump mode fd = open("./%s/%s.pickle" % (dumpdir, "hashes"), "w") pickle.dump(hashes, fd) fd.close() return hashes
def compute_ssdeep(fp): try: buff = fp.read() except AttributeError: pass else: return ssdeep.hash(buff)
def calc_hash(cls, pe_data): try: fuzzy_result = ssdeep.hash(pe_data) except: fuzzy_result = "Unable to calc ssdeep" return fuzzy_result
def build_sample(self, data, url=None): if not data: return None p = dict() p["type"] = self.get_sample_type(data) if p["type"] is None: return None p["md5"] = hashlib.md5(data).hexdigest() p["sha1"] = hashlib.sha1(data).hexdigest() if SSDEEP: p["ssdeep"] = ssdeep.hash(data) if p["type"] in ("PE",): imphash = self.get_imphash(data) if imphash: p["imphash"] = imphash if url: p["url"] = url p["data"] = base64.b64encode(data) return p
def getfuzzyhash(): """Returns fuzzy hash of spam. This function returns hash generated using the ssdeep library. Hash is generated using the combination of mail's body + subject. Msg length is being checked because SSDEEP has some issues with comparing hashes of small spams. If spam's body is very less or non existent, we add our randomText to body. There would be certain cases when there wouldn't be any html or text portion i.e. email body would be empty. Hence forth len = html/text + subject In shivamaindb.py if len < 10 then keeping comparision ratio higher """ if mailFields['html']: if len(mailFields['html']) < 150: data = mailFields['html'] + " " + mailFields['subject'] + randomText else: data = mailFields['html'] + " " + mailFields['subject'] mailFields['len'] = len(mailFields['html']) + len(mailFields['subject']) elif mailFields['text']: if len(mailFields['text']) < 150: data = mailFields['text'] + " " + mailFields['subject'] + randomText else: data = mailFields['text'] + " " + mailFields['subject'] mailFields['len'] = len(mailFields['text']) + len(mailFields['subject']) else: # Test mails without body and limited chars in subject data = mailFields['subject'] + mailFields['from'] + randomText mailFields['len'] = len(mailFields['subject']) return ssdeep.hash(data)
def should_parse(repo, state): if repo.repository.owner.login in state.bad_users: print(bcolors.FAIL + "Failed check: Ignore User" + bcolors.ENDC) return False if repo.repository.name in state.bad_repos: print(bcolors.FAIL + "Failed check: Ignore Repo" + bcolors.ENDC) return False if repo.name in state.bad_files: print(bcolors.FAIL + "Failed check: Ignore File" + bcolors.ENDC) return False # Fuzzy Hash Comparison try: candidate_sig = ssdeep.hash(repo.decoded_content) for sig in state.bad_signatures: similarity = ssdeep.compare(candidate_sig, sig) if similarity > SIMILARITY_THRESHOLD: print(bcolors.FAIL + "Failed check: Ignore Fuzzy Signature on Contents " "({}% Similarity)".format(similarity) + bcolors.ENDC) return False except github.UnknownObjectException: print(bcolors.FAIL + "API Error: File no longer exists on github.com" + bcolors.ENDC) return False return True
def fingerprints(data): """This function return the fingerprints of data: - md5 - sha1 - sha256 - sha512 - ssdeep """ # md5 md5 = hashlib.md5() md5.update(data) md5 = md5.hexdigest() # sha1 sha1 = hashlib.sha1() sha1.update(data) sha1 = sha1.hexdigest() # sha256 sha256 = hashlib.sha256() sha256.update(data) sha256 = sha256.hexdigest() # sha512 sha512 = hashlib.sha512() sha512.update(data) sha512 = sha512.hexdigest() # ssdeep ssdeep_ = ssdeep.hash(data) return md5, sha1, sha256, sha512, ssdeep_
def get_hash(path): f = open(path, 'r', encoding='utf-8') lines = [l.strip() for l in f.readlines()] codes = "" for line in lines: codes += line return ssdeep.hash(codes)
def extract_sections_privileges(self): section_dict = {} for section in self.pe.sections: try: # 섹션 이름 추출 section_name = section.Name.decode().split('\x00')[0] entropy = section.get_entropy() hash_ssdeep = ssdeep.hash(section.get_data()) hash_md5 = hashlib.md5(section.get_data()).hexdigest().upper() offset = hex(section.PointerToRawData) character = hex(section.Characteristics)[2:] virtual_address = section.VirtualAddress virtual_size = section.Misc_VirtualSize raw_size = section.SizeOfRawData except: continue section_dict[section_name] = { 'section_name': section_name, 'entropy': entropy, 'virtual_address': virtual_address, 'virtual_size': virtual_size, 'raw_size': raw_size, 'hash_md5': hash_md5, 'hash_ssdeep': hash_ssdeep, 'offset': offset, 'character': character } return section_dict
def get_spam_level(player, message_content): """ Get's a spam level for a message using a fuzzy hash > 50% means it's probably spam """ try: message_hash = ssdeep.hash(message_content) spam_level = 0 spam_levels = [ssdeep.compare(message_hash, prior_hash) for prior_hash in player.last_message_hashes if prior_hash is not None] if len(spam_levels) > 0: spam_level = max(spam_levels) player.last_message_hashes.append(message_hash) if spam_level > SPAM_TOLERANCE: player.spam_detections += 1 try: util.logger.info("Message '" + message_content + "' detected as SPAM!") util.logger.info("spam_level was " + str(spam_level) + ".") util.logger.info("Player " + player.name + " now has " + str(player.spam_detections) + " spam detections.") except Exception as e: util.logger.info("Failed to print spam detection debug message because " + str(e) + ".") except: spam_level = 0 # if ssdeep doesn't work try: util.logger.info("ssdeep failed to process message!") util.logger.info("Message was '" + message_content + "' by player " + player.name + ".") util.logger.info("BTW, player " + player.name + " has " + str(player.spam_detections) + " spam detections.") except Exception as e: util.logger.info("Failed to print ssdeep failure debug message because " + str(e) + ".") return spam_level
def lambda_handler(event, context): r''' handles the image files and directs it to ghiro for digital forensics''' srcbucket = event['Records'][0]['s3']['bucket']['name'] key = urllib.unquote_plus( event['Records'][0]['s3']['object']['key'].encode('utf8')) try: response = s3.get_object(Bucket=srcbucket, Key=key) except Exception as e: print( 'Error getting object {} from bucket {}. Make sure they exist and your bucket is in the same region as this function.' .format(key, srcbucket)) raise e try: hashval = ssdeep.hash(response['Body'].read()) except Exception as e: print('Error occurred while computing hash of the file {}', key) raise e try: if isBadHash(hashval) == False: s3.copy_object(Bucket=dstbucket, CopySource={ 'Bucket': srcbucket, 'Key': key }, Key=key) except Exception as e: print('Error occurred while comparing hash of the file {}'.format(key)) raise e
def getFileProperties(self, filename,fc): self.logger.info("Getting file ID") fp={'filename':filename} try: #File size fp['size']=len(fc) #MD5 m=hashlib.md5() m.update(fc) fp['md5']=m.hexdigest() #SHA1 m=hashlib.sha1() m.update(fc) fp['sha1']=m.hexdigest() #SHA256 m=hashlib.sha256() m.update(fc) fp['sha256']=m.hexdigest() #SSDEEP fp['ssdeep']=ssdeep.hash(fc) #Magic fp['magic']=magic.from_buffer(fc) #Exiftool #NOTE: exiftool shits itself on certian formats, wipe it's ass someday fp['filetype']=self.et.get_tag('FileType',self.incoming+filename) #Tag if fp['magic'] is not '': fp['tags']=[fp['magic'].split()[0].lower()] else: fp['tags']=[] except IOError as e: self.logger.error("IO Error", exc_info=True) return {'_id':fp['sha1'],'id':fp}
def fingerprints(data): # md5 md5 = hashlib.md5() md5.update(data) md5 = md5.hexdigest() # sha1 sha1 = hashlib.sha1() sha1.update(data) sha1 = sha1.hexdigest() # sha256 sha256 = hashlib.sha256() sha256.update(data) sha256 = sha256.hexdigest() # sha512 sha512 = hashlib.sha512() sha512.update(data) sha512 = sha512.hexdigest() # ssdeep ssdeep_ = ssdeep.hash(data) return md5, sha1, sha256, sha512, ssdeep_
def info_file(path): BUF_SIZE = 65536 md5 = hashlib.md5() sha1 = hashlib.sha1() sha256 = hashlib.sha256() sha512 = hashlib.sha512() with open(path, 'rb') as f: while True: data = f.read(BUF_SIZE) if not data: break md5.update(data) sha1.update(data) sha256.update(data) sha512.update(data) SSDEEP = ssdeep.hash(data) TLSH = tlsh.hash(data) print("MD5: {0}".format(md5.hexdigest())) print("SHA1: {0}".format(sha1.hexdigest())) print("SHA256: {0}".format(sha256.hexdigest())) print("SHA512: {0}".format(sha512.hexdigest())) print("SSDEEP: {0}".format(SSDEEP)) print("TLSH: {0}".format(TLSH))
def make_request(url, quiet=False, raw_results=False): if not quiet: print "Requesting {0}".format(url) r = requests.get(url, allow_redirects=False, timeout=90) content = r.content return r.status_code, ssdeep.hash( content) if not raw_results else content.encode('hex')
def log_data(url, resp, now, loot_dir): page_data = resp.text page_data_binary = page_data.encode() sha2 = hashlib.sha256(page_data_binary).hexdigest() parsed = urlparse(url) if parsed.path == '' or parsed.path == '/': full_path = loot_dir + (parsed.netloc + '/') + ('index' + sha2) else: full_path = loot_dir + (parsed.netloc + '/') + (parsed.path + sha2) with open(full_path, 'a') as fd: _LOGGER.info('Writing out HTTP text body for %s' % url) fd.write(page_data) payload = { 'status_code': resp.status_code, 'domain': parsed.netloc, 'path': parsed.path, 'url': url, 'time': str(now), 'sha2': sha2, 'md5': hashlib.md5(page_data.encode()).hexdigest(), 'ssdeep': ssdeep.hash(page_data), 'has_forms': len(get_forms(page_data)) > 0 } with open('%s.json' % full_path, 'a') as fd: _LOGGER.info('Writing out metadata for %s' % url) json.dump(payload, fd, ensure_ascii=False, indent=4) return payload
def build_sample(self, data, url=None): if not data: return None p = dict() p['type'] = self.get_sample_type(data) if p['type'] is None: return None p['md5'] = hashlib.md5(data).hexdigest() p['sha1'] = hashlib.sha1(data).hexdigest() if SSDEEP: p['ssdeep'] = ssdeep.hash(data) if p['type'] in ('PE', ): imphash = self.get_imphash(data) if imphash: p['imphash'] = imphash if url: p['url'] = url p['data'] = base64.b64encode(data) return p
def getSsdeep(data): try: res=ssdeep.hash(data) return res except Exception, e: logging.exception(str(e)) return ''
def analyze_info_data(self): sum1 = self.pkg1.package['info']['summary'] or '' sum2 = self.pkg2.package['info']['summary'] or '' self.flags['similar_description'] = (ssdeep.compare(ssdeep.hash(sum1), ssdeep.hash(sum2)) > 80) page1 = self.pkg1.package['info']['home_page'] or '' page2 = self.pkg2.package['info']['home_page'] or '' self.flags['same_homepage'] = (page1 == page2) docs1 = self.pkg1.package['info']['docs_url'] or '' docs2 = self.pkg2.package['info']['docs_url'] or '' self.flags['same_docs'] = (docs1 == docs2) releases1 = set(self.pkg1.package['releases'].keys()) releases2 = set(self.pkg2.package['releases'].keys()) self.flags['has_subreleases'] = (releases2.issubset(releases1))
def build_sample(self, data, url = None, sampletype = None): if not data: return None p = dict() if sampletype: p['type'] = sampletype else: p['type'] = self.get_sample_type(data) if p['type'] is None: return None p['md5'] = hashlib.md5(data).hexdigest() p['sha1'] = hashlib.sha1(data).hexdigest() p['sha256'] = hashlib.sha256(data).hexdigest() if SSDEEP: p['ssdeep'] = ssdeep.hash(data) if p['type'] in ('PE', ): imphash = self.get_imphash(data) if imphash: p['imphash'] = imphash if url: p['url'] = url p['data'] = base64.b64encode(data) return p
def getfuzzyhash(self): """ Returns fuzzy hash of spam using the ssdeep library. Hash is generated using the combination of mail's body + subject. Msg length is being checked because SSDEEP has some issues with comparing hashes of small spams. If spam's body is very small or non-existent, we add our randomText to the body. There are certain cases when the email body is empty. Hence len = html/text + subject """ if self.mailFields['html']: if len(self.mailFields['html']) < 150: data = self.mailFields['html'] + " " + self.mailFields[ 'subject'] + self.randomText else: data = self.mailFields['html'] + " " + self.mailFields[ 'subject'] self.mailFields['len'] = len(self.mailFields['html']) + len( self.mailFields['subject']) elif self.mailFields['text']: if len(self.mailFields['text']) < 150: data = self.mailFields['text'] + " " + self.mailFields[ 'subject'] + self.randomText else: data = self.mailFields['text'] + " " + self.mailFields[ 'subject'] self.mailFields['len'] = len(self.mailFields['text']) + len( self.mailFields['subject']) else: # Test mails without body and limited chars in subject data = self.mailFields['subject'] + self.mailFields[ 'from'] + self.randomText self.mailFields['len'] = len(self.mailFields['subject']) return ssdeep.hash(data)
def fingerprints(data): """This function return the fingerprints of data. Args: data (string): raw data Returns: tuple: fingerprints md5, sha1, sha256, sha512, ssdeep """ # md5 md5 = hashlib.md5() md5.update(data) md5 = md5.hexdigest() # sha1 sha1 = hashlib.sha1() sha1.update(data) sha1 = sha1.hexdigest() # sha256 sha256 = hashlib.sha256() sha256.update(data) sha256 = sha256.hexdigest() # sha512 sha512 = hashlib.sha512() sha512.update(data) sha512 = sha512.hexdigest() # ssdeep ssdeep_ = ssdeep.hash(data) return md5, sha1, sha256, sha512, ssdeep_
def run_detect(self) -> t.Dict[str, str]: """ Given a blob of data, run initial detection to gather metadata """ with open(self.filepath, "rb") as fd: data = fd.read() # generate table of hashes useful for analyst hashes: t.Dict[str, str] = {} hashes["MD5"] = hashlib.md5(data).hexdigest() hashes["SHA256"] = hashlib.sha256(data).hexdigest() hashes["Similiarity Hash (ssdeep)"] = ssdeep.hash(data) # VT checks are optional, and only occur if $VT_API is set vt_api: t.Optional[str] = os.environ.get("VT_API") if vt_api: params = {"apiKey": vt_api} files = {"file": binary} resp = requests.post( "https://www.virustotal.com/vtapi/v2/file/scan", files=files, params=params, ) print(resp.json()) return hashes
def ui_loop(repo, log_buf, state, is_gist=False): choice = input_handler(state, is_gist) if choice == "c": state.bad_signatures.append(ssdeep.hash(repo.decoded_content)) elif choice == "u": state.bad_users.append( repo.owner.login if is_gist else repo.repository.owner.login) elif choice == "r" and not is_gist: state.bad_repos.append(repo.repository.name) elif choice == "f" and not is_gist: state.bad_files.append(repo.name) elif choice == "p": print_handler(repo.decoded_content) ui_loop(repo, log_buf, state, is_gist) elif choice == "s": save_state(state.query, state) ui_loop(repo, log_buf, state, is_gist) elif choice == "a": with open(state.logfile, "a") as fd: fd.write(log_buf) elif choice.startswith("/"): log_buf += regex_handler(choice, repo) ui_loop(repo, log_buf, state, is_gist) elif choice == "b": if state.index - 1 < state.lastInitIndex: print(bcolors.FAIL + "Can't go backwards past restore point " "because of rate-limiting/API limitations" + bcolors.ENDC) ui_loop(repo, log_buf, state, is_gist) else: state.index -= 2 elif choice == "q": sys.exit(0)
def should_parse(repo, state, is_gist=False): owner_login = repo.owner.login if is_gist else repo.repository.owner.login if owner_login in state.bad_users: print(bcolors.FAIL + "Failed check: Ignore User" + bcolors.ENDC) return False if not is_gist and repo.repository.name in state.bad_repos: print(bcolors.FAIL + "Failed check: Ignore Repo" + bcolors.ENDC) return False if not is_gist and repo.name in state.bad_files: print(bcolors.FAIL + "Failed check: Ignore File" + bcolors.ENDC) return False # Fuzzy Hash Comparison try: if not is_gist: # Temporary fix for PyGithub until fixed upstream (PyGithub#1178) repo._url.value = repo._url.value.replace( repo._path.value, urllib.parse.quote(repo._path.value)) candidate_sig = ssdeep.hash(repo.decoded_content) for sig in state.bad_signatures: similarity = ssdeep.compare(candidate_sig, sig) if similarity > SIMILARITY_THRESHOLD: print(bcolors.FAIL + "Failed check: Ignore Fuzzy Signature on Contents " "({}% Similarity)".format(similarity) + bcolors.ENDC) return False except github.UnknownObjectException: print(bcolors.FAIL + "API Error: File no longer exists on github.com" + bcolors.ENDC) return False return True
def getfuzzyhash(): """Returns fuzzy hash of spam. This function returns hash generated using the ssdeep library. Hash is generated using the combination of mail's body + subject. Msg length is being checked because SSDEEP has some issues with comparing hashes of small spams. If spam's body is very less or non existent, we add our randomText to body. There would be certain cases when there wouldn't be any html or text portion i.e. email body would be empty. Hence forth len = html/text + subject In shivamaindb.py if len < 10 then keeping comparision ratio higher """ if mailFields['html']: if len(mailFields['html']) < 150: data = mailFields['html'] + " " + mailFields['subject'] + randomText else: data = mailFields['html'] + " " + mailFields['subject'] mailFields['len'] = len(mailFields['html']) + len( mailFields['subject']) elif mailFields['text']: if len(mailFields['text']) < 150: data = mailFields['text'] + " " + mailFields['subject'] + randomText else: data = mailFields['text'] + " " + mailFields['subject'] mailFields['len'] = len(mailFields['text']) + len( mailFields['subject']) else: # Test mails without body and limited chars in subject data = mailFields['subject'] + mailFields['from'] + randomText mailFields['len'] = len(mailFields['subject']) return ssdeep.hash(data)
def compute_hashes(self): """ Compute the file hashes """ filename = self.get_file_path(self.sha256) # Make sure the file exists and is readable if not os.access(filename, os.R_OK): flash('There was an error while trying to analyse the file.', 'danger') return False with open(filename, 'rb') as f: buf = f.read() if self.sha256 is None: self.sha256 = hashlib.sha256(buf).hexdigest() if self.sha1 is None: self.sha1 = hashlib.sha1(buf).hexdigest() if self.md5 is None: self.md5 = hashlib.md5(buf).hexdigest() if self.ssdeep is None: self.ssdeep = ssdeep.hash(buf) if self.mime is None: try: self.mime = magic.from_buffer(buf, mime=True).decode('utf-8') except: self.mime = None if self.entropy is None: self.entropy = self.compute_entropy(buf)
def build_sample(self, data, url=None, sampletype=None): if not data: return None p = dict() if sampletype: p['type'] = sampletype if isinstance(data, str): data = data.encode() else: p['type'] = self.get_sample_type(data) if p['type'] is None: return None p['md5'] = hashlib.md5(data).hexdigest() p['sha1'] = hashlib.sha1(data).hexdigest() p['sha256'] = hashlib.sha256(data).hexdigest() if SSDEEP: p['ssdeep'] = ssdeep.hash(data) if p['type'] in ('PE', ): imphash = self.get_imphash(data) if imphash: p['imphash'] = imphash if url: p['url'] = url p['data'] = base64.b64encode(data).decode() return p
def get_hashes(self, address_code): """Given array of contract bytecodes returns array of corresponding ssdeep (CTPH) hashes keyword arguments: address_code -- Array of bytecodes as strings""" hash_arr = map(lambda x: ssdeep.hash(x), address_code) return hash_arr
def hash_data(file): md5 = hashlib.md5(file.content).hexdigest() sha128 = hashlib.sha1(file.content).hexdigest() sha256 = hashlib.sha256(file.content).hexdigest() sha512 = hashlib.sha512(file.content).hexdigest() ssdeep_hash = ssdeep.hash(file.content) hash = {'evil_finder_md5':md5, 'evil_finder_sha128':sha128, 'evil_finder_sha256':sha256, 'evil_finder_sha512':sha512, 'evil_finder_ssdeep':ssdeep_hash} return hash
def execute(self, input_data): raw_bytes = input_data["sample"]["raw_bytes"] self.meta["sha1"] = hashlib.sha1(raw_bytes).hexdigest() self.meta["sha256"] = hashlib.sha256(raw_bytes).hexdigest() self.meta["ssdeep"] = ssd.hash(raw_bytes) self.meta["entropy"] = self._entropy(raw_bytes) self.meta.update(input_data["meta"]) return self.meta
def execute(self, input_data): raw_bytes = input_data['sample']['raw_bytes'] self.meta['sha1'] = hashlib.sha1(raw_bytes).hexdigest() self.meta['sha256'] = hashlib.sha256(raw_bytes).hexdigest() self.meta['ssdeep'] = ssd.hash(raw_bytes) self.meta['entropy'] = self._entropy(raw_bytes) self.meta.update(input_data['meta']) return self.meta
def main(): """Entry function.""" parser = argparse.ArgumentParser( description='Process Fuzzy hashing comparison between project url and \ return project urls') parser.add_argument("input_csv_file", help="Specify the csv file to read") parser.add_argument( "number_urls", help="Number of found urls to process fuzzy hashing \ (max = 10)") args = parser.parse_args() input_csv_file = args.input_csv_file number_urls = int(args.number_urls) header_names = ['acronym', 'title', 'projectUrl', 'foundProjectUrl1', 'foundProjectUrl2', 'foundProjectUrl3', 'foundProjectUrl4', 'foundProjectUrl5', 'foundProjectUrl6', 'foundProjectUrl7', 'foundProjectUrl8', 'foundProjectUrl9', 'foundProjectUrl10'] df = pd.read_csv(input_csv_file, sep=',', quotechar='"', names=header_names, index_col=False) df['projectUrlHash'] = np.nan df['foundProjectUrl1Hash'] = np.nan df['foundProjectUrl2Hash'] = np.nan df['foundProjectUrl3Hash'] = np.nan df['foundProjectUrl4Hash'] = np.nan df['foundProjectUrl5Hash'] = np.nan df['foundProjectUrl6Hash'] = np.nan df['foundProjectUrl7Hash'] = np.nan df['foundProjectUrl8Hash'] = np.nan df['foundProjectUrl9Hash'] = np.nan df['foundProjectUrl10Hash'] = np.nan df['MatchScore1'] = np.nan df['MatchScore2'] = np.nan df['MatchScore3'] = np.nan df['MatchScore4'] = np.nan df['MatchScore5'] = np.nan df['MatchScore6'] = np.nan df['MatchScore7'] = np.nan df['MatchScore8'] = np.nan df['MatchScore9'] = np.nan df['MatchScore10'] = np.nan for index, row in df.iterrows(): print "computing fuzzy hash for project %s" % row['acronym'] try: df.ix[index, 'projectUrlHash'] = ssdeep.hash( urllib2.urlopen(row['projectUrl'], timeout=10).read()) except urllib2.HTTPError, e: print e.code except urllib2.URLError, e: print e.reason
def _run(self, scanObject, result, depth, args): ''' Assumes: there is a string like object in scanObject.buffer Ensures: hash values added using scanObject.addMetadata Laika Config File Options: hashmd5: "1" = md5.hexdigest, "0" = omit hashSHA1: "1" = sha1.hexdigest, "0" = omit hashSHA256: "1" = sha256.hexdigest, "0" = omit hashSHA512: "1" = sha256.hexdigest, "0" = omit hashSHA1: "1" = sha1.hexdigest, "0" = omit ssdeep: "1" = ssdeep.hash, "0" = omit Function Arguments: :param scanObject:<laikaboss.objectmodel.ScanObject> :param result:<laikaboss.objectmodel.ScanResult> :param depth:<int> :param args:<dict> --execution flow controls-- Valid args names <str> (value must be 1, 0, "1", or "0") 1/"1": Generate the hash of named type 0/"0": Omit the hash of named type default args: {"md5":1, "SHA1":0, "SHA256":1, "SHA512":1, "ssdeep":0} :return: Always returns a empty list (no child objects) ''' moduleResult = [] metaDict = {} if int(get_option(args, 'md5', 'hashmd5', "md5" in self.module_defaults)): metaDict['md5'] = hashlib.md5(scanObject.buffer).hexdigest() if int(get_option(args, 'SHA1', 'hashSHA1', "SHA1" in self.module_defaults)): metaDict['SHA1'] = hashlib.sha1(scanObject.buffer).hexdigest() if int(get_option(args, 'SHA256', 'hashSHA256', "SHA256" in self.module_defaults)): metaDict['SHA256'] = hashlib.sha256(scanObject.buffer).hexdigest() if int(get_option(args, 'SHA512', 'hashSHA512', "SHA512" in self.module_defaults)): metaDict['SHA512'] = hashlib.sha512(scanObject.buffer).hexdigest() if int(get_option(args, 'ssdeep', 'hashssdeep', "ssdeep" in self.module_defaults)): #only import ssdeep if dispatched. #Prevents import error if you don't have/want the package #python should keep handing you the original, minimal/no overhead try: import ssdeep metaDict['ssdeep'] = ssdeep.hash(scanObject.buffer) except ImportError: metaDict['ssdeep'] = "" #indicate ssdeep was configured but failed scanObject.addMetadata(self.module_name, "HASHES", metaDict) return moduleResult
def processMeta(pe,fc, profile): profile[PROFILE.STATIC][META.fileSize]=len(fc) profile[PROFILE.STATIC][META.timeStamp]=pe.FILE_HEADER.TimeDateStamp profile[PROFILE.STATIC][META.dll]=pe.FILE_HEADER.IMAGE_FILE_DLL profile[PROFILE.STATIC][META.numberSec]=pe.FILE_HEADER.NumberOfSections profile[PROFILE.STATIC][META.importHash]=pe.get_imphash() profile[PROFILE.STATIC][META.md5]=hashlib.md5(fc).hexdigest() profile[PROFILE.STATIC][META.sha1]=hashlib.sha1(fc).hexdigest() profile[PROFILE.STATIC][META.ssdeep]=ssdeep.hash(fc) return profile
def _run(self, scanObject, result, depth, args): moduleResult = [] metaDict = {} #metaDict['SHA224'] = hashlib.sha224(scanObject.buffer).hexdigest() metaDict['SHA256'] = hashlib.sha256(scanObject.buffer).hexdigest() #metaDict['SHA384'] = hashlib.sha384(scanObject.buffer).hexdigest() metaDict['SHA512'] = hashlib.sha512(scanObject.buffer).hexdigest()[0:32] metaDict['ssdeep'] = ssdeep.hash(scanObject.buffer) scanObject.addMetadata(self.module_name, "HASHES", metaDict) return moduleResult
def META_BASIC_INFO(s, buff): BASIC_INFO = { 'MD5' : hashlib.md5(buff).hexdigest(), 'SHA1' : hashlib.sha1(buff).hexdigest(), 'SHA256' : hashlib.sha256(buff).hexdigest(), 'SHA512' : hashlib.sha512(buff).hexdigest(), 'ssdeep' : ssdeep.hash(buff), 'Size' : '%s bytes' % len(buff) } return BASIC_INFO
def META_BASIC_INFO(s, buff): BASIC_INFO = OrderedDict([('MD5', hashlib.md5(buff).hexdigest()), ('SHA1', hashlib.sha1(buff).hexdigest()), ('SHA256', hashlib.sha256(buff).hexdigest()), ('SHA512', hashlib.sha512(buff).hexdigest()), ('ssdeep' , ssdeep.hash(buff)), ('Size', '%s bytes' % len(buff))]) return BASIC_INFO
def get_hash_tuple(functions, filename): """ Creates the binary tuple for use in Malfunction and Mallearn Results in the form: (Binary Hash, [**ssdeep hashes])""" function_hashes = [] binary_hash = get_binary_hash(filename) for function in functions: function_hashes.append(ssdeep.hash(function)) return (binary_hash, function_hashes)
def execute(self, input_data): raw_bytes = input_data['sample']['raw_bytes'] sha1 = hashlib.sha1(raw_bytes).hexdigest() sha256 = hashlib.sha256(raw_bytes).hexdigest() ssdeep = ssd.hash(raw_bytes) entropy = self._entropy(raw_bytes) output = {name:value for name,value in locals().iteritems() if name not in ['self', 'input_data','raw_bytes']} output.update(input_data['meta']) return output
def build_apk_sample(self, data, url = None): sample = { "md5" : hashlib.md5(data).hexdigest(), "sha1" : hashlib.sha1(data).hexdigest(), "raw" : data, "data" : base64.b64encode(data), "type" : "APK", } if SSDEEP: sample['ssdeep'] = ssdeep.hash(data) return sample
def set_hash(cid): c = Content.objects.get(id=cid) if c: if c.content: try: encoded = c.content.encode("utf-8") c.sha1 = str(hashlib.sha1(encoded).hexdigest()) c.sha256 = str(hashlib.sha256(encoded).hexdigest()) c.sha512 = str(hashlib.sha512(encoded).hexdigest()) c.ssdeep = str(ssdeep.hash(encoded)) c.save() except Exception as e: logger.error(str(e)) return c
def get_hashes(self): hashes = {} # Calculate hashes with open(self.path) as handle: filedata = handle.read() hashes = { 'md5': hashlib.md5(filedata).hexdigest(), 'sha1': hashlib.sha1(filedata).hexdigest(), 'sha256': hashlib.sha256(filedata).hexdigest(), 'sha512': hashlib.sha512(filedata).hexdigest(), 'crc32': "%08X" % (binascii.crc32(filedata) & 0xFFFFFFFF), 'ssdeep': ssdeep.hash(filedata), } return hashes
def get_hashes(self): hashes = {} # Calculate hashes with open(self.path) as handle: filedata = handle.read() hashes = { "md5": hashlib.md5(filedata).hexdigest(), "sha1": hashlib.sha1(filedata).hexdigest(), "sha256": hashlib.sha256(filedata).hexdigest(), "sha512": hashlib.sha512(filedata).hexdigest(), "crc32": "%08X" % (binascii.crc32(filedata) & 0xFFFFFFFF), "ssdeep": ssdeep.hash(filedata), } return hashes
def get_ssdeep(payload): """ Generate ssdeep hash of a payload :param payload: The payload to be hashed. :returns: ssdeep hash :rtype: str or None """ try: fuzzy = ssdeep.hash(payload) except: fuzzy = None return fuzzy
def get_page_data(response): page = WebPage() page['uri'] = response.url page['status_code'] = response.status if 'screenshot' in response.meta: page['screenshot'] = response.meta['screenshot'] page['ssdeep_pagesource'] = str(ssdeep.hash(response.body)) try: screenshot_hash = ssdeep.hash_from_file(response.meta['screenshot']) page['ssdeep_screenshot'] = screenshot_hash except: log.msg("Could not create hash from screenshot: " + response.meta['screenshot'], level=log.DEBUG) return page
def fileHash(self, filePath): try: with open(filePath, 'rb') as fh: data = fh.read() m = hashlib.md5() m.update(data) md5 = m.hexdigest() try: import ssdeep deep = ssdeep.hash(data) except: deep = "Null" except: md5 = "Null" deep = "Null" return md5, deep
def get_page_info(self): page = WebPage() page['uri'] = self.response.url page['status_code'] = self.response.status page['useragent'] = self.response.meta.get('User-Agent') page['referer'] = self.response.meta.get('Referer') if 'screenshot' in self.response.meta: page['screenshot'] = self.response.meta['screenshot'] page['ssdeep_pagesource'] = str(ssdeep.hash(self.response.body)) try: screenshot_hash = ssdeep.hash_from_file(self.response.meta['screenshot']) page['ssdeep_screenshot'] = screenshot_hash except: log.msg("Could not create hash from screenshot: " + self.response.meta['screenshot'], level=log.DEBUG) return page
def load_data_from_results_file(path): c2_bases = {} with open(path, "r") as f: for line in f: line = line.strip() data = json.loads(line) data['content'] = data['content'].decode('hex') data['content_ssdeep'] = ssdeep.hash(data['content']) if data['base_url'] not in c2_bases: c2_bases[data['base_url']] = {} data["offset"] = data["url"][len(data["base_url"]):] print "{0} - {1} - {2}".format(data['code'], data['base_url'], data['offset']) c2_bases[data['base_url']][data['offset']] = data return c2_bases
def fileHash(self, filePath, mdHash, deepHash): try: with open(filePath, 'rb') as fh: data = fh.read() except: print "unable to open file %s" % filePath data = None if data != None and mdHash == True: m = hashlib.md5() m.update(data) md5 = m.hexdigest() else: md5 = "Null" if data != None and deepHash == True: import ssdeep deep = ssdeep.hash(data) else: deep = "Null" return md5, deep