def upload_view(): upload_file = request.files['file'] file_stream = upload_file.stream.read() if file_stream: #: Collect upload file data sample = { 'filename': secure_filename(upload_file.filename), 'sha1': hashlib.sha1(file_stream).hexdigest().upper(), 'sha256': hashlib.sha256(file_stream).hexdigest().upper(), 'md5': hashlib.md5(file_stream).hexdigest().upper(), 'ssdeep': pydeep.hash_buf(file_stream), 'filesize': len(file_stream), 'filetype': magic.from_buffer(file_stream), 'filemime': upload_file.mimetype, 'upload_date': r.now(), 'uploaded_by': "api", # g.user 'detection_ratio': dict(infected=0, count=0), 'filestatus': "Processing" } insert_in_samples_db(sample) update_upload_file_metadata(sample) #: Run all configured scanners sample['detection_ratio'] = scan_upload(file_stream, sample) #: Done Processing File sample['filestatus'] = 'Complete' sample['scancomplete'] = r.now() update_sample_in_db(sample) found = is_hash_in_db(sample['md5']) if found: return jsonify(found) else: return jsonify( dict(error='Not a valid API end point.', response=404)), 404 else: return jsonify(dict(error='Missing Parameters', response=400)), 400
def _pydeep_page(self): """Run pydeep and return the hash""" page_sig = None try: if self._config.SSDEEP_SIG: # s = self._config.YARA_RULES ## Don't wrap hex or regex rules in quotes # if s[0] not in ("{", "/"): s = '"' + s + '"' ## Scan for unicode strings # if self._config.WIDE: s += "wide" # rules = yara.compile(sources = { #'n' : 'rule r1 {strings: $a = ' + s + ' condition: $a}' # }) pass elif self._config.SSDEEP_FILE: # rules = yara.compile(self._config.YARA_FILE) pass elif self._config.SSDEEP_PIDOFF: (pid, base) = self._config.SSDEEP_PIDOFF.split(":") for proc in tasks.pslist(self._addr_space): if proc.UniqueProcessId == int(pid): process_space = proc.get_process_address_space() page_data = process_space.zread(int(base, 16), 0x1000) page_sig = pydeep.hash_buf(page_data) if page_sig == "3::": debug.error("PID XXX and OFFSET YYY null or not found") else: debug.error("You must specify an ssdeep hash (-Y), a file to hash (-y), or a PID:BASE pair (-T)") except Exception as why: debug.error("Cannot compile rules: {0}".format(str(why))) return page_sig
def _pydeep_page(self): """Run pydeep and return the hash""" page_sig = None try: if self._config.SSDEEP_SIG: #s = self._config.YARA_RULES ## Don't wrap hex or regex rules in quotes #if s[0] not in ("{", "/"): s = '"' + s + '"' ## Scan for unicode strings #if self._config.WIDE: s += "wide" #rules = yara.compile(sources = { #'n' : 'rule r1 {strings: $a = ' + s + ' condition: $a}' #}) pass elif self._config.SSDEEP_FILE: #rules = yara.compile(self._config.YARA_FILE) pass elif self._config.SSDEEP_PIDOFF: (pid, base) = self._config.SSDEEP_PIDOFF.split(':') for proc in tasks.pslist(self._addr_space): if proc.UniqueProcessId == int(pid): process_space = proc.get_process_address_space() page_data = process_space.zread(int(base, 16), 0x1000) page_sig = pydeep.hash_buf(page_data) if page_sig == "3::": debug.error('PID XXX and OFFSET YYY null or not found') else: debug.error("You must specify an ssdeep hash (-Y), a file to hash (-y), or a PID:BASE pair (-T)") except Exception as why: debug.error("Cannot compile rules: {0}".format(str(why))) return page_sig
def data_hashes(data, algo="sha256"): if not data: return None algo = algo.lower() if algo == "crc32": return int("%d" % (zlib.crc32(data) & 0xffffffff)) elif algo == "adler32": return "%d" % (zlib.adler32(data) & 0xffffffff) elif algo == "md5": hasher = hashlib.md5() elif algo == "sha1": hasher = hashlib.sha1() elif algo == "sha224": hasher = hashlib.sha224() elif algo == "sha256": hasher = hashlib.sha256() elif algo == "sha384": hasher = hashlib.sha384() elif algo == "sha512": hasher = hashlib.sha512() elif algo == "ssdeep": if hasattr(pydeep, "hash_data"): return pydeep.hash_data(data) elif hasattr(pydeep, "hash_buf"): return pydeep.hash_buf(data) else: return None else: return None hasher.update(data) return hasher.hexdigest()
def import_dir(directory, r): p = r.pipeline(False) md5s = [] sha1s = [] sha256s = [] for (dirpath, dirnames, filenames) in os.walk(args.dir): for filename in filenames: path = os.path.join(dirpath, filename) content = open(path, 'rb').read() md5 = hashlib.md5(content).hexdigest() sha1 = hashlib.sha1(content).hexdigest() sha256 = hashlib.sha256(content).hexdigest() ssdeep = pydeep.hash_buf(content) md5s.append(md5) sha1s.append(sha1) sha256s.append(sha256) p.hmset( sha256, { 'md5': md5, 'sha1': sha1, 'filename': filename, 'path': path, 'ssdeep': ssdeep }) p.execute() return md5s, sha1s, sha256s
def upload_view(): upload_file = request.files['file'] file_stream = upload_file.stream.read() if file_stream: #: Collect upload file data sample = {'filename': secure_filename(upload_file.filename), 'sha1': hashlib.sha1(file_stream).hexdigest().upper(), 'sha256': hashlib.sha256(file_stream).hexdigest().upper(), 'md5': hashlib.md5(file_stream).hexdigest().upper(), 'ssdeep': pydeep.hash_buf(file_stream), 'filesize': len(file_stream), 'filetype': magic.from_buffer(file_stream), 'filemime': upload_file.mimetype, 'upload_date': r.now(), 'uploaded_by': "api", # g.user 'detection_ratio': dict(infected=0, count=0), 'filestatus': "Processing"} insert_in_samples_db(sample) update_upload_file_metadata(sample) #: Run all configured scanners sample['detection_ratio'] = scan_upload(file_stream, sample) #: Done Processing File sample['filestatus'] = 'Complete' sample['scancomplete'] = r.now() update_sample_in_db(sample) found = is_hash_in_db(sample['md5']) if found: return jsonify(found) else: return jsonify(dict(error='Not a valid API end point.', response=404)), 404 else: return jsonify(dict(error='Missing Parameters', response=400)), 400
def _load_meta(self, db, _object): content = base64.b64decode(_object["content"]) entry = { "magic": magic.from_buffer(content), "ssdeep": pydeep.hash_buf(content), "md5": hashlib.md5(content).hexdigest(), "sha1": hashlib.sha1(content).hexdigest(), "sha256": hashlib.sha256(content).hexdigest() } if entry["magic"] == "MS-DOS executable": ### This is a weak application of magic try: pe_data = self._get_pe(content) for k, v in pe_data.iteritems(): entry[k] = v except Exception, e: print e; pass pass #entry_copy = copy.deepcopy(entry) #del entry #del content #gc.collect() db.table("content").get(_object["id"]).update({"load_meta": entry}).run() print "Loaded meta for object (%s) %s." % (_object["firmware_id"], _object["id"]) pass
def generate_attributes(self): self.add_attribute('name', value=self.__section.name) size = self.add_attribute('size-in-bytes', value=self.__section.size) if int(size.value) > 0: # zero-filled sections can create too many correlations to_ids = float(self.__section.entropy) > 0 disable_correlation = not to_ids self.add_attribute('entropy', value=self.__section.entropy) self.add_attribute('md5', value=md5(self.__data).hexdigest(), disable_correlation=disable_correlation, to_ids=to_ids) self.add_attribute('sha1', value=sha1(self.__data).hexdigest(), disable_correlation=disable_correlation, to_ids=to_ids) self.add_attribute('sha256', value=sha256(self.__data).hexdigest(), disable_correlation=disable_correlation, to_ids=to_ids) self.add_attribute('sha512', value=sha512(self.__data).hexdigest(), disable_correlation=disable_correlation, to_ids=to_ids) if HAS_PYDEEP and float(self.__section.entropy) > 0: if self.__section.name == '.rsrc': # ssdeep of .rsrc creates too many correlations disable_correlation = True to_ids = False self.add_attribute('ssdeep', value=pydeep.hash_buf(self.__data).decode(), disable_correlation=disable_correlation, to_ids=to_ids)
def parse_dl(fname): try: filetype=magic.from_file(fname) filecontent=open(fname,'rb').read() (mode, ino, dev, nlink, uid, gid, size, atime, mtime, ctime) = os.stat(fname) dl_file={'filename':fname, 'mimetype':filetype,'size':size,'atime':time.strftime("%a, %d %b %Y %X GMT", time.gmtime(atime)),'ctime':time.strftime("%a, %d %b %Y %X GMT", time.gmtime(ctime)),'mtime':time.strftime("%a, %d %b %Y %X GMT", time.gmtime(mtime))} if filetype=='HTML document, ASCII text, with CRLF line terminators' or filetype=='XML document text': dl_file['jar_href']=re.findall(r'\<jar\ href\=\"(.*?)\"',filecontent)[0] main_class_arr=re.findall('\<applet\-desc.*main\-class\=\"(.*?)\"',filecontent) if main_class_arr: dl_file['main_class']=main_class_arr[0] dl_file['parameters']={} for param,value in re.findall(r'<param name="(.*?)" value="(.*?)"',filecontent): dl_file['parameters'][param]=value m = hashlib.md5() m.update(filecontent) dl_file['md5']=m.digest().encode('hex') # computing sha1 s = hashlib.sha1() s.update(filecontent) dl_file['sha1']=s.digest().encode('hex') # computing ssdeep dl_file['ssdeep']=pydeep.hash_buf(filecontent) return dl_file except: print "Unable to stat the downloaded file"
def get_fuzzy(data): """ Uses SSDeep's fuzzy.dll to return a fuzzy hash for a block of data Based off of http://codepaste.ru/13245/ Arguments: data: binary data to perform hash of """ error_code = '' try: import pydeep return pydeep.hash_buf(data) except ImportError: # Oh man, this is going to be ugly fuzzy_dll = os.path.join(SCRIPT_PATH, 'fuzzy.dll') if not file_exists(fuzzy_dll): root_fuzzy_dll = os.path.join(os.path.dirname(os.path.realpath(sys.argv[0])), fuzzy_dll) if file_exists(root_fuzzy_dll): fuzzy_dll = root_fuzzy_dll try: fuzzy = ctypes.CDLL(fuzzy_dll) except WindowsError, error: error = str(error) if '[Error ' in error: error_code = error.split()[1].split(']')[0] if error_code: if error_code == '193': py_bits = struct.calcsize('P') * 8 return '[!] %s incompatible. Needs to be same as Python: %d-bits' % (fuzzy_dll, py_bits) elif error_code == '126': return '[!] %s not found' % fuzzy_dll else: return '[!] %s not loaded. Unknown error.' return
def upload(): form = SearchForm(request.form) if request.method == 'POST': # TODO: use secure_filename for upload_file in request.files.getlist('files[]'): file_stream = upload_file.stream.read() file_md5 = hashlib.md5(file_stream).hexdigest().upper() #: Add file hash to Bloomfilter unless it is already there #: Check if user wishes to force a sample rescan if file_md5 not in bf or form.force.data: bf.add(file_md5) #: Collect upload file data sample = {'filename': secure_filename(upload_file.filename.encode('utf-8')), 'sha1': hashlib.sha1(file_stream).hexdigest().upper(), 'sha256': hashlib.sha256(file_stream).hexdigest().upper(), 'md5': file_md5, 'ssdeep': pydeep.hash_buf(file_stream), 'filesize': len(file_stream), 'filetype': magic.from_buffer(file_stream), 'filemime': upload_file.mimetype, 'upload_date': r.now(), 'uploaded_by': "jmaine", # g.user 'detection_ratio': dict(infected=0, count=0), 'filestatus': 'Processing'} insert_in_samples_db(sample) update_upload_file_metadata(sample) #: Run all configured scanners sample['detection_ratio'] = scan_upload(file_stream, sample) #: Done Processing File sample['filestatus'] = 'Complete' sample['scancomplete'] = r.now() update_sample_in_db(sample) #: Once Finished redirect user to the samples page return redirect(url_for('.samples')) return render_template('samples.html')
def __init__(self, jsonObj): self.address = jsonObj['address'] self.name = jsonObj['contract_name'] self.bytecode = jsonObj['bytecode'] self.hash = pydeep.hash_buf(self.bytecode) contract.contract_list.append(self) contract.address_list.append(self.address) contract.name_list.append(self.name) contract.hash_list.append(self.hash)
def test_pydeep(): for test in testL: filename, filelen, filehash = test data = io.open(filename, 'rb').read() hash_buf = pydeep.hash_buf(data) hash_file = pydeep.hash_file(filename) assert len(data) == filelen, "File length error" assert hash_buf == filehash, "Error hashing %s using hash_buf"%filename assert hash_file == filehash, "Error hashing %s using hash_file"%filename
def upload(): form = SearchForm(request.form) if request.method == 'POST': # TODO: use secure_filename for upload_file in request.files.getlist('files[]'): file_stream = upload_file.stream.read() file_md5 = hashlib.md5(file_stream).hexdigest().upper() #: Add file hash to Bloomfilter unless it is already there #: Check if user wishes to force a sample rescan if file_md5 not in bf or form.force.data: bf.add(file_md5) #: Collect upload file data sample = { 'filename': secure_filename(upload_file.filename.encode('utf-8')), 'sha1': hashlib.sha1(file_stream).hexdigest().upper(), 'sha256': hashlib.sha256(file_stream).hexdigest().upper(), 'md5': file_md5, 'ssdeep': pydeep.hash_buf(file_stream), 'filesize': len(file_stream), 'filetype': magic.from_buffer(file_stream), 'filemime': upload_file.mimetype, 'upload_date': r.now(), 'uploaded_by': "anonymous", # g.user 'detection_ratio': dict(infected=0, count=0), 'filestatus': 'Processing' } insert_in_samples_db(sample) update_upload_file_metadata(sample) #: Run all configured scanners sample['detection_ratio'] = scan_upload(file_stream, sample) #: Done Processing File sample['filestatus'] = 'Complete' sample['scancomplete'] = r.now() update_sample_in_db(sample) else: # flash('File {0} already submitted. Visit: {1}'.format(secure_filename(upload_file.filename.encode('utf-8')), file_md5), 'error') flash( 'File {} already submitted.'.format( secure_filename(upload_file.filename.encode('utf-8'))), 'error') return redirect(url_for('.index')) #: Once Finished redirect user to the samples page return redirect(url_for('.samples')) # return render_template('samples.html') return render_template('samples.html')
def get_ssdeep(self): if not HAVE_SSDEEP: return None try: return pydeep.hash_buf(self.file_data) except MemoryError: logging.exception("Out of memory") sys.exit("Out of memory error") except Exception: return None
def generate_attributes(self): self.name = self.section_info['Name']['Value'] self.size = self.section_info['SizeOfRawData']['Value'] if self.size > 0: self.entropy = self.section_info['Entropy'] self.md5 = self.section_info['MD5'] self.sha1 = self.section_info['SHA1'] self.sha256 = self.section_info['SHA256'] self.sha512 = self.section_info['SHA512'] self.ssdeep = pydeep.hash_buf(self.data).decode()
def prepare_hashes(r, buf, path): deephash = pydeep.hash_buf(buf) sha256 = hashlib.sha256(file(path, 'rb').read()).hexdigest() p = r.pipeline(False) p.hmset(sha256, {'path': path, 'ssdeep': deephash}) p.sadd('hashes', sha256) block_size, chunk, double_chunk = preprocess_hash(deephash) add_chunks_db(p, block_size, chunk, sha256) add_chunks_db(p, block_size, double_chunk, sha256) p.execute()
def generate_attributes(self): self.add_attribute('name', value=self.__section.name) size = self.add_attribute('size-in-bytes', value=self.__section.size) if int(size.value) > 0: self.add_attribute('entropy', value=self.__section.entropy) self.add_attribute('md5', value=md5(self.__data).hexdigest()) self.add_attribute('sha1', value=sha1(self.__data).hexdigest()) self.add_attribute('sha256', value=sha256(self.__data).hexdigest()) self.add_attribute('sha512', value=sha512(self.__data).hexdigest()) if HAS_PYDEEP: self.add_attribute('ssdeep', value=pydeep.hash_buf(self.__data).decode())
def generate_attributes(self): self.filename = os.path.basename(self.filepath) self.size = os.path.getsize(self.filepath) if self.size > 0: self.filetype = magic.from_buffer(self.data) self.entropy = self.__entropy_H(self.data) self.md5 = md5(self.data).hexdigest() self.sha1 = sha1(self.data).hexdigest() self.sha256 = sha256(self.data).hexdigest() self.sha512 = sha512(self.data).hexdigest() self.ssdeep = pydeep.hash_buf(self.data).decode()
def hash_mem_file(file): mem_file = open(file, "r") mem_hashes = {} pos = 0 mem_file.seek(pos) mem_buf = mem_file.read(4096) while len(mem_buf) > 0: mem_hashes[pos] = pydeep.hash_buf(mem_buf) pos += 4096 mem_file.seek(pos) mem_buf = mem_file.read(4096) return mem_hashes
def generate_attributes(self): self.add_attribute('name', value=self.__section.name) self.add_attribute('type', value=str(self.__section.type).split('.')[1]) for flag in self.__section.flags_list: self.add_attribute('flag', value=str(flag).split('.')[1]) size = self.add_attribute('size-in-bytes', value=self.__section.size) if int(size.value) > 0: self.add_attribute('entropy', value=self.__section.entropy) self.add_attribute('md5', value=md5(self.__data).hexdigest()) self.add_attribute('sha1', value=sha1(self.__data).hexdigest()) self.add_attribute('sha256', value=sha256(self.__data).hexdigest()) self.add_attribute('sha512', value=sha512(self.__data).hexdigest()) if HAS_PYDEEP: self.add_attribute('ssdeep', value=pydeep.hash_buf(self.__data).decode())
def generate_attributes(self): self.add_attribute('filename', value=self.__filename) size = self.add_attribute('size-in-bytes', value=len(self.__data)) if int(size.value) > 0: self.add_attribute('entropy', value=self.__entropy_H(self.__data)) self.add_attribute('md5', value=md5(self.__data).hexdigest()) self.add_attribute('sha1', value=sha1(self.__data).hexdigest()) self.add_attribute('sha256', value=sha256(self.__data).hexdigest()) self.add_attribute('sha512', value=sha512(self.__data).hexdigest()) self.add_attribute('malware-sample', value=self.__filename, data=self.__pseudofile) if HAS_MAGIC: self.add_attribute('mimetype', value=magic.from_buffer(self.__data)) if HAS_PYDEEP: self.add_attribute('ssdeep', value=pydeep.hash_buf(self.__data).decode())
def scan(self, offset, maxlen): # Start scanning from offset until maxlen: i = offset pydeep_hash = self.pydeep_hash while i < offset + maxlen: # Read some data and match it. data = self.address_space.zread(i, 0x1000) if data: data_hash = pydeep.hash_buf(data) alike = pydeep.compare(pydeep_hash, data_hash) if alike > 10: yield data_hash, i, alike i += 0x1000
def upload_form(request): if request.method == 'POST': form = UploadUrlForm(request.POST) if form.is_valid(): uri = request.POST['uri'] newurl = URL( uri = uri, ticket = request.POST['ticket'], md5 = hashlib.md5(uri).hexdigest(), fuzzy = pydeep.hash_buf(uri), #html = handler.get_html(uri), ) ua = request.POST['UserAgent'] results = handler.get_thug(uri, ua, request.POST['ticket']) #newurl.ssdeep_compare = unicode(handler.ssdeep_compare(newurl.fuzzy, newurl.md5), 'utf-8', errors="replace") newurl.ssdeep_compare = handler.ssdeep_compare(newurl.fuzzy, newurl.md5) newurl.html = unicode(results['html'], 'utf-8', errors="replace") newurl.thug = unicode(results['thug_res'], 'utf-8', errors="replace") newurl.js = unicode(results['js'], 'utf-8', errors="replace") newurl.js_didier = unicode(results['js_didier'], 'utf-8', errors="replace") #newurl.js = handler.get_js(newurl.html) #If VirusTotal is activated, get vt results #URL['vt']=handler.get_vt(url) newurl.save() newpage = "/uanalysis/url/" + newurl.md5 return HttpResponseRedirect(newpage) else: form = UploadUrlForm() url = URL.objects.filter(created__lte=timezone.now()).order_by('-id')[:25] return render(request, 'uanalysis/upload_form.html', {'form': form, 'url': url}) else: form = UploadUrlForm() url = URL.objects.filter(created__lte=timezone.now()).order_by('-id')[:25] return render(request, 'uanalysis/upload_form.html', {'form': form, 'url': url})
def import_dir(directory, r): p = r.pipeline(False) md5s = [] sha1s = [] sha256s = [] for (dirpath, dirnames, filenames) in os.walk(args.dir): for filename in filenames: path = os.path.join(dirpath, filename) content = open(path, 'rb').read() md5 = hashlib.md5(content).hexdigest() sha1 = hashlib.sha1(content).hexdigest() sha256 = hashlib.sha256(content).hexdigest() ssdeep = pydeep.hash_buf(content) md5s.append(md5) sha1s.append(sha1) sha256s.append(sha256) p.hmset(sha256, {'md5': md5, 'sha1': sha1, 'filename': filename, 'path': path, 'ssdeep': ssdeep}) p.execute() return md5s, sha1s, sha256s
def get_hashes(self): crc = 0 md5 = hashlib.md5() sha1 = hashlib.sha1() sha256 = hashlib.sha256() sha512 = hashlib.sha512() crc = binascii.crc32(self.artifact, crc) md5.update(self.artifact) sha1.update(self.artifact) sha256.update(self.artifact) sha512.update(self.artifact) self.hashes['crc32'] = ''.join('%02X' % ((crc>>i)&0xff) for i in [24, 16, 8, 0]) self.hashes['md5'] = md5.hexdigest() self.hashes['sha1'] = sha1.hexdigest() self.hashes['sha256'] = sha256.hexdigest() self.hashes['sha512'] = sha512.hexdigest() self.hashes['ssdeep'] = pydeep.hash_buf(self.artifact)
def get_fuzzy(data): """ Uses SSDeep's fuzzy.dll to return a fuzzy hash for a block of data Based off of http://codepaste.ru/13245/ Arguments: data: binary data to perform hash of """ error_code = '' #try: if True: try: import pydeep return pydeep.hash_buf(data) except ImportError: return None #except ImportError: # Oh man, this is going to be ugly fuzzy_dll = os.path.join(SCRIPT_PATH, 'fuzzy.dll') if not file_exists(fuzzy_dll): root_fuzzy_dll = os.path.join(os.path.dirname(os.path.realpath(sys.argv[0])), fuzzy_dll) if file_exists(root_fuzzy_dll): fuzzy_dll = root_fuzzy_dll try: fuzzy = ctypes.CDLL(fuzzy_dll) except: return if error_code: if error_code == '193': py_bits = struct.calcsize('P') * 8 return '[!] %s incompatible. Needs to be same as Python: %d-bits' % (fuzzy_dll, py_bits) elif error_code == '126': return '[!] %s not found' % fuzzy_dll else: return '[!] %s not loaded. Unknown error.' return out_buf = ctypes.create_string_buffer(b'\x00' * 1024) fuzzy.fuzzy_hash_buf(data, len(data), out_buf) return out_buf.value
def _load_meta(self, db, _object): content = base64.b64decode(_object["content"]) entry = { "magic": magic.from_buffer(content), "ssdeep": pydeep.hash_buf(content), "md5": hashlib.md5(content).hexdigest(), "sha1": hashlib.sha1(content).hexdigest(), "sha256": hashlib.sha256(content).hexdigest() } if entry["magic"] == "MS-DOS executable": ### This is a weak application of magic try: pe_data = self._get_pe(content) for k, v in pe_data.iteritems(): entry[k] = v except Exception, e: print e pass pass
def get_hashes(self): crc = 0 md5 = hashlib.md5() sha1 = hashlib.sha1() sha256 = hashlib.sha256() sha512 = hashlib.sha512() crc = binascii.crc32(self.artifact, crc) md5.update(self.artifact) sha1.update(self.artifact) sha256.update(self.artifact) sha512.update(self.artifact) self.hashes['crc32'] = ''.join('%02X' % ((crc >> i) & 0xff) for i in [24, 16, 8, 0]) self.hashes['md5'] = md5.hexdigest() self.hashes['sha1'] = sha1.hexdigest() self.hashes['sha256'] = sha256.hexdigest() self.hashes['sha512'] = sha512.hexdigest() self.hashes['ssdeep'] = pydeep.hash_buf(self.artifact)
key = {'md5': sample['md5']} metadata = {} logger.debug('[%s] Downloading data' % sampleno) data = get_file(db, md5=sample['md5']) # Do analysis logger.debug('[%s] Analysing' % sampleno) # metadata['md5'] = hashlib.md5(data).hexdigest() metadata['sha1'] = hashlib.sha1(data).hexdigest() metadata['sha256'] = hashlib.sha256(data).hexdigest() metadata['sha512'] = hashlib.sha512(data).hexdigest() metadata['ssdeep'] = pydeep.hash_buf(data) # Store results logger.debug('[%s] Storing results into MongoDB' % sampleno) for (metakey, metaval) in metadata.iteritems(): db.fs.files.update(key, {'$set': {metakey: metaval}}, upsert=True) logger.debug('[%s] Removing temporary data' % sampleno) del key del metadata del data logger.info('[%s] Metadata updated' % sampleno)
def render_text(self, outfd, data): if not has_distorm3: debug.warning("For best results please install distorm3") if self._config.DUMP_DIR and not os.path.isdir(self._config.DUMP_DIR): debug.error(self._config.DUMP_DIR + " is not a directory") for task in data: for vad, address_space in task.get_vads(vad_filter = task._injection_filter): if self._is_vad_empty(vad, address_space): continue if self._config.SSDEEP and has_pydeep: skip = False # read the first page of the VAD then hash it inject_buf = address_space.zread(vad.Start, 0x1000) inject_hash = pydeep.hash_buf(inject_buf) # loop through all the whitelist hashes and compare for (whitelist_name, whitelist_hash) in whitelist_ssdeep: alike = pydeep.compare(inject_hash, whitelist_hash) # the comparison is greater than the threshold so display an informational message # then skip the rest of the output in normal malfind if alike > self._config.THRESHOLD: outfd.write("Process: {0} Pid: {1} Address: {2:#x}\n".format( task.ImageFileName, task.UniqueProcessId, vad.Start)) outfd.write("Injection is {0}% similar to whitelist hook {1}\n".format(alike, whitelist_name)) #outfd.write(" hook: {0}\n".format(inject_hash)) #outfd.write(" whitelist: {0}\n".format(whitelist_hash)) skip = True continue if skip: continue content = address_space.zread(vad.Start, 64) outfd.write("Process: {0} Pid: {1} Address: {2:#x}\n".format( task.ImageFileName, task.UniqueProcessId, vad.Start)) outfd.write("Vad Tag: {0} Protection: {1}\n".format( vad.Tag, vadinfo.PROTECT_FLAGS.get(vad.u.VadFlags.Protection.v(), ""))) outfd.write("Flags: {0}\n".format(str(vad.u.VadFlags))) outfd.write("\n") outfd.write("{0}\n".format("\n".join( ["{0:#010x} {1:<48} {2}".format(vad.Start + o, h, ''.join(c)) for o, h, c in utils.Hexdump(content) ]))) outfd.write("\n") outfd.write("\n".join( ["{0:#x} {1:<16} {2}".format(o, h, i) for o, i, h in malfind.Disassemble(content, vad.Start) ])) # Dump the data if --dump-dir was supplied if self._config.DUMP_DIR: filename = os.path.join(self._config.DUMP_DIR, "process.{0:#x}.{1:#x}.dmp".format( task.obj_offset, vad.Start)) self.dump_vad(filename, vad, address_space) outfd.write("\n\n")
def _get_ssdeep(self): try: return pydeep.hash_buf(self._file_data) except Exception as e: print e return None
def get_fuzzy(fh): fuzzy = pydeep.hash_buf(fh.read()) position = fh.seek(0, 0) return fuzzy
def parse_idx(fname): data=open(fname,'rb') filecontent=open(fname,'rb').read() filesize = os.path.getsize(fname) busy_byte = data.read(1) complete_byte = data.read(1) cache_ver = struct.unpack(">i", data.read(4))[0] idx_file={'filename':fname,'mimetype':"IDX file: %s (IDX File Version %d.%02d)" % (fname, cache_ver / 100, cache_ver - 600)} if cache_ver not in (602, 603, 604, 605, 606): print "Invalid IDX header found" print "Found: 0x%s" % cache_ver m = hashlib.md5() m.update(filecontent) idx_file['md5']=m.digest().encode('hex') # computing sha1 s = hashlib.sha1() s.update(filecontent) idx_file['sha1']=s.digest().encode('hex') # computing ssdeep idx_file['ssdeep']=pydeep.hash_buf(filecontent) # Different IDX cache versions have data in different offsets if cache_ver in [602, 603, 604, 605]: if cache_ver in [602, 603, 604]: data.seek(8) elif cache_ver == 605: data.seek(6) is_shortcut_img = data.read(1) content_len = struct.unpack(">l", data.read(4))[0] last_modified_date = struct.unpack(">q", data.read(8))[0]/1000 expiration_date = struct.unpack(">q", data.read(8))[0]/1000 validation_date = struct.unpack(">q", data.read(8))[0]/1000 sec1={} sec1['last_modified_date']=time.strftime("%a, %d %b %Y %X GMT", time.gmtime(last_modified_date)) if expiration_date: sec1['expiration_date']=time.strftime("%a, %d %b %Y %X GMT", time.gmtime(expiration_date)) if validation_date and cache_ver > 602: #While 6.02 technically supports this, every sample I've seen just has 3 null bytes and skips to Section 2 sec1['validation_date']=time.strftime("%a, %d %b %Y %X GMT", time.gmtime(validation_date)) if cache_ver == 602: sec2_len = 1 sec3_len = 0 sec4_len = 0 sec5_len = 0 elif cache_ver in [603, 604, 605]: known_to_be_signed = data.read(1) sec2_len = struct.unpack(">i", data.read(4))[0] sec3_len = struct.unpack(">i", data.read(4))[0] sec4_len = struct.unpack(">i", data.read(4))[0] sec5_len = struct.unpack(">i", data.read(4))[0] blacklist_timestamp = struct.unpack(">q", data.read(8))[0]/1000 cert_expiration_date = struct.unpack(">q", data.read(8))[0]/1000 class_verification_status = data.read(1) reduced_manifest_length = struct.unpack(">l", data.read(4))[0] #print "Section 2 length: %d" % sec2_len if sec3_len: print "Section 3 length: %d" % sec3_len if sec4_len: print "Section 4 length: %d" % sec4_len if sec5_len: print "Section 4 length: %d" % sec5_len if expiration_date: sec1['blacklist_date']=time.strftime("%a, %d %b %Y %X GMT", time.gmtime(blacklist_timestamp)) if cert_expiration_date: sec1['cert_expiration_date']=time.strftime("%a, %d %b %Y %X GMT", time.gmtime(cert_expiration_date)) else: print "Current file version, %d, is not supported at this time." % cache_ver if sec2_len: if cache_ver == 602: idx_file['sec2']=sec2_parse_old(data) else: idx_file['sec2']=sec2_parse(data) if sec3_len: #print "\n[*] Section 3 (Jar Manifest) found:" idx_file['sec3']=sec3_parse(data,sec2_len,sec3_len,filesize) if sec4_len: #print "\n[*] Section 4 (Code Signer) found:" idx_file['sec4']=sec4_parse(data,sec2_len,sec3_len,filesize) if sec5_len: print "\n[*] Section 5 found (offset 0x%X, length %d bytes)" % (128 + sec2_len + sec3_len + sec4_len, sec5_len) return idx_file
def _get_ssdeep(self): try: return pydeep.hash_buf(self._file_data).decode() except Exception as e: logging.warn(f"Error: {e}") return None
def get_ssdeep(file_bytes): """Returns the SSDEEP.""" return pydeep.hash_buf(file_bytes).decode()
def render_text(self, outfd, data): for process, module, hook, addr_space in data: if not self._config.NO_WHITELIST: if process: process_name = str(process.ImageFileName) else: process_name = '' if self.whitelist(hook.hook_mode | hook.hook_type, process_name, hook.VictimModule, hook.HookModule, hook.Function): #debug.debug("Skipping whitelisted function: {0} {1} {2} {3}".format( # process_name, hook.VictimModule, hook.HookModule, # hook.Function)) continue if self._config.SSDEEP and has_pydeep: skip = False # read from the start of the page containing the hook, then hash it page_address = hook.hook_address & 0xFFFFF000 hook_buf = addr_space.zread(page_address, 0x1000) hook_hash = pydeep.hash_buf(hook_buf) # loop through all the whitelist hashes and compare for (whitelist_name, whitelist_hash) in whitelist_ssdeep: alike = pydeep.compare(hook_hash, whitelist_hash) # the comparison is greater than the threshold so display an informational message # then skip the rest of the output in normal malfind if alike > self._config.THRESHOLD: if process: outfd.write('Process: {0} ({1})\n'.format( process.UniqueProcessId, process.ImageFileName)) outfd.write("Hook at 0x{0:x} in page 0x{1:x} is {2}% similar to whitelist hook {3}\n".format(hook.hook_address, page_address, alike, whitelist_name)) #outfd.write(" hook: {0}\n".format(hook_hash)) #outfd.write(" whitelist: {0}\n".format(whitelist_hash)) outfd.write("\n") skip = True continue if skip: continue outfd.write("*" * 72 + "\n") outfd.write("Hook mode: {0}\n".format(hook.Mode)) outfd.write("Hook type: {0}\n".format(hook.Type)) if process: outfd.write('Process: {0} ({1})\n'.format( process.UniqueProcessId, process.ImageFileName)) outfd.write("Victim module: {0} ({1:#x} - {2:#x})\n".format( str(module.BaseDllName or '') or ntpath.basename(str(module.FullDllName or '')), module.DllBase, module.DllBase + module.SizeOfImage)) outfd.write("Function: {0}\n".format(hook.Detail)) outfd.write("Hook address: {0:#x}\n".format(hook.hook_address)) outfd.write("Hooking module: {0}\n\n".format(hook.HookModule)) for n, info in enumerate(hook.disassembled_hops): (address, data) = info s = ["{0:#x} {1:<16} {2}".format(o, h, i) for o, i, h in malfind.Disassemble(data, int(address), bits = "32bit" if hook.decode_bits == distorm3.Decode32Bits else "64bit") ] outfd.write("Disassembly({0}):\n{1}".format(n, "\n".join(s))) outfd.write("\n\n")
def get_fuzzy(f): fuzzy = pydeep.hash_buf(f.read()) position = f.seek(0, 0) return fuzzy
email: [email protected] ------------------------------ This takes a file and a name as input and will provide a list of tuples, where each tuple is that name and the ssdeep hash of every 4096 byte chunk of the file. These chunks correspond to a page of memory. This script is intended for use with my volatility plugins malfinddeep and apihooksdeep. ''' import argparse import pydeep def cliargs(): '''Parse CLI args''' parser = argparse.ArgumentParser(description="hash_by_page.py -- return SSDeep hash of each 4096 byte chunk of a file in whitelist format for Volatility") parser.add_argument('-n', '--name', required=True, action='store', dest='name', help='Name associated with file') parser.add_argument('-f', '--file', required=True, action='store', dest='filename', help='File to hash') args = parser.parse_args() return args args = cliargs() fh = open(args.filename,"r") buff = fh.read(0x1000) while len(buff) > 0: hash_buff = pydeep.hash_buf(buff) if hash_buff != "3::": print "('" + args.name + "', '" + hash_buff + "')," buff = fh.read(0x1000)
import pydeep file1 = 'calc.exe' file2 = 'notepad.exe' file1hash = '1536:JEl14rQcWAkN7GAlqbkfAGQGV8aMbrNyrf1w+noPvLV6eBsCXKc:JYmZWXyaiedMbrN6pnoXL1BsC' file2hash = '1536:0awOnbNQKLjWDyy1o5RefYMJUEbooPRrKKRl1P3:0YNQKPWDyDRefVJltZrpRl1P3' data1 = open(file1).read() data2 = open(file2).read() assert len(data1) == 114688, "File length error" assert len(data2) == 69120, "File lendth error" hash01 = pydeep.hash_buf(data1) hash02 = pydeep.hash_buf(data2) assert hash01 == file1hash, "Error hashing file1" assert hash02 == file2hash, "Error hashing file2" hash1 = pydeep.hash_file(file1) hash2 = pydeep.hash_file(file2) assert hash1 == file1hash, "Error hashing file1" assert hash2 == file2hash, "Error hashing file2" assert pydeep.compare(hash1, hash2) == 0, "Error fuzzy compare value" print 'Stuff looks fine..'
def render_text(self, outfd, data): for process, module, hook, addr_space in data: if not self._config.NO_WHITELIST: if process: process_name = str(process.ImageFileName) else: process_name = '' if self.whitelist(hook.hook_mode | hook.hook_type, process_name, hook.VictimModule, hook.HookModule, hook.Function): #debug.debug("Skipping whitelisted function: {0} {1} {2} {3}".format( # process_name, hook.VictimModule, hook.HookModule, # hook.Function)) continue if self._config.SSDEEP and has_pydeep: skip = False # read from the start of the page containing the hook, then hash it page_address = hook.hook_address & 0xFFFFF000 hook_buf = addr_space.zread(page_address, 0x1000) hook_hash = pydeep.hash_buf(hook_buf) # loop through all the whitelist hashes and compare for (whitelist_name, whitelist_hash) in whitelist_ssdeep: alike = pydeep.compare(hook_hash, whitelist_hash) # the comparison is greater than the threshold so display an informational message # then skip the rest of the output in normal malfind if alike > self._config.THRESHOLD: if process: outfd.write('Process: {0} ({1})\n'.format( process.UniqueProcessId, process.ImageFileName)) outfd.write( "Hook at 0x{0:x} in page 0x{1:x} is {2}% similar to whitelist hook {3}\n" .format(hook.hook_address, page_address, alike, whitelist_name)) #outfd.write(" hook: {0}\n".format(hook_hash)) #outfd.write(" whitelist: {0}\n".format(whitelist_hash)) outfd.write("\n") skip = True continue if skip: continue outfd.write("*" * 72 + "\n") outfd.write("Hook mode: {0}\n".format(hook.Mode)) outfd.write("Hook type: {0}\n".format(hook.Type)) if process: outfd.write('Process: {0} ({1})\n'.format( process.UniqueProcessId, process.ImageFileName)) outfd.write("Victim module: {0} ({1:#x} - {2:#x})\n".format( str(module.BaseDllName or '') or ntpath.basename(str(module.FullDllName or '')), module.DllBase, module.DllBase + module.SizeOfImage)) outfd.write("Function: {0}\n".format(hook.Detail)) outfd.write("Hook address: {0:#x}\n".format(hook.hook_address)) outfd.write("Hooking module: {0}\n\n".format(hook.HookModule)) for n, info in enumerate(hook.disassembled_hops): (address, data) = info s = [ "{0:#x} {1:<16} {2}".format(o, h, i) for o, i, h in malfind.Disassemble(data, int(address), bits="32bit" if hook.decode_bits == distorm3.Decode32Bits else "64bit") ] outfd.write("Disassembly({0}):\n{1}".format(n, "\n".join(s))) outfd.write("\n\n")
import pydeep file1 = 'calc.exe' file2 = 'notepad.exe' file3 = 'bc' file1hash = '1536:JEl14rQcWAkN7GAlqbkfAGQGV8aMbrNyrf1w+noPvLV6eBsCXKc:JYmZWXyaiedMbrN6pnoXL1BsC' file2hash = '1536:0awOnbNQKLjWDyy1o5RefYMJUEbooPRrKKRl1P3:0YNQKPWDyDRefVJltZrpRl1P3' file3hash = '1536:MsjYdR3Bul8hcURWhEcg4/btZzDcQflbCUPEBEh8wkcGDioxMYeo7:TYf8l8htRWA4ztZsGlWUPEBEh8wmxMYe' data1 = open(file1).read() data2 = open(file2).read() data3 = open(file3).read() assert len(data1) == 114688, "File length error" assert len(data2) == 69120, "File length error" assert len(data3) == 77168, "File length error" hash01 = pydeep.hash_buf(data1) hash02 = pydeep.hash_buf(data2) hash03 = pydeep.hash_buf(data3) assert hash01 == file1hash, "Error hashing file1" assert hash02 == file2hash, "Error hashing file2" assert hash03 == file3hash, "Error hashing file2" hash1 = pydeep.hash_file(file1) hash2 = pydeep.hash_file(file2) hash3 = pydeep.hash_file(file3) assert hash1 == file1hash, "Error hashing file1" assert hash2 == file2hash, "Error hashing file2" assert hash3 == file3hash, "Error hashing file3" assert pydeep.compare(hash1,hash2) == 0, "Error fuzzy compare value" print 'Stuff looks fine..'
def hash_mem_block(block): return pydeep.hash_buf(block)