def update_upload_file_metadata(sample): found = is_hash_in_db(sample['md5']) if found: found['sha1'] = sample['sha1'] found['sha256'] = sample['sha256'] found['ssdeep'] = sample['ssdeep'] found['filesize'] = sample['filesize'] found['filetype'] = sample['filetype'] found['filemime'] = sample['filemime'] upload = { 'filename': sample['filename'], 'upload_date': sample['upload_date'], 'uploaded_by': sample['uploaded_by'], 'detection_ratio': sample['detection_ratio'] } found.setdefault('user_uploads', []).append(upload) db_insert(found) else: file = { 'md5': sample['md5'], 'sha1': sample['sha1'], 'sha256': sample['sha256'], 'ssdeep': sample['ssdeep'], 'filesize': sample['filesize'], 'filetype': sample['filetype'], 'filemime': sample['filemime'] } upload = { 'filename': sample['filename'], 'upload_date': sample['upload_date'], 'uploaded_by': sample['uploaded_by'], 'detection_ratio': sample['detection_ratio'] } file.setdefault('user_uploads', []).append(upload) db_insert(file)
def run_metascan(this_file, file_md5): # TODO : remove these hardcoded creds if config.has_section('Metascan'): meta_scan = MetaScan(ip=config.get('Metascan', 'IP'), port=config.get('Metascan', 'Port')) else: meta_scan = MetaScan(ip='127.0.0.1', port='8008') if meta_scan.connected: results = meta_scan.scan_file_stream_and_get_results(this_file) if results.status_code != 200: print_error("MetaScan can not be reached.") return None metascan_results = results.json() #: Calculate AV Detection Ratio detection_ratio = dict(infected=0, count=0) for av in metascan_results[u'scan_results'][u'scan_details']: metascan_results[u'scan_results'][u'scan_details'][av][u'def_time'] \ = parser.parse(metascan_results[u'scan_results'][u'scan_details'][av][u'def_time']) detection_ratio['count'] += 1 if metascan_results[u'scan_results'][u'scan_details'][av]['scan_result_i'] == 1: detection_ratio['infected'] += 1 found = is_hash_in_db(file_md5) if found: found['user_uploads'][-1].setdefault('metascan_results', []).append(metascan_results) found['user_uploads'][-1]['detection_ratio']['infected'] += detection_ratio['infected'] found['user_uploads'][-1]['detection_ratio']['count'] += detection_ratio['count'] data = found else: data = dict(md5=file_md5) data['user_uploads'][-1].setdefault('metascan_results', []).append(metascan_results) db_insert(data) return metascan_results else: return None
def scan_upload(file_stream, sample): # job = q.enqueue(run_workers, file_stream) # print job.result print("<< Now Scanning file: {} >>>>>>>>>>>>>>>>>>>>>>>>>>>>".format(sample['filename'])) # print_info("Scanning with MetaScan now.") # if run_metascan(file_stream, sample['md5']): # print_success("MetaScan Complete.") #: Run the AV workers on the file. print_info("Scanning with AV workers now.") if run_workers(file_stream): print_success("Malice AV scan Complete.") print_info("Performing file analysis now.") print_item("Scanning with EXIF now.", 1) exif_scan(file_stream, sample['md5']) print_item("Scanning with TrID now.", 1) trid_scan(file_stream, sample['md5']) if file_is_pe(file_stream): #: Run PE Analysis print_item("Scanning with PE Analysis now.", 1) pe_scan(file_stream, sample['md5']) if file_is_pdf(file_stream): #: Run PDF Analysis pdfparser_scan(file_stream, sample['md5']) pdfid_scan(file_stream, sample['md5']) #: Run Intel workers print_item("Searching for Intel now.", 1) single_hash_search(sample['md5']) print_success("File Analysis Complete.") found = is_hash_in_db(sample['md5']) return found['user_uploads'][-1]['detection_ratio']
def upload_view(): upload_file = request.files['file'] file_stream = upload_file.stream.read() if file_stream: #: Collect upload file data sample = {'filename': secure_filename(upload_file.filename), 'sha1': hashlib.sha1(file_stream).hexdigest().upper(), 'sha256': hashlib.sha256(file_stream).hexdigest().upper(), 'md5': hashlib.md5(file_stream).hexdigest().upper(), 'ssdeep': pydeep.hash_buf(file_stream), 'filesize': len(file_stream), 'filetype': magic.from_buffer(file_stream), 'filemime': upload_file.mimetype, 'upload_date': r.now(), 'uploaded_by': "api", # g.user 'detection_ratio': dict(infected=0, count=0), 'filestatus': "Processing"} insert_in_samples_db(sample) update_upload_file_metadata(sample) #: Run all configured scanners sample['detection_ratio'] = scan_upload(file_stream, sample) #: Done Processing File sample['filestatus'] = 'Complete' sample['scancomplete'] = r.now() update_sample_in_db(sample) found = is_hash_in_db(sample['md5']) if found: return jsonify(found) else: return jsonify(dict(error='Not a valid API end point.', response=404)), 404 else: return jsonify(dict(error='Missing Parameters', response=400)), 400
def upload_view(): upload_file = request.files['file'] file_stream = upload_file.stream.read() if file_stream: #: Collect upload file data sample = { 'filename': secure_filename(upload_file.filename), 'sha1': hashlib.sha1(file_stream).hexdigest().upper(), 'sha256': hashlib.sha256(file_stream).hexdigest().upper(), 'md5': hashlib.md5(file_stream).hexdigest().upper(), 'ssdeep': pydeep.hash_buf(file_stream), 'filesize': len(file_stream), 'filetype': magic.from_buffer(file_stream), 'filemime': upload_file.mimetype, 'upload_date': r.now(), 'uploaded_by': "api", # g.user 'detection_ratio': dict(infected=0, count=0), 'filestatus': "Processing" } insert_in_samples_db(sample) update_upload_file_metadata(sample) #: Run all configured scanners sample['detection_ratio'] = scan_upload(file_stream, sample) #: Done Processing File sample['filestatus'] = 'Complete' sample['scancomplete'] = r.now() update_sample_in_db(sample) found = is_hash_in_db(sample['md5']) if found: return jsonify(found) else: return jsonify( dict(error='Not a valid API end point.', response=404)), 404 else: return jsonify(dict(error='Missing Parameters', response=400)), 400
def scan_upload(file_stream, sample): # job = q.enqueue(run_workers, file_stream) # print job.result print("<< Now Scanning file: {} >>>>>>>>>>>>>>>>>>>>>>>".format( sample['filename'])) # print_info("Scanning with MetaScan now.") # if run_metascan(file_stream, sample['md5']): # print_success("MetaScan Complete.") #: Run the AV workers on the file. print_info("Scanning with AV workers now.") if run_workers(file_stream): print_success("Malice AV scan Complete.") print_info("Performing file analysis now.") print_item("Scanning with EXIF now.", 1) exif_scan(file_stream, sample['md5']) print_item("Scanning with TrID now.", 1) trid_scan(file_stream, sample['md5']) if file_is_pe(file_stream): #: Run PE Analysis print_item("Scanning with PE Analysis now.", 1) pe_scan(file_stream, sample['md5']) if file_is_pdf(file_stream): #: Run PDF Analysis pdfparser_scan(file_stream, sample['md5']) pdfid_scan(file_stream, sample['md5']) #: Run Intel workers print_item("Searching for Intel now.", 1) single_hash_search(sample['md5']) print_success("File Analysis Complete.") found = is_hash_in_db(sample['md5']) return found['user_uploads'][-1]['detection_ratio']
def update_upload_file_metadata(sample): found = is_hash_in_db(sample['md5']) if found: found['sha1'] = sample['sha1'] found['sha256'] = sample['sha256'] found['ssdeep'] = sample['ssdeep'] found['filesize'] = sample['filesize'] found['filetype'] = sample['filetype'] found['filemime'] = sample['filemime'] upload = {'filename': sample['filename'], 'upload_date': sample['upload_date'], 'uploaded_by': sample['uploaded_by'], 'detection_ratio': sample['detection_ratio']} found.setdefault('user_uploads', []).append(upload) db_insert(found) else: file = {'md5': sample['md5'], 'sha1': sample['sha1'], 'sha256': sample['sha256'], 'ssdeep': sample['ssdeep'], 'filesize': sample['filesize'], 'filetype': sample['filetype'], 'filemime': sample['filemime'] } upload = {'filename': sample['filename'], 'upload_date': sample['upload_date'], 'uploaded_by': sample['uploaded_by'], 'detection_ratio': sample['detection_ratio'] } file.setdefault('user_uploads', []).append(upload) db_insert(file)
def single_hash_search(this_hash): found = is_hash_in_db(this_hash) if not found: #: Run all Intel Workers on hash # TODO: Make these async with .delay(this_hash) single_query_bit9(this_hash) single_query_virustotal(this_hash) return is_hash_in_db(this_hash) else: #: Fill in the blanks if 'Bit9' not in list(found.keys()): single_query_bit9(this_hash) if 'VirusTotal' not in list(found.keys()): single_query_virustotal(this_hash) if found: # TODO: handle case where all fields are filled out (session not updating on first submission) r.table('sessions').insert(found).run(g.rdb_sess_conn) return found else: return False
def sample(id): #: Check sample id is valid hash value a_sample_id = parse_hash_list(id) if not a_sample_id: abort(404) #: Check that id exists in DB found = is_hash_in_db(a_sample_id) if not found: abort(404) #: Pull out all important information from sample to display to user av_results, metascan_results, detection_ratio, exif, file_metadata, pe, tags, trid = parse_sample_data(found) return render_template('analysis.html', sample=found, file=file_metadata, tags=tags, pe=pe, exif=exif, trid=trid, av_results=av_results, metascan_results=metascan_results, detection_ratio=detection_ratio)
def batch_search_hash(hash_list): new_hash_list = [] search_results = [] #: Check DB for hashes, if found do not query API for a_hash in hash_list: found = is_hash_in_db(a_hash) if found: search_results.append(found) r.table('sessions').insert(found).run(g.rdb_sess_conn) else: new_hash_list.append(a_hash) if new_hash_list: batch_query_bit9(new_hash_list) # batch_query_bit9.delay(new_hash_list) batch_query_virustotal(new_hash_list) for a_new_hash in new_hash_list: found = is_hash_in_db(a_new_hash) if found: search_results.append(found) return search_results else: return search_results
def pe_scan(this_file, file_md5): this_pe = pe.PE(this_file) if this_pe.pe: key, pe_results = this_pe.scan() found = is_hash_in_db(file_md5) if found: found[key] = pe_results data = found else: data = dict(md5=file_md5) data[key] = pe_results db_insert(data) return data else: print_error("PE Analysis Failed - This file might not be a PE Executable.")
def trid_scan(file_stream, file_md5): this_trid = trid.TrID(file_stream) if this_trid: key, trid_results = this_trid.scan() found = is_hash_in_db(file_md5) if found: found[key] = trid_results data = found else: data = dict(md5=file_md5) data[key] = trid_results db_insert(data) return data else: print_error("TrID Analysis Failed.")
def exif_scan(file_stream, file_md5): this_exif = exif.Exif(file_stream) if this_exif: key, exif_results = this_exif.scan() found = is_hash_in_db(file_md5) if found: found[key] = exif_results data = found else: data = dict(md5=file_md5) data[key] = exif_results db_insert(data) return data else: print_error("EXIF Analysis Failed.")
def sophos_scan(this_file): my_sophos = sophos_engine() results = my_sophos.scan(PickleableFileSample.string_factory(this_file)) file_md5_hash = hashlib.md5(this_file).hexdigest().upper() found = is_hash_in_db(file_md5_hash) if found: found['user_uploads'][-1].setdefault('av_results', []).append(scan_to_dict(results, 'Sophos')) if results.infected: found['user_uploads'][-1]['detection_ratio']['infected'] += 1 found['user_uploads'][-1]['detection_ratio']['count'] += 1 data = found else: data = dict(md5=file_md5_hash) data['user_uploads'][-1].setdefault('av_results', []).append(scan_to_dict(results, 'Sophos')) db_insert(data) return data
def avg_scan(this_file): my_avg = avg_engine.AVG(this_file) result = my_avg.scan() # result = my_avg.scan(PickleableFileSample.string_factory(file)) if 'error' in result[1]: flash(result[1]['error'], 'error') else: file_md5_hash = hashlib.md5(this_file).hexdigest().upper() found = is_hash_in_db(file_md5_hash) if found: found['user_uploads'][-1].setdefault('av_results', []).append(result[1]) if result[1]['infected']: found['user_uploads'][-1]['detection_ratio']['infected'] += 1 found['user_uploads'][-1]['detection_ratio']['count'] += 1 data = found else: data = dict(md5=file_md5_hash) data['user_uploads'][-1].setdefault('av_results', []).append(result[1]) db_insert(data) return data