def save_file_database(detected, mutation, url_sandbox, CSV, scanner): ''' Structure manipulation and logic to update DB Input: detected: Boolean value whether malware mutation is detected mutation: Name of malware with path url_sandbox: URL to functionality report (default: Cuckoo sandbox) CSV: Structure to save in DB scanner: malware classifier ''' if not detected: # Copy successful sample into evasion path now = datetime.now() name_file = str(now.year)+str(now.month)+str(now.day)+ \ str(now.hour)+str(now.minute)+str(now.second) copyfile(mod_path+mutation, evasion_path+ \ CSV['Perturbations']+'m_'+name_file+'.exe') # Update CSV with successful mutation CSV['Manipulated_File'], CSV['Full_Analysis_Report'], \ CSV['MF_Detections'], CSV['Full_Detections_Report'], CSV['Date_Reported'] = \ evasion_path+CSV['Perturbations']+'m_'+ name_file+'.exe', \ url_sandbox, 'Evasion', scanner, str(datetime.now()) f.write_dict_CSV('db/evasion.csv', CSV, fields) print('Results: Evasion found for {}!\n'.format(scanner)) #print('Evasive sequence: {}'.format(chosen_actions[:int(CSV['Perturbations'])])) return 1 else: # Copy valid sample but detected into detected_path now = datetime.now() name_file = str(now.year)+str(now.month)+str(now.day)+ \ str(now.hour)+str(now.minute)+str(now.second) copyfile(mod_path+mutation, detected_path+ \ CSV['Perturbations']+'m_'+name_file+scanner+'.exe') # Update CSV with valid mutation but detected by scanner CSV['Manipulated_File'], CSV['Full_Analysis_Report'], \ CSV['MF_Detections'], CSV['Full_Detections_Report'], CSV['Date_Reported'] = \ detected_path+CSV['Perturbations']+'m_'+ name_file+scanner+'.exe', \ url_sandbox, 'Detected', scanner, str(datetime.now()) f.write_dict_CSV('db/detected.csv', CSV, fields) return 0
def comparing(bin_bytes, sample, n, rounds, files_expected, detection_threshold, scanner): ''' This function compares ARMED and AIMED to assess random vs. evolutionary performance finding adversarial examples. The results will be stored on compare.csv ''' # Run ARMED start_Total = time() start_ARMED = time() _, ARMED_corrupt_samples = armed(bin_bytes, sample, n, rounds, files_expected, detection_threshold, scanner) time_ARMED = f.time_me(start_ARMED) # Run AIMED size_population = 4 start_AIMED = time() AIMED_new_evasions, AIMED_corrupt_files = aimed(bin_bytes, sample, size_population, n, files_expected, scanner) time_AIMED = f.time_me(start_AIMED) # Update CSV with comparison data Compare_CSV = {} fields_compare = [ 'Sample', 'Perturbations', 'Module 1', 'Time M1', 'Files M1', 'Corr M1', 'Module 2', 'Time M2', 'Files M2', 'Corr M2', 'Total Time' ] Compare_CSV['Sample'], Compare_CSV['Perturbations'], Compare_CSV['Module 1'], Compare_CSV['Time M1'], Compare_CSV['Files M1'], \ Compare_CSV['Corr M1'], Compare_CSV['Module 2'], Compare_CSV['Time M2'], Compare_CSV['Files M2'], Compare_CSV['Corr M2'], Compare_CSV['Total Time'] = \ sample, n, 'ARMED', time_ARMED, files_expected, ARMED_corrupt_samples, 'AIMED', time_AIMED, AIMED_new_evasions, AIMED_corrupt_files, strftime('%H:%M:%S', gmtime(time() - start_Total)) f.write_dict_CSV('db/compare.csv', Compare_CSV, fields_compare) # Update short version CSV with time averages to use as input in LaTeX f.comparing_AXMED()
def malware_detection_VT(sample_report, CSV): ''' Detecting malware samples using VirusTotal (remote) Input: sample_report: the number of VT detections to use as benchmark ''' loops = 0 limit = 20 start = time() # Comparing detections of both samples print('\n# Malware Detection Stage #') print('\nOriginal sample:') print('Detected by {} out of {} engines \n'.format( sample_report['positives'], sample_report['total']) ) #, (sample_report['positives']/sample_report['total'])*100)) print(sample_report['permalink']) print('\nStatus:') # Use loops and sleep to keep requests lows and avoid API banned by VT (Limit: 100) while loops < limit: try: # Getting report of sample submitted via VT API - Rescan: False report = f.get_report_VT(CSV['Mod_File_Hash'], False) # Check the status of sample & report if report['response_code'] == -2: print('The sample is queued for analysis. Next update in 60 s') sleep(60) elif report['response_code'] == 1: print('\nResults: New sample found') print('\nDetected by {} out of {} engines \n'.format( report['positives'], #({:.2f}%) report['total']) ) #, (report['positives']/report['total'])*100)) # Print only engines detecting new sample av_detect = { key: val for key, val in report['scans'].items() if val['detected'] == 1 } print(list(av_detect.keys())) # Provide link to sample detections report print('\n{}'.format(report['permalink'])) # Calculate evasion rate based on original sample detections and print summary print('\n## Summary ##') print('\nEvasion rate: {:.2f}% of previous engines'.format( (1 - (report['positives'] / report['total']) / (sample_report['positives'] / sample_report['total'])) * 100)) #print('\nEvasion rate: {:.2f}% of engines'.format((sample_report['positives']/ #sample_report['total']-report['positives']/report['total'])*100)) # Show detection time in hh:mm:ss f.time_me(start) # Copy successful sample into evasion path now = datetime.now() name_file = str(now.year) + str(now.month) + str( now.day) + str(now.hour) + str(now.minute) + str( now.second) copyfile(mod_path+CSV['Perturbations']+'_m.exe', \ evasion_path+CSV['Perturbations']+'m_'+name_file+'.exe') # Update database with sample's info CSV['Manipulated_File'], CSV['MF_Detections'], CSV['Full_Detections_Report'], \ CSV['Date_Reported'] = evasion_path+CSV['Perturbations']+'m_'+ \ name_file+'.exe', str(report['positives'])+'/'+str(report['total']), \ str(report['permalink']), str(report['scan_date']) f.write_dict_CSV('db/database.csv', CSV, fields) return report['positives'] else: # 'response_code' == 0: print("Sample is not present in VirusTotal's dataset") sleep(60) loops += 1 except (requests.ConnectionError, requests.Timeout, requests.ConnectTimeout) as e: print('Connection issues or API requests threshold reached: {}'. format(e))
def malware_analysis_HA(mod_sample, json_send_HA, CSV): ''' Analyze malware using remote service Hybrid Analysis ''' loops = 0 start = time() functionality = False # Wait a few minutes if server did not accept further submissions while json_send_HA == 429: print('Submission quota limit has been exceeded. Retry in 5 minutes.') sleep(301) # Retrieve report from Hybrid Analisys sandbox: report URL + Hash + Job ID url_sample = 'https://www.reverse.it/sample/' + json_send_HA[ 'sha256'] + '/' + json_send_HA['job_id'] print('\nFull report: {}\n\nStatus:'.format(url_sample)) # Use loops and sleep to keep requests low and avoid API banned by HA (Limit: 5/m) limit = 30 while loops < limit: try: # Server could return 403 if f.url_ok(url_sample) == 200 or f.url_ok(url_sample) == 403: report_HA = f.get_summary_HA(json_send_HA['sha256']) if report_HA['state'] == 'ERROR': print('The sandbox environment returned {}.'.format( report_HA['error_type'])) break elif report_HA['state'] == 'IN_QUEUE': print( 'Waiting in queue to be analyzed. Next update in 60 s') elif report_HA['state'] == 'IN_PROGRESS': print('Analysis in progress..') elif report_HA['state'] == 'SUCCESS': print('Analysis finished.') break sleep(60) else: print('Website not reachable. Next update in 30 s') sleep(30) if loops == limit - 1: print( 'ARMED exited because the limit of {} minutes has been reached.\n' .format(limit)) quit() loops += 1 except (requests.ConnectionError, requests.Timeout, requests.ConnectTimeout) as e: print('Connection issues or API requests reached:\n{}'.format(e)) # Check the likelihood that malware runs based on report if report_HA['domains'] or report_HA['compromised_hosts']: functionality = True print('\nResults: WORKING') print('Malware connects to domains or contacts hosts.') # Show analysis time in hh:mh:ss f.time_me(start) # Send to VT to check detections print('Sent to VirusTotal!') json_send_VT = f.send_VT(mod_sample) else: if report_HA['state'] != 'ERROR': print('\nResults: Most likely not working') print('Check if manipulated sample runs before scanning.') print('Malware does not connect to domains or contacts hosts.') # Copy sample into failed path & tag with F now = datetime.now() name_file = str(now.year) + str(now.month) + str(now.day) + str( now.hour) + str(now.minute) copyfile(mod_path+CSV['Perturbations']+'_m.exe', \ fail_path+CSV['Perturbations']+'F_'+name_file+'.exe') # Update database with basic sample's info CSV['Manipulated_File'], CSV['Full_Analysis_Report'] \ = fail_path+CSV['Perturbations']+'F_'+name_file+'.exe', url_sample f.write_dict_CSV('db/fail_database.csv', CSV, fields) # Show analysis time in hh:mh:ss f.time_me(start) return functionality, url_sample
def malware_analysis(mod_sample, json_send, useVT, CSV): ''' Analyze malware with sandbox Cuckoo Input: mod_sample: Compiled version of modified malware mutation json_send: JSON status after sending mutation to local sandbox for analysis useVT: Boolean value indicating whether VirusTotal is used or detection will be performed locally CSV: Data structure with information to save on DB ''' loops = 0 start = time() functionality = False # Show report from analisys sandbox: report URL + Job ID url_sample = 'http://localhost:8000/analysis/' + str( json_send['task_id']) + '/summary' print('\nFull analysis report: {}\n\nStatus:'.format(url_sample)) # Using sleep in loop to space requests to sandbox may improve results firstPrintR, firstPrintW, firstPrintRep = True, True, True while True: try: v = f.get_summary_local_sandbox(json_send['task_id'], 'view') view_status = v['task']['status'] if view_status == 'completed' and firstPrintRep: print('Analysis finished. Generating report..') firstPrintRep = False elif view_status == 'pending' and firstPrintW: print('Waiting in queue to be analyzed..') firstPrintW = False elif view_status == 'running' and firstPrintR: print('Analysis in progress..') firstPrintR = False elif view_status == 'reported': print('Report finished.') break sleep(0.2) except (requests.ConnectionError, requests.Timeout, requests.ConnectTimeout) as e: print('Connection issues or API not available:\n{}'.format(e)) # Check the likelihood that malware runs based on report err = 'CuckooPackageError: Unable to execute the initial process, analysis aborted.\n' r = f.get_summary_local_sandbox(json_send['task_id'], 'report') report = r['debug']['cuckoo'] duration = r['info']['duration'] if err not in report and duration >= 15: functionality = True print('\nResults: WORKING') # Show analysis time in hh:mh:ss f.time_me(start) # Send to VT for detections (activate if local detection is not used) if useVT: print('Sending to VirusTotal!') json_send_VT = f.send_VT(mod_sample) elif err not in report and duration < 15: print( '\nResults: It could not be determined (score = {} – duration = {})' .format(r['info']['score'], duration)) # Show analysis time in hh:mh:ss f.time_me(start) elif err in report: print('\nResults: Mutation is corrupt') # Copy sample into failed path & tag with letter F now = datetime.now() name_file = str(now.year) + str(now.month) + str(now.day) + str( now.hour) + str(now.minute) copyfile(mod_path+CSV['Perturbations']+'_m.exe', \ fail_path+CSV['Perturbations']+'F_'+name_file+'.exe') # Update database with basic sample's info CSV['Manipulated_File'], CSV['Full_Analysis_Report'], CSV['Date_Reported'] \ = fail_path+CSV['Perturbations']+'F_'+name_file+'.exe', url_sample, str(datetime.now()) f.write_dict_CSV('db/corrupted.csv', CSV, fields) # Show analysis time in hh:mh:ss f.time_me(start) return functionality, url_sample