Esempio n. 1
0
def save_file_database(detected, mutation, url_sandbox, CSV, scanner):
    '''
		Structure manipulation and logic to update DB	
		
		Input: 
			detected: Boolean value whether malware mutation is detected
			mutation: Name of malware with path 
			url_sandbox: URL to functionality report (default: Cuckoo sandbox)
			CSV: Structure to save in DB 
			scanner: malware classifier
	'''

    if not detected:

        # Copy successful sample into evasion path
        now = datetime.now()
        name_file = str(now.year)+str(now.month)+str(now.day)+ \
        str(now.hour)+str(now.minute)+str(now.second)
        copyfile(mod_path+mutation, evasion_path+ \
        CSV['Perturbations']+'m_'+name_file+'.exe')

        # Update CSV with successful mutation
        CSV['Manipulated_File'], CSV['Full_Analysis_Report'], \
        CSV['MF_Detections'], CSV['Full_Detections_Report'], CSV['Date_Reported'] = \
        evasion_path+CSV['Perturbations']+'m_'+ name_file+'.exe', \
        url_sandbox, 'Evasion', scanner, str(datetime.now())
        f.write_dict_CSV('db/evasion.csv', CSV, fields)

        print('Results: Evasion found for {}!\n'.format(scanner))
        #print('Evasive sequence: {}'.format(chosen_actions[:int(CSV['Perturbations'])]))

        return 1

    else:

        # Copy valid sample but detected into detected_path
        now = datetime.now()
        name_file = str(now.year)+str(now.month)+str(now.day)+ \
        str(now.hour)+str(now.minute)+str(now.second)
        copyfile(mod_path+mutation, detected_path+ \
        CSV['Perturbations']+'m_'+name_file+scanner+'.exe')

        # Update CSV with valid mutation but detected by scanner
        CSV['Manipulated_File'], CSV['Full_Analysis_Report'], \
        CSV['MF_Detections'], CSV['Full_Detections_Report'], CSV['Date_Reported'] = \
        detected_path+CSV['Perturbations']+'m_'+ name_file+scanner+'.exe', \
        url_sandbox, 'Detected', scanner, str(datetime.now())
        f.write_dict_CSV('db/detected.csv', CSV, fields)

        return 0
Esempio n. 2
0
def comparing(bin_bytes, sample, n, rounds, files_expected,
              detection_threshold, scanner):
    '''
		This function compares ARMED and AIMED to assess random vs. evolutionary performance
		finding adversarial examples. The results will be stored on compare.csv
	'''

    # Run ARMED
    start_Total = time()
    start_ARMED = time()
    _, ARMED_corrupt_samples = armed(bin_bytes, sample, n, rounds,
                                     files_expected, detection_threshold,
                                     scanner)
    time_ARMED = f.time_me(start_ARMED)

    # Run AIMED
    size_population = 4
    start_AIMED = time()
    AIMED_new_evasions, AIMED_corrupt_files = aimed(bin_bytes, sample,
                                                    size_population, n,
                                                    files_expected, scanner)
    time_AIMED = f.time_me(start_AIMED)

    # Update CSV with comparison data
    Compare_CSV = {}
    fields_compare = [
        'Sample', 'Perturbations', 'Module 1', 'Time M1', 'Files M1',
        'Corr M1', 'Module 2', 'Time M2', 'Files M2', 'Corr M2', 'Total Time'
    ]
    Compare_CSV['Sample'], Compare_CSV['Perturbations'], Compare_CSV['Module 1'], Compare_CSV['Time M1'], Compare_CSV['Files M1'], \
    Compare_CSV['Corr M1'], Compare_CSV['Module 2'], Compare_CSV['Time M2'], Compare_CSV['Files M2'], Compare_CSV['Corr M2'], Compare_CSV['Total Time'] = \
    sample, n, 'ARMED', time_ARMED, files_expected, ARMED_corrupt_samples, 'AIMED', time_AIMED, AIMED_new_evasions, AIMED_corrupt_files, strftime('%H:%M:%S', gmtime(time() - start_Total))
    f.write_dict_CSV('db/compare.csv', Compare_CSV, fields_compare)

    # Update short version CSV with time averages to use as input in LaTeX
    f.comparing_AXMED()
Esempio n. 3
0
def malware_detection_VT(sample_report, CSV):
    '''
		Detecting malware samples using VirusTotal (remote)
		
		Input: 
			sample_report: the number of VT detections to use as benchmark
	'''

    loops = 0
    limit = 20
    start = time()

    # Comparing detections of both samples
    print('\n# Malware Detection Stage #')
    print('\nOriginal sample:')
    print('Detected by {} out of {} engines \n'.format(
        sample_report['positives'], sample_report['total'])
          )  #, (sample_report['positives']/sample_report['total'])*100))
    print(sample_report['permalink'])
    print('\nStatus:')

    # Use loops and sleep to keep requests lows and avoid API banned by VT (Limit: 100)
    while loops < limit:
        try:
            # Getting report of sample submitted via VT API - Rescan: False
            report = f.get_report_VT(CSV['Mod_File_Hash'], False)

            # Check the status of sample & report
            if report['response_code'] == -2:
                print('The sample is queued for analysis. Next update in 60 s')
                sleep(60)

            elif report['response_code'] == 1:
                print('\nResults: New sample found')
                print('\nDetected by {} out of {} engines \n'.format(
                    report['positives'],  #({:.2f}%)
                    report['total'])
                      )  #, (report['positives']/report['total'])*100))

                # Print only engines detecting new sample
                av_detect = {
                    key: val
                    for key, val in report['scans'].items()
                    if val['detected'] == 1
                }
                print(list(av_detect.keys()))

                # Provide link to sample detections report
                print('\n{}'.format(report['permalink']))

                # Calculate evasion rate based on original sample detections and print summary
                print('\n## Summary ##')
                print('\nEvasion rate: {:.2f}% of previous engines'.format(
                    (1 - (report['positives'] / report['total']) /
                     (sample_report['positives'] / sample_report['total'])) *
                    100))
                #print('\nEvasion rate: {:.2f}% of engines'.format((sample_report['positives']/
                #sample_report['total']-report['positives']/report['total'])*100))

                # Show detection time in hh:mm:ss
                f.time_me(start)

                # Copy successful sample into evasion path
                now = datetime.now()
                name_file = str(now.year) + str(now.month) + str(
                    now.day) + str(now.hour) + str(now.minute) + str(
                        now.second)
                copyfile(mod_path+CSV['Perturbations']+'_m.exe', \
                evasion_path+CSV['Perturbations']+'m_'+name_file+'.exe')

                # Update database with sample's info
                CSV['Manipulated_File'], CSV['MF_Detections'], CSV['Full_Detections_Report'], \
                CSV['Date_Reported'] = evasion_path+CSV['Perturbations']+'m_'+ \
                name_file+'.exe', str(report['positives'])+'/'+str(report['total']), \
                str(report['permalink']), str(report['scan_date'])
                f.write_dict_CSV('db/database.csv', CSV, fields)

                return report['positives']

            else:  # 'response_code' == 0:
                print("Sample is not present in VirusTotal's dataset")
                sleep(60)
            loops += 1

        except (requests.ConnectionError, requests.Timeout,
                requests.ConnectTimeout) as e:
            print('Connection issues or API requests threshold reached: {}'.
                  format(e))
Esempio n. 4
0
def malware_analysis_HA(mod_sample, json_send_HA, CSV):
    '''
		Analyze malware using remote service Hybrid Analysis
	'''

    loops = 0
    start = time()
    functionality = False

    # Wait a few minutes if server did not accept further submissions
    while json_send_HA == 429:
        print('Submission quota limit has been exceeded. Retry in 5 minutes.')
        sleep(301)

# Retrieve report from Hybrid Analisys sandbox: report URL + Hash + Job ID
    url_sample = 'https://www.reverse.it/sample/' + json_send_HA[
        'sha256'] + '/' + json_send_HA['job_id']
    print('\nFull report: {}\n\nStatus:'.format(url_sample))

    # Use loops and sleep to keep requests low and avoid API banned by HA (Limit: 5/m)
    limit = 30
    while loops < limit:
        try:
            # Server could return 403
            if f.url_ok(url_sample) == 200 or f.url_ok(url_sample) == 403:
                report_HA = f.get_summary_HA(json_send_HA['sha256'])
                if report_HA['state'] == 'ERROR':
                    print('The sandbox environment returned {}.'.format(
                        report_HA['error_type']))
                    break
                elif report_HA['state'] == 'IN_QUEUE':
                    print(
                        'Waiting in queue to be analyzed. Next update in 60 s')
                elif report_HA['state'] == 'IN_PROGRESS':
                    print('Analysis in progress..')
                elif report_HA['state'] == 'SUCCESS':
                    print('Analysis finished.')
                    break
                    sleep(60)
            else:
                print('Website not reachable. Next update in 30 s')
                sleep(30)

            if loops == limit - 1:
                print(
                    'ARMED exited because the limit of {} minutes has been reached.\n'
                    .format(limit))
                quit()

            loops += 1

        except (requests.ConnectionError, requests.Timeout,
                requests.ConnectTimeout) as e:
            print('Connection issues or API requests reached:\n{}'.format(e))

# Check the likelihood that malware runs based on report
    if report_HA['domains'] or report_HA['compromised_hosts']:
        functionality = True
        print('\nResults: WORKING')
        print('Malware connects to domains or contacts hosts.')

        # Show analysis time in hh:mh:ss
        f.time_me(start)

        # Send to VT to check detections
        print('Sent to VirusTotal!')
        json_send_VT = f.send_VT(mod_sample)

    else:
        if report_HA['state'] != 'ERROR':
            print('\nResults: Most likely not working')
            print('Check if manipulated sample runs before scanning.')
            print('Malware does not connect to domains or contacts hosts.')

            # Copy sample into failed path & tag with F
            now = datetime.now()
            name_file = str(now.year) + str(now.month) + str(now.day) + str(
                now.hour) + str(now.minute)
            copyfile(mod_path+CSV['Perturbations']+'_m.exe', \
            fail_path+CSV['Perturbations']+'F_'+name_file+'.exe')

            # Update database with basic sample's info
            CSV['Manipulated_File'], CSV['Full_Analysis_Report'] \
            = fail_path+CSV['Perturbations']+'F_'+name_file+'.exe', url_sample
            f.write_dict_CSV('db/fail_database.csv', CSV, fields)

            # Show analysis time in hh:mh:ss
            f.time_me(start)

    return functionality, url_sample
Esempio n. 5
0
def malware_analysis(mod_sample, json_send, useVT, CSV):
    '''
		Analyze malware with sandbox Cuckoo
		
		Input: 
			mod_sample: Compiled version of modified malware mutation 
			json_send: JSON status after sending mutation to local sandbox for analysis
			useVT: Boolean value indicating whether VirusTotal is used or detection will be performed locally
			CSV: Data structure with information to save on DB 
	'''

    loops = 0
    start = time()
    functionality = False

    # Show report from analisys sandbox: report URL + Job ID
    url_sample = 'http://localhost:8000/analysis/' + str(
        json_send['task_id']) + '/summary'
    print('\nFull analysis report: {}\n\nStatus:'.format(url_sample))

    # Using sleep in loop to space requests to sandbox may improve results
    firstPrintR, firstPrintW, firstPrintRep = True, True, True
    while True:
        try:
            v = f.get_summary_local_sandbox(json_send['task_id'], 'view')
            view_status = v['task']['status']
            if view_status == 'completed' and firstPrintRep:
                print('Analysis finished. Generating report..')
                firstPrintRep = False
            elif view_status == 'pending' and firstPrintW:
                print('Waiting in queue to be analyzed..')
                firstPrintW = False
            elif view_status == 'running' and firstPrintR:
                print('Analysis in progress..')
                firstPrintR = False
            elif view_status == 'reported':
                print('Report finished.')
                break
            sleep(0.2)

        except (requests.ConnectionError, requests.Timeout,
                requests.ConnectTimeout) as e:
            print('Connection issues or API not available:\n{}'.format(e))

# Check the likelihood that malware runs based on report
    err = 'CuckooPackageError: Unable to execute the initial process, analysis aborted.\n'
    r = f.get_summary_local_sandbox(json_send['task_id'], 'report')
    report = r['debug']['cuckoo']
    duration = r['info']['duration']
    if err not in report and duration >= 15:
        functionality = True
        print('\nResults: WORKING')

        # Show analysis time in hh:mh:ss
        f.time_me(start)

        # Send to VT for detections (activate if local detection is not used)
        if useVT:
            print('Sending to VirusTotal!')
            json_send_VT = f.send_VT(mod_sample)

    elif err not in report and duration < 15:
        print(
            '\nResults: It could not be determined (score = {} – duration = {})'
            .format(r['info']['score'], duration))

        # Show analysis time in hh:mh:ss
        f.time_me(start)

    elif err in report:
        print('\nResults: Mutation is corrupt')

        # Copy sample into failed path & tag with letter F
        now = datetime.now()
        name_file = str(now.year) + str(now.month) + str(now.day) + str(
            now.hour) + str(now.minute)
        copyfile(mod_path+CSV['Perturbations']+'_m.exe', \
        fail_path+CSV['Perturbations']+'F_'+name_file+'.exe')

        # Update database with basic sample's info
        CSV['Manipulated_File'], CSV['Full_Analysis_Report'], CSV['Date_Reported']  \
        = fail_path+CSV['Perturbations']+'F_'+name_file+'.exe', url_sample, str(datetime.now())
        f.write_dict_CSV('db/corrupted.csv', CSV, fields)

        # Show analysis time in hh:mh:ss
        f.time_me(start)

    return functionality, url_sample