def test_no_config(): results, metadata = multiscanner.multiscan(filelist, configfile=None, config=None, recursive=None, module_list=module_list)[0] assert metadata['conf'] == {'a': 'b', 'c': 'd'}
def test_config_api_with_real_file(): config = {'test_conf': {'a': 'z'}} config_file = tempfile.mkstemp()[1] multiscanner.config_init(config_file) results, metadata = multiscanner.multiscan(filelist, configfile=config_file, config=config, recursive=None, module_list=module_list)[0] os.remove(config_file) assert metadata['conf'] == {'a': 'z', 'c': 'd'}
def multiscanner_process(work_queue, exit_signal): '''Not used in distributed mode. ''' metadata_list = [] time_stamp = None while True: time.sleep(1) try: metadata_list.append(work_queue.get_nowait()) if not time_stamp: time_stamp = time.time() while len(metadata_list) < batch_size: metadata_list.append(work_queue.get_nowait()) except queue.Empty: if metadata_list and time_stamp: if len(metadata_list) >= batch_size: pass elif time.time() - time_stamp > batch_interval: pass else: continue else: continue filelist = [item[0] for item in metadata_list] # modulelist = [item[5] for item in metadata_list] resultlist = multiscanner.multiscan( filelist, configfile=multiscanner.CONFIG # module_list ) results = multiscanner.parse_reports(resultlist, python=True) scan_time = datetime.now().isoformat() if delete_after_scan: for file_name in results: os.remove(file_name) for item in metadata_list: # Use the original filename as the index instead of the full path results[item[1]] = results[item[0]] del results[item[0]] results[item[1]]['Scan Metadata'] = item[4] results[item[1]]['Scan Metadata']['Scan Time'] = scan_time results[item[1]]['Scan Metadata']['Task ID'] = item[2] db.update_task( task_id=item[2], task_status='Complete', timestamp=scan_time, ) metadata_list = [] storage_handler.store(results, wait=False) filelist = [] time_stamp = None storage_handler.close()
def celery_task(files, config=multiscanner.CONFIG): ''' Run multiscanner on the given file and store the results in the storage handler(s) specified in the storage configuration file. ''' # Get the storage config storage_conf = multiscanner.common.get_config_path(config, 'storage') storage_handler = multiscanner.storage.StorageHandler( configfile=storage_conf) resultlist = multiscanner.multiscan(list(files), configfile=config) results = multiscanner.parse_reports(resultlist, python=True) scan_time = datetime.now().isoformat() # Loop through files in a way compatible with Py 2 and 3, and won't be # affected by changing keys to original filenames for file_ in files: original_filename = files[file_]['original_filename'] task_id = files[file_]['task_id'] file_hash = files[file_]['file_hash'] metadata = files[file_]['metadata'] # Get the Scan Config that the task was run with and # add it to the task metadata scan_config_object = configparser.SafeConfigParser() scan_config_object.optionxform = str scan_config_object.read(config) full_conf = common.parse_config(scan_config_object) sub_conf = {} for key in full_conf: if key == 'main': continue sub_conf[key] = {} sub_conf[key]['ENABLED'] = full_conf[key]['ENABLED'] results[file_]['Scan Metadata'] = {} results[file_]['Scan Metadata']['Worker Node'] = gethostname() results[file_]['Scan Metadata']['Scan Config'] = sub_conf # Use the original filename as the value for the filename # in the report (instead of the tmp path assigned to the file # by the REST API) results[original_filename] = results[file_] del results[file_] results[original_filename]['Scan Time'] = scan_time results[original_filename]['Metadata'] = metadata # Update the task DB to reflect that the task is done db.update_task( task_id=task_id, task_status='Complete', timestamp=scan_time, ) # Save the reports to storage storage_handler.store(results, wait=False) storage_handler.close() return results
def test_config_api_no_file(): config = {'test_conf': {'a': 'z'}} results, metadata = multiscanner.multiscan(filelist, configfile=None, config=config, recursive=None, module_list=module_list)[0] assert metadata['conf'] == {'a': 'z', 'c': 'd'}
def setup(self): self.result = multiscanner.multiscan(self.filelist, recursive=False, configregen=False, configfile='.tmpfile.ini') self.report = multiscanner.parse_reports(self.result, includeMetadata=False, python=True) self.report_m = multiscanner.parse_reports(self.result, includeMetadata=True, python=True)
def test_subscan(): m = multiscanner.multiscan( ['fake.zip'], recursive=None, configfile=None, module_list=[os.path.join(CWD, 'modules', 'test_subscan.py')]) assert m == [([(u'fake.zip', 0)], { 'Type': 'Test', 'Name': 'test_subscan' }), ([(u'fake.zip/0', u'fake.zip')], { u'Include': False, u'Type': u'subscan', u'Name': u'Parent' }), ([(u'fake.zip', [u'fake.zip/0'])], { u'Include': False, u'Type': u'subscan', u'Name': u'Children' }), ([(u'fake.zip/0', u'test_subscan')], { u'Include': False, u'Type': u'subscan', u'Name': u'Created by' }), ([(u'fake.zip/0', 1)], { 'Type': 'Test', 'Name': 'test_subscan' }), ([(u'fake.zip/0/1', u'fake.zip/0')], { u'Include': False, u'Type': u'subscan', u'Name': u'Parent' }), ([(u'fake.zip/0', [u'fake.zip/0/1'])], { u'Include': False, u'Type': u'subscan', u'Name': u'Children' }), ([(u'fake.zip/0/1', u'test_subscan')], { u'Include': False, u'Type': u'subscan', u'Name': u'Created by' }), ([(u'fake.zip/0/1', 2)], { 'Type': 'Test', 'Name': 'test_subscan' })] # noqa: E501
def multiscanner_process(work_queue, exit_signal): metadata_list = [] time_stamp = None while True: time.sleep(1) try: metadata_list.append(work_queue.get_nowait()) if not time_stamp: time_stamp = time.time() while len(metadata_list) < BATCH_SIZE: metadata_list.append(work_queue.get_nowait()) except queue.Empty: if metadata_list and time_stamp: if len(metadata_list) >= BATCH_SIZE: pass elif time.time() - time_stamp > WAIT_SECONDS: pass else: continue else: continue filelist = [item[0] for item in metadata_list] resultlist = multiscanner.multiscan( filelist, configfile=multiscanner.CONFIG ) results = multiscanner.parse_reports(resultlist, python=True) for file_name in results: os.remove(file_name) for item in metadata_list: results[item[1]] = results[item[0]] del results[item[0]] db.update_task( task_id=item[2], task_status='Complete', report_id=item[3] ) storage_handler.store(results, wait=False) filelist = [] time_stamp = None storage_handler.close()
def multiscanner_process(work_queue, exit_signal): metadata_list = [] time_stamp = None while True: time.sleep(1) try: metadata_list.append(work_queue.get_nowait()) if not time_stamp: time_stamp = time.time() while len(metadata_list) < BATCH_SIZE: metadata_list.append(work_queue.get_nowait()) except queue.Empty: if metadata_list and time_stamp: if len(metadata_list) >= BATCH_SIZE: pass elif time.time() - time_stamp > WAIT_SECONDS: pass else: continue else: continue filelist = [item[0] for item in metadata_list] resultlist = multiscanner.multiscan(filelist, configfile=multiscanner.CONFIG) results = multiscanner.parse_reports(resultlist, python=True) for file_name in results: os.remove(file_name) for item in metadata_list: results[item[1]] = results[item[0]] del results[item[0]] db.update_task(task_id=item[2], task_status='Complete', report_id=item[3]) storage_handler.store(results, wait=False) filelist = [] time_stamp = None storage_handler.close()
def multiscanner_process(work_queue, config, batch_size, wait_seconds, delete, exit_signal): filelist = [] time_stamp = None storage_conf = multiscanner.common.get_config_path(config, 'storage') storage_handler = multiscanner.storage.StorageHandler( configfile=storage_conf) while not exit_signal.value: time.sleep(1) try: filelist.append(work_queue.get_nowait()) if not time_stamp: time_stamp = time.time() while len(filelist) < batch_size: filelist.append(work_queue.get_nowait()) except queue.Empty: if filelist and time_stamp: if len(filelist) >= batch_size: pass elif time.time() - time_stamp > wait_seconds: pass else: continue else: continue resultlist = multiscanner.multiscan(filelist, configfile=config) results = multiscanner.parse_reports(resultlist, python=True) if delete: for file_name in results: os.remove(file_name) storage_handler.store(results, wait=False) print('Scanned', len(results), 'files') filelist = [] time_stamp = None storage_handler.close()
def multiscanner_process(work_queue, config, batch_size, wait_seconds, delete, exit_signal): filelist = [] time_stamp = None storage_conf = utils.get_config_path(config, 'storage') storage_handler = storage.StorageHandler(configfile=storage_conf) while not exit_signal.value: time.sleep(1) try: filelist.append(work_queue.get_nowait()) if not time_stamp: time_stamp = time.time() while len(filelist) < batch_size: filelist.append(work_queue.get_nowait()) except queue.Empty: if filelist and time_stamp: if len(filelist) >= batch_size: pass elif time.time() - time_stamp > wait_seconds: pass else: continue else: continue resultlist = multiscan(filelist, configfile=config) results = parse_reports(resultlist, python=True) if delete: for file_name in results: os.remove(file_name) storage_handler.store(results, wait=False) print('Scanned', len(results), 'files') filelist = [] time_stamp = None storage_handler.close()
def test_subscan(): m = multiscanner.multiscan( ['fake.zip'], recursive=None, configfile=None, module_list=[os.path.join(CWD, 'modules', 'test_subscan.py')]) assert m == [([(u'fake.zip', 0)], {'Type': 'Test', 'Name': 'test_subscan'}), ([(u'fake.zip/0', u'fake.zip')], {u'Include': False, u'Type': u'subscan', u'Name': u'Parent'}), ([(u'fake.zip', [u'fake.zip/0'])], {u'Include': False, u'Type': u'subscan', u'Name': u'Children'}), ([(u'fake.zip/0', u'test_subscan')], {u'Include': False, u'Type': u'subscan', u'Name': u'Created by'}), ([(u'fake.zip/0', 1)], {'Type': 'Test', 'Name': 'test_subscan'}), ([(u'fake.zip/0/1', u'fake.zip/0')], {u'Include': False, u'Type': u'subscan', u'Name': u'Parent'}), ([(u'fake.zip/0', [u'fake.zip/0/1'])], {u'Include': False, u'Type': u'subscan', u'Name': u'Children'}), ([(u'fake.zip/0/1', u'test_subscan')], {u'Include': False, u'Type': u'subscan', u'Name': u'Created by'}), ([(u'fake.zip/0/1', 2)], {'Type': 'Test', 'Name': 'test_subscan'})] # noqa: E501
def setup(self): self.result = multiscanner.multiscan( self.filelist, recursive=False, configregen=False, configfile='.tmpfile.ini') self.report = multiscanner.parse_reports(self.result, includeMetadata=False, python=True) self.report_m = multiscanner.parse_reports(self.result, includeMetadata=True, python=True)
def multiscanner_process(work_queue, exit_signal): '''Not used in distributed mode. ''' metadata_list = [] time_stamp = None while True: time.sleep(1) try: metadata_list.append(work_queue.get_nowait()) if not time_stamp: time_stamp = time.time() while len(metadata_list) < batch_size: metadata_list.append(work_queue.get_nowait()) except queue.Empty: if metadata_list and time_stamp: if len(metadata_list) >= batch_size: pass elif time.time() - time_stamp > batch_interval: pass else: continue else: continue filelist = [item[0] for item in metadata_list] # modulelist = [item[5] for item in metadata_list] resultlist = multiscan( filelist, configfile=MS_CONFIG # module_list ) results = parse_reports(resultlist, python=True) scan_time = datetime.now().isoformat() if delete_after_scan: for file_name in results: os.remove(file_name) for item in metadata_list: # Use the original filename as the index instead of the full path results[item[1]] = results[item[0]] del results[item[0]] results[item[1]]['Scan Metadata'] = item[4] results[item[1]]['Scan Metadata']['Scan Time'] = scan_time results[item[1]]['Scan Metadata']['Task ID'] = item[2] results[item[1]]['tags'] = results[item[1]]['Scan Metadata'].get('Tags', '').split(',') results[item[1]]['Scan Metadata'].pop('Tags', None) db.update_task( task_id=item[2], task_status='Complete', timestamp=scan_time, ) metadata_list = [] storage_handler.store(results, wait=False) filelist = [] time_stamp = None storage_handler.close()
def multiscanner_celery(file_, original_filename, task_id, file_hash, metadata, config=multiscanner.CONFIG, module_list=None): ''' Queue up multiscanner tasks Usage: from celery_worker import multiscanner_celery multiscanner_celery.delay(full_path, original_filename, task_id, hashed_filename, metadata, config, module_list) ''' # Initialize the connection to the task DB db.init_db() logger.info('\n\n{}{}Got file: {}.\nOriginal filename: {}.\n'.format('='*48, '\n', file_hash, original_filename)) # Get the storage config storage_conf = multiscanner.common.get_config_path(config, 'storage') storage_handler = multiscanner.storage.StorageHandler(configfile=storage_conf) resultlist = multiscanner.multiscan( [file_], configfile=config, module_list=module_list ) results = multiscanner.parse_reports(resultlist, python=True) scan_time = datetime.now().isoformat() # Get the Scan Config that the task was run with and # add it to the task metadata scan_config_object = configparser.SafeConfigParser() scan_config_object.optionxform = str scan_config_object.read(config) full_conf = common.parse_config(scan_config_object) sub_conf = {} # Count number of modules enabled out of total possible # and add it to the Scan Metadata total_enabled = 0 total_modules = 0 for key in full_conf: if key == 'main': continue sub_conf[key] = {} sub_conf[key]['ENABLED'] = full_conf[key]['ENABLED'] total_modules += 1 if sub_conf[key]['ENABLED'] is True: total_enabled += 1 results[file_]['Scan Metadata'] = {} results[file_]['Scan Metadata']['Worker Node'] = gethostname() results[file_]['Scan Metadata']['Scan Config'] = sub_conf results[file_]['Scan Metadata']['Modules Enabled'] = '{} / {}'.format( total_enabled, total_modules ) # Use the original filename as the value for the filename # in the report (instead of the tmp path assigned to the file # by the REST API) results[original_filename] = results[file_] del results[file_] results[original_filename]['Scan Time'] = scan_time results[original_filename]['Metadata'] = metadata # Update the task DB to reflect that the task is done db.update_task( task_id=task_id, task_status='Complete', timestamp=scan_time, ) # Save the reports to storage storage_handler.store(results, wait=False) storage_handler.close() logger.info('Completed Task #{}'.format(task_id)) return results
def multiscanner_celery(file_, original_filename, task_id, file_hash, metadata, config=MS_CONFIG, module_list=None): ''' Queue up multiscanner tasks Usage: from celery_worker import multiscanner_celery multiscanner_celery.delay(full_path, original_filename, task_id, hashed_filename, metadata, config, module_list) ''' # Initialize the connection to the task DB db.init_db() logger.info('\n\n{}{}Got file: {}.\nOriginal filename: {}.\n'.format('=' * 48, '\n', file_hash, original_filename)) # Get the storage config storage_conf = utils.get_config_path(config, 'storage') storage_handler = storage.StorageHandler(configfile=storage_conf) resultlist = multiscan( [file_], configfile=config, module_list=module_list ) results = parse_reports(resultlist, python=True) scan_time = datetime.now().isoformat() # Get the Scan Config that the task was run with and # add it to the task metadata scan_config_object = configparser.SafeConfigParser() scan_config_object.optionxform = str scan_config_object.read(config) full_conf = utils.parse_config(scan_config_object) sub_conf = {} # Count number of modules enabled out of total possible # and add it to the Scan Metadata total_enabled = 0 total_modules = len(full_conf.keys()) # Get the count of modules enabled from the module_list # if it exists, else count via the config if module_list: total_enabled = len(module_list) else: for key in full_conf: if key == 'main': continue sub_conf[key] = {} sub_conf[key]['ENABLED'] = full_conf[key]['ENABLED'] if sub_conf[key]['ENABLED'] is True: total_enabled += 1 results[file_]['Scan Metadata'] = metadata results[file_]['Scan Metadata']['Worker Node'] = gethostname() results[file_]['Scan Metadata']['Scan Config'] = sub_conf results[file_]['Scan Metadata']['Modules Enabled'] = '{} / {}'.format( total_enabled, total_modules ) results[file_]['Scan Metadata']['Scan Time'] = scan_time results[file_]['Scan Metadata']['Task ID'] = task_id # Use the original filename as the value for the filename # in the report (instead of the tmp path assigned to the file # by the REST API) results[original_filename] = results[file_] del results[file_] # Save the reports to storage storage_ids = storage_handler.store(results, wait=False) storage_handler.close() # Only need to raise ValueError here, # Further cleanup will be handled by the on_failure method # of MultiScannerTask if not storage_ids: raise ValueError('Report failed to index') # Update the task DB to reflect that the task is done db.update_task( task_id=task_id, task_status='Complete', timestamp=scan_time, ) logger.info('Completed Task #{}'.format(task_id)) return results
Parses arguments """ import argparse import parser #argparse stuff parser = argparse.ArgumentParser(description="Scan files and store results in elastic search") parser.add_argument("-r", "--recursive", action="store_true") parser.add_argument("-v", "--verbose", action="store_true") parser.add_argument('Files', help="Files and Directories to attach", nargs='+') return parser.parse_args() def results2es(results): """ Takes a dictionary of Filename: {Results} and stores it in elastic search. """ es = elasticsearch.Elasticsearch(hosts=ES_HOSTS) es.indices.create(index=ES_INDEX, ignore=400) for fname in results: result = results[fname] result['filename'] = fname es.index(index=ES_INDEX, doc_type=ES_DOCTYPE, id=result['SHA256'], body=result) if __name__ == '__main__': args = parse_args() print "Starting scan..." results = multiscanner.multiscan(args.Files, recursive=args.recursive) results = multiscanner.parseReports(results, python=True, includeMetadata=False) print "Storing results..." results2es(results) print "Done!"
def test_no_config(): results, metadata = multiscanner.multiscan( filelist, configfile=None, config=None, recursive=None, module_list=module_list)[0] assert metadata['conf'] == {'a': 'b', 'c': 'd'}
def test_config_api_no_file(): config = {'test_conf': {'a': 'z'}} results, metadata = multiscanner.multiscan( filelist, configfile=None, config=config, recursive=None, module_list=module_list)[0] assert metadata['conf'] == {'a': 'z', 'c': 'd'}