def __init__(self, configfile=MS_CONFIG, config=None, configregen=False): self.storage_lock = threading.Lock() self.storage_counter = ThreadCounter() # Load all storage classes storage_classes = _get_storage_classes() # Read in config if configfile: configfile = utils.get_config_path(MS_CONFIG, 'storage') config_object = configparser.SafeConfigParser() config_object.optionxform = str # Regen the config if needed or wanted if configregen or not os.path.isfile(configfile): _write_main_config(config_object) _rewrite_config(storage_classes, config_object, configfile) config_object.read(configfile) if config: file_conf = utils.parse_config(config_object) for key in config: if key not in file_conf: file_conf[key] = config[key] file_conf[key]['_load_default'] = True else: file_conf[key].update(config[key]) config = file_conf else: config = utils.parse_config(config_object) else: if config is None: config = {} for storage_name in storage_classes: config[storage_name] = {} config['_load_default'] = True self.sleep_time = config.get('main', {}).get('retry_time', DEFAULTCONF['retry_time']) self.num_retries = config.get('main', {}).get('retry_num', DEFAULTCONF['retry_num']) # Set the config inside of the storage classes for storage_name in storage_classes: if storage_name in config: if '_load_default' in config or '_load_default' in config[storage_name]: # Remove _load_default from config if '_load_default' in config[storage_name]: del config[storage_name]['_load_default'] # Update the default storage config storage_classes[storage_name].config = storage_classes[storage_name].DEFAULTCONF storage_classes[storage_name].config.update(config[storage_name]) else: storage_classes[storage_name].config = config[storage_name] self.storage_classes = storage_classes self.loaded_storage = {} # Setup each enabled storage self.load_modules()
def __init__(self, debug=False): storage_conf = utils.get_config_path(MS_CONFIG, 'storage') config_object = configparser.SafeConfigParser() config_object.optionxform = str config_object.read(storage_conf) conf = utils.parse_config(config_object) storage_handler = storage.StorageHandler(configfile=storage_conf) es_handler = storage_handler.load_required_module('ElasticSearchStorage') if not es_handler: print('[!] ERROR: This analytic only works with ES storage module.') sys.exit(0) # probably not ideal... self.es = es_handler.es self.index = conf['ElasticSearchStorage']['index'] self.doc_type = '_doc' self.debug = debug
def multiscanner_process(work_queue, config, batch_size, wait_seconds, delete, exit_signal): filelist = [] time_stamp = None storage_conf = utils.get_config_path(config, 'storage') storage_handler = storage.StorageHandler(configfile=storage_conf) while not exit_signal.value: time.sleep(1) try: filelist.append(work_queue.get_nowait()) if not time_stamp: time_stamp = time.time() while len(filelist) < batch_size: filelist.append(work_queue.get_nowait()) except queue.Empty: if filelist and time_stamp: if len(filelist) >= batch_size: pass elif time.time() - time_stamp > wait_seconds: pass else: continue else: continue resultlist = multiscan(filelist, configfile=config) results = parse_reports(resultlist, python=True) if delete: for file_name in results: os.remove(file_name) storage_handler.store(results, wait=False) print('Scanned', len(results), 'files') filelist = [] time_stamp = None storage_handler.close()
class CustomJSONEncoder(JSONEncoder): def default(self, obj): if isinstance(obj, datetime): if obj.utcoffset() is not None: obj = obj - obj.utcoffset() return str(obj) else: return JSONEncoder.default(self, obj) app = Flask(__name__) app.json_encoder = CustomJSONEncoder api_config_object = configparser.SafeConfigParser() api_config_object.optionxform = str # TODO: Why does this multiscanner.common instead of just common? api_config_file = utils.get_config_path(MS_CONFIG, 'api') api_config_object.read(api_config_file) if not api_config_object.has_section('api') or not os.path.isfile( api_config_file): # Write default config api_config_object.add_section('api') for key in DEFAULTCONF: api_config_object.set('api', key, str(DEFAULTCONF[key])) conffile = codecs.open(api_config_file, 'w', 'utf-8') api_config_object.write(conffile) conffile.close() api_config = utils.parse_config(api_config_object) # TODO: fix this mess # Needs api_config in order to function properly from multiscanner.distributed.celery_worker import multiscanner_celery, ssdeep_compare_celery
def multiscanner_celery(file_, original_filename, task_id, file_hash, metadata, config=MS_CONFIG, module_list=None): ''' Queue up multiscanner tasks Usage: from celery_worker import multiscanner_celery multiscanner_celery.delay(full_path, original_filename, task_id, hashed_filename, metadata, config, module_list) ''' # Initialize the connection to the task DB db.init_db() logger.info('\n\n{}{}Got file: {}.\nOriginal filename: {}.\n'.format('=' * 48, '\n', file_hash, original_filename)) # Get the storage config storage_conf = utils.get_config_path(config, 'storage') storage_handler = storage.StorageHandler(configfile=storage_conf) resultlist = multiscan( [file_], configfile=config, module_list=module_list ) results = parse_reports(resultlist, python=True) scan_time = datetime.now().isoformat() # Get the Scan Config that the task was run with and # add it to the task metadata scan_config_object = configparser.SafeConfigParser() scan_config_object.optionxform = str scan_config_object.read(config) full_conf = utils.parse_config(scan_config_object) sub_conf = {} # Count number of modules enabled out of total possible # and add it to the Scan Metadata total_enabled = 0 total_modules = len(full_conf.keys()) # Get the count of modules enabled from the module_list # if it exists, else count via the config if module_list: total_enabled = len(module_list) else: for key in full_conf: if key == 'main': continue sub_conf[key] = {} sub_conf[key]['ENABLED'] = full_conf[key]['ENABLED'] if sub_conf[key]['ENABLED'] is True: total_enabled += 1 results[file_]['Scan Metadata'] = metadata results[file_]['Scan Metadata']['Worker Node'] = gethostname() results[file_]['Scan Metadata']['Scan Config'] = sub_conf results[file_]['Scan Metadata']['Modules Enabled'] = '{} / {}'.format( total_enabled, total_modules ) results[file_]['Scan Metadata']['Scan Time'] = scan_time results[file_]['Scan Metadata']['Task ID'] = task_id # Use the original filename as the value for the filename # in the report (instead of the tmp path assigned to the file # by the REST API) results[original_filename] = results[file_] del results[file_] # Save the reports to storage storage_ids = storage_handler.store(results, wait=False) storage_handler.close() # Only need to raise ValueError here, # Further cleanup will be handled by the on_failure method # of MultiScannerTask if not storage_ids: raise ValueError('Report failed to index') # Update the task DB to reflect that the task is done db.update_task( task_id=task_id, task_status='Complete', timestamp=scan_time, ) logger.info('Completed Task #{}'.format(task_id)) return results
logger = get_task_logger(__name__) DEFAULTCONF = { 'protocol': 'pyamqp', 'host': 'localhost', 'user': '******', 'password': '', 'vhost': '/', 'flush_every': '100', 'flush_interval': '10', 'tz': 'US/Eastern', } config_object = configparser.SafeConfigParser() config_object.optionxform = str configfile = utils.get_config_path(MS_CONFIG, 'api') config_object.read(configfile) if not config_object.has_section('celery') or not os.path.isfile(configfile): # Write default config config_object.add_section('celery') for key in DEFAULTCONF: config_object.set('celery', key, str(DEFAULTCONF[key])) conffile = codecs.open(configfile, 'w', 'utf-8') config_object.write(conffile) conffile.close() config = utils.parse_config(config_object) api_config = config.get('api') worker_config = config.get('celery') db_config = config.get('Database')
"Submitter Email", "Submitter Organization", "Submitter Phone", ], 'TAGS': [ "Malware", "Benign" ] } app = Flask(__name__) # Finagle Flask to read config from .ini file instead of .py file web_config_object = configparser.SafeConfigParser() web_config_object.optionxform = str web_config_file = utils.get_config_path(MS_CONFIG, 'web') web_config_object.read(web_config_file) if not web_config_object.has_section('web') or not os.path.isfile(web_config_file): # Write default config web_config_object.add_section('web') for key in DEFAULTCONF: web_config_object.set('web', key, str(DEFAULTCONF[key])) conffile = codecs.open(web_config_file, 'w', 'utf-8') web_config_object.write(conffile) conffile.close() web_config = utils.parse_config(web_config_object)['web'] conf_tuple = namedtuple('WebConfig', web_config.keys())(*web_config.values()) app.config.from_object(conf_tuple) @app.context_processor
'METADATA_FIELDS': [ "Submitter Name", "Submission Description", "Submitter Email", "Submitter Organization", "Submitter Phone", ], 'TAGS': ["Malware", "Benign"] } app = Flask(__name__) # Finagle Flask to read config from .ini file instead of .py file web_config_object = configparser.SafeConfigParser() web_config_object.optionxform = str web_config_file = utils.get_config_path(MS_CONFIG, 'web') web_config_object.read(web_config_file) if not web_config_object.has_section('web') or not os.path.isfile( web_config_file): # Write default config web_config_object.add_section('web') for key in DEFAULTCONF: web_config_object.set('web', key, str(DEFAULTCONF[key])) conffile = codecs.open(web_config_file, 'w', 'utf-8') web_config_object.write(conffile) conffile.close() web_config = utils.parse_config(web_config_object)['web'] conf_tuple = namedtuple('WebConfig', web_config.keys())(*web_config.values()) app.config.from_object(conf_tuple)
def multiscanner_celery(file_, original_filename, task_id, file_hash, metadata, config=MS_CONFIG, module_list=None): ''' Queue up multiscanner tasks Usage: from celery_worker import multiscanner_celery multiscanner_celery.delay(full_path, original_filename, task_id, hashed_filename, metadata, config, module_list) ''' # Initialize the connection to the task DB db.init_db() logger.info('\n\n{}{}Got file: {}.\nOriginal filename: {}.\n'.format( '=' * 48, '\n', file_hash, original_filename)) # Get the storage config storage_conf = utils.get_config_path(config, 'storage') storage_handler = storage.StorageHandler(configfile=storage_conf) resultlist = multiscan([file_], configfile=config, module_list=module_list) results = parse_reports(resultlist, python=True) scan_time = datetime.now().isoformat() # Get the Scan Config that the task was run with and # add it to the task metadata scan_config_object = configparser.SafeConfigParser() scan_config_object.optionxform = str scan_config_object.read(config) full_conf = utils.parse_config(scan_config_object) sub_conf = {} # Count number of modules enabled out of total possible # and add it to the Scan Metadata total_enabled = 0 total_modules = len(full_conf.keys()) # Get the count of modules enabled from the module_list # if it exists, else count via the config if module_list: total_enabled = len(module_list) else: for key in full_conf: if key == 'main': continue sub_conf[key] = {} sub_conf[key]['ENABLED'] = full_conf[key]['ENABLED'] if sub_conf[key]['ENABLED'] is True: total_enabled += 1 results[file_]['Scan Metadata'] = metadata results[file_]['Scan Metadata']['Worker Node'] = gethostname() results[file_]['Scan Metadata']['Scan Config'] = sub_conf results[file_]['Scan Metadata']['Modules Enabled'] = '{} / {}'.format( total_enabled, total_modules) results[file_]['Scan Metadata']['Scan Time'] = scan_time results[file_]['Scan Metadata']['Task ID'] = task_id # Use the original filename as the value for the filename # in the report (instead of the tmp path assigned to the file # by the REST API) results[original_filename] = results[file_] del results[file_] # Save the reports to storage storage_ids = storage_handler.store(results, wait=False) storage_handler.close() # Only need to raise ValueError here, # Further cleanup will be handled by the on_failure method # of MultiScannerTask if not storage_ids: raise ValueError('Report failed to index') # Update the task DB to reflect that the task is done db.update_task( task_id=task_id, task_status='Complete', timestamp=scan_time, ) logger.info('Completed Task #{}'.format(task_id)) return results