def __init__(self, debug=False): storage_conf = utils.get_config_path(MS_CONFIG, 'storage') config_object = configparser.SafeConfigParser() config_object.optionxform = str config_object.read(storage_conf) conf = utils.parse_config(config_object) storage_handler = storage.StorageHandler(configfile=storage_conf) es_handler = storage_handler.load_required_module('ElasticSearchStorage') if not es_handler: print('[!] ERROR: This analytic only works with ES storage module.') sys.exit(0) # probably not ideal... self.es = es_handler.es self.index = conf['ElasticSearchStorage']['index'] self.doc_type = '_doc' self.debug = debug
def multiscanner_process(work_queue, config, batch_size, wait_seconds, delete, exit_signal): filelist = [] time_stamp = None storage_conf = utils.get_config_path(config, 'storage') storage_handler = storage.StorageHandler(configfile=storage_conf) while not exit_signal.value: time.sleep(1) try: filelist.append(work_queue.get_nowait()) if not time_stamp: time_stamp = time.time() while len(filelist) < batch_size: filelist.append(work_queue.get_nowait()) except queue.Empty: if filelist and time_stamp: if len(filelist) >= batch_size: pass elif time.time() - time_stamp > wait_seconds: pass else: continue else: continue resultlist = multiscan(filelist, configfile=config) results = parse_reports(resultlist, python=True) if delete: for file_name in results: os.remove(file_name) storage_handler.store(results, wait=False) print('Scanned', len(results), 'files') filelist = [] time_stamp = None storage_handler.close()
def metricbeat_rollover(days, config=MS_CONFIG): ''' Clean up old Elastic Beats indices ''' try: # Get the storage config storage_handler = storage.StorageHandler(configfile=storage_configfile) metricbeat_enabled = es_storage_config.get('metricbeat_enabled', True) if not metricbeat_enabled: logger.debug('Metricbeat logging not enbaled, exiting...') return if not days: days = es_storage_config.get('metricbeat_rollover_days') if not days: raise NameError( "name 'days' is not defined, check storage.ini for 'metricbeat_rollover_days' setting" ) # Find Elastic storage for handler in storage_handler.loaded_storage: if isinstance(handler, elasticsearch_storage.ElasticSearchStorage): ret = handler.delete_index(index_prefix='metricbeat', days=days) if ret is False: logger.warn('Metricbeat Roller failed') else: logger.info( 'Metricbeat indices older than {} days deleted'.format( days)) except Exception as e: logger.warn(e) finally: storage_handler.close()
def _main(): global CONFIG, VERBOSE # Force all prints to go to stderr stdout = sys.stdout sys.stdout = sys.stderr # Get args args = _parse_args() # Set config or update locations if args.config is None: args.config = CONFIG else: CONFIG = args.config _update_DEFAULTCONF(DEFAULTCONF, CONFIG) # Set verbose if args.verbose: VERBOSE = args.verbose # Checks if user is trying to initialize if str(args.Files) == "['init']" and not os.path.isfile('init'): _init(args) if not os.path.isfile(args.config): config_init(args.config) # Make sure report is not a dir if args.json: if os.path.isdir(args.json): sys.exit('ERROR:', args.json, 'is a directory, a file is expected') # Parse the file list parsedlist = parseFileList(args.Files, recursive=args.recursive) # Unzip zip files if asked to if args.extractzips: for fname in parsedlist: if zipfile.is_zipfile(fname): unzip_dir = os.path.join('_tmp', os.path.basename(fname)) z = zipfile.ZipFile(fname) if PY3: args.password = bytes(args.password, 'utf-8') try: z.extractall(path=unzip_dir, pwd=args.password) for uzfile in z.namelist(): parsedlist.append(os.path.join(unzip_dir, uzfile)) except RuntimeError as e: print("ERROR: Failed to extract ", fname, ' - ', e, sep='') parsedlist.remove(fname) if not parsedlist: sys.exit("ERROR: No valid files found!") # Resume from report if args.resume: i = len(parsedlist) try: reportfile = codecs.open(args.json, 'r', 'utf-8') except Exception as e: sys.exit("ERROR: Could not open report file") for line in reportfile: line = json.loads(line) for fname in line: if fname in parsedlist: parsedlist.remove(fname) reportfile.close() i = i - len(parsedlist) if VERBOSE: print("Skipping", i, "files which are in the report already") # Do multiple runs if there are too many files filelists = [] if len(parsedlist) > args.numberper: while len(parsedlist) > args.numberper: filelists.append(parsedlist[:args.numberper]) parsedlist = parsedlist[args.numberper:] if parsedlist: filelists.append(parsedlist) for filelist in filelists: # Record start time for metadata starttime = str(datetime.datetime.now()) # Run the multiscan results = multiscan(filelist, configfile=args.config) # We need to read in the config for the parseReports call config = configparser.SafeConfigParser() config.optionxform = str config.read(args.config) config = _get_main_config(config) # Make sure we have a group-types if "group-types" not in config: config["group-types"] = [] elif not config["group-types"]: config["group-types"] = [] # Add in script metadata endtime = str(datetime.datetime.now()) # For windows compatibility try: username = os.getlogin() except Exception as e: # TODO: log exception username = os.getenv('USERNAME') # Add metadata to the scan results.append(( [], { "Name": "MultiScanner", "Start Time": starttime, "End Time": endtime, # "Command Line":list2cmdline(sys.argv), "Run by": username })) # Add tags if present if args.tag: tag_results = [] for filename in filelist: tag_results.append((filename, args.tag)) results.append((tag_results, {"Name": "tags", "Type": "Metadata"})) if args.show or not stdout.isatty(): # TODO: Make this output something readable # Parse Results report = parse_reports(results, groups=config["group-types"], ugly=args.ugly, includeMetadata=args.metadata) # Print report try: print(convert_encoding(report, encoding='ascii', errors='replace'), file=stdout) stdout.flush() except Exception as e: print('ERROR: Can\'t print report -', e) report = parse_reports(results, groups=config["group-types"], includeMetadata=args.metadata, python=True) update_conf = None if args.json: update_conf = {'File': {'path': args.json}} if args.json.endswith('.gz') or args.json.endswith('.gzip'): update_conf['File']['gzip'] = True if 'storage-config' not in config: config["storage-config"] = None storage_handle = storage.StorageHandler( configfile=config["storage-config"], config=update_conf) storage_handle.store(report) storage_handle.close() # Cleanup zip extracted files if args.extractzips: shutil.rmtree('_tmp')
def multiscanner_celery(file_, original_filename, task_id, file_hash, metadata, config=MS_CONFIG, module_list=None): ''' Queue up multiscanner tasks Usage: from celery_worker import multiscanner_celery multiscanner_celery.delay(full_path, original_filename, task_id, hashed_filename, metadata, config, module_list) ''' # Initialize the connection to the task DB db.init_db() logger.info('\n\n{}{}Got file: {}.\nOriginal filename: {}.\n'.format( '=' * 48, '\n', file_hash, original_filename)) # Get the storage config storage_conf = utils.get_config_path(config, 'storage') storage_handler = storage.StorageHandler(configfile=storage_conf) resultlist = multiscan([file_], configfile=config, module_list=module_list) results = parse_reports(resultlist, python=True) scan_time = datetime.now().isoformat() # Get the Scan Config that the task was run with and # add it to the task metadata scan_config_object = configparser.SafeConfigParser() scan_config_object.optionxform = str scan_config_object.read(config) full_conf = utils.parse_config(scan_config_object) sub_conf = {} # Count number of modules enabled out of total possible # and add it to the Scan Metadata total_enabled = 0 total_modules = len(full_conf.keys()) # Get the count of modules enabled from the module_list # if it exists, else count via the config if module_list: total_enabled = len(module_list) else: for key in full_conf: if key == 'main': continue sub_conf[key] = {} sub_conf[key]['ENABLED'] = full_conf[key]['ENABLED'] if sub_conf[key]['ENABLED'] is True: total_enabled += 1 results[file_]['Scan Metadata'] = metadata results[file_]['Scan Metadata']['Worker Node'] = gethostname() results[file_]['Scan Metadata']['Scan Config'] = sub_conf results[file_]['Scan Metadata']['Modules Enabled'] = '{} / {}'.format( total_enabled, total_modules) results[file_]['Scan Metadata']['Scan Time'] = scan_time results[file_]['Scan Metadata']['Task ID'] = task_id # Use the original filename as the value for the filename # in the report (instead of the tmp path assigned to the file # by the REST API) results[original_filename] = results[file_] del results[file_] # Save the reports to storage storage_ids = storage_handler.store(results, wait=False) storage_handler.close() # Only need to raise ValueError here, # Further cleanup will be handled by the on_failure method # of MultiScannerTask if not storage_ids: raise ValueError('Report failed to index') # Update the task DB to reflect that the task is done db.update_task( task_id=task_id, task_status='Complete', timestamp=scan_time, ) logger.info('Completed Task #{}'.format(task_id)) return results