Python StorageHandler Examples, multiscanner.storage.storage.StorageHandler Python Examples

Example #1

0

Show file

    def __init__(self, debug=False):
        storage_conf = utils.get_config_path(MS_CONFIG, 'storage')
        config_object = configparser.SafeConfigParser()
        config_object.optionxform = str
        config_object.read(storage_conf)
        conf = utils.parse_config(config_object)
        storage_handler = storage.StorageHandler(configfile=storage_conf)
        es_handler = storage_handler.load_required_module('ElasticSearchStorage')

        if not es_handler:
            print('[!] ERROR: This analytic only works with ES storage module.')
            sys.exit(0)

        # probably not ideal...
        self.es = es_handler.es
        self.index = conf['ElasticSearchStorage']['index']
        self.doc_type = '_doc'

        self.debug = debug

Example #2

0

Show file

File: dir_monitor.py Project: yehias/multiscanner

def multiscanner_process(work_queue, config, batch_size, wait_seconds, delete,
                         exit_signal):
    filelist = []
    time_stamp = None
    storage_conf = utils.get_config_path(config, 'storage')
    storage_handler = storage.StorageHandler(configfile=storage_conf)
    while not exit_signal.value:
        time.sleep(1)
        try:
            filelist.append(work_queue.get_nowait())
            if not time_stamp:
                time_stamp = time.time()
            while len(filelist) < batch_size:
                filelist.append(work_queue.get_nowait())
        except queue.Empty:
            if filelist and time_stamp:
                if len(filelist) >= batch_size:
                    pass
                elif time.time() - time_stamp > wait_seconds:
                    pass
                else:
                    continue
            else:
                continue

        resultlist = multiscan(filelist, configfile=config)
        results = parse_reports(resultlist, python=True)
        if delete:
            for file_name in results:
                os.remove(file_name)

        storage_handler.store(results, wait=False)
        print('Scanned', len(results), 'files')

        filelist = []
        time_stamp = None
    storage_handler.close()

Example #3

0

Show file

File: celery_worker.py Project: yehias/multiscanner

def metricbeat_rollover(days, config=MS_CONFIG):
    '''
    Clean up old Elastic Beats indices
    '''
    try:
        # Get the storage config
        storage_handler = storage.StorageHandler(configfile=storage_configfile)
        metricbeat_enabled = es_storage_config.get('metricbeat_enabled', True)

        if not metricbeat_enabled:
            logger.debug('Metricbeat logging not enbaled, exiting...')
            return

        if not days:
            days = es_storage_config.get('metricbeat_rollover_days')
        if not days:
            raise NameError(
                "name 'days' is not defined, check storage.ini for 'metricbeat_rollover_days' setting"
            )

        # Find Elastic storage
        for handler in storage_handler.loaded_storage:
            if isinstance(handler, elasticsearch_storage.ElasticSearchStorage):
                ret = handler.delete_index(index_prefix='metricbeat',
                                           days=days)

                if ret is False:
                    logger.warn('Metricbeat Roller failed')
                else:
                    logger.info(
                        'Metricbeat indices older than {} days deleted'.format(
                            days))
    except Exception as e:
        logger.warn(e)
    finally:
        storage_handler.close()

Example #4

0

Show file

File: ms.py Project: yehias/multiscanner

def _main():
    global CONFIG, VERBOSE
    # Force all prints to go to stderr
    stdout = sys.stdout
    sys.stdout = sys.stderr

    # Get args
    args = _parse_args()
    # Set config or update locations
    if args.config is None:
        args.config = CONFIG
    else:
        CONFIG = args.config
        _update_DEFAULTCONF(DEFAULTCONF, CONFIG)
    # Set verbose
    if args.verbose:
        VERBOSE = args.verbose

    # Checks if user is trying to initialize
    if str(args.Files) == "['init']" and not os.path.isfile('init'):
        _init(args)

    if not os.path.isfile(args.config):
        config_init(args.config)

    # Make sure report is not a dir
    if args.json:
        if os.path.isdir(args.json):
            sys.exit('ERROR:', args.json, 'is a directory, a file is expected')

    # Parse the file list
    parsedlist = parseFileList(args.Files, recursive=args.recursive)

    # Unzip zip files if asked to
    if args.extractzips:
        for fname in parsedlist:
            if zipfile.is_zipfile(fname):
                unzip_dir = os.path.join('_tmp', os.path.basename(fname))
                z = zipfile.ZipFile(fname)
                if PY3:
                    args.password = bytes(args.password, 'utf-8')
                try:
                    z.extractall(path=unzip_dir, pwd=args.password)
                    for uzfile in z.namelist():
                        parsedlist.append(os.path.join(unzip_dir, uzfile))
                except RuntimeError as e:
                    print("ERROR: Failed to extract ", fname, ' - ', e, sep='')
                parsedlist.remove(fname)

    if not parsedlist:
        sys.exit("ERROR: No valid files found!")

    # Resume from report
    if args.resume:
        i = len(parsedlist)
        try:
            reportfile = codecs.open(args.json, 'r', 'utf-8')
        except Exception as e:
            sys.exit("ERROR: Could not open report file")
        for line in reportfile:
            line = json.loads(line)
            for fname in line:
                if fname in parsedlist:
                    parsedlist.remove(fname)
        reportfile.close()
        i = i - len(parsedlist)
        if VERBOSE:
            print("Skipping", i, "files which are in the report already")

    # Do multiple runs if there are too many files
    filelists = []
    if len(parsedlist) > args.numberper:
        while len(parsedlist) > args.numberper:
            filelists.append(parsedlist[:args.numberper])
            parsedlist = parsedlist[args.numberper:]
    if parsedlist:
        filelists.append(parsedlist)

    for filelist in filelists:
        # Record start time for metadata
        starttime = str(datetime.datetime.now())

        # Run the multiscan
        results = multiscan(filelist, configfile=args.config)

        # We need to read in the config for the parseReports call
        config = configparser.SafeConfigParser()
        config.optionxform = str
        config.read(args.config)
        config = _get_main_config(config)
        # Make sure we have a group-types
        if "group-types" not in config:
            config["group-types"] = []
        elif not config["group-types"]:
            config["group-types"] = []

        # Add in script metadata
        endtime = str(datetime.datetime.now())

        # For windows compatibility
        try:
            username = os.getlogin()
        except Exception as e:
            # TODO: log exception
            username = os.getenv('USERNAME')

        # Add metadata to the scan
        results.append((
            [],
            {
                "Name": "MultiScanner",
                "Start Time": starttime,
                "End Time": endtime,
                # "Command Line":list2cmdline(sys.argv),
                "Run by": username
            }))

        # Add tags if present
        if args.tag:
            tag_results = []
            for filename in filelist:
                tag_results.append((filename, args.tag))
            results.append((tag_results, {"Name": "tags", "Type": "Metadata"}))

        if args.show or not stdout.isatty():
            # TODO: Make this output something readable
            # Parse Results
            report = parse_reports(results,
                                   groups=config["group-types"],
                                   ugly=args.ugly,
                                   includeMetadata=args.metadata)

            # Print report
            try:
                print(convert_encoding(report,
                                       encoding='ascii',
                                       errors='replace'),
                      file=stdout)
                stdout.flush()
            except Exception as e:
                print('ERROR: Can\'t print report -', e)

        report = parse_reports(results,
                               groups=config["group-types"],
                               includeMetadata=args.metadata,
                               python=True)

        update_conf = None
        if args.json:
            update_conf = {'File': {'path': args.json}}
            if args.json.endswith('.gz') or args.json.endswith('.gzip'):
                update_conf['File']['gzip'] = True

        if 'storage-config' not in config:
            config["storage-config"] = None
        storage_handle = storage.StorageHandler(
            configfile=config["storage-config"], config=update_conf)
        storage_handle.store(report)
        storage_handle.close()

    # Cleanup zip extracted files
    if args.extractzips:
        shutil.rmtree('_tmp')

Example #5

0

Show file

File: celery_worker.py Project: yehias/multiscanner

def multiscanner_celery(file_,
                        original_filename,
                        task_id,
                        file_hash,
                        metadata,
                        config=MS_CONFIG,
                        module_list=None):
    '''
    Queue up multiscanner tasks

    Usage:
    from celery_worker import multiscanner_celery
    multiscanner_celery.delay(full_path, original_filename, task_id,
                              hashed_filename, metadata, config, module_list)
    '''

    # Initialize the connection to the task DB
    db.init_db()

    logger.info('\n\n{}{}Got file: {}.\nOriginal filename: {}.\n'.format(
        '=' * 48, '\n', file_hash, original_filename))

    # Get the storage config
    storage_conf = utils.get_config_path(config, 'storage')
    storage_handler = storage.StorageHandler(configfile=storage_conf)

    resultlist = multiscan([file_], configfile=config, module_list=module_list)
    results = parse_reports(resultlist, python=True)

    scan_time = datetime.now().isoformat()

    # Get the Scan Config that the task was run with and
    # add it to the task metadata
    scan_config_object = configparser.SafeConfigParser()
    scan_config_object.optionxform = str
    scan_config_object.read(config)
    full_conf = utils.parse_config(scan_config_object)
    sub_conf = {}
    # Count number of modules enabled out of total possible
    # and add it to the Scan Metadata
    total_enabled = 0
    total_modules = len(full_conf.keys())

    # Get the count of modules enabled from the module_list
    # if it exists, else count via the config
    if module_list:
        total_enabled = len(module_list)
    else:
        for key in full_conf:
            if key == 'main':
                continue
            sub_conf[key] = {}
            sub_conf[key]['ENABLED'] = full_conf[key]['ENABLED']
            if sub_conf[key]['ENABLED'] is True:
                total_enabled += 1

    results[file_]['Scan Metadata'] = metadata
    results[file_]['Scan Metadata']['Worker Node'] = gethostname()
    results[file_]['Scan Metadata']['Scan Config'] = sub_conf
    results[file_]['Scan Metadata']['Modules Enabled'] = '{} / {}'.format(
        total_enabled, total_modules)
    results[file_]['Scan Metadata']['Scan Time'] = scan_time
    results[file_]['Scan Metadata']['Task ID'] = task_id

    # Use the original filename as the value for the filename
    # in the report (instead of the tmp path assigned to the file
    # by the REST API)
    results[original_filename] = results[file_]
    del results[file_]

    # Save the reports to storage
    storage_ids = storage_handler.store(results, wait=False)
    storage_handler.close()

    # Only need to raise ValueError here,
    # Further cleanup will be handled by the on_failure method
    # of MultiScannerTask
    if not storage_ids:
        raise ValueError('Report failed to index')

    # Update the task DB to reflect that the task is done
    db.update_task(
        task_id=task_id,
        task_status='Complete',
        timestamp=scan_time,
    )

    logger.info('Completed Task #{}'.format(task_id))

    return results