def getsnapshot():
    logger.debug('Received getsnapshot request')
    framefile = tempfile.mktemp(prefix='frame.')
    try:
        args = {}
        try:
            crawl_options = json.loads(bottle.request.body())
            if crawl_options:
                args['options'] = crawl_options
        except:
            pass # benign- no json payload found in the request
        if bottle.request.query:
            value = bottle.request.query.get('features', None)
            if value: 
                args['features'] = value
            value = bottle.request.query.get('since', None)
            if value and value in ['EPOCH','BOOT','LASTSNAPSHOT']: 
                args['since'] = value
        args['url'] = 'file://{0}'.format(framefile) 
        args['compress'] = False
        crawlutils.snapshot(**args)
        bottle.response.content_type = 'text/csv'
        with open(framefile, 'r') as fd:
            for line in fd:
                yield line
        os.remove(framefile)
    except:
        if os.path.exists(framefile):
            os.remove(framefile)
        bottle.response.content_type = 'application/json'
        yield json.dumps({'success': False, 'stacktrace': traceback.format_exc().split('\n')}, indent=2)
def start_autonomous_crawler(num_processes, logfile):

    if params['crawlmode'] == 'OUTCONTAINER':
        jobs = []

        for index in xrange(num_processes):
            # XXX use options.get() instead
            options['partition_strategy']['name'] = 'equally_by_pid'
            partition_args = options['partition_strategy']['args']
            partition_args['process_id'] = index
            partition_args['num_processes'] = num_processes
            """
            XXX(ricarkol): remember that when we finally get rid of these
            worker processes in favor of a proper pool of working threads,
            we have to move the caches of previous metrics somewhere out of
            the FeaturesCrawler objects. And that cache has to be shared among
            all the working threads.
            """
            p = multiprocessing.Process(name='crawler-%s' % index,
                                        target=crawler_worker,
                                        args=(index, logfile, params))
            jobs.append((p, index))
            p.start()
            logger.info('Crawler %s (pid=%s) started', index, p.pid)

        while jobs:
            for (index, (job, process_id)) in enumerate(jobs):
                if not job.is_alive():
                    exitcode = job.exitcode
                    pname = job.name
                    pid = job.pid
                    if job.exitcode:
                        logger.info(
                            '%s terminated unexpectedly with errorcode %s' %
                            (pname, exitcode))
                        for (other_job, process_id) in jobs:
                            if other_job != job:
                                logger.info('Terminating crawler %s (pid=%s)',
                                            process_id, other_job.pid)
                                os.kill(other_job.pid, 9)
                        logger.info('Exiting as all jobs were terminated.')
                        raise RuntimeError(
                            '%s terminated unexpectedly with errorcode %s' %
                            (pname, exitcode))
                    else:
                        logger.info('Crawler %s (pid=%s) exited normally.',
                                    process_id, pid)
                    del jobs[index]
            time.sleep(0.1)
        logger.info('Exiting as there are no more processes running.')
    else:

        # INVM, OUTVM, and others

        setup_logger('crawlutils', logfile, 0)
        crawlutils.snapshot(**params)
def crawler_worker(process_id, logfile, params):
    setup_logger('crawlutils', logfile, process_id)

    # Starting message

    logger.info('*' * 50)
    logger.info('Crawler #%d started.' % (process_id))
    logger.info('*' * 50)

    crawlutils.snapshot(**params)
Example #4
0
def crawler_worker(process_id, logfile, params):
    setup_logger('crawlutils', logfile, process_id)

    # Starting message

    logger.info('*' * 50)
    logger.info('Crawler #%d started.' % (process_id))
    logger.info('*' * 50)

    crawlutils.snapshot(**params)
def start_autonomous_crawler(num_processes, logfile):

    if params['crawlmode'] == 'OUTCONTAINER':
        jobs = []

        for index in xrange(num_processes):
            # XXX use options.get() instead
            options['partition_strategy']['name'] = 'equally_by_pid'
            partition_args = options['partition_strategy']['args']
            partition_args['process_id'] = index
            partition_args['num_processes'] = num_processes
            p = multiprocessing.Process(
                name='crawler-%s' %
                index, target=crawler_worker, args=(
                    index, logfile, params))
            jobs.append((p, index))
            p.start()
            logger.info('Crawler %s (pid=%s) started', index, p.pid)

        while jobs:
            for (index, (job, process_id)) in enumerate(jobs):
                if not job.is_alive():
                    exitcode = job.exitcode
                    pname = job.name
                    pid = job.pid
                    if job.exitcode:
                        logger.info(
                            '%s terminated unexpectedly with errorcode %s' %
                            (pname, exitcode))
                        for (other_job, process_id) in jobs:
                            if other_job != job:
                                logger.info(
                                    'Terminating crawler %s (pid=%s)',
                                    process_id,
                                    other_job.pid)
                                os.kill(other_job.pid, 9)
                        logger.info('Exiting as all jobs were terminated.'
                                    )
                        raise RuntimeError(
                            '%s terminated unexpectedly with errorcode %s' %
                            (pname, exitcode))
                    else:
                        logger.info(
                            'Crawler %s (pid=%s) exited normally.',
                            process_id,
                            pid)
                    del jobs[index]
            time.sleep(0.1)
        logger.info('Exiting as there are no more processes running.')
    else:

        # INVM, OUTVM, and others

        setup_logger('crawlutils', logfile, 0)
        crawlutils.snapshot(**params)
Example #6
0
def start_autonomous_crawler(num_processes, logfile):

    if params['crawlmode'] == 'OUTCONTAINER':
        jobs = []

        for index in xrange(num_processes):
            # XXX use options.get() instead
            options['partition_strategy']['name'] = 'equally_by_pid'
            partition_args = options['partition_strategy']['args']
            partition_args['process_id'] = index
            partition_args['num_processes'] = num_processes
            p = multiprocessing.Process(name='crawler-%s' % index,
                                        target=crawler_worker,
                                        args=(index, logfile, params))
            jobs.append((p, index))
            p.start()
            logger.info('Crawler %s (pid=%s) started', index, p.pid)

        while jobs:
            for (index, (job, process_id)) in enumerate(jobs):
                if not job.is_alive():
                    exitcode = job.exitcode
                    pname = job.name
                    pid = job.pid
                    if job.exitcode:
                        logger.info(
                            '%s terminated unexpectedly with errorcode %s' %
                            (pname, exitcode))
                        for (other_job, process_id) in jobs:
                            if other_job != job:
                                logger.info('Terminating crawler %s (pid=%s)',
                                            process_id, other_job.pid)
                                os.kill(other_job.pid, 9)
                        logger.info('Exiting as all jobs were terminated.')
                        raise RuntimeError(
                            '%s terminated unexpectedly with errorcode %s' %
                            (pname, exitcode))
                    else:
                        logger.info('Crawler %s (pid=%s) exited normally.',
                                    process_id, pid)
                    del jobs[index]
            time.sleep(0.1)
        logger.info('Exiting as there are no more processes running.')
    else:

        # INVM, OUTVM, and others

        setup_logger('crawlutils', logfile, 0)
        crawlutils.snapshot(**params)
def start_autonomous_crawler(snapshot_params, process_count, logfile):

    params['parent_pid'] = int(os.getpid())
    if params['crawlmode'] == 'OUTCONTAINER':
        jobs = []

        for index in xrange(process_count):
            params['process_id'] = index
            params['process_count'] = process_count
            p = multiprocessing.Process(name="crawler-%s" % (index),
                            target=crawler_worker,
                            args=(index, logfile, snapshot_params))
            jobs.append((p, index))
            p.start()
            logger.info("Crawler %s (pid=%s) started", index, p.pid)

        """
        Monitor the children. The behavior is to wait for all children to
        terminate, or to exit and raise an exception when any of the processes
        crashes.
        """
        while jobs:
            for index, (job, process_id) in enumerate(jobs):
                if not job.is_alive():
                    exitcode = job.exitcode
                    pname = job.name
                    pid = job.pid
                    if job.exitcode:
                        logger.info("%s terminated unexpectedly with "
                                    "errorcode %s" % (pname, exitcode))
                        for other_job, process_id in jobs:
                            if other_job != job:
                                logger.info("Terminating crawler %s (pid=%s)",
                                            process_id, other_job.pid)
                                os.kill(other_job.pid, 9)
                        logger.info("Exiting as all jobs were terminated.")
                        raise RuntimeError("%s terminated unexpectedly with "
                                           "errorcode %s" % (pname, exitcode))
                    else:
                        logger.info("Crawler %s (pid=%s) exited normally.",
                                    process_id, pid)
                    del jobs[index]
            time.sleep(0.1)
        logger.info("Exiting as there are no more processes running.")
    else:
        # INVM, OUTVM, and others
        setup_logger("crawlutils", logfile, 0)
        crawlutils.snapshot(**params)
Example #8
0
def start_autonomous_crawler(snapshot_params, process_count, logfile):

    params['parent_pid'] = int(os.getpid())
    if params['crawlmode'] == 'OUTCONTAINER':
        jobs = []

        for index in xrange(process_count):
            params['process_id'] = index
            params['process_count'] = process_count
            p = multiprocessing.Process(name="crawler-%s" % (index),
                                        target=crawler_worker,
                                        args=(index, logfile, snapshot_params))
            jobs.append((p, index))
            p.start()
            logger.info("Crawler %s (pid=%s) started", index, p.pid)
        """
        Monitor the children. The behavior is to wait for all children to
        terminate, or to exit and raise an exception when any of the processes
        crashes.
        """
        while jobs:
            for index, (job, process_id) in enumerate(jobs):
                if not job.is_alive():
                    exitcode = job.exitcode
                    pname = job.name
                    pid = job.pid
                    if job.exitcode:
                        logger.info("%s terminated unexpectedly with "
                                    "errorcode %s" % (pname, exitcode))
                        for other_job, process_id in jobs:
                            if other_job != job:
                                logger.info("Terminating crawler %s (pid=%s)",
                                            process_id, other_job.pid)
                                os.kill(other_job.pid, 9)
                        logger.info("Exiting as all jobs were terminated.")
                        raise RuntimeError("%s terminated unexpectedly with "
                                           "errorcode %s" % (pname, exitcode))
                    else:
                        logger.info("Crawler %s (pid=%s) exited normally.",
                                    process_id, pid)
                    del jobs[index]
            time.sleep(0.1)
        logger.info("Exiting as there are no more processes running.")
    else:
        # INVM, OUTVM, and others
        setup_logger("crawlutils", logfile, 0)
        crawlutils.snapshot(**params)
Example #9
0
def getsnapshot():
    logger.debug('Received getsnapshot request')
    framefile = tempfile.mktemp(prefix='frame.')
    try:
        args = {}
        try:
            crawl_options = json.loads(bottle.request.body())
            if crawl_options:
                args['options'] = crawl_options
        except:
            pass  # benign- no json payload found in the request
        if bottle.request.query:
            value = bottle.request.query.get('features', None)
            if value:
                args['features'] = value
            value = bottle.request.query.get('since', None)
            if value and value in ['EPOCH', 'BOOT', 'LASTSNAPSHOT']:
                args['since'] = value
        args['url'] = 'file://{0}'.format(framefile)
        args['compress'] = False
        crawlutils.snapshot(**args)
        bottle.response.content_type = 'text/csv'
        with open(framefile, 'r') as fd:
            for line in fd:
                yield line
        os.remove(framefile)
    except:
        if os.path.exists(framefile):
            os.remove(framefile)
        bottle.response.content_type = 'application/json'
        yield json.dumps(
            {
                'success': False,
                'stacktrace': traceback.format_exc().split('\n')
            },
            indent=2)
Example #10
0
def crawler_worker(process_id, logfile, snapshot_params):
    setup_logger("crawlutils", logfile, process_id)
    crawlutils.snapshot(**snapshot_params)
Example #11
0
        print 'Starting crawler at URL http://{0}:{1}'.format(
            CRAWLER_HOST, CRAWLER_PORT)
        print 'Log output will be in /var/log/crawler.log'
        print ''
        logging.basicConfig(filename='/var/log/crawler.log',
                            filemode='w',
                            format='%(asctime)s %(levelname)s : %(message)s',
                            level=logging.DEBUG)
        logger = logging.getLogger(__name__)
        logger.info('Started crawler at URL http://{0}:{1}'.format(
            CRAWLER_HOST, CRAWLER_PORT))
        logger.info('Log output will be in /var/log/crawler.log')
        app.run(host=CRAWLER_HOST, port=CRAWLER_PORT, quiet=True)

# Example Usage #1: crawl all features with default options
'''
crawlutils.snapshot()
'''

# Example Usage #2: crawl selected features with custom options, emit frame to local file
'''
my_crawl_commands = [
    ('os', None), ('disk', None), ('process', None), ('connection', None), # these features don't take options
    ('file', {'root_dir':'/', 'exclude_dirs':['boot', 'dev', 'mnt', 'proc', 'sys']}),
    ('config', {'root_dir':'/', 'known_config_files':['etc/passwd', 'etc/hosts', 'etc/issue', 'etc/mtab', 'etc/group'], 'discover_config_files': True})
]
crawlutils.snapshot(emit_to_url='file://frame.csv', crawl_commands=my_crawl_commands)
'''

# Example Usage #3 (UDeploy use case): crawl "file" features and use a customer root_dir_alias, emit frame to local file
'''
def crawler_worker(process_id, logfile, snapshot_params):
    setup_logger("crawlutils", logfile, process_id)
    crawlutils.snapshot(**snapshot_params)
        start_autonomous_crawler(params, args.numprocesses, args.logfile)
    else:
        print ''
        print 'Starting crawler at URL http://{0}:{1}'.format(CRAWLER_HOST, CRAWLER_PORT)
        print 'Log output will be in /var/log/crawler.log'
        print ''
        logging.basicConfig(filename='/var/log/crawler.log', filemode='w', format='%(asctime)s %(levelname)s : %(message)s', level=logging.DEBUG)
        logger = logging.getLogger(__name__)
        logger.info('Started crawler at URL http://{0}:{1}'.format(CRAWLER_HOST, CRAWLER_PORT))
        logger.info('Log output will be in /var/log/crawler.log')
        app.run(host=CRAWLER_HOST, port=CRAWLER_PORT, quiet=True)


# Example Usage #1: crawl all features with default options
'''
crawlutils.snapshot()
'''

# Example Usage #2: crawl selected features with custom options, emit frame to local file
'''
my_crawl_commands = [
    ('os', None), ('disk', None), ('process', None), ('connection', None), # these features don't take options
    ('file', {'root_dir':'/', 'exclude_dirs':['boot', 'dev', 'mnt', 'proc', 'sys']}),
    ('config', {'root_dir':'/', 'known_config_files':['etc/passwd', 'etc/hosts', 'etc/issue', 'etc/mtab', 'etc/group'], 'discover_config_files': True})
]
crawlutils.snapshot(emit_to_url='file://frame.csv', crawl_commands=my_crawl_commands)
'''

# Example Usage #3 (UDeploy use case): crawl "file" features and use a customer root_dir_alias, emit frame to local file
'''