コード例 #1
0
    def __init__(self,
                 features=['os', 'cpu'],
                 environment='cloudsight',
                 user_list='ALL',
                 host_namespace='',
                 plugin_places=['plugins'],
                 options={}):

        BaseCrawler.__init__(
            self,
            features=features,
            plugin_places=plugin_places,
            options=options)
        plugins_manager.reload_env_plugin(environment, plugin_places)
        plugins_manager.reload_container_crawl_plugins(
            features, plugin_places, options)
        self.plugins = plugins_manager.get_container_crawl_plugins(features)
        self.environment = environment
        self.host_namespace = host_namespace
        self.user_list = user_list
コード例 #2
0
def snapshot(
        urls=['stdout://'],
        namespace=misc.get_host_ipaddr(),
        features=config_parser.get_config()['general']['features_to_crawl'],
        options={},
        frequency=-1,
        crawlmode=Modes.INVM,
        format='csv',
        overwrite=False,
        first_snapshot_num=0,
        max_snapshots=-1):
    """Entrypoint for crawler functionality.

    This is the function executed by long running crawler processes. It just
    loops sleeping for `frequency` seconds at each crawl interval.  During each
    interval, it collects the features listed in `features`, and sends them to
    the outputs listed in `urls`.

    :param urls: The url used as the output of the snapshot.
    :param namespace: This a pointer to a specific system (e.g. IP for INVM).
    :param features: List of features to crawl.
    :param options: Tree of options with details like what config files.
    :param frequency: Target time period for iterations. -1 means just one run.
    :param crawlmode: What's the system we want to crawl.
    :param format: The format of the frame, defaults to csv.
    """

    global should_exit
    saved_args = locals()
    logger.debug('snapshot args: %s' % (saved_args))

    environment = options.get(
        'environment',
        config_parser.get_config()['general']['environment'])
    plugin_places = options.get(
        'plugin_places',
        config_parser.get_config()['general']['plugin_places'])

    plugin_mode = config_parser.get_config()['general']['plugin_mode']

    plugins_manager.reload_env_plugin(plugin_places=plugin_places,
                                      environment=environment)

    plugins_manager.reload_container_crawl_plugins(plugin_places=plugin_places,
                                                   features=features,
                                                   plugin_mode=plugin_mode)

    plugins_manager.reload_vm_crawl_plugins(plugin_places=plugin_places,
                                            features=features,
                                            plugin_mode=plugin_mode)

    plugins_manager.reload_host_crawl_plugins(plugin_places=plugin_places,
                                              features=features,
                                              plugin_mode=plugin_mode)

    next_iteration_time = None

    snapshot_num = first_snapshot_num

    # Die if the parent dies
    PR_SET_PDEATHSIG = 1
    try:
        libc.prctl(PR_SET_PDEATHSIG, signal.SIGHUP)
        signal.signal(signal.SIGHUP, signal_handler_exit)
    except AttributeError:
        logger.warning('prctl is not available. MacOS is not supported.')

    containers = []

    # This is the main loop of the system, taking a snapshot and sleeping at
    # every iteration.

    while True:

        snapshot_time = int(time.time())

        if crawlmode == Modes.OUTCONTAINER:
            containers = snapshot_containers(
                containers=containers,
                urls=urls,
                snapshot_num=snapshot_num,
                features=features,
                options=options,
                format=format,
                overwrite=overwrite,
                host_namespace=namespace,
            )
        elif crawlmode == Modes.MESOS:
            snapshot_mesos(
                crawlmode=crawlmode,
                urls=urls,
                snapshot_num=snapshot_num,
                options=options,
                format=format,
                overwrite=overwrite,
                namespace=namespace,
            )
        elif crawlmode == Modes.OUTVM:
            snapshot_vms(
                urls=urls,
                snapshot_num=snapshot_num,
                features=features,
                options=options,
                format=format,
                overwrite=overwrite,
                namespace=namespace,
            )
        elif crawlmode in [Modes.INVM, Modes.MOUNTPOINT]:
            snapshot_generic(crawlmode=crawlmode,
                             urls=urls,
                             snapshot_num=snapshot_num,
                             features=features,
                             options=options,
                             format=format,
                             namespace=namespace,
                             overwrite=overwrite)
        else:
            raise NotImplementedError('Crawl mode %s is not implemented' %
                                      crawlmode)

        # Frequency < 0 means only one run.
        if (frequency < 0 or should_exit or snapshot_num == max_snapshots):
            logger.info('Bye')
            break

        time_to_sleep, next_iteration_time = _get_next_iteration_time(
            next_iteration_time, frequency, snapshot_time)
        if time_to_sleep > 0:
            time.sleep(time_to_sleep)

        snapshot_num += 1
コード例 #3
0
def snapshot(
    urls=['stdout://'],
    namespace=misc.get_host_ipaddr(),
    features=defaults.DEFAULT_FEATURES_TO_CRAWL,
    options=defaults.DEFAULT_CRAWL_OPTIONS,
    since='BOOT',
    frequency=-1,
    crawlmode=Modes.INVM,
    inputfile='Undefined',
    format='csv',
    overwrite=False,
):
    """Entrypoint for crawler functionality.

    This is the function executed by long running crawler processes. It just
    loops sleeping for `frequency` seconds at each crawl interval.  During each
    interval, it collects the features listed in `features`, and sends them to
    the outputs listed in `urls`.

    :param urls: The url used as the output of the snapshot.
    :param namespace: This a pointer to a specific system (e.g. IP for INVM).
    :param features: List of features to crawl.
    :param options: Tree of options with details like what config files.
    :param since: Calculate deltas or not. XXX needs some work.
    :param frequency: Target time period for iterations. -1 means just one run.
    :param crawlmode: What's the system we want to crawl.
    :param inputfile: Applies to mode.FILE. The frame emitted is this file.
    :param format: The format of the frame, defaults to csv.
    """

    global should_exit
    saved_args = locals()
    logger.debug('snapshot args: %s' % (saved_args))

    assert('metadata' in options)
    environment = options.get('environment', defaults.DEFAULT_ENVIRONMENT)
    plugin_places = options.get('plugin_places',
                                defaults.DEFAULT_PLUGIN_PLACES).split(',')
    plugins_manager.reload_env_plugin(plugin_places=plugin_places,
                                      environment=environment)

    since_timestamp, last_snapshot_time = get_initial_since_values(since)
    next_iteration_time = None

    snapshot_num = 0

    # Die if the parent dies
    PR_SET_PDEATHSIG = 1
    libc.prctl(PR_SET_PDEATHSIG, signal.SIGHUP)
    signal.signal(signal.SIGHUP, signal_handler_exit)

    if crawlmode == Modes.OUTCONTAINER:
        containers = get_filtered_list_of_containers(options, namespace)

    # This is the main loop of the system, taking a snapshot and sleeping at
    # every iteration.

    while True:

        snapshot_time = int(time.time())

        if crawlmode == Modes.OUTCONTAINER:

            curr_containers = get_filtered_list_of_containers(options,
                                                              namespace)
            deleted = [c for c in containers if c not in curr_containers]
            containers = curr_containers

            for container in deleted:
                if options.get('link_container_log_files', False):
                    try:
                        container.unlink_logfiles(options)
                    except NotImplementedError:
                        pass
 
            logger.debug('Crawling %d containers' % (len(containers)))

            for container in containers:

                logger.info(
                    'Crawling container %s %s %s' %
                    (container.pid, container.short_id, container.namespace))

                if options.get('link_container_log_files', False):
                    # This is a NOP if files are already linked (which is
                    # pretty much always).
                    try:
                        container.link_logfiles(options=options)
                    except NotImplementedError:
                        pass

                # no feature crawling
                if 'nofeatures' in features:
                    continue
                snapshot_container(
                    urls=urls,
                    snapshot_num=snapshot_num,
                    features=features,
                    options=options,
                    format=format,
                    inputfile=inputfile,
                    container=container,
                    since=since,
                    since_timestamp=since_timestamp,
                    overwrite=overwrite
                )

        elif crawlmode in (Modes.INVM,
                           Modes.MOUNTPOINT,
                           Modes.DEVICE,
                           Modes.FILE,
                           Modes.ISCSI):

            snapshot_generic(
                crawlmode=crawlmode,
                urls=urls,
                snapshot_num=snapshot_num,
                features=features,
                options=options,
                format=format,
                inputfile=inputfile,
                namespace=namespace,
                since=since,
                since_timestamp=since_timestamp,
                overwrite=overwrite
            )
        elif crawlmode in (Modes.MESOS):
            snapshot_mesos(
                crawlmode=crawlmode,
                urls=urls,
                snapshot_num=snapshot_num,
                options=options,
                format=format,
                inputfile=inputfile,
                overwrite=overwrite,
                namespace=namespace,
                since=since,
                since_timestamp=since_timestamp
            )
        else:
            raise RuntimeError('Unknown Mode')

        if since == 'LASTSNAPSHOT':
            # Subsequent snapshots will update this value.
            since_timestamp = snapshot_time

        # Frequency <= 0 means only one run.
        if frequency < 0 or should_exit:
            logger.info('Bye')
            break
        elif frequency == 0:
            continue

        if next_iteration_time is None:
            next_iteration_time = snapshot_time + frequency
        else:
            next_iteration_time = next_iteration_time + frequency

        while next_iteration_time + frequency < time.time():
            next_iteration_time = next_iteration_time + frequency

        time_to_sleep = next_iteration_time - time.time()
        if time_to_sleep > 0:
            time.sleep(time_to_sleep)

        snapshot_num += 1
コード例 #4
0
def snapshot(
    urls=['stdout://'],
    namespace=misc.get_host_ipaddr(),
    features=defaults.DEFAULT_FEATURES_TO_CRAWL,
    options=defaults.DEFAULT_CRAWL_OPTIONS,
    since='BOOT',
    frequency=-1,
    crawlmode=Modes.INVM,
    inputfile='Undefined',
    format='csv',
    overwrite=False,
):
    """Entrypoint for crawler functionality.

    This is the function executed by long running crawler processes. It just
    loops sleeping for `frequency` seconds at each crawl interval.  During each
    interval, it collects the features listed in `features`, and sends them to
    the outputs listed in `urls`.

    :param urls: The url used as the output of the snapshot.
    :param namespace: This a pointer to a specific system (e.g. IP for INVM).
    :param features: List of features to crawl.
    :param options: Tree of options with details like what config files.
    :param since: Calculate deltas or not. XXX needs some work.
    :param frequency: Target time period for iterations. -1 means just one run.
    :param crawlmode: What's the system we want to crawl.
    :param inputfile: Applies to mode.FILE. The frame emitted is this file.
    :param format: The format of the frame, defaults to csv.
    """

    global should_exit
    saved_args = locals()
    logger.debug('snapshot args: %s' % (saved_args))

    assert('metadata' in options)
    environment = options.get('environment', defaults.DEFAULT_ENVIRONMENT)
    plugin_places = options.get('plugin_places',
                                defaults.DEFAULT_PLUGIN_PLACES).split(',')
    plugins_manager.reload_env_plugin(plugin_places=plugin_places,
                                      environment=environment)

    since_timestamp, last_snapshot_time = get_initial_since_values(since)
    next_iteration_time = None

    snapshot_num = 0

    # Die if the parent dies
    PR_SET_PDEATHSIG = 1
    libc.prctl(PR_SET_PDEATHSIG, signal.SIGHUP)
    signal.signal(signal.SIGHUP, signal_handler_exit)

    if crawlmode == Modes.OUTCONTAINER:
        containers = get_filtered_list_of_containers(options, namespace)

    # This is the main loop of the system, taking a snapshot and sleeping at
    # every iteration.

    while True:

        snapshot_time = int(time.time())

        if crawlmode == Modes.OUTCONTAINER:

            curr_containers = get_filtered_list_of_containers(options,
                                                              namespace)
            deleted = [c for c in containers if c not in curr_containers]
            containers = curr_containers

            for container in deleted:
                if options.get('link_container_log_files', False):
                    try:
                        container.unlink_logfiles(options)
                    except NotImplementedError:
                        pass
 
            logger.debug('Crawling %d containers' % (len(containers)))

            for container in containers:

                logger.info(
                    'Crawling container %s %s %s' %
                    (container.pid, container.short_id, container.namespace))

                if options.get('link_container_log_files', False):
                    # This is a NOP if files are already linked (which is
                    # pretty much always).
                    try:
                        container.link_logfiles(options=options)
                    except NotImplementedError:
                        pass

                # no feature crawling
                if 'nofeatures' in features:
                    continue
                snapshot_container(
                    urls=urls,
                    snapshot_num=snapshot_num,
                    features=features,
                    options=options,
                    format=format,
                    inputfile=inputfile,
                    container=container,
                    since=since,
                    since_timestamp=since_timestamp,
                    overwrite=overwrite
                )

        elif crawlmode in (Modes.INVM,
                           Modes.MOUNTPOINT,
                           Modes.DEVICE,
                           Modes.FILE,
                           Modes.ISCSI):

            snapshot_generic(
                crawlmode=crawlmode,
                urls=urls,
                snapshot_num=snapshot_num,
                features=features,
                options=options,
                format=format,
                inputfile=inputfile,
                namespace=namespace,
                since=since,
                since_timestamp=since_timestamp,
                overwrite=overwrite
            )

        else:
            raise RuntimeError('Unknown Mode')

        if since == 'LASTSNAPSHOT':
            # Subsequent snapshots will update this value.
            since_timestamp = snapshot_time

        # Frequency <= 0 means only one run.
        if frequency < 0 or should_exit:
            logger.info('Bye')
            break
        elif frequency == 0:
            continue

        if next_iteration_time is None:
            next_iteration_time = snapshot_time + frequency
        else:
            next_iteration_time = next_iteration_time + frequency

        while next_iteration_time + frequency < time.time():
            next_iteration_time = next_iteration_time + frequency

        time_to_sleep = next_iteration_time - time.time()
        if time_to_sleep > 0:
            time.sleep(time_to_sleep)

        snapshot_num += 1