def _get_feature_key(self, gpuhandle, gpuid):
     hostip = get_host_ipaddr().replace('.', '/')
     key = '{}.gpu{}'.format(hostip, gpuid)
     cont_ids = self._get_container_id(gpuhandle)
     for cont_id in cont_ids:
         key = key + '.' + cont_id
     return key
def get_containers(
    environment='cloudsight',
    host_namespace=misc.get_host_ipaddr(),
    user_list='ALL',
    ignore_raw_containers=True
):
    """
    Returns a list of all containers running in the host.

    XXX This list excludes non-docker containers when running in non-cloudsight
    environment. TODO: fix this weird behaviour.

    :param environment: this defines how the name (namespace) is constructed.
    :param host_namespace: string representing the host name (e.g. host IP)
    :param user_list: list of Docker container IDs. TODO: include rkt.
    :param ignore_raw_containers: if True, only include Docker or rkt.
    An example of a non-docker container is a chromium-browser process.
    :return: a list of Container objects.
    """
    filtered_list = []
    containers_list = list_all_containers(user_list, host_namespace,
                                          ignore_raw_containers)
    for _container in containers_list:
        default_environment = 'cloudsight'
        if (environment != default_environment and
                not _container.is_docker_container()):
            continue

        filtered_list.append(_container)

    return filtered_list
Beispiel #3
0
 def _get_feature_key(self, gpuhandle, gpuid):
     hostip = get_host_ipaddr().replace('.', '/')
     key = '{}.gpu{}'.format(hostip, gpuid)
     cont_ids = self._get_container_id(gpuhandle)
     for cont_id in cont_ids:
         key = key + '.' + cont_id
     return key
Beispiel #4
0
def get_containers(
    environment='cloudsight',
    host_namespace=misc.get_host_ipaddr(),
    user_list='ALL',
    ignore_raw_containers=True
):
    """
    Returns a list of all containers running in the host.

    XXX This list excludes non-docker containers when running in non-cloudsight
    environment. TODO: fix this weird behaviour.

    :param environment: this defines how the name (namespace) is constructed.
    :param host_namespace: string representing the host name (e.g. host IP)
    :param user_list: list of Docker container IDs. TODO: include rkt.
    :param ignore_raw_containers: if True, only include Docker or rkt.
    An example of a non-docker container is a chromium-browser process.
    :return: a list of Container objects.
    """
    filtered_list = []
    containers_list = list_all_containers(user_list, host_namespace,
                                          ignore_raw_containers)
    for _container in containers_list:
        default_environment = 'cloudsight'
        if (environment != default_environment and
                not _container.is_docker_container()):
            continue

        filtered_list.append(_container)

    return filtered_list
 def setUp(self):
     root = logging.getLogger()
     root.setLevel(logging.INFO)
     ch = logging.StreamHandler(sys.stdout)
     ch.setLevel(logging.INFO)
     formatter = logging.Formatter(
         '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
     ch.setFormatter(formatter)
     root.addHandler(ch)
     self.container = {}
     self.container_name = 'LogLinkerContainer'
     self.host_namespace = get_host_ipaddr()
     try:
         shutil.rmtree(os.path.join(HOST_LOG_BASEDIR, self.host_namespace,
                                    self.container_name))
     except OSError:
         pass
Beispiel #6
0
 def setUp(self):
     root = logging.getLogger()
     root.setLevel(logging.INFO)
     ch = logging.StreamHandler(sys.stdout)
     ch.setLevel(logging.INFO)
     formatter = logging.Formatter(
         '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
     ch.setFormatter(formatter)
     root.addHandler(ch)
     self.container = {}
     self.container_name = 'LogLinkerContainer'
     self.host_namespace = get_host_ipaddr()
     try:
         shutil.rmtree(
             os.path.join(HOST_LOG_BASEDIR, self.host_namespace,
                          self.container_name))
     except OSError:
         pass
Beispiel #7
0

if __name__ == '__main__':

    euid = os.geteuid()
    if euid != 0:
        print 'Need to run this as root.'
        exit(1)

    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--namespace',
        dest='namespace',
        type=str,
        nargs='?',
        default=misc.get_host_ipaddr(),
        help='Data source this crawler is associated with. Defaults to '
        '/localhost',
    )
    parser.add_argument(
        '--frequency',
        dest='frequency',
        type=int,
        default=-1,
        help='Target time period for iterations. Defaults to -1 which '
        'means only run one iteration.')
    parser.add_argument('--logfile',
                        dest='logfile',
                        type=str,
                        default='crawler.log',
                        help='Logfile path. Defaults to crawler.log')

if __name__ == '__main__':

    euid = os.geteuid()
    if euid != 0:
        print 'Need to run this as root.'
        exit(1)

    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--namespace',
        dest='namespace',
        type=str,
        nargs='?',
        default=misc.get_host_ipaddr(),
        help='Data source this crawler is associated with. Defaults to '
             '/localhost',
    )
    parser.add_argument(
        '--frequency',
        dest='frequency',
        type=int,
        default=-1,
        help='Target time period for iterations. Defaults to -1 which '
             'means only run one iteration.'
    )
    parser.add_argument('--logfile', dest='logfile', type=str,
                        default='crawler.log',
                        help='Logfile path. Defaults to crawler.log'
                        )
def main():

    euid = os.geteuid()
    if euid != 0:
        print 'Need to run this as root.'
        exit(1)

    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--options',
        dest='options',
        type=json_parser,
        default={},
        help='JSON dict of crawler options used to be passed as arguments'
             'to the crawler plugins.'
    )
    parser.add_argument(
        '--url',
        dest='url',
        type=csv_list,
        default=['stdout://'],
        help='Send the snapshot data to URL. Defaults to the console.',
    )
    parser.add_argument(
        '--namespace',
        dest='namespace',
        type=str,
        nargs='?',
        default=misc.get_host_ipaddr(),
        help='Data source this crawler is associated with. Defaults to '
             '/localhost',
    )
    parser.add_argument(
        '--features',
        dest='features',
        type=csv_list,
        default=['os', 'cpu'],
        help='Comma-separated list of feature-types to crawl. Defaults to '
             'os,cpu',
    )
    parser.add_argument(
        '--frequency',
        dest='frequency',
        type=int,
        default=-1,
        help='Target time period for iterations. Defaults to -1 which '
             'means only run one iteration.'
    )
    parser.add_argument(
        '--compress',
        dest='compress',
        action='store_true',
        default=False,
        help='Whether to GZIP-compress the output frame data, must be one of '
             '{true,false}. Defaults to false',
    )
    parser.add_argument('--logfile', dest='logfile', type=str,
                        default='crawler.log',
                        help='Logfile path. Defaults to crawler.log'
                        )
    parser.add_argument(
        '--crawlmode',
        dest='crawlmode',
        type=str,
        choices=[
            Modes.INVM,
            Modes.OUTVM,
            Modes.MOUNTPOINT,
            Modes.OUTCONTAINER,
            Modes.MESOS,
        ],
        default=Modes.INVM,
        help='The crawler mode: '
             '{INVM,OUTVM,MOUNTPOINT,OUTCONTAINER}. '
             'Defaults to INVM',
    )
    parser.add_argument(
        '--mountpoint',
        dest='mountpoint',
        type=str,
        default='/',
        help='Mountpoint location used as the / for features like packages,'
             'files, config'
    )
    parser.add_argument(
        '--format',
        dest='format',
        type=str,
        default='csv',
        choices=['csv', 'graphite', 'json'],
        help='Emitted data format.',
    )
    parser.add_argument(
        '--crawlContainers',
        dest='crawlContainers',
        type=str,
        nargs='?',
        default='ALL',
        help='List of containers to crawl as a list of Docker container IDs'
             '(only Docker is supported at the moment). ' 'Defaults to all '
             'running containers. Example: --crawlContainers aaa,bbb',
    )
    parser.add_argument(
        '--crawlVMs',
        dest='vm_descs_list',
        nargs='+',
        default='ALL',
        help='List of VMs to crawl'
             'Default is \'ALL\' VMs'
             'Currently need following as input for each VM'
             '\'vm_name, kernel_version_long, linux_flavour, arch\''
             'Auto kernel version detection in future, when only vm names'
             '(\'ALL\' by default) would need to be passed'
             'Example --crawlVM'
             'vm1,3.13.0-24-generic_3.13.0-24.x86_64,ubuntu,x86_64'
             'vm2,4.0.3.x86_64,vanilla,x86_64',
    )
    parser.add_argument(
        '--environment',
        dest='environment',
        type=str,
        default='cloudsight',
        help='This speficies some environment specific behavior, like how '
             'to name a container. The way to add a new behavior is by '
             'implementing a plugin (see plugins/cloudsight_environment.py '
             'as an example. Defaults to "cloudsight".',
    )
    parser.add_argument(
        '--plugins',
        dest='plugin_places',
        type=csv_list,
        default=['plugins'],
        help='This is a comma separated list of directories where to find '
             'plugins. Each path can be an absolute, or a relative to the '
             'location of the crawler.py. Default is "plugins"',
    )
    parser.add_argument(
        '--numprocesses',
        dest='numprocesses',
        type=int,
        default=1,
        help='Number of processes used for container crawling. Defaults '
             'to the number of cores. NOT SUPPORTED.'
    )
    parser.add_argument(
        '--extraMetadata',
        dest='extraMetadata',
        type=json_parser,
        default={},
        help='Json with data to annotate all features. It can be used '
             'to append a set of system identifiers to the metadata feature '
             'and if the --extraMetadataForAll'
    )
    parser.add_argument(
        '--avoidSetns',
        dest='avoid_setns',
        action='store_true',
        default=False,
        help='Avoids the use of the setns() syscall to crawl containers. '
             'Some features like process will not work with this option. '
             'Only applies to the OUTCONTAINER mode'
    )

    args = parser.parse_args()
    misc.setup_logger('crawlutils', args.logfile)
    misc.setup_logger('yapsy', 'yapsy.log')

    options = args.options
    options['avoid_setns'] = args.avoid_setns
    options['mountpoint'] = args.mountpoint

    emitters = EmittersManager(urls=args.url,
                               format=args.format,
                               compress=args.compress,
                               extra_metadata=args.extraMetadata)

    if args.crawlmode == 'OUTCONTAINER':
        crawler = ContainersCrawler(
            features=args.features,
            environment=args.environment,
            user_list=args.crawlContainers,
            host_namespace=args.namespace,
            plugin_places=args.plugin_places,
            options=options)
    elif args.crawlmode == 'INVM' or args.crawlmode == 'MOUNTPOINT':
        crawler = HostCrawler(
            features=args.features,
            namespace=args.namespace,
            plugin_places=args.plugin_places,
            options=options)
    elif args.crawlmode == 'OUTVM':
        crawler = VirtualMachinesCrawler(
            features=args.features,
            user_list=args.vm_descs_list,
            host_namespace=args.namespace,
            plugin_places=args.plugin_places,
            options=options)
    else:
        raise NotImplementedError('Invalid crawlmode')

    worker = Worker(emitters=emitters,
                    frequency=args.frequency,
                    crawler=crawler)

    try:
        worker.run()
    except KeyboardInterrupt:
        pass
Beispiel #10
0
def main():

    euid = os.geteuid()
    if euid != 0:
        print 'Need to run this as root.'
        exit(1)

    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--options',
        dest='options',
        type=json_parser,
        default={},
        help='JSON dict of crawler options used to be passed as arguments'
             'to the crawler plugins.'
    )
    parser.add_argument(
        '--url',
        dest='url',
        type=csv_list,
        default=['stdout://'],
        help='Send the snapshot data to URL. Defaults to the console.',
    )
    parser.add_argument(
        '--namespace',
        dest='namespace',
        type=str,
        nargs='?',
        default=misc.get_host_ipaddr(),
        help='Data source this crawler is associated with. Defaults to '
             '/localhost',
    )
    parser.add_argument(
        '--features',
        dest='features',
        type=csv_list,
        default=['os', 'cpu'],
        help='Comma-separated list of feature-types to crawl. Defaults to '
             'os,cpu',
    )
    parser.add_argument(
        '--frequency',
        dest='frequency',
        type=int,
        default=-1,
        help='Target time period for iterations. Defaults to -1 which '
             'means only run one iteration.'
    )
    parser.add_argument(
        '--compress',
        dest='compress',
        action='store_true',
        default=False,
        help='Whether to GZIP-compress the output frame data, must be one of '
             '{true,false}. Defaults to false',
    )
    parser.add_argument('--logfile', dest='logfile', type=str,
                        default='crawler.log',
                        help='Logfile path. Defaults to crawler.log'
                        )
    parser.add_argument(
        '--crawlmode',
        dest='crawlmode',
        type=str,
        choices=[
            Modes.INVM,
            Modes.OUTVM,
            Modes.MOUNTPOINT,
            Modes.OUTCONTAINER,
            Modes.MESOS,
        ],
        default=Modes.INVM,
        help='The crawler mode: '
             '{INVM,OUTVM,MOUNTPOINT,OUTCONTAINER}. '
             'Defaults to INVM',
    )
    parser.add_argument(
        '--mountpoint',
        dest='mountpoint',
        type=str,
        default='/',
        help='Mountpoint location used as the / for features like packages,'
             'files, config'
    )
    parser.add_argument(
        '--format',
        dest='format',
        type=str,
        default='csv',
        choices=['csv', 'graphite', 'json', 'logstash'],
        help='Emitted data format.',
    )
    parser.add_argument(
        '--crawlContainers',
        dest='crawlContainers',
        type=str,
        nargs='?',
        default='ALL',
        help='List of containers to crawl as a list of Docker container IDs'
             '(only Docker is supported at the moment). ' 'Defaults to all '
             'running containers. Example: --crawlContainers aaa,bbb',
    )
    parser.add_argument(
        '--crawlVMs',
        dest='vm_descs_list',
        nargs='+',
        default='ALL',
        help='List of VMs to crawl'
             'Default is \'ALL\' VMs'
             'Currently need following as input for each VM'
             '\'vm_name, kernel_version_long, linux_flavour, arch\''
             'Auto kernel version detection in future, when only vm names'
             '(\'ALL\' by default) would need to be passed'
             'Example --crawlVM'
             'vm1,3.13.0-24-generic_3.13.0-24.x86_64,ubuntu,x86_64'
             'vm2,4.0.3.x86_64,vanilla,x86_64',
    )
    parser.add_argument(
        '--environment',
        dest='environment',
        type=str,
        default='cloudsight',
        help='This speficies some environment specific behavior, like how '
             'to name a container. The way to add a new behavior is by '
             'implementing a plugin (see plugins/cloudsight_environment.py '
             'as an example. Defaults to "cloudsight".',
    )
    parser.add_argument(
        '--plugins',
        dest='plugin_places',
        type=csv_list,
        default=['plugins'],
        help='This is a comma separated list of directories where to find '
             'plugins. Each path can be an absolute, or a relative to the '
             'location of the crawler.py. Default is "plugins"',
    )
    parser.add_argument(
        '--numprocesses',
        dest='numprocesses',
        type=int,
        default=1,
        help='Number of processes used for container crawling. Defaults '
             'to the number of cores. NOT SUPPORTED.'
    )
    parser.add_argument(
        '--extraMetadata',
        dest='extraMetadata',
        type=json_parser,
        default={},
        help='Json with data to annotate all features. It can be used '
             'to append a set of system identifiers to the metadata feature '
             'and if the --extraMetadataForAll'
    )
    parser.add_argument(
        '--avoidSetns',
        dest='avoid_setns',
        action='store_true',
        default=False,
        help='Avoids the use of the setns() syscall to crawl containers. '
             'Some features like process will not work with this option. '
             'Only applies to the OUTCONTAINER mode'
    )

    args = parser.parse_args()
    misc.setup_logger('crawlutils', args.logfile)
    misc.setup_logger('yapsy', 'yapsy.log')

    options = args.options
    options['avoid_setns'] = args.avoid_setns
    options['mountpoint'] = args.mountpoint

    emitters = EmittersManager(urls=args.url,
                               format=args.format,
                               compress=args.compress,
                               extra_metadata=args.extraMetadata,
                               plugin_places=args.plugin_places)

    if args.crawlmode == 'OUTCONTAINER':
        crawler = ContainersCrawler(
            features=args.features,
            environment=args.environment,
            user_list=args.crawlContainers,
            host_namespace=args.namespace,
            plugin_places=args.plugin_places,
            options=options)
    elif args.crawlmode == 'INVM' or args.crawlmode == 'MOUNTPOINT':
        crawler = HostCrawler(
            features=args.features,
            namespace=args.namespace,
            plugin_places=args.plugin_places,
            options=options)
    elif args.crawlmode == 'OUTVM':
        crawler = VirtualMachinesCrawler(
            features=args.features,
            user_list=args.vm_descs_list,
            host_namespace=args.namespace,
            plugin_places=args.plugin_places,
            options=options)
    else:
        raise NotImplementedError('Invalid crawlmode')

    worker = Worker(emitters=emitters,
                    frequency=args.frequency,
                    crawler=crawler)

    try:
        worker.run()
    except KeyboardInterrupt:
        pass