Example #1
0
    def _crawl_without_setns(self, container_id):
        mountpoint = dockerutils.get_docker_container_rootfs_path(container_id)
        candidate_paths = [
            "usr/lib/",
            "usr/share/",
            "usr/local/lib/",
            "usr/local/share/",
            "usr/local/bundle/",
            "var/lib/"]

        packages = []

        for path in candidate_paths:
            path = os.path.join(mountpoint, path)
            packages += self._crawl_files(path, ".gemspec")

        for pkg in packages:
            name_parts = re.match(r'(.*)-([\d\.]*)(\.gemspec)', pkg)
            if name_parts is not None:
                pkg_name = name_parts.group(1)
                pkg_version = name_parts.group(2)
                yield (
                    pkg_name,
                    {"pkgname": pkg_name, "pkgversion": pkg_version},
                    'ruby-package')
    def _get_logfiles_list(self,
                           host_log_dir,
                           options=defaults.DEFAULT_CRAWL_OPTIONS):
        """
        Returns list of log files as a list of dictionaries `{name, type,
        source, dest}` to be linked to `host_log_dir`.
        """

        # Get the rootfs of the container in the host

        rootfs_path = get_docker_container_rootfs_path(
            self.long_id, self.inspect)

        logs_list = []

        self._get_container_log_files(rootfs_path, options)
        for logdict in self.log_file_list:
            name = logdict['name']
            _type = logdict['type']

            # assuming mount source or destination does not contain '*'
            for mount in self.inspect['Mounts']:
                if name.startswith(mount['Destination']):
                    lname = name.replace(mount['Destination'], mount['Source'])
                    if "*" in lname:
                        src_dest = [(s, s.replace(mount['Source'], mount[
                                     'Destination'])) for s in glob.glob(lname)]
                    else:
                        src_dest = [(lname, name)]
                else:
                    lname = rootfs_path + name
                    if "*" in lname:
                        src_dest = [(s, s.split(rootfs_path, 1)[1])
                                    for s in glob.glob(lname)]
                    else:
                        src_dest = [(lname, name)]

            for log_src, log_dest in src_dest:
                log_dest = host_log_dir + log_dest
                log = {
                    'name': name,
                    'type': _type,
                    'source': log_src,
                    'dest': log_dest}

                if log not in logs_list:
                    logs_list.append(log)

        logger.info('logmap %s' % logs_list)

        docker_log_source = get_docker_container_json_logs_path(
            self.long_id, self.inspect)
        name = 'docker.log'
        docker_log_dest = os.path.join(host_log_dir, name)
        logs_list.append({'name': name,
                          'type': None,
                          'source': docker_log_source,
                          'dest': docker_log_dest})

        return logs_list
    def crawl_packages(self, dbpath=None, root_dir='/', avoid_setns=False):

        if not (avoid_setns and self.crawl_mode == Modes.OUTCONTAINER):
            try:
                for (key, feature) in self._crawl_wrapper(
                        self._crawl_packages, ALL_NAMESPACES, dbpath, root_dir):
                    yield (key, feature)
                return
            except CrawlError as e:
		# Raise the exception unless we are crawling containers, in
		# that case, retry the crawl avoiding the setns() syscall. This
		# is needed for PPC where we can not jump into the container
		# and run its apt or rpm commands.
                if self.crawl_mode != Modes.OUTCONTAINER:
                    raise e
                else:
                    avoid_setns = True

	# If we are here it's because we have to retry avoiding setns(), or we
	# were asked to avoid it
        assert(avoid_setns and self.crawl_mode == Modes.OUTCONTAINER)

        root_dir = dockerutils.get_docker_container_rootfs_path(
                self.container.long_id)
        for (key, feature) in self._crawl_packages(dbpath, root_dir):
            yield (key, feature)
Example #4
0
    def __init__(
        self,
        long_id,
        inspect=None,
        container_opts={},
        process_namespace=None,
    ):

        # Some quick sanity checks
        if not isinstance(long_id, basestring):
            raise TypeError('long_id should be a string')
        if inspect and not isinstance(inspect, dict):
            raise TypeError('inspect should be a dict.')
        if container_opts and not isinstance(container_opts, dict):
            raise TypeError('container_opts should be a dict.')

        if not inspect:
            try:
                inspect = exec_dockerinspect(long_id)
            except HTTPError:
                raise ContainerNonExistent('No docker container with ID: %s' %
                                           long_id)

        state = inspect['State']
        self.image = inspect['Image']

        assert (long_id == inspect['Id'])
        self.long_id = long_id
        self.pid = str(state['Pid'])
        self.name = inspect['Name']
        self.running = state['Running']
        self.created = inspect['Created']
        self.network_settings = inspect['NetworkSettings']
        self.cmd = inspect['Config']['Cmd']
        self.mounts = inspect.get('Mounts')
        self.volumes = inspect.get('Volumes')
        self.inspect = inspect

        self.process_namespace = (process_namespace
                                  or namespace.get_pid_namespace(self.pid))

        # This short ID is mainly used for logging purposes
        self.short_id = long_id[:12]

        # Docker prepends a '/' to the name. Let's remove it.
        if self.name[0] == '/':
            self.name = self.name[1:]

        self._set_image_fields(inspect.get('RepoTag', ''))
        self._set_mounts_list()

        try:
            self.root_fs = get_docker_container_rootfs_path(self.long_id)
        except (HTTPError, RuntimeError, DockerutilsException) as e:
            logger.exception(e)
            self.root_fs = None

        self._set_logs_list_input()
        self._set_environment_specific_options(container_opts)
        self._set_logs_list()
Example #5
0
 def crawl_config_files(
     self,
     root_dir='/',
     exclude_dirs=['proc', 'mnt', 'dev', 'tmp'],
     root_dir_alias=None,
     known_config_files=[],
     discover_config_files=False,
     avoid_setns=False
 ):
     if avoid_setns and self.crawl_mode == Modes.OUTCONTAINER:
         # Handle this special case first (avoiding setns() for the
         # OUTCONTAINER mode).
         root_dir = dockerutils.get_docker_container_rootfs_path(
             self.container.long_id)
         for (key, feature) in self._crawl_config_files(
                 root_dir,
                 exclude_dirs,
                 root_dir_alias,
                 known_config_files,
                 discover_config_files):
             yield (key, feature)
     else:
         for (key, feature) in self._crawl_wrapper(
                 self._crawl_config_files,
                 ['mnt'],
                 root_dir,
                 exclude_dirs,
                 root_dir_alias,
                 known_config_files,
                 discover_config_files):
             yield (key, feature)
    def crawl_config_files(
        self,
        root_dir='/',
        exclude_dirs=['proc', 'mnt', 'dev', 'tmp'],
        root_dir_alias=None,
        known_config_files=[],
        discover_config_files=False,
        avoid_setns=False
    ):
        if avoid_setns and self.crawl_mode == Modes.OUTCONTAINER:
	    # Handle this special case first (avoiding setns() for the
	    # OUTCONTAINER mode).
            root_dir = dockerutils.get_docker_container_rootfs_path(
                             self.container.long_id)
            for (key, feature) in self._crawl_config_files(
                    root_dir,
                    exclude_dirs,
                    root_dir_alias,
                    known_config_files,
                    discover_config_files):
                yield (key, feature)
        else:
            for (key, feature) in self._crawl_wrapper(
                    self._crawl_config_files,
                    ['mnt'],
                    root_dir,
                    exclude_dirs,
                    root_dir_alias,
                    known_config_files,
                    discover_config_files):
                yield (key, feature)
Example #7
0
    def crawl_packages(self, dbpath=None, root_dir='/', avoid_setns=False):

        if not (avoid_setns and self.crawl_mode == Modes.OUTCONTAINER):
            try:
                for (key, feature) in self._crawl_wrapper(
                        self._crawl_packages, ALL_NAMESPACES, dbpath, root_dir):
                    yield (key, feature)
                return
            except CrawlError as e:
                # Raise the exception unless we are crawling containers, in
                # that case, retry the crawl avoiding the setns() syscall. This
                # is needed for PPC where we can not jump into the container
                # and run its apt or rpm commands.
                if self.crawl_mode != Modes.OUTCONTAINER:
                    raise e
                else:
                    avoid_setns = True

        # If we are here it's because we have to retry avoiding setns(), or we
        # were asked to avoid it
        assert(avoid_setns and self.crawl_mode == Modes.OUTCONTAINER)

        root_dir = dockerutils.get_docker_container_rootfs_path(
            self.container.long_id)
        for (key, feature) in self._crawl_packages(dbpath, root_dir):
            yield (key, feature)
Example #8
0
    def crawl_files(
        self,
        root_dir='/',
        exclude_dirs=['/proc', '/mnt', '/dev', '/tmp'],
        root_dir_alias=None,
        avoid_setns=False,
    ):

        if avoid_setns and self.crawl_mode == Modes.OUTCONTAINER:
            # Handle this special case first (avoiding setns() for the
            # OUTCONTAINER mode).
            rootfs_dir = dockerutils.get_docker_container_rootfs_path(
                             self.container.long_id)

            for d in exclude_dirs:
                if not os.path.isabs(d):
                    raise ValueError('crawl_files with avoidsetns only takes'
                                     'absolute paths in the exclude_dirs arg.')

            exclude_dirs = [misc.join_abs_paths(rootfs_dir, d)
                            for d in exclude_dirs]

            for (key, feature) in self._crawl_files(
                    root_dir=misc.join_abs_paths(rootfs_dir, root_dir),
                    exclude_dirs=exclude_dirs,
                    root_dir_alias=root_dir):
                yield (key, feature)
        else:
            for (key, feature) in self._crawl_wrapper(
                    self._crawl_files,
                    ['mnt'],
                    root_dir,
                    exclude_dirs,
                    root_dir_alias):
                yield (key, feature)
    def __init__(
        self,
        long_id,
        inspect=None,
        container_opts={},
    ):

        if not inspect:
            try:
                inspect = exec_dockerinspect(long_id)
            except HTTPError:
                raise ContainerNonExistent('No docker container with ID: %s'
                                           % long_id)

        state = inspect['State']
        self.image = inspect['Image']

        assert(long_id == inspect['Id'])
        self.long_id = long_id
        self.pid = str(state['Pid'])
        self.name = inspect['Name']
        self.running = state['Running']
        self.created = inspect['Created']
        self.network_settings = inspect['NetworkSettings']
        self.cmd = inspect['Config']['Cmd']
        self.inspect = inspect

        # This short ID is mainly used for logging purposes
        self.short_id = long_id[:12]

        # Docker prepends a '/' to the name. Let's remove it.
        if self.name[0] == '/':
            self.name = self.name[1:]

        repo_tag = inspect.get('RepoTag', '')
	self.docker_image_long_name = repo_tag
	self.docker_image_short_name = os.path.basename(repo_tag)
	if ':' in repo_tag and not '/' in repo_tag.rsplit(':', 1)[1]:
	    self.docker_image_tag = repo_tag.rsplit(':', 1)[1]
	else:
	    self.docker_image_tag = ''
	self.docker_image_registry = os.path.dirname(repo_tag).split('/')[0]
	try:
            # This is the 'abc' in 'registry/abc/bla:latest'
	    self.owner_namespace = os.path.dirname(repo_tag).split('/', 1)[1]
	except IndexError:
	    self.owner_namespace = ''

        try:
            self.root_fs = get_docker_container_rootfs_path(self.long_id)
        except HTTPError as e:
            print e
            logger.exception(e)
            self.root_fs = None

        self._set_logfiles_links_source()
        self._set_environment_specific_options(container_opts)
        self._set_logfiles_links_source_and_dest()
    def _get_logfiles_list(self,
                           host_log_dir,
                           options=defaults.DEFAULT_CRAWL_OPTIONS):
        """
        Returns list of log files as a list of dictionaries `{name, type,
        source, dest}` to be linked to `host_log_dir`.
        """

        # Get the rootfs of the container in the host

        rootfs_path = get_docker_container_rootfs_path(
            self.long_id, self.inspect)

        logs_list = []

        self._get_container_log_files(rootfs_path, options)
        for logdict in self.log_file_list:
            name = logdict['name']
            _type = logdict['type']
            log_source = rootfs_path + name
            log_dest = host_log_dir + name
            if "*" in log_source:
               source_unglob_list = glob.glob(log_source)
            else:
               source_unglob_list = [ log_source ]

            logger.debug('GLOB LIST %s' % source_unglob_list) 
            for source_unglob in source_unglob_list:
               logger.debug('SOURCE GLOB %s' % source_unglob) 
               if rootfs_path in source_unglob:
                   dest_unglob = host_log_dir + source_unglob.split(rootfs_path, 1)[1]
               else:
                   dest_unglob = host_log_dir + source_unglob

               logger.debug('DEST GLOB %s' % dest_unglob) 

               log = {
                  'name': name,
                  'type': _type,
                  'source': source_unglob,
                  'dest': dest_unglob}

               if log not in logs_list:
                  logs_list.append(log)

        logger.debug('GLOB LOGSLIST %s' % logs_list) 
        docker_log_source = get_docker_container_json_logs_path(
            self.long_id, self.inspect)
        name = 'docker.log'
        docker_log_dest = os.path.join(host_log_dir, name)
        logs_list.append({'name': name,
                          'type': None,
                          'source': docker_log_source,
                          'dest': docker_log_dest})

        return logs_list
    def crawl_os(self, mountpoint=None, avoid_setns=False):
        if avoid_setns and self.crawl_mode == Modes.OUTCONTAINER:
	    # Handle this special case first (avoiding setns() for the
	    # OUTCONTAINER mode).
            mountpoint = dockerutils.get_docker_container_rootfs_path(
                             self.container.long_id)
            self.crawl_mode = Modes.MOUNTPOINT
            try:
                for (key, feature) in self._crawl_os(mountpoint):
                    yield (key, feature)
            finally:
                self.crawl_mode = Modes.OUTCONTAINER
        else:
            for (key, feature) in self._crawl_wrapper(
                    self._crawl_os, ALL_NAMESPACES, mountpoint):
                yield (key, feature)
Example #12
0
 def crawl_os(self, mountpoint=None, avoid_setns=False):
     if avoid_setns and self.crawl_mode == Modes.OUTCONTAINER:
         # Handle this special case first (avoiding setns() for the
         # OUTCONTAINER mode).
         mountpoint = dockerutils.get_docker_container_rootfs_path(
             self.container.long_id)
         self.crawl_mode = Modes.MOUNTPOINT
         try:
             for (key, feature) in self._crawl_os(mountpoint):
                 yield (key, feature)
         finally:
             self.crawl_mode = Modes.OUTCONTAINER
     else:
         for (key, feature) in self._crawl_wrapper(
                 self._crawl_os, ALL_NAMESPACES, mountpoint):
             yield (key, feature)
Example #13
0
    def _get_logfiles_list(self,
                           host_log_dir,
                           options=defaults.DEFAULT_CRAWL_OPTIONS):
        """
        Returns list of log files as a list of dictionaries `{name, type,
        source, dest}` to be linked to `host_log_dir`.
        """

        # Get the rootfs of the container in the host

        rootfs_path = get_docker_container_rootfs_path(self.long_id,
                                                       self.inspect)

        logs_list = []
        for logdict in self._get_container_log_files(rootfs_path, options):
            name = logdict['name']
            _type = logdict['type']
            log_source = rootfs_path + name
            log_dest = host_log_dir + name
            log = {
                'name': name,
                'type': _type,
                'source': log_source,
                'dest': log_dest
            }
            if log not in logs_list:
                logs_list.append(log)

        docker_log_source = get_docker_container_json_logs_path(
            self.long_id, self.inspect)
        name = 'docker.log'
        docker_log_dest = os.path.join(host_log_dir, name)
        logs_list.append({
            'name': name,
            'type': None,
            'source': docker_log_source,
            'dest': docker_log_dest
        })

        return logs_list
    def _get_logfiles_list(self,
                           host_log_dir,
                           options=defaults.DEFAULT_CRAWL_OPTIONS):
        """
        Returns list of log files as a list of dictionaries `{name, type,
        source, dest}` to be linked to `host_log_dir`.
        """

        # Get the rootfs of the container in the host

        rootfs_path = get_docker_container_rootfs_path(
            self.long_id, self.inspect)

        logs_list = []
        for logdict in self._get_container_log_files(rootfs_path,
                                                     options):
            name = logdict['name']
            _type = logdict['type']
            log_source = rootfs_path + name
            log_dest = host_log_dir + name
            log = {
                'name': name,
                'type': _type,
                'source': log_source,
                'dest': log_dest}
            if log not in logs_list:
                logs_list.append(log)

        docker_log_source = get_docker_container_json_logs_path(
            self.long_id, self.inspect)
        name = 'docker.log'
        docker_log_dest = os.path.join(host_log_dir, name)
        logs_list.append({'name': name,
                          'type': None,
                          'source': docker_log_source,
                          'dest': docker_log_dest})

        return logs_list
 def crawl_config_files(
         self,
         root_dir='/',
         exclude_dirs=DEFAULT_CONFIG_EXCLUDE_DIRS,
         root_dir_alias=None,
         known_config_files=DEFAULT_CONFIG_KNOWN_CONFIG_FILES,
         discover_config_files=False,
         avoid_setns=False):
     if avoid_setns and self.crawl_mode == Modes.OUTCONTAINER:
         # Handle this special case first (avoiding setns() for the
         # OUTCONTAINER mode).
         # XXX: following overwrites root_dir sent as argument!
         # XXX: should it donw as in crawl_files()?
         root_dir = dockerutils.get_docker_container_rootfs_path(
             self.container.long_id)
         for (key, feature) in self._crawl_config_files(
                 root_dir, exclude_dirs, root_dir_alias, known_config_files,
                 discover_config_files):
             yield (key, feature)
     else:
         for (key, feature) in self._crawl_wrapper(
                 self._crawl_config_files, ['mnt'], root_dir, exclude_dirs,
                 root_dir_alias, known_config_files, discover_config_files):
             yield (key, feature)
 def _crawl_without_setns(self, container_id):
     mountpoint = dockerutils.get_docker_container_rootfs_path(container_id)
     return self._get_packages_by_extension(mountpoint)
Example #17
0
    def __init__(
        self,
        long_id,
        inspect=None,
        container_opts={},
    ):

        # Some quick sanity checks
        if not isinstance(long_id, basestring):
            raise TypeError('long_id should be a string')
        if inspect and not isinstance(inspect, dict):
            raise TypeError('inspect should be a dict.')
        if container_opts and not isinstance(container_opts, dict):
            raise TypeError('container_opts should be a dict.')

        if not inspect:
            try:
                inspect = exec_dockerinspect(long_id)
            except HTTPError:
                raise ContainerNonExistent('No docker container with ID: %s' %
                                           long_id)

        state = inspect['State']
        self.image = inspect['Image']

        assert (long_id == inspect['Id'])
        self.long_id = long_id
        self.pid = str(state['Pid'])
        self.name = inspect['Name']
        self.running = state['Running']
        self.created = inspect['Created']
        self.network_settings = inspect['NetworkSettings']
        self.cmd = inspect['Config']['Cmd']
        self.mounts = inspect.get('Mounts')
        self.volumes = inspect.get('Volumes')
        self.inspect = inspect

        # This short ID is mainly used for logging purposes
        self.short_id = long_id[:12]

        # Docker prepends a '/' to the name. Let's remove it.
        if self.name[0] == '/':
            self.name = self.name[1:]

        repo_tag = inspect.get('RepoTag', '')
        self.docker_image_long_name = repo_tag
        self.docker_image_short_name = os.path.basename(repo_tag)
        if ':' in repo_tag and not '/' in repo_tag.rsplit(':', 1)[1]:
            self.docker_image_tag = repo_tag.rsplit(':', 1)[1]
        else:
            self.docker_image_tag = ''
        self.docker_image_registry = os.path.dirname(repo_tag).split('/')[0]
        try:
            # This is the 'abc' in 'registry/abc/bla:latest'
            self.owner_namespace = os.path.dirname(repo_tag).split('/', 1)[1]
        except IndexError:
            self.owner_namespace = ''

        self._set_mounts_list()

        try:
            self.root_fs = get_docker_container_rootfs_path(self.long_id)
        except (HTTPError, RuntimeError, DockerutilsException) as e:
            logger.exception(e)
            self.root_fs = None

        self._set_logfiles_links_source()
        self._set_environment_specific_options(container_opts)
        self._set_logfiles_links_source_and_dest()
    def setup_namespace_and_metadata(self,
                                     container_opts={},
                                     runtime_env=None):
        logger.info('setup_namespace_and_metadata: long_id=' +
                       self.long_id)

        self.runtime_env = runtime_env
        assert(runtime_env)

        _map = container_opts.get('long_id_to_namespace_map', {})
        if self.long_id in _map:
            self.namespace = _map[self.long_id]
	    # XXX assert that there are no logs being linked as that won't be
	    # supported now
            return

        host_namespace = container_opts.get('host_namespace', 'undefined')
        environment = container_opts.get('environment', 'cloudsight')
        container_logs = container_opts.get('container_logs');

        # XXX-kollerr only alchemy and watson containers are meant to be docker
        # this check is wrong. This should only apply to watson and alchemy.
        #
        # Just in case, a linux container is any process running in a different
        # namespace than the host root namespace. So, there are other containers
        # running in teh system besides docker containers.
        if not self.is_docker_container():
            # XXX-kollerr So if we are only doing Docker container stuff below,
            # everything below here should be in dockercontainer.py
            raise AlchemyInvalidContainer()

        if environment == 'watson':
	    # XXX-kollerr only docker containers have a rootfs. This code is
	    # supposed to be docker agnostic. Moreover, this really applies to
	    # watson containers only.
            self.root_fs = get_docker_container_rootfs_path(self.long_id)
        else:
            self.root_fs = None

        try:
            _options = {'root_fs': self.root_fs, 'type': 'docker',
                'name': self.name, 'host_namespace': host_namespace,
                'container_logs': container_logs}
            namespace = self.runtime_env.get_container_namespace(
                                                    self.long_id, _options)
            if not namespace:
                logger.warning('Container %s does not have alchemy '
                               'metadata.' % self.short_id)
                # XXX-kollerr this should not be alchemy specific either
                raise AlchemyInvalidMetadata()
            self.namespace = namespace

            self.log_prefix = self.runtime_env.get_container_log_prefix(
                            self.long_id, _options)

            self.log_file_list = self.runtime_env.get_container_log_file_list(
                            self.long_id, _options)
        except ValueError:
            # XXX-kollerr this ValueError looks suspiciously very specific
            # to alchemy. Are you sure watson.py will be throwing ValueError?
            logger.warning('Container %s does not have a valid alchemy '
                           'metadata json file.' % self.short_id)
            raise AlchemyInvalidMetadata()