def _crawl_without_setns(self, container_id): mountpoint = dockerutils.get_docker_container_rootfs_path(container_id) candidate_paths = [ "usr/lib/", "usr/share/", "usr/local/lib/", "usr/local/share/", "usr/local/bundle/", "var/lib/"] packages = [] for path in candidate_paths: path = os.path.join(mountpoint, path) packages += self._crawl_files(path, ".gemspec") for pkg in packages: name_parts = re.match(r'(.*)-([\d\.]*)(\.gemspec)', pkg) if name_parts is not None: pkg_name = name_parts.group(1) pkg_version = name_parts.group(2) yield ( pkg_name, {"pkgname": pkg_name, "pkgversion": pkg_version}, 'ruby-package')
def _get_logfiles_list(self, host_log_dir, options=defaults.DEFAULT_CRAWL_OPTIONS): """ Returns list of log files as a list of dictionaries `{name, type, source, dest}` to be linked to `host_log_dir`. """ # Get the rootfs of the container in the host rootfs_path = get_docker_container_rootfs_path( self.long_id, self.inspect) logs_list = [] self._get_container_log_files(rootfs_path, options) for logdict in self.log_file_list: name = logdict['name'] _type = logdict['type'] # assuming mount source or destination does not contain '*' for mount in self.inspect['Mounts']: if name.startswith(mount['Destination']): lname = name.replace(mount['Destination'], mount['Source']) if "*" in lname: src_dest = [(s, s.replace(mount['Source'], mount[ 'Destination'])) for s in glob.glob(lname)] else: src_dest = [(lname, name)] else: lname = rootfs_path + name if "*" in lname: src_dest = [(s, s.split(rootfs_path, 1)[1]) for s in glob.glob(lname)] else: src_dest = [(lname, name)] for log_src, log_dest in src_dest: log_dest = host_log_dir + log_dest log = { 'name': name, 'type': _type, 'source': log_src, 'dest': log_dest} if log not in logs_list: logs_list.append(log) logger.info('logmap %s' % logs_list) docker_log_source = get_docker_container_json_logs_path( self.long_id, self.inspect) name = 'docker.log' docker_log_dest = os.path.join(host_log_dir, name) logs_list.append({'name': name, 'type': None, 'source': docker_log_source, 'dest': docker_log_dest}) return logs_list
def crawl_packages(self, dbpath=None, root_dir='/', avoid_setns=False): if not (avoid_setns and self.crawl_mode == Modes.OUTCONTAINER): try: for (key, feature) in self._crawl_wrapper( self._crawl_packages, ALL_NAMESPACES, dbpath, root_dir): yield (key, feature) return except CrawlError as e: # Raise the exception unless we are crawling containers, in # that case, retry the crawl avoiding the setns() syscall. This # is needed for PPC where we can not jump into the container # and run its apt or rpm commands. if self.crawl_mode != Modes.OUTCONTAINER: raise e else: avoid_setns = True # If we are here it's because we have to retry avoiding setns(), or we # were asked to avoid it assert(avoid_setns and self.crawl_mode == Modes.OUTCONTAINER) root_dir = dockerutils.get_docker_container_rootfs_path( self.container.long_id) for (key, feature) in self._crawl_packages(dbpath, root_dir): yield (key, feature)
def __init__( self, long_id, inspect=None, container_opts={}, process_namespace=None, ): # Some quick sanity checks if not isinstance(long_id, basestring): raise TypeError('long_id should be a string') if inspect and not isinstance(inspect, dict): raise TypeError('inspect should be a dict.') if container_opts and not isinstance(container_opts, dict): raise TypeError('container_opts should be a dict.') if not inspect: try: inspect = exec_dockerinspect(long_id) except HTTPError: raise ContainerNonExistent('No docker container with ID: %s' % long_id) state = inspect['State'] self.image = inspect['Image'] assert (long_id == inspect['Id']) self.long_id = long_id self.pid = str(state['Pid']) self.name = inspect['Name'] self.running = state['Running'] self.created = inspect['Created'] self.network_settings = inspect['NetworkSettings'] self.cmd = inspect['Config']['Cmd'] self.mounts = inspect.get('Mounts') self.volumes = inspect.get('Volumes') self.inspect = inspect self.process_namespace = (process_namespace or namespace.get_pid_namespace(self.pid)) # This short ID is mainly used for logging purposes self.short_id = long_id[:12] # Docker prepends a '/' to the name. Let's remove it. if self.name[0] == '/': self.name = self.name[1:] self._set_image_fields(inspect.get('RepoTag', '')) self._set_mounts_list() try: self.root_fs = get_docker_container_rootfs_path(self.long_id) except (HTTPError, RuntimeError, DockerutilsException) as e: logger.exception(e) self.root_fs = None self._set_logs_list_input() self._set_environment_specific_options(container_opts) self._set_logs_list()
def crawl_config_files( self, root_dir='/', exclude_dirs=['proc', 'mnt', 'dev', 'tmp'], root_dir_alias=None, known_config_files=[], discover_config_files=False, avoid_setns=False ): if avoid_setns and self.crawl_mode == Modes.OUTCONTAINER: # Handle this special case first (avoiding setns() for the # OUTCONTAINER mode). root_dir = dockerutils.get_docker_container_rootfs_path( self.container.long_id) for (key, feature) in self._crawl_config_files( root_dir, exclude_dirs, root_dir_alias, known_config_files, discover_config_files): yield (key, feature) else: for (key, feature) in self._crawl_wrapper( self._crawl_config_files, ['mnt'], root_dir, exclude_dirs, root_dir_alias, known_config_files, discover_config_files): yield (key, feature)
def crawl_files( self, root_dir='/', exclude_dirs=['/proc', '/mnt', '/dev', '/tmp'], root_dir_alias=None, avoid_setns=False, ): if avoid_setns and self.crawl_mode == Modes.OUTCONTAINER: # Handle this special case first (avoiding setns() for the # OUTCONTAINER mode). rootfs_dir = dockerutils.get_docker_container_rootfs_path( self.container.long_id) for d in exclude_dirs: if not os.path.isabs(d): raise ValueError('crawl_files with avoidsetns only takes' 'absolute paths in the exclude_dirs arg.') exclude_dirs = [misc.join_abs_paths(rootfs_dir, d) for d in exclude_dirs] for (key, feature) in self._crawl_files( root_dir=misc.join_abs_paths(rootfs_dir, root_dir), exclude_dirs=exclude_dirs, root_dir_alias=root_dir): yield (key, feature) else: for (key, feature) in self._crawl_wrapper( self._crawl_files, ['mnt'], root_dir, exclude_dirs, root_dir_alias): yield (key, feature)
def __init__( self, long_id, inspect=None, container_opts={}, ): if not inspect: try: inspect = exec_dockerinspect(long_id) except HTTPError: raise ContainerNonExistent('No docker container with ID: %s' % long_id) state = inspect['State'] self.image = inspect['Image'] assert(long_id == inspect['Id']) self.long_id = long_id self.pid = str(state['Pid']) self.name = inspect['Name'] self.running = state['Running'] self.created = inspect['Created'] self.network_settings = inspect['NetworkSettings'] self.cmd = inspect['Config']['Cmd'] self.inspect = inspect # This short ID is mainly used for logging purposes self.short_id = long_id[:12] # Docker prepends a '/' to the name. Let's remove it. if self.name[0] == '/': self.name = self.name[1:] repo_tag = inspect.get('RepoTag', '') self.docker_image_long_name = repo_tag self.docker_image_short_name = os.path.basename(repo_tag) if ':' in repo_tag and not '/' in repo_tag.rsplit(':', 1)[1]: self.docker_image_tag = repo_tag.rsplit(':', 1)[1] else: self.docker_image_tag = '' self.docker_image_registry = os.path.dirname(repo_tag).split('/')[0] try: # This is the 'abc' in 'registry/abc/bla:latest' self.owner_namespace = os.path.dirname(repo_tag).split('/', 1)[1] except IndexError: self.owner_namespace = '' try: self.root_fs = get_docker_container_rootfs_path(self.long_id) except HTTPError as e: print e logger.exception(e) self.root_fs = None self._set_logfiles_links_source() self._set_environment_specific_options(container_opts) self._set_logfiles_links_source_and_dest()
def _get_logfiles_list(self, host_log_dir, options=defaults.DEFAULT_CRAWL_OPTIONS): """ Returns list of log files as a list of dictionaries `{name, type, source, dest}` to be linked to `host_log_dir`. """ # Get the rootfs of the container in the host rootfs_path = get_docker_container_rootfs_path( self.long_id, self.inspect) logs_list = [] self._get_container_log_files(rootfs_path, options) for logdict in self.log_file_list: name = logdict['name'] _type = logdict['type'] log_source = rootfs_path + name log_dest = host_log_dir + name if "*" in log_source: source_unglob_list = glob.glob(log_source) else: source_unglob_list = [ log_source ] logger.debug('GLOB LIST %s' % source_unglob_list) for source_unglob in source_unglob_list: logger.debug('SOURCE GLOB %s' % source_unglob) if rootfs_path in source_unglob: dest_unglob = host_log_dir + source_unglob.split(rootfs_path, 1)[1] else: dest_unglob = host_log_dir + source_unglob logger.debug('DEST GLOB %s' % dest_unglob) log = { 'name': name, 'type': _type, 'source': source_unglob, 'dest': dest_unglob} if log not in logs_list: logs_list.append(log) logger.debug('GLOB LOGSLIST %s' % logs_list) docker_log_source = get_docker_container_json_logs_path( self.long_id, self.inspect) name = 'docker.log' docker_log_dest = os.path.join(host_log_dir, name) logs_list.append({'name': name, 'type': None, 'source': docker_log_source, 'dest': docker_log_dest}) return logs_list
def crawl_os(self, mountpoint=None, avoid_setns=False): if avoid_setns and self.crawl_mode == Modes.OUTCONTAINER: # Handle this special case first (avoiding setns() for the # OUTCONTAINER mode). mountpoint = dockerutils.get_docker_container_rootfs_path( self.container.long_id) self.crawl_mode = Modes.MOUNTPOINT try: for (key, feature) in self._crawl_os(mountpoint): yield (key, feature) finally: self.crawl_mode = Modes.OUTCONTAINER else: for (key, feature) in self._crawl_wrapper( self._crawl_os, ALL_NAMESPACES, mountpoint): yield (key, feature)
def _get_logfiles_list(self, host_log_dir, options=defaults.DEFAULT_CRAWL_OPTIONS): """ Returns list of log files as a list of dictionaries `{name, type, source, dest}` to be linked to `host_log_dir`. """ # Get the rootfs of the container in the host rootfs_path = get_docker_container_rootfs_path(self.long_id, self.inspect) logs_list = [] for logdict in self._get_container_log_files(rootfs_path, options): name = logdict['name'] _type = logdict['type'] log_source = rootfs_path + name log_dest = host_log_dir + name log = { 'name': name, 'type': _type, 'source': log_source, 'dest': log_dest } if log not in logs_list: logs_list.append(log) docker_log_source = get_docker_container_json_logs_path( self.long_id, self.inspect) name = 'docker.log' docker_log_dest = os.path.join(host_log_dir, name) logs_list.append({ 'name': name, 'type': None, 'source': docker_log_source, 'dest': docker_log_dest }) return logs_list
def _get_logfiles_list(self, host_log_dir, options=defaults.DEFAULT_CRAWL_OPTIONS): """ Returns list of log files as a list of dictionaries `{name, type, source, dest}` to be linked to `host_log_dir`. """ # Get the rootfs of the container in the host rootfs_path = get_docker_container_rootfs_path( self.long_id, self.inspect) logs_list = [] for logdict in self._get_container_log_files(rootfs_path, options): name = logdict['name'] _type = logdict['type'] log_source = rootfs_path + name log_dest = host_log_dir + name log = { 'name': name, 'type': _type, 'source': log_source, 'dest': log_dest} if log not in logs_list: logs_list.append(log) docker_log_source = get_docker_container_json_logs_path( self.long_id, self.inspect) name = 'docker.log' docker_log_dest = os.path.join(host_log_dir, name) logs_list.append({'name': name, 'type': None, 'source': docker_log_source, 'dest': docker_log_dest}) return logs_list
def crawl_config_files( self, root_dir='/', exclude_dirs=DEFAULT_CONFIG_EXCLUDE_DIRS, root_dir_alias=None, known_config_files=DEFAULT_CONFIG_KNOWN_CONFIG_FILES, discover_config_files=False, avoid_setns=False): if avoid_setns and self.crawl_mode == Modes.OUTCONTAINER: # Handle this special case first (avoiding setns() for the # OUTCONTAINER mode). # XXX: following overwrites root_dir sent as argument! # XXX: should it donw as in crawl_files()? root_dir = dockerutils.get_docker_container_rootfs_path( self.container.long_id) for (key, feature) in self._crawl_config_files( root_dir, exclude_dirs, root_dir_alias, known_config_files, discover_config_files): yield (key, feature) else: for (key, feature) in self._crawl_wrapper( self._crawl_config_files, ['mnt'], root_dir, exclude_dirs, root_dir_alias, known_config_files, discover_config_files): yield (key, feature)
def _crawl_without_setns(self, container_id): mountpoint = dockerutils.get_docker_container_rootfs_path(container_id) return self._get_packages_by_extension(mountpoint)
def __init__( self, long_id, inspect=None, container_opts={}, ): # Some quick sanity checks if not isinstance(long_id, basestring): raise TypeError('long_id should be a string') if inspect and not isinstance(inspect, dict): raise TypeError('inspect should be a dict.') if container_opts and not isinstance(container_opts, dict): raise TypeError('container_opts should be a dict.') if not inspect: try: inspect = exec_dockerinspect(long_id) except HTTPError: raise ContainerNonExistent('No docker container with ID: %s' % long_id) state = inspect['State'] self.image = inspect['Image'] assert (long_id == inspect['Id']) self.long_id = long_id self.pid = str(state['Pid']) self.name = inspect['Name'] self.running = state['Running'] self.created = inspect['Created'] self.network_settings = inspect['NetworkSettings'] self.cmd = inspect['Config']['Cmd'] self.mounts = inspect.get('Mounts') self.volumes = inspect.get('Volumes') self.inspect = inspect # This short ID is mainly used for logging purposes self.short_id = long_id[:12] # Docker prepends a '/' to the name. Let's remove it. if self.name[0] == '/': self.name = self.name[1:] repo_tag = inspect.get('RepoTag', '') self.docker_image_long_name = repo_tag self.docker_image_short_name = os.path.basename(repo_tag) if ':' in repo_tag and not '/' in repo_tag.rsplit(':', 1)[1]: self.docker_image_tag = repo_tag.rsplit(':', 1)[1] else: self.docker_image_tag = '' self.docker_image_registry = os.path.dirname(repo_tag).split('/')[0] try: # This is the 'abc' in 'registry/abc/bla:latest' self.owner_namespace = os.path.dirname(repo_tag).split('/', 1)[1] except IndexError: self.owner_namespace = '' self._set_mounts_list() try: self.root_fs = get_docker_container_rootfs_path(self.long_id) except (HTTPError, RuntimeError, DockerutilsException) as e: logger.exception(e) self.root_fs = None self._set_logfiles_links_source() self._set_environment_specific_options(container_opts) self._set_logfiles_links_source_and_dest()
def setup_namespace_and_metadata(self, container_opts={}, runtime_env=None): logger.info('setup_namespace_and_metadata: long_id=' + self.long_id) self.runtime_env = runtime_env assert(runtime_env) _map = container_opts.get('long_id_to_namespace_map', {}) if self.long_id in _map: self.namespace = _map[self.long_id] # XXX assert that there are no logs being linked as that won't be # supported now return host_namespace = container_opts.get('host_namespace', 'undefined') environment = container_opts.get('environment', 'cloudsight') container_logs = container_opts.get('container_logs'); # XXX-kollerr only alchemy and watson containers are meant to be docker # this check is wrong. This should only apply to watson and alchemy. # # Just in case, a linux container is any process running in a different # namespace than the host root namespace. So, there are other containers # running in teh system besides docker containers. if not self.is_docker_container(): # XXX-kollerr So if we are only doing Docker container stuff below, # everything below here should be in dockercontainer.py raise AlchemyInvalidContainer() if environment == 'watson': # XXX-kollerr only docker containers have a rootfs. This code is # supposed to be docker agnostic. Moreover, this really applies to # watson containers only. self.root_fs = get_docker_container_rootfs_path(self.long_id) else: self.root_fs = None try: _options = {'root_fs': self.root_fs, 'type': 'docker', 'name': self.name, 'host_namespace': host_namespace, 'container_logs': container_logs} namespace = self.runtime_env.get_container_namespace( self.long_id, _options) if not namespace: logger.warning('Container %s does not have alchemy ' 'metadata.' % self.short_id) # XXX-kollerr this should not be alchemy specific either raise AlchemyInvalidMetadata() self.namespace = namespace self.log_prefix = self.runtime_env.get_container_log_prefix( self.long_id, _options) self.log_file_list = self.runtime_env.get_container_log_file_list( self.long_id, _options) except ValueError: # XXX-kollerr this ValueError looks suspiciously very specific # to alchemy. Are you sure watson.py will be throwing ValueError? logger.warning('Container %s does not have a valid alchemy ' 'metadata json file.' % self.short_id) raise AlchemyInvalidMetadata()