def get_processes(gs, docker_host, container_id): """Gets the list of all processes in the 'docker_host' and 'container_id'. If the container is not found, returns an empty list of processes. Args: gs: global state. docker_host: the Docker host running the container. container_id: the container running the processes. Returns: list of wrapped process objects. Each element in the list is the result of utilities.wrap_object(process, 'Process', ...) Raises: CollectorError in case of failure to fetch data from Docker. Other exceptions may be raised due to exectution errors. """ processes_label = '%s/%s' % (docker_host, container_id) processes, timestamp_secs = gs.get_processes_cache().lookup( processes_label) if timestamp_secs is not None: gs.logger_info( 'get_processes(docker_host=%s, container_id=%s) cache hit', docker_host, container_id) return processes container = get_one_container(gs, docker_host, container_id) if container is not None: assert utilities.is_wrapped_object(container, 'Container') container_short_hex_id = utilities.object_to_hex_id(container['properties']) assert utilities.valid_string(container_short_hex_id) else: # Parent container not found. Container might have crashed while we were # looking for it. return [] # NOTE: there is no trailing /json in this URL - this looks like a bug in the # Docker API url = ('http://{docker_host}:{port}/containers/{container_id}/top?' 'ps_args=aux'.format(docker_host=docker_host, port=gs.get_docker_port(), container_id=container_id)) # A typical value of 'docker_host' is: # k8s-guestbook-node-3.c.rising-apricot-840.internal # Use only the first period-seperated element for the test file name. # The typical value of 'container_id' is: # k8s_php-redis.b317029a_guestbook-controller-ls6k1.default.api_f991d53e-b949-11e4-8246-42010af0c3dd_8dcdfec8 # Use just the tail of the container ID after the last '_' sign. fname = '{host}-processes-{id}'.format( host=docker_host.split('.')[0], id=container_id.split('_')[-1]) try: # TODO(vasbala): what should we do in cases where the container is gone # (and replaced by a different one)? result = fetch_data(gs, url, fname, expect_missing=True) except ValueError: # this container does not exist anymore return [] except collector_error.CollectorError: raise except: msg = 'fetching %s failed with exception %s' % (url, sys.exc_info()[0]) gs.logger_exception(msg) raise collector_error.CollectorError(msg) if not isinstance(utilities.get_attribute(result, ['Titles']), types.ListType): invalid_processes(gs, url) if not isinstance(utilities.get_attribute(result, ['Processes']), types.ListType): invalid_processes(gs, url) pstats = result['Titles'] processes = [] now = time.time() for pvalues in result['Processes']: process = {} if not isinstance(pvalues, types.ListType): invalid_processes(gs, url) if len(pstats) != len(pvalues): invalid_processes(gs, url) for pstat, pvalue in zip(pstats, pvalues): process[pstat] = pvalue # Prefix with container Id to ensure uniqueness across the whole graph. process_id = '%s/%s' % (container_short_hex_id, process['PID']) processes.append(utilities.wrap_object( process, 'Process', process_id, now, label=process['PID'])) ret_value = gs.get_processes_cache().update( processes_label, processes, now) gs.logger_info( 'get_processes(docker_host=%s, container_id=%s) returns %d processes', docker_host, container_id, len(processes)) return ret_value
def get_image(gs, docker_host, container): """Gets the information of the given image in the given host. Args: gs: global state. docker_host: Docker host name. Must not be empty. container: the container which runs the image. Returns: If image was found, returns the wrapped image object, which is the result of utilities.wrap_object(image, 'Image', ...) If the image was not found, returns None. Raises: CollectorError: in case of failure to fetch data from Docker. ValueError: in case the container does not contain a valid image ID. Other exceptions may be raised due to exectution errors. """ assert utilities.is_wrapped_object(container, 'Container') # The 'image_id' should be a long hexadecimal string. image_id = utilities.get_attribute(container, ['properties', 'Image']) if not utilities.valid_hex_id(image_id): msg = 'missing or invalid image ID in container ID=%s' % container['id'] gs.logger_error(msg) raise ValueError(msg) # The 'image_name' should be a symbolic name (not a hexadecimal string). image_name = utilities.get_attribute( container, ['properties', 'Config', 'Image']) if ((not utilities.valid_string(image_name)) or utilities.valid_hex_id(image_name)): msg = 'missing or invalid image name in container ID=%s' % container['id'] gs.logger_error(msg) raise ValueError(msg) cache_key = '%s|%s' % (docker_host, image_id) image, timestamp_secs = gs.get_images_cache().lookup(cache_key) if timestamp_secs is not None: gs.logger_info('get_image(docker_host=%s, image_id=%s) cache hit', docker_host, image_id) return image # A typical value of 'docker_host' is: # k8s-guestbook-node-3.c.rising-apricot-840.internal # Use only the first period-seperated element for the test file name. # The typical value of 'image_name' is: # brendanburns/php-redis # We convert embedded '/' and ':' characters to '-' to avoid interference with # the directory structure or file system. url = 'http://{docker_host}:{port}/images/{image_id}/json'.format( docker_host=docker_host, port=gs.get_docker_port(), image_id=image_id) fname = '{host}-image-{id}'.format( host=docker_host.split('.')[0], id=image_name.replace('/', '-').replace(':', '-')) try: image = fetch_data(gs, url, fname, expect_missing=True) except ValueError: # image not found. msg = 'image not found for image_id: %s' % image_id gs.logger_info(msg) return None except collector_error.CollectorError: raise except: msg = 'fetching %s failed with exception %s' % (url, sys.exc_info()[0]) gs.logger_exception(msg) raise collector_error.CollectorError(msg) now = time.time() # compute the two labels of the image. # The first is a 12-digit hexadecimal number shown by "docker images". # The second is the symbolic name of the image. full_hex_label = image.get('Id') if not (isinstance(full_hex_label, types.StringTypes) and full_hex_label): msg = 'Image id=%s has an invalid "Id" attribute value' % image_id gs.logger_error(msg) raise collector_error.CollectorError(msg) short_hex_label = utilities.object_to_hex_id(image) if short_hex_label is None: msg = 'Could not compute short hex ID of image %s' % image_id gs.logger_error(msg) raise collector_error.CollectorError(msg) wrapped_image = utilities.wrap_object( image, 'Image', full_hex_label, now, label=short_hex_label, alt_label=image_name) ret_value = gs.get_images_cache().update(cache_key, wrapped_image, now) gs.logger_info('get_image(docker_host=%s, image_id=%s, image_name=%s)', docker_host, image_id, image_name) return ret_value
def get_containers(gs, docker_host): """Gets the list of all containers in 'docker_host'. Args: gs: global state. docker_host: the Docker host running the containers. Returns: list of wrapped container objects. Each element in the list is the result of utilities.wrap_object(container, 'Container', ...) Raises: CollectorError: in case of failure to fetch data from Docker. Other exceptions may be raised due to exectution errors. """ containers, timestamp = gs.get_containers_cache().lookup(docker_host) if timestamp is not None: gs.logger_info( 'get_containers(docker_host=%s) cache hit returns ' '%d containers', docker_host, len(containers)) return containers url = 'http://{docker_host}:{port}/containers/json'.format( docker_host=docker_host, port=gs.get_docker_port()) # A typical value of 'docker_host' is: # k8s-guestbook-node-3.c.rising-apricot-840.internal # Use only the first period-seperated element for the test file name. fname = '{host}-containers'.format(host=docker_host.split('.')[0]) try: containers_list = fetch_data(gs, url, fname) except collector_error.CollectorError: raise except: msg = ('fetching %s or %s failed with exception %s' % (url, fname, sys.exc_info()[0])) gs.logger_exception(msg) raise collector_error.CollectorError(msg) if not isinstance(containers_list, types.ListType): msg = 'invalid response from fetching %s' % url gs.logger_exception(msg) raise collector_error.CollectorError(msg) containers = [] timestamps = [] for container_info in containers_list: # NOTE: container 'Name' is stable across container re-starts whereas # container 'Id' is not. # This may be because Kubernertes assigns the Name while Docker assigns # the Id (?) # The container Name is the only element of the array 'Names' - # why is Names an array here? # skip the leading / in the Name if not (isinstance(container_info.get('Names'), types.ListType) and container_info['Names'] and utilities.valid_string(container_info['Names'][0]) and container_info['Names'][0][0] == '/'): msg = 'invalid containers data format. docker_host=%s' % docker_host gs.logger_error(msg) raise collector_error.CollectorError(msg) container_id = container_info['Names'][0][1:] container, ts = _inspect_container(gs, docker_host, container_id) if container is None: continue if not utilities.valid_string(container.get('Name')): msg = ('missing or invalid Name attribute in container %s' % container_id) gs.logger_error(msg) raise collector_error.CollectorError(msg) if container['Name'] != ('/' + container_id): msg = ('container %s\'s Name attribute is "%s"; expecting "%s"' % (container_id, container['Name'], '/' + container_id)) gs.logger_error(msg) raise collector_error.CollectorError(msg) short_hex_id = utilities.object_to_hex_id(container) if short_hex_id is None: msg = 'Could not compute short hex ID of container %s' % container_id gs.logger_error(msg) raise collector_error.CollectorError(msg) wrapped_container = utilities.wrap_object( container, 'Container', container_id, ts, label=short_hex_id) containers.append(wrapped_container) timestamps.append(ts) # Modify the container's label after the wrapped container was added # to the containers list. # Compute the container's short name to create a better container label: # short_container_name/short_hex_id. # For example: "cassandra/d85b599c17d8". parent_pod_id = utilities.get_parent_pod_id(wrapped_container) if parent_pod_id is None: continue parent_pod = kubernetes.get_one_pod(gs, docker_host, parent_pod_id) if parent_pod is None: continue short_container_name = utilities.get_short_container_name( wrapped_container, parent_pod) if not utilities.valid_string(short_container_name): continue wrapped_container['annotations']['label'] = (short_container_name + '/' + short_hex_id) ret_value = gs.get_containers_cache().update( docker_host, containers, min(timestamps) if timestamps else time.time()) gs.logger_info( 'get_containers(docker_host=%s) returns %d containers', docker_host, len(containers)) return ret_value
def get_processes(gs, docker_host, container_id): """Gets the list of all processes in the 'docker_host' and 'container_id'. If the container is not found, returns an empty list of processes. Args: gs: global state. docker_host: the Docker host running the container. container_id: the container running the processes. Returns: list of wrapped process objects. Each element in the list is the result of utilities.wrap_object(process, 'Process', ...) Raises: CollectorError in case of failure to fetch data from Docker. Other exceptions may be raised due to exectution errors. """ processes_label = '%s/%s' % (docker_host, container_id) processes, timestamp_secs = gs.get_processes_cache().lookup( processes_label) if timestamp_secs is not None: gs.logger_info( 'get_processes(docker_host=%s, container_id=%s) cache hit', docker_host, container_id) return processes container = get_one_container(gs, docker_host, container_id) if container is not None: assert utilities.is_wrapped_object(container, 'Container') container_short_hex_id = utilities.object_to_hex_id(container['properties']) assert utilities.valid_string(container_short_hex_id) else: # Parent container not found. Container might have crashed while we were # looking for it. return [] container_name = utilities.get_container_name(container) if not utilities.valid_string(container_name): msg = 'Invalid container "Name" attribute in container %s' % container_id gs.logger_error(msg) raise collector_error.CollectorError(msg) # NOTE: there is no trailing /json in this URL - this looks like a bug in the # Docker API # Note that the {container_id} in the URL must be the internal container # name in container['properties']['Name'][1:] # and not the container name in container['id'] which may contain an extra # suffix. url = ('http://{docker_host}:{port}/containers/{container_name}/top?' 'ps_args=aux'.format(docker_host=docker_host, port=gs.get_docker_port(), container_name=container_name)) fname = utilities.container_id_to_fname( docker_host, 'processes', container_name) try: # TODO(vasbala): what should we do in cases where the container is gone # (and replaced by a different one)? result = fetch_data(gs, url, fname, expect_missing=True) except ValueError: # this container does not exist anymore return [] except collector_error.CollectorError: raise except: msg = 'fetching %s failed with exception %s' % (url, sys.exc_info()[0]) gs.logger_exception(msg) raise collector_error.CollectorError(msg) if not isinstance(utilities.get_attribute(result, ['Titles']), types.ListType): invalid_processes(gs, url) if not isinstance(utilities.get_attribute(result, ['Processes']), types.ListType): invalid_processes(gs, url) pstats = result['Titles'] processes = [] now = time.time() for pvalues in result['Processes']: process = {} if not isinstance(pvalues, types.ListType): invalid_processes(gs, url) if len(pstats) != len(pvalues): invalid_processes(gs, url) for pstat, pvalue in zip(pstats, pvalues): process[pstat] = pvalue # Prefix with container Id to ensure uniqueness across the whole graph. process_id = '%s/%s' % (container_short_hex_id, process['PID']) processes.append(utilities.wrap_object( process, 'Process', process_id, now, label=process['PID'])) ret_value = gs.get_processes_cache().update( processes_label, processes, now) gs.logger_info( 'get_processes(docker_host=%s, container_id=%s) returns %d processes', docker_host, container_id, len(processes)) return ret_value
def get_image(gs, docker_host, container): """Gets the information of the given image in the given host. Args: gs: global state. docker_host: Docker host name. Must not be empty. container: the container which runs the image. Returns: If image was found, returns the wrapped image object, which is the result of utilities.wrap_object(image, 'Image', ...) If the image was not found, returns None. Raises: CollectorError: in case of failure to fetch data from Docker. ValueError: in case the container does not contain a valid image ID. Other exceptions may be raised due to exectution errors. """ assert utilities.is_wrapped_object(container, 'Container') # The 'image_id' should be a long hexadecimal string. image_id = utilities.get_attribute(container, ['properties', 'Image']) if not utilities.valid_hex_id(image_id): msg = 'missing or invalid image ID in container ID=%s' % container['id'] gs.logger_error(msg) raise ValueError(msg) # The 'image_name' should be a symbolic name (not a hexadecimal string). image_name = utilities.get_attribute(container, ['properties', 'Config', 'Image']) if ((not utilities.valid_string(image_name)) or utilities.valid_hex_id(image_name)): msg = 'missing or invalid image name in container ID=%s' % container[ 'id'] gs.logger_error(msg) raise ValueError(msg) cache_key = '%s|%s' % (docker_host, image_id) image, timestamp_secs = gs.get_images_cache().lookup(cache_key) if timestamp_secs is not None: gs.logger_info('get_image(docker_host=%s, image_id=%s) cache hit', docker_host, image_id) return image # A typical value of 'docker_host' is: # k8s-guestbook-node-3.c.rising-apricot-840.internal # Use only the first period-seperated element for the test file name. # The typical value of 'image_name' is: # brendanburns/php-redis # We convert embedded '/' and ':' characters to '-' to avoid interference with # the directory structure or file system. url = 'http://{docker_host}:{port}/images/{image_id}/json'.format( docker_host=docker_host, port=gs.get_docker_port(), image_id=image_id) fname = '{host}-image-{id}'.format(host=docker_host.split('.')[0], id=image_name.replace('/', '-').replace( ':', '-')) try: image = fetch_data(gs, url, fname, expect_missing=True) except ValueError: # image not found. msg = 'image not found for image_id: %s' % image_id gs.logger_info(msg) return None except collector_error.CollectorError: raise except: msg = 'fetching %s failed with exception %s' % (url, sys.exc_info()[0]) gs.logger_exception(msg) raise collector_error.CollectorError(msg) now = time.time() # compute the two labels of the image. # The first is a 12-digit hexadecimal number shown by "docker images". # The second is the symbolic name of the image. full_hex_label = image.get('Id') if not (isinstance(full_hex_label, types.StringTypes) and full_hex_label): msg = 'Image id=%s has an invalid "Id" attribute value' % image_id gs.logger_error(msg) raise collector_error.CollectorError(msg) short_hex_label = utilities.object_to_hex_id(image) if short_hex_label is None: msg = 'Could not compute short hex ID of image %s' % image_id gs.logger_error(msg) raise collector_error.CollectorError(msg) wrapped_image = utilities.wrap_object(image, 'Image', full_hex_label, now, label=short_hex_label, alt_label=image_name) ret_value = gs.get_images_cache().update(cache_key, wrapped_image, now) gs.logger_info('get_image(docker_host=%s, image_id=%s, image_name=%s)', docker_host, image_id, image_name) return ret_value
def get_processes(gs, docker_host, container_id): """Gets the list of all processes in the 'docker_host' and 'container_id'. If the container is not found, returns an empty list of processes. Args: gs: global state. docker_host: the Docker host running the container. container_id: the container running the processes. Returns: list of wrapped process objects. Each element in the list is the result of utilities.wrap_object(process, 'Process', ...) Raises: CollectorError in case of failure to fetch data from Docker. Other exceptions may be raised due to exectution errors. """ processes_label = '%s/%s' % (docker_host, container_id) processes, timestamp_secs = gs.get_processes_cache().lookup( processes_label) if timestamp_secs is not None: gs.logger_info( 'get_processes(docker_host=%s, container_id=%s) cache hit', docker_host, container_id) return processes container = get_one_container(gs, docker_host, container_id) if container is not None: assert utilities.is_wrapped_object(container, 'Container') container_short_hex_id = utilities.object_to_hex_id( container['properties']) assert utilities.valid_string(container_short_hex_id) else: # Parent container not found. Container might have crashed while we were # looking for it. return [] # NOTE: there is no trailing /json in this URL - this looks like a bug in the # Docker API url = ('http://{docker_host}:{port}/containers/{container_id}/top?' 'ps_args=aux'.format(docker_host=docker_host, port=gs.get_docker_port(), container_id=container_id)) # A typical value of 'docker_host' is: # k8s-guestbook-node-3.c.rising-apricot-840.internal # Use only the first period-seperated element for the test file name. # The typical value of 'container_id' is: # k8s_php-redis.b317029a_guestbook-controller-ls6k1.default.api_f991d53e-b949-11e4-8246-42010af0c3dd_8dcdfec8 # Use just the tail of the container ID after the last '_' sign. fname = '{host}-processes-{id}'.format(host=docker_host.split('.')[0], id=container_id.split('_')[-1]) try: # TODO(vasbala): what should we do in cases where the container is gone # (and replaced by a different one)? result = fetch_data(gs, url, fname, expect_missing=True) except ValueError: # this container does not exist anymore return [] except collector_error.CollectorError: raise except: msg = 'fetching %s failed with exception %s' % (url, sys.exc_info()[0]) gs.logger_exception(msg) raise collector_error.CollectorError(msg) if not isinstance(utilities.get_attribute(result, ['Titles']), types.ListType): invalid_processes(gs, url) if not isinstance(utilities.get_attribute(result, ['Processes']), types.ListType): invalid_processes(gs, url) pstats = result['Titles'] processes = [] now = time.time() for pvalues in result['Processes']: process = {} if not isinstance(pvalues, types.ListType): invalid_processes(gs, url) if len(pstats) != len(pvalues): invalid_processes(gs, url) for pstat, pvalue in zip(pstats, pvalues): process[pstat] = pvalue # Prefix with container Id to ensure uniqueness across the whole graph. process_id = '%s/%s' % (container_short_hex_id, process['PID']) processes.append( utilities.wrap_object(process, 'Process', process_id, now, label=process['PID'])) ret_value = gs.get_processes_cache().update(processes_label, processes, now) gs.logger_info( 'get_processes(docker_host=%s, container_id=%s) returns %d processes', docker_host, container_id, len(processes)) return ret_value
def get_containers(gs, docker_host): """Gets the list of all containers in 'docker_host'. Args: gs: global state. docker_host: the Docker host running the containers. Returns: list of wrapped container objects. Each element in the list is the result of utilities.wrap_object(container, 'Container', ...) Raises: CollectorError: in case of failure to fetch data from Docker. Other exceptions may be raised due to exectution errors. """ containers, timestamp = gs.get_containers_cache().lookup(docker_host) if timestamp is not None: gs.logger_info( 'get_containers(docker_host=%s) cache hit returns ' '%d containers', docker_host, len(containers)) return containers url = 'http://{docker_host}:{port}/containers/json'.format( docker_host=docker_host, port=gs.get_docker_port()) # A typical value of 'docker_host' is: # k8s-guestbook-node-3.c.rising-apricot-840.internal # Use only the first period-seperated element for the test file name. fname = '{host}-containers'.format(host=docker_host.split('.')[0]) try: containers_list = fetch_data(gs, url, fname) except collector_error.CollectorError: raise except: msg = ('fetching %s or %s failed with exception %s' % (url, fname, sys.exc_info()[0])) gs.logger_exception(msg) raise collector_error.CollectorError(msg) if not isinstance(containers_list, types.ListType): msg = 'invalid response from fetching %s' % url gs.logger_exception(msg) raise collector_error.CollectorError(msg) containers = [] timestamps = [] for container_info in containers_list: # NOTE: container 'Name' is stable across container re-starts whereas # container 'Id' is not. # This may be because Kubernertes assigns the Name while Docker assigns # the Id (?) # The container Name is the only element of the array 'Names' - # why is Names an array here? # skip the leading / in the Name if not (isinstance(container_info.get('Names'), types.ListType) and container_info['Names'] and utilities.valid_string(container_info['Names'][0]) and container_info['Names'][0][0] == '/'): msg = 'invalid containers data format. docker_host=%s' % docker_host gs.logger_error(msg) raise collector_error.CollectorError(msg) container_id = container_info['Names'][0][1:] container, ts = _inspect_container(gs, docker_host, container_id) if container is None: continue if not utilities.valid_string(container.get('Name')): msg = ('missing or invalid Name attribute in container %s' % container_id) gs.logger_error(msg) raise collector_error.CollectorError(msg) if container['Name'] != ('/' + container_id): msg = ('container %s\'s Name attribute is "%s"; expecting "%s"' % (container_id, container['Name'], '/' + container_id)) gs.logger_error(msg) raise collector_error.CollectorError(msg) short_hex_id = utilities.object_to_hex_id(container) if short_hex_id is None: msg = 'Could not compute short hex ID of container %s' % container_id gs.logger_error(msg) raise collector_error.CollectorError(msg) wrapped_container = utilities.wrap_object(container, 'Container', container_id, ts, label=short_hex_id) containers.append(wrapped_container) timestamps.append(ts) # Modify the container's label after the wrapped container was added # to the containers list. # Compute the container's short name to create a better container label: # short_container_name/short_hex_id. # For example: "cassandra/d85b599c17d8". parent_pod_id = utilities.get_parent_pod_id(wrapped_container) if parent_pod_id is None: continue parent_pod = kubernetes.get_one_pod(gs, docker_host, parent_pod_id) if parent_pod is None: continue short_container_name = utilities.get_short_container_name( wrapped_container, parent_pod) if not utilities.valid_string(short_container_name): continue wrapped_container['annotations']['label'] = (short_container_name + '/' + short_hex_id) ret_value = gs.get_containers_cache().update( docker_host, containers, min(timestamps) if timestamps else time.time()) gs.logger_info('get_containers(docker_host=%s) returns %d containers', docker_host, len(containers)) return ret_value