def get_pods(gs, node_id=None):
  """Gets the list of all pods in the given node or in the cluster.

  When 'node_id' is None, it returns the list of pods in the cluster.
  When 'node_id' is a non-empty string, it returns the list of pods in that
  node.

  Args:
    gs: global state.
    node_id: the parent node of the pods or None.

  Returns:
    list of wrapped pod objects.
    Each element in the list is the result of
    utilities.wrap_object(pod, 'Pod', ...)

  Raises:
    CollectorError: in case of failure to fetch data from Kubernetes.
    Other exceptions may be raised due to exectution errors.
  """
  pods_label = '' if node_id is None else node_id
  pods, timestamp_secs = gs.get_pods_cache().lookup(pods_label)
  if timestamp_secs is not None:
    gs.logger_info('get_pods(pods_label=%s) cache hit returns %d pods',
                   pods_label, len(pods))
    return pods

  pods = []
  url = '{kubernetes}/pods'.format(kubernetes=KUBERNETES_API)
  try:
    result = fetch_data(gs, url)
  except:
    msg = 'fetching %s failed with exception %s' % (url, sys.exc_info()[0])
    gs.logger_exception(msg)
    raise collector_error.CollectorError(msg)

  now = time.time()
  if not (isinstance(result, types.DictType) and 'items' in result):
    msg = 'invalid result when fetching %s' % url
    gs.logger_exception(msg)
    raise collector_error.CollectorError(msg)

  for pod in result['items']:
    name = utilities.get_attribute(pod, ['metadata', 'name'])
    if not utilities.valid_string(name):
      # an invalid pod without a valid pod ID value.
      continue
    wrapped_pod = utilities.wrap_object(pod, 'Pod', name, now)
    if node_id:
      # pod['spec']['host'] may be missing if the pod is in "Waiting"
      # status.
      if utilities.get_attribute(pod, ['spec', 'host']) == node_id:
        pods.append(wrapped_pod)
    else:
      # append pod to output if 'node_id' is not specified.
      pods.append(wrapped_pod)

  ret_value = gs.get_pods_cache().update(pods_label, pods, now)
  gs.logger_info('get_pods(node_id=%s) returns %d pods', pods_label, len(pods))
  return ret_value
def get_containers_from_pod(pod):
    """Extracts synthesized container resources from a pod.

  Only containers for which status is available are included. (The pod may
  still be pending.)
  """
    assert utilities.is_wrapped_object(pod, "Pod")
    specs = utilities.get_attribute(pod, ["properties", "spec", "containers"])
    statuses = utilities.get_attribute(pod, ["properties", "status", "containerStatuses"])
    timestamp = pod["timestamp"]

    spec_dict = {}
    for spec in specs or []:
        spec = spec.copy()
        spec_dict[spec.pop("name")] = spec

    containers = []
    for status in statuses or []:
        status = status.copy()
        name = status.pop("name")
        unique_id = status.get("containerID", name)
        obj = {"metadata": {"name": name}, "spec": spec_dict.get(name, {}), "status": status}
        container = utilities.wrap_object(obj, "Container", unique_id, timestamp, label=name)
        containers.append(container)

    return containers
def get_image_from_container(container):
    """Extracts a synthesized image resource from a container."""
    assert utilities.is_wrapped_object(container, "Container")
    timestamp = container["timestamp"]
    image_name = container["properties"]["status"]["image"]
    image_id = container["properties"]["status"]["imageID"]
    obj = {"metadata": {"name": image_name}}
    return utilities.wrap_object(obj, "Image", image_id, timestamp, label=image_name)
Example #4
0
def _do_compute_other_nodes(gs, cluster_guid, nodes_list, oldest_timestamp, g):
  """Adds nodes not in the node list but running pods to the graph.

  This handles the case when there are pods running on the master node,
  in which case we add a dummy node representing the master to the graph.
  The nodes list does not include the master.

  Args:
    gs: the global state.
    cluster_guid: the cluster's ID.
    nodes_list: a list of wrapped Node objects.
    oldest_timestamp: the timestamp of the oldest Node object.
    g: the context graph under construction.
  """
  assert isinstance(gs, global_state.GlobalState)
  assert utilities.valid_string(cluster_guid)
  assert isinstance(nodes_list, list)
  assert utilities.valid_string(oldest_timestamp)
  assert isinstance(g, ContextGraph)

  # Compute the set of known Node names.
  known_node_ids = set()
  for node in nodes_list:
    assert utilities.is_wrapped_object(node, 'Node')
    known_node_ids.add(node['id'])

  # Compute the set of Nodes referenced by pods but not in the known set.
  # The set of unknown node names may be empty.
  missing_node_ids = set()
  for pod in kubernetes.get_pods(gs):
    assert utilities.is_wrapped_object(pod, 'Pod')
    # pod.properties.spec.nodeName may be missing if the pod is waiting.
    parent_node_id = utilities.get_attribute(
        pod, ['properties', 'spec', 'nodeName'])
    if not utilities.valid_string(parent_node_id):
      continue

    if parent_node_id in known_node_ids:
      continue

    # Found a pod that does not belong to any of the known nodes.
    missing_node_ids.add(parent_node_id)

  # Process the missing nodes.
  for node_id in missing_node_ids:
    # Create a dummy node object just as a placeholder for metric
    # annotations.
    node = utilities.wrap_object({}, 'Node', node_id, time.time())

    metrics.annotate_node(node)
    node_guid = 'Node:' + node_id
    g.add_resource(node_guid, node['annotations'], 'Node', oldest_timestamp, {})
    g.add_relation(cluster_guid, node_guid, 'contains')  # Cluster contains Node
Example #5
0
def get_image_from_container(container):
    """Extracts a synthesized image resource from a container."""
    assert utilities.is_wrapped_object(container, 'Container')
    timestamp = container['timestamp']
    image_name = container['properties']['status']['image']
    image_id = container['properties']['status']['imageID']
    obj = {'metadata': {'name': image_name}}
    return utilities.wrap_object(obj,
                                 'Image',
                                 image_id,
                                 timestamp,
                                 label=image_name)
Example #6
0
def get_rcontrollers(gs):
    """Gets the list of replication controllers in the current cluster.

  Args:
    gs: global state.

  Returns:
    list of wrapped replication controller objects.
    Each element in the list is the result of
    utilities.wrap_object(rcontroller, 'ReplicationController', ...)

  Raises:
    CollectorError: in case of failure to fetch data from Kubernetes.
    Other exceptions may be raised due to exectution errors.
  """
    rcontrollers, ts = gs.get_rcontrollers_cache().lookup('')
    if ts is not None:
        app.logger.debug(
            'get_rcontrollers() cache hit returns %d rcontrollers',
            len(rcontrollers))
        return rcontrollers

    rcontrollers = []
    url = get_kubernetes_base_url() + '/replicationcontrollers'

    try:
        result = fetch_data(gs, url)
    except Exception:
        msg = 'fetching %s failed with exception %s' % (url, sys.exc_info()[0])
        app.logger.exception(msg)
        raise collector_error.CollectorError(msg)

    now = time.time()
    if not (isinstance(result, dict) and 'items' in result):
        msg = 'invalid result when fetching %s' % url
        app.logger.exception(msg)
        raise collector_error.CollectorError(msg)

    for rcontroller in result['items']:
        name = utilities.get_attribute(rcontroller, ['metadata', 'name'])
        if not utilities.valid_string(name):
            # an invalid replication controller without a valid rcontroller ID.
            continue

        rcontrollers.append(
            utilities.wrap_object(rcontroller, 'ReplicationController', name,
                                  now))

    ret_value = gs.get_rcontrollers_cache().update('', rcontrollers, now)
    app.logger.info('get_rcontrollers() returns %d rcontrollers',
                    len(rcontrollers))
    return ret_value
Example #7
0
def get_nodes(gs):
    """Gets the list of all nodes in the current cluster.

  Args:
    gs: global state.

  Returns:
    list of wrapped node objects.
    Each element in the list is the result of
    utilities.wrap_object(node, 'Node', ...)

  Raises:
    CollectorError: in case of failure to fetch data from Kubernetes.
    Other exceptions may be raised due to exectution errors.
  """
    nodes, timestamp_secs = gs.get_nodes_cache().lookup('')
    if timestamp_secs is not None:
        gs.logger_info('get_nodes() cache hit returns %d nodes', len(nodes))
        return nodes

    nodes = []
    url = '{kubernetes}/nodes'.format(kubernetes=KUBERNETES_API)
    try:
        result = fetch_data(gs, url)
    except:
        msg = 'fetching %s failed with exception %s' % (url, sys.exc_info()[0])
        gs.logger_exception(msg)
        raise collector_error.CollectorError(msg)

    now = time.time()
    if not (isinstance(result, types.DictType) and 'items' in result):
        msg = 'invalid result when fetching %s' % url
        gs.logger_exception(msg)
        raise collector_error.CollectorError(msg)

    for node in result['items']:
        name = utilities.get_attribute(node, ['metadata', 'name'])
        if not utilities.valid_string(name):
            # an invalid node without a valid node ID value.
            continue
        wrapped_node = utilities.wrap_object(
            node,
            'Node',
            name,
            now,
            label=utilities.node_id_to_host_name(name))
        nodes.append(wrapped_node)

    ret_value = gs.get_nodes_cache().update('', nodes, now)
    gs.logger_info('get_nodes() returns %d nodes', len(nodes))
    return ret_value
Example #8
0
def get_rcontrollers(gs):
  """Gets the list of replication controllers in the current cluster.

  Args:
    gs: global state.

  Returns:
    list of wrapped replication controller objects.
    Each element in the list is the result of
    utilities.wrap_object(rcontroller, 'ReplicationController', ...)

  Raises:
    CollectorError: in case of failure to fetch data from Kubernetes.
    Other exceptions may be raised due to exectution errors.
  """
  rcontrollers, ts = gs.get_rcontrollers_cache().lookup('')
  if ts is not None:
    gs.logger_info(
        'get_rcontrollers() cache hit returns %d rcontrollers',
        len(rcontrollers))
    return rcontrollers

  rcontrollers = []
  url = get_kubernetes_base_url() + '/replicationcontrollers'

  try:
    result = fetch_data(gs, url)
  except:
    msg = 'fetching %s failed with exception %s' % (url, sys.exc_info()[0])
    gs.logger_exception(msg)
    raise collector_error.CollectorError(msg)

  now = time.time()
  if not (isinstance(result, types.DictType) and 'items' in result):
    msg = 'invalid result when fetching %s' % url
    gs.logger_exception(msg)
    raise collector_error.CollectorError(msg)

  for rcontroller in result['items']:
    name = utilities.get_attribute(rcontroller, ['metadata', 'name'])
    if not utilities.valid_string(name):
      # an invalid replication controller without a valid rcontroller ID.
      continue

    rcontrollers.append(utilities.wrap_object(
        rcontroller, 'ReplicationController', name, now))

  ret_value = gs.get_rcontrollers_cache().update('', rcontrollers, now)
  gs.logger_info(
      'get_rcontrollers() returns %d rcontrollers', len(rcontrollers))
  return ret_value
  def make_same_node(self, seconds):
    """Makes the same wrapped node object with the given timestamp.

    Args:
      seconds: timestamp in seconds since the epoch.

    Returns:
    A wrapped Node object with the given 'timestamp' and 'lastHeartbeatTime'.
    """
    assert isinstance(seconds, (int, long, float))
    return utilities.wrap_object(
        {'uid': KEY,
         'lastHeartbeatTime': utilities.seconds_to_timestamp(seconds)},
        'Node', KEY, seconds)
  def make_different_node(self, seconds):
    """Makes the a different wrapped node object with the given timestamp.

    Args:
      seconds: timestamp in seconds since the epoch.

    Returns:
    A wrapped Node object with the given 'timestamp' and 'creationTimestamp'.
    """
    assert isinstance(seconds, (int, long, float))
    return utilities.wrap_object(
        {'uid': KEY,
         'creationTimestamp': utilities.seconds_to_timestamp(seconds)},
        'Node', KEY, seconds)
Example #11
0
def get_services(gs):
  """Gets the list of services in the current cluster.

  Args:
    gs: global state.

  Returns:
    list of wrapped service objects.
    Each element in the list is the result of
    utilities.wrap_object(service, 'Service', ...)

    (list_of_services, timestamp_in_seconds)

  Raises:
    CollectorError: in case of failure to fetch data from Kubernetes.
    Other exceptions may be raised due to exectution errors.
  """
  services, timestamp_secs = gs.get_services_cache().lookup('')
  if timestamp_secs is not None:
    gs.logger_info('get_services() cache hit returns %d services',
                   len(services))
    return services

  services = []
  url = '{kubernetes}/services'.format(kubernetes=KUBERNETES_API)
  try:
    result = fetch_data(gs, url)
  except:
    msg = 'fetching %s failed with exception %s' % (url, sys.exc_info()[0])
    gs.logger_exception(msg)
    raise collector_error.CollectorError(msg)

  now = time.time()
  if not (isinstance(result, types.DictType) and 'items' in result):
    msg = 'invalid result when fetching %s' % url
    gs.logger_exception(msg)
    raise collector_error.CollectorError(msg)

  for service in result['items']:
    name = utilities.get_attribute(service, ['metadata', 'name'])
    if not utilities.valid_string(name):
      # an invalid service without a valid service ID.
      continue
    services.append(
        utilities.wrap_object(service, 'Service', name, now))

  ret_value = gs.get_services_cache().update('', services, now)
  gs.logger_info('get_services() returns %d services', len(services))
  return ret_value
Example #12
0
    def make_different_node(self, seconds):
        """Makes the a different wrapped node object with the given timestamp.

    Args:
      seconds: timestamp in seconds since the epoch.

    Returns:
    A wrapped Node object with the given 'timestamp' and 'creationTimestamp'.
    """
        assert isinstance(seconds, (int, long, float))
        return utilities.wrap_object(
            {
                'uid': KEY,
                'creationTimestamp': utilities.seconds_to_timestamp(seconds)
            }, 'Node', KEY, seconds)
Example #13
0
  def test_timeless_json_hash(self):
    """Tests timeless_json_hash() with multiple similar and dissimilar objects.
    """
    a = {'uid': 'A', 'creationTimestamp': '2015-02-20T21:39:34Z'}

    # 'b1' and 'b2' differs just by the value of the 'lastProbeTime' attribute.
    b1 = {'uid': 'B', 'lastProbeTime': '2015-03-13T22:32:15Z'}
    b2 = {'uid': 'B', 'lastProbeTime': utilities.now()}

    # 'wrapped_xxx' objects look like the objects we normally keep in the cache.
    # The difference between 'wrapped_a1' and 'wrapped_a2' is the value of the
    # 'timestamp' attribute.
    wrapped_a1 = utilities.wrap_object(a, 'Node', 'aaa', time.time())
    wrapped_a2 = utilities.wrap_object(a, 'Node', 'aaa', time.time() + 100)

    # The difference between the 'wrapped_b1', 'wrapped_b2' and 'wrapped_b3'
    # objects are the values of the 'timestamp' and 'lastProbeTime' attributes.
    now = time.time()
    wrapped_b1 = utilities.wrap_object(b1, 'Node', 'bbb', now)
    wrapped_b2 = utilities.wrap_object(b2, 'Node', 'bbb', now)
    wrapped_b3 = utilities.wrap_object(b2, 'Node', 'bbb', now + 100)

    self.assertEqual(utilities.timeless_json_hash(wrapped_a1),
                     utilities.timeless_json_hash(wrapped_a2))
    self.assertEqual(utilities.timeless_json_hash(wrapped_b1),
                     utilities.timeless_json_hash(wrapped_b2))
    self.assertEqual(utilities.timeless_json_hash(wrapped_b1),
                     utilities.timeless_json_hash(wrapped_b3))

    # Verify that the hash values of lists of objects behaves as expected.
    self.assertEqual(utilities.timeless_json_hash([wrapped_a1, wrapped_b3]),
                     utilities.timeless_json_hash([wrapped_a2, wrapped_b1]))

    # Verify that the hash value of dissimilar objects is not equal.
    self.assertTrue(utilities.timeless_json_hash(wrapped_a1) !=
                    utilities.timeless_json_hash(wrapped_b1))
  def test_timeless_json_hash(self):
    """Tests timeless_json_hash() with multiple similar and dissimilar objects.
    """
    a = {'uid': 'A', 'creationTimestamp': '2015-02-20T21:39:34Z'}

    # 'b1' and 'b2' differs just by the value of the 'lastProbeTime' attribute.
    b1 = {'uid': 'B', 'lastProbeTime': '2015-03-13T22:32:15Z'}
    b2 = {'uid': 'B', 'lastProbeTime': datetime.datetime.now().isoformat()}

    # 'wrapped_xxx' objects look like the objects we normally keep in the cache.
    # The difference between 'wrapped_a1' and 'wrapped_a2' is the value of the
    # 'timestamp' attribute.
    wrapped_a1 = utilities.wrap_object(a, 'Node', 'aaa', time.time())
    wrapped_a2 = utilities.wrap_object(a, 'Node', 'aaa', time.time() + 100)

    # The difference between the 'wrapped_b1', 'wrapped_b2' and 'wrapped_b3'
    # objects are the values of the 'timestamp' and 'lastProbeTime' attributes.
    now = time.time()
    wrapped_b1 = utilities.wrap_object(b1, 'Node', 'bbb', now)
    wrapped_b2 = utilities.wrap_object(b2, 'Node', 'bbb', now)
    wrapped_b3 = utilities.wrap_object(b2, 'Node', 'bbb', now + 100)

    self.assertEqual(utilities.timeless_json_hash(wrapped_a1),
                     utilities.timeless_json_hash(wrapped_a2))
    self.assertEqual(utilities.timeless_json_hash(wrapped_b1),
                     utilities.timeless_json_hash(wrapped_b2))
    self.assertEqual(utilities.timeless_json_hash(wrapped_b1),
                     utilities.timeless_json_hash(wrapped_b3))

    # Verify that the hash values of lists of objects behaves as expected.
    self.assertEqual(utilities.timeless_json_hash([wrapped_a1, wrapped_b3]),
                     utilities.timeless_json_hash([wrapped_a2, wrapped_b1]))

    # Verify that the hash value of dissimilar objects is not equal.
    self.assertTrue(utilities.timeless_json_hash(wrapped_a1) !=
                    utilities.timeless_json_hash(wrapped_b1))
Example #15
0
    def make_same_node(self, seconds):
        """Makes the same wrapped node object with the given timestamp.

    Args:
      seconds: timestamp in seconds since the epoch.

    Returns:
    A wrapped Node object with the given 'timestamp' and 'lastHeartbeatTime'.
    """
        assert isinstance(seconds, (int, long, float))
        return utilities.wrap_object(
            {
                'uid': KEY,
                'lastHeartbeatTime': utilities.seconds_to_timestamp(seconds)
            }, 'Node', KEY, seconds)
Example #16
0
def get_nodes(gs):
  """Gets the list of all nodes in the current cluster.

  Args:
    gs: global state.

  Returns:
    list of wrapped node objects.
    Each element in the list is the result of
    utilities.wrap_object(node, 'Node', ...)

  Raises:
    CollectorError: in case of failure to fetch data from Kubernetes.
    Other exceptions may be raised due to exectution errors.
  """
  nodes, timestamp_secs = gs.get_nodes_cache().lookup('')
  if timestamp_secs is not None:
    gs.logger_info('get_nodes() cache hit returns %d nodes', len(nodes))
    return nodes

  nodes = []
  url = get_kubernetes_base_url() + '/nodes'
  try:
    result = fetch_data(gs, url)
  except:
    msg = 'fetching %s failed with exception %s' % (url, sys.exc_info()[0])
    gs.logger_exception(msg)
    raise collector_error.CollectorError(msg)

  now = time.time()
  if not (isinstance(result, types.DictType) and 'items' in result):
    msg = 'invalid result when fetching %s' % url
    gs.logger_exception(msg)
    raise collector_error.CollectorError(msg)

  for node in result['items']:
    name = utilities.get_attribute(node, ['metadata', 'name'])
    if not utilities.valid_string(name):
      # an invalid node without a valid node ID value.
      continue
    wrapped_node = utilities.wrap_object(
        node, 'Node', name, now,
        label=utilities.node_id_to_host_name(name))
    nodes.append(wrapped_node)

  ret_value = gs.get_nodes_cache().update('', nodes, now)
  gs.logger_info('get_nodes() returns %d nodes', len(nodes))
  return ret_value
Example #17
0
def get_services(gs):
    """Gets the list of services in the current cluster.

  Args:
    gs: global state.

  Returns:
    list of wrapped service objects.
    Each element in the list is the result of
    utilities.wrap_object(service, 'Service', ...)

    (list_of_services, timestamp_in_seconds)

  Raises:
    CollectorError: in case of failure to fetch data from Kubernetes.
    Other exceptions may be raised due to exectution errors.
  """
    services, timestamp_secs = gs.get_services_cache().lookup('')
    if timestamp_secs is not None:
        app.logger.debug('get_services() cache hit returns %d services',
                         len(services))
        return services

    services = []
    url = get_kubernetes_base_url() + '/services'
    try:
        result = fetch_data(gs, url)
    except Exception:
        msg = 'fetching %s failed with exception %s' % (url, sys.exc_info()[0])
        app.logger.exception(msg)
        raise collector_error.CollectorError(msg)

    now = time.time()
    if not (isinstance(result, dict) and 'items' in result):
        msg = 'invalid result when fetching %s' % url
        app.logger.exception(msg)
        raise collector_error.CollectorError(msg)

    for service in result['items']:
        name = utilities.get_attribute(service, ['metadata', 'name'])
        if not utilities.valid_string(name):
            # an invalid service without a valid service ID.
            continue
        services.append(utilities.wrap_object(service, 'Service', name, now))

    ret_value = gs.get_services_cache().update('', services, now)
    app.logger.info('get_services() returns %d services', len(services))
    return ret_value
Example #18
0
def get_rcontrollers(gs):
    """Gets the list of replication controllers in the current cluster.

  Args:
    gs: global state.

  Returns:
    list of wrapped replication controller objects.
    Each element in the list is the result of
    utilities.wrap_object(rcontroller, 'ReplicationController', ...)

  Raises:
    CollectorError: in case of failure to fetch data from Kubernetes.
    Other exceptions may be raised due to exectution errors.
  """
    rcontrollers, ts = gs.get_rcontrollers_cache().lookup("")
    if ts is not None:
        app.logger.debug("get_rcontrollers() cache hit returns %d rcontrollers", len(rcontrollers))
        return rcontrollers

    rcontrollers = []
    url = get_kubernetes_base_url() + "/replicationcontrollers"

    try:
        result = fetch_data(gs, url)
    except Exception:
        msg = "fetching %s failed with exception %s" % (url, sys.exc_info()[0])
        app.logger.exception(msg)
        raise collector_error.CollectorError(msg)

    now = time.time()
    if not (isinstance(result, dict) and "items" in result):
        msg = "invalid result when fetching %s" % url
        app.logger.exception(msg)
        raise collector_error.CollectorError(msg)

    for rcontroller in result["items"]:
        name = utilities.get_attribute(rcontroller, ["metadata", "name"])
        if not utilities.valid_string(name):
            # an invalid replication controller without a valid rcontroller ID.
            continue

        rcontrollers.append(utilities.wrap_object(rcontroller, "ReplicationController", name, now))

    ret_value = gs.get_rcontrollers_cache().update("", rcontrollers, now)
    app.logger.info("get_rcontrollers() returns %d rcontrollers", len(rcontrollers))
    return ret_value
Example #19
0
def get_services(gs):
    """Gets the list of services in the current cluster.

  Args:
    gs: global state.

  Returns:
    list of wrapped service objects.
    Each element in the list is the result of
    utilities.wrap_object(service, 'Service', ...)

    (list_of_services, timestamp_in_seconds)

  Raises:
    CollectorError: in case of failure to fetch data from Kubernetes.
    Other exceptions may be raised due to exectution errors.
  """
    services, timestamp_secs = gs.get_services_cache().lookup("")
    if timestamp_secs is not None:
        app.logger.debug("get_services() cache hit returns %d services", len(services))
        return services

    services = []
    url = get_kubernetes_base_url() + "/services"
    try:
        result = fetch_data(gs, url)
    except Exception:
        msg = "fetching %s failed with exception %s" % (url, sys.exc_info()[0])
        app.logger.exception(msg)
        raise collector_error.CollectorError(msg)

    now = time.time()
    if not (isinstance(result, dict) and "items" in result):
        msg = "invalid result when fetching %s" % url
        app.logger.exception(msg)
        raise collector_error.CollectorError(msg)

    for service in result["items"]:
        name = utilities.get_attribute(service, ["metadata", "name"])
        if not utilities.valid_string(name):
            # an invalid service without a valid service ID.
            continue
        services.append(utilities.wrap_object(service, "Service", name, now))

    ret_value = gs.get_services_cache().update("", services, now)
    app.logger.info("get_services() returns %d services", len(services))
    return ret_value
Example #20
0
def get_pods(gs):
    """Gets the list of all pods in the cluster.

  Args:
    gs: global state.

  Returns:
    list of wrapped pod objects.
    Each element in the list is the result of
    utilities.wrap_object(pod, 'Pod', ...)

  Raises:
    CollectorError: in case of failure to fetch data from Kubernetes.
    Other exceptions may be raised due to exectution errors.
  """
    pods, timestamp_secs = gs.get_pods_cache().lookup("")
    if timestamp_secs is not None:
        app.logger.debug("get_pods() cache hit returns %d pods", len(pods))
        return pods

    pods = []
    url = get_kubernetes_base_url() + "/pods"
    try:
        result = fetch_data(gs, url)
    except Exception:
        msg = "fetching %s failed with exception %s" % (url, sys.exc_info()[0])
        app.logger.exception(msg)
        raise collector_error.CollectorError(msg)

    now = time.time()
    if not (isinstance(result, dict) and "items" in result):
        msg = "invalid result when fetching %s" % url
        app.logger.exception(msg)
        raise collector_error.CollectorError(msg)

    for pod in result["items"]:
        name = utilities.get_attribute(pod, ["metadata", "name"])
        if not utilities.valid_string(name):
            # an invalid pod without a valid pod ID value.
            continue
        wrapped_pod = utilities.wrap_object(pod, "Pod", name, now)
        pods.append(wrapped_pod)

    ret_value = gs.get_pods_cache().update("", pods, now)
    app.logger.info("get_pods() returns %d pods", len(pods))
    return ret_value
Example #21
0
def get_pods(gs):
    """Gets the list of all pods in the cluster.

  Args:
    gs: global state.

  Returns:
    list of wrapped pod objects.
    Each element in the list is the result of
    utilities.wrap_object(pod, 'Pod', ...)

  Raises:
    CollectorError: in case of failure to fetch data from Kubernetes.
    Other exceptions may be raised due to exectution errors.
  """
    pods, timestamp_secs = gs.get_pods_cache().lookup('')
    if timestamp_secs is not None:
        app.logger.debug('get_pods() cache hit returns %d pods', len(pods))
        return pods

    pods = []
    url = get_kubernetes_base_url() + '/pods'
    try:
        result = fetch_data(gs, url)
    except Exception:
        msg = 'fetching %s failed with exception %s' % (url, sys.exc_info()[0])
        app.logger.exception(msg)
        raise collector_error.CollectorError(msg)

    now = time.time()
    if not (isinstance(result, dict) and 'items' in result):
        msg = 'invalid result when fetching %s' % url
        app.logger.exception(msg)
        raise collector_error.CollectorError(msg)

    for pod in result['items']:
        name = utilities.get_attribute(pod, ['metadata', 'name'])
        if not utilities.valid_string(name):
            # an invalid pod without a valid pod ID value.
            continue
        wrapped_pod = utilities.wrap_object(pod, 'Pod', name, now)
        pods.append(wrapped_pod)

    ret_value = gs.get_pods_cache().update('', pods, now)
    app.logger.info('get_pods() returns %d pods', len(pods))
    return ret_value
Example #22
0
def get_containers_from_pod(pod):
    """Extracts synthesized container resources from a pod.

  Only containers for which status is available are included. (The pod may
  still be pending.)
  """
    assert utilities.is_wrapped_object(pod, 'Pod')
    specs = utilities.get_attribute(pod, ['properties', 'spec', 'containers'])
    statuses = utilities.get_attribute(
        pod, ['properties', 'status', 'containerStatuses'])
    timestamp = pod['timestamp']

    spec_dict = {}
    for spec in specs or []:
        spec = spec.copy()
        spec_dict[spec.pop('name')] = spec

    containers = []
    for status in statuses or []:
        status = status.copy()
        name = status.pop('name')
        unique_id = status.get('containerID', name)
        obj = {
            'metadata': {
                'name': name
            },
            'spec': spec_dict.get(name, {}),
            'status': status,
        }
        container = utilities.wrap_object(obj,
                                          'Container',
                                          unique_id,
                                          timestamp,
                                          label=name)
        containers.append(container)

    return containers
Example #23
0
def get_containers(gs, docker_host):
  """Gets the list of all containers in 'docker_host'.

  Args:
    gs: global state.
    docker_host: the Docker host running the containers.

  Returns:
    list of wrapped container objects.
    Each element in the list is the result of
    utilities.wrap_object(container, 'Container', ...)

  Raises:
    CollectorError: in case of failure to fetch data from Docker.
    Other exceptions may be raised due to exectution errors.
  """
  containers, timestamp = gs.get_containers_cache().lookup(docker_host)
  if timestamp is not None:
    gs.logger_info(
        'get_containers(docker_host=%s) cache hit returns '
        '%d containers', docker_host, len(containers))
    return containers

  url = 'http://{docker_host}:{port}/containers/json'.format(
      docker_host=docker_host, port=gs.get_docker_port())
  # A typical value of 'docker_host' is:
  # k8s-guestbook-node-3.c.rising-apricot-840.internal
  # Use only the first period-seperated element for the test file name.
  fname = '{host}-containers'.format(host=docker_host.split('.')[0])
  try:
    containers_list = fetch_data(gs, url, fname)
  except collector_error.CollectorError:
    raise
  except:
    msg = ('fetching %s or %s failed with exception %s' %
           (url, fname, sys.exc_info()[0]))
    gs.logger_exception(msg)
    raise collector_error.CollectorError(msg)

  if not isinstance(containers_list, types.ListType):
    msg = 'invalid response from fetching %s' % url
    gs.logger_exception(msg)
    raise collector_error.CollectorError(msg)

  containers = []
  timestamps = []
  for container_info in containers_list:
    # NOTE: container 'Name' is stable across container re-starts whereas
    # container 'Id' is not.
    # This may be because Kubernertes assigns the Name while Docker assigns
    # the Id (?)
    # The container Name is the only element of the array 'Names' -
    # why is Names an array here?
    # skip the leading / in the Name
    if not (isinstance(container_info.get('Names'), types.ListType) and
            container_info['Names'] and
            utilities.valid_string(container_info['Names'][0]) and
            container_info['Names'][0][0] == '/'):
      msg = 'invalid containers data format. docker_host=%s' % docker_host
      gs.logger_error(msg)
      raise collector_error.CollectorError(msg)

    container_id = container_info['Names'][0][1:]
    container, ts = _inspect_container(gs, docker_host, container_id)
    if container is None:
      continue

    if not utilities.valid_string(container.get('Name')):
      msg = ('missing or invalid Name attribute in container %s' %
             container_id)
      gs.logger_error(msg)
      raise collector_error.CollectorError(msg)

    if container['Name'] != ('/' + container_id):
      msg = ('container %s\'s Name attribute is "%s"; expecting "%s"' %
             (container_id, container['Name'], '/' + container_id))
      gs.logger_error(msg)
      raise collector_error.CollectorError(msg)

    short_hex_id = utilities.object_to_hex_id(container)
    if short_hex_id is None:
      msg = 'Could not compute short hex ID of container %s' % container_id
      gs.logger_error(msg)
      raise collector_error.CollectorError(msg)

    wrapped_container = utilities.wrap_object(
        container, 'Container', container_id, ts, label=short_hex_id)
    containers.append(wrapped_container)
    timestamps.append(ts)

    # Modify the container's label after the wrapped container was added
    # to the containers list.
    # Compute the container's short name to create a better container label:
    # short_container_name/short_hex_id.
    # For example: "cassandra/d85b599c17d8".
    parent_pod_id = utilities.get_parent_pod_id(wrapped_container)
    if parent_pod_id is None:
      continue
    parent_pod = kubernetes.get_one_pod(gs, docker_host, parent_pod_id)
    if parent_pod is None:
      continue
    short_container_name = utilities.get_short_container_name(
        wrapped_container, parent_pod)
    if not utilities.valid_string(short_container_name):
      continue
    wrapped_container['annotations']['label'] = (short_container_name + '/' +
                                                 short_hex_id)

  ret_value = gs.get_containers_cache().update(
      docker_host, containers,
      min(timestamps) if timestamps else time.time())
  gs.logger_info(
      'get_containers(docker_host=%s) returns %d containers',
      docker_host, len(containers))
  return ret_value
Example #24
0
def get_containers(gs, docker_host):
    """Gets the list of all containers in 'docker_host'.

  Args:
    gs: global state.
    docker_host: the Docker host running the containers.

  Returns:
    list of wrapped container objects.
    Each element in the list is the result of
    utilities.wrap_object(container, 'Container', ...)

  Raises:
    CollectorError: in case of failure to fetch data from Docker.
    Other exceptions may be raised due to exectution errors.
  """
    containers, timestamp = gs.get_containers_cache().lookup(docker_host)
    if timestamp is not None:
        gs.logger_info(
            'get_containers(docker_host=%s) cache hit returns '
            '%d containers', docker_host, len(containers))
        return containers

    url = 'http://{docker_host}:{port}/containers/json'.format(
        docker_host=docker_host, port=gs.get_docker_port())
    # A typical value of 'docker_host' is:
    # k8s-guestbook-node-3.c.rising-apricot-840.internal
    # Use only the first period-seperated element for the test file name.
    fname = '{host}-containers'.format(host=docker_host.split('.')[0])
    try:
        containers_list = fetch_data(gs, url, fname)
    except collector_error.CollectorError:
        raise
    except:
        msg = ('fetching %s or %s failed with exception %s' %
               (url, fname, sys.exc_info()[0]))
        gs.logger_exception(msg)
        raise collector_error.CollectorError(msg)

    if not isinstance(containers_list, types.ListType):
        msg = 'invalid response from fetching %s' % url
        gs.logger_exception(msg)
        raise collector_error.CollectorError(msg)

    containers = []
    timestamps = []
    for container_info in containers_list:
        # NOTE: container 'Name' is stable across container re-starts whereas
        # container 'Id' is not.
        # This may be because Kubernertes assigns the Name while Docker assigns
        # the Id (?)
        # The container Name is the only element of the array 'Names' -
        # why is Names an array here?
        # skip the leading / in the Name
        if not (isinstance(container_info.get('Names'), types.ListType)
                and container_info['Names']
                and utilities.valid_string(container_info['Names'][0])
                and container_info['Names'][0][0] == '/'):
            msg = 'invalid containers data format. docker_host=%s' % docker_host
            gs.logger_error(msg)
            raise collector_error.CollectorError(msg)

        container_id = container_info['Names'][0][1:]
        container, ts = _inspect_container(gs, docker_host, container_id)
        if container is None:
            continue

        if not utilities.valid_string(container.get('Name')):
            msg = ('missing or invalid Name attribute in container %s' %
                   container_id)
            gs.logger_error(msg)
            raise collector_error.CollectorError(msg)

        if container['Name'] != ('/' + container_id):
            msg = ('container %s\'s Name attribute is "%s"; expecting "%s"' %
                   (container_id, container['Name'], '/' + container_id))
            gs.logger_error(msg)
            raise collector_error.CollectorError(msg)

        short_hex_id = utilities.object_to_hex_id(container)
        if short_hex_id is None:
            msg = 'Could not compute short hex ID of container %s' % container_id
            gs.logger_error(msg)
            raise collector_error.CollectorError(msg)

        wrapped_container = utilities.wrap_object(container,
                                                  'Container',
                                                  container_id,
                                                  ts,
                                                  label=short_hex_id)
        containers.append(wrapped_container)
        timestamps.append(ts)

        # Modify the container's label after the wrapped container was added
        # to the containers list.
        # Compute the container's short name to create a better container label:
        # short_container_name/short_hex_id.
        # For example: "cassandra/d85b599c17d8".
        parent_pod_id = utilities.get_parent_pod_id(wrapped_container)
        if parent_pod_id is None:
            continue
        parent_pod = kubernetes.get_one_pod(gs, docker_host, parent_pod_id)
        if parent_pod is None:
            continue
        short_container_name = utilities.get_short_container_name(
            wrapped_container, parent_pod)
        if not utilities.valid_string(short_container_name):
            continue
        wrapped_container['annotations']['label'] = (short_container_name +
                                                     '/' + short_hex_id)

    ret_value = gs.get_containers_cache().update(
        docker_host, containers,
        min(timestamps) if timestamps else time.time())
    gs.logger_info('get_containers(docker_host=%s) returns %d containers',
                   docker_host, len(containers))
    return ret_value
Example #25
0
def get_processes(gs, docker_host, container_id):
    """Gets the list of all processes in the 'docker_host' and 'container_id'.

  If the container is not found, returns an empty list of processes.

  Args:
    gs: global state.
    docker_host: the Docker host running the container.
    container_id: the container running the processes.

  Returns:
    list of wrapped process objects.
    Each element in the list is the result of
    utilities.wrap_object(process, 'Process', ...)

  Raises:
    CollectorError in case of failure to fetch data from Docker.
    Other exceptions may be raised due to exectution errors.
  """
    processes_label = '%s/%s' % (docker_host, container_id)
    processes, timestamp_secs = gs.get_processes_cache().lookup(
        processes_label)
    if timestamp_secs is not None:
        gs.logger_info(
            'get_processes(docker_host=%s, container_id=%s) cache hit',
            docker_host, container_id)
        return processes

    container = get_one_container(gs, docker_host, container_id)
    if container is not None:
        assert utilities.is_wrapped_object(container, 'Container')
        container_short_hex_id = utilities.object_to_hex_id(
            container['properties'])
        assert utilities.valid_string(container_short_hex_id)
    else:
        # Parent container not found. Container might have crashed while we were
        # looking for it.
        return []

    # NOTE: there is no trailing /json in this URL - this looks like a bug in the
    # Docker API
    url = ('http://{docker_host}:{port}/containers/{container_id}/top?'
           'ps_args=aux'.format(docker_host=docker_host,
                                port=gs.get_docker_port(),
                                container_id=container_id))
    # A typical value of 'docker_host' is:
    # k8s-guestbook-node-3.c.rising-apricot-840.internal
    # Use only the first period-seperated element for the test file name.
    # The typical value of 'container_id' is:
    # k8s_php-redis.b317029a_guestbook-controller-ls6k1.default.api_f991d53e-b949-11e4-8246-42010af0c3dd_8dcdfec8
    # Use just the tail of the container ID after the last '_' sign.
    fname = '{host}-processes-{id}'.format(host=docker_host.split('.')[0],
                                           id=container_id.split('_')[-1])

    try:
        # TODO(vasbala): what should we do in cases where the container is gone
        # (and replaced by a different one)?
        result = fetch_data(gs, url, fname, expect_missing=True)
    except ValueError:
        # this container does not exist anymore
        return []
    except collector_error.CollectorError:
        raise
    except:
        msg = 'fetching %s failed with exception %s' % (url, sys.exc_info()[0])
        gs.logger_exception(msg)
        raise collector_error.CollectorError(msg)

    if not isinstance(utilities.get_attribute(result, ['Titles']),
                      types.ListType):
        invalid_processes(gs, url)
    if not isinstance(utilities.get_attribute(result, ['Processes']),
                      types.ListType):
        invalid_processes(gs, url)

    pstats = result['Titles']
    processes = []
    now = time.time()
    for pvalues in result['Processes']:
        process = {}
        if not isinstance(pvalues, types.ListType):
            invalid_processes(gs, url)
        if len(pstats) != len(pvalues):
            invalid_processes(gs, url)
        for pstat, pvalue in zip(pstats, pvalues):
            process[pstat] = pvalue

        # Prefix with container Id to ensure uniqueness across the whole graph.
        process_id = '%s/%s' % (container_short_hex_id, process['PID'])
        processes.append(
            utilities.wrap_object(process,
                                  'Process',
                                  process_id,
                                  now,
                                  label=process['PID']))

    ret_value = gs.get_processes_cache().update(processes_label, processes,
                                                now)
    gs.logger_info(
        'get_processes(docker_host=%s, container_id=%s) returns %d processes',
        docker_host, container_id, len(processes))
    return ret_value
Example #26
0
def get_image(gs, docker_host, container):
    """Gets the information of the given image in the given host.

  Args:
    gs: global state.
    docker_host: Docker host name. Must not be empty.
    container: the container which runs the image.

  Returns:
    If image was found, returns the wrapped image object, which is the result of
    utilities.wrap_object(image, 'Image', ...)
    If the image was not found, returns None.

  Raises:
    CollectorError: in case of failure to fetch data from Docker.
    ValueError: in case the container does not contain a valid image ID.
    Other exceptions may be raised due to exectution errors.
  """
    assert utilities.is_wrapped_object(container, 'Container')
    # The 'image_id' should be a long hexadecimal string.
    image_id = utilities.get_attribute(container, ['properties', 'Image'])
    if not utilities.valid_hex_id(image_id):
        msg = 'missing or invalid image ID in container ID=%s' % container['id']
        gs.logger_error(msg)
        raise ValueError(msg)

    # The 'image_name' should be a symbolic name (not a hexadecimal string).
    image_name = utilities.get_attribute(container,
                                         ['properties', 'Config', 'Image'])

    if ((not utilities.valid_string(image_name))
            or utilities.valid_hex_id(image_name)):
        msg = 'missing or invalid image name in container ID=%s' % container[
            'id']
        gs.logger_error(msg)
        raise ValueError(msg)

    cache_key = '%s|%s' % (docker_host, image_id)
    image, timestamp_secs = gs.get_images_cache().lookup(cache_key)
    if timestamp_secs is not None:
        gs.logger_info('get_image(docker_host=%s, image_id=%s) cache hit',
                       docker_host, image_id)
        return image

    # A typical value of 'docker_host' is:
    # k8s-guestbook-node-3.c.rising-apricot-840.internal
    # Use only the first period-seperated element for the test file name.
    # The typical value of 'image_name' is:
    # brendanburns/php-redis
    # We convert embedded '/' and ':' characters to '-' to avoid interference with
    # the directory structure or file system.
    url = 'http://{docker_host}:{port}/images/{image_id}/json'.format(
        docker_host=docker_host, port=gs.get_docker_port(), image_id=image_id)
    fname = '{host}-image-{id}'.format(host=docker_host.split('.')[0],
                                       id=image_name.replace('/', '-').replace(
                                           ':', '-'))

    try:
        image = fetch_data(gs, url, fname, expect_missing=True)
    except ValueError:
        # image not found.
        msg = 'image not found for image_id: %s' % image_id
        gs.logger_info(msg)
        return None
    except collector_error.CollectorError:
        raise
    except:
        msg = 'fetching %s failed with exception %s' % (url, sys.exc_info()[0])
        gs.logger_exception(msg)
        raise collector_error.CollectorError(msg)

    now = time.time()
    # compute the two labels of the image.
    # The first is a 12-digit hexadecimal number shown by "docker images".
    # The second is the symbolic name of the image.
    full_hex_label = image.get('Id')
    if not (isinstance(full_hex_label, types.StringTypes) and full_hex_label):
        msg = 'Image id=%s has an invalid "Id" attribute value' % image_id
        gs.logger_error(msg)
        raise collector_error.CollectorError(msg)

    short_hex_label = utilities.object_to_hex_id(image)
    if short_hex_label is None:
        msg = 'Could not compute short hex ID of image %s' % image_id
        gs.logger_error(msg)
        raise collector_error.CollectorError(msg)

    wrapped_image = utilities.wrap_object(image,
                                          'Image',
                                          full_hex_label,
                                          now,
                                          label=short_hex_label,
                                          alt_label=image_name)

    ret_value = gs.get_images_cache().update(cache_key, wrapped_image, now)
    gs.logger_info('get_image(docker_host=%s, image_id=%s, image_name=%s)',
                   docker_host, image_id, image_name)
    return ret_value
Example #27
0
def get_processes(gs, docker_host, container_id):
  """Gets the list of all processes in the 'docker_host' and 'container_id'.

  If the container is not found, returns an empty list of processes.

  Args:
    gs: global state.
    docker_host: the Docker host running the container.
    container_id: the container running the processes.

  Returns:
    list of wrapped process objects.
    Each element in the list is the result of
    utilities.wrap_object(process, 'Process', ...)

  Raises:
    CollectorError in case of failure to fetch data from Docker.
    Other exceptions may be raised due to exectution errors.
  """
  processes_label = '%s/%s' % (docker_host, container_id)
  processes, timestamp_secs = gs.get_processes_cache().lookup(
      processes_label)
  if timestamp_secs is not None:
    gs.logger_info(
        'get_processes(docker_host=%s, container_id=%s) cache hit',
        docker_host, container_id)
    return processes

  container = get_one_container(gs, docker_host, container_id)
  if container is not None:
    assert utilities.is_wrapped_object(container, 'Container')
    container_short_hex_id = utilities.object_to_hex_id(container['properties'])
    assert utilities.valid_string(container_short_hex_id)
  else:
    # Parent container not found. Container might have crashed while we were
    # looking for it.
    return []

  # NOTE: there is no trailing /json in this URL - this looks like a bug in the
  # Docker API
  url = ('http://{docker_host}:{port}/containers/{container_id}/top?'
         'ps_args=aux'.format(docker_host=docker_host,
                              port=gs.get_docker_port(),
                              container_id=container_id))
  # A typical value of 'docker_host' is:
  # k8s-guestbook-node-3.c.rising-apricot-840.internal
  # Use only the first period-seperated element for the test file name.
  # The typical value of 'container_id' is:
  # k8s_php-redis.b317029a_guestbook-controller-ls6k1.default.api_f991d53e-b949-11e4-8246-42010af0c3dd_8dcdfec8
  # Use just the tail of the container ID after the last '_' sign.
  fname = '{host}-processes-{id}'.format(
      host=docker_host.split('.')[0], id=container_id.split('_')[-1])

  try:
    # TODO(vasbala): what should we do in cases where the container is gone
    # (and replaced by a different one)?
    result = fetch_data(gs, url, fname, expect_missing=True)
  except ValueError:
     # this container does not exist anymore
    return []
  except collector_error.CollectorError:
    raise
  except:
    msg = 'fetching %s failed with exception %s' % (url, sys.exc_info()[0])
    gs.logger_exception(msg)
    raise collector_error.CollectorError(msg)

  if not isinstance(utilities.get_attribute(result, ['Titles']),
                    types.ListType):
    invalid_processes(gs, url)
  if not isinstance(utilities.get_attribute(result, ['Processes']),
                    types.ListType):
    invalid_processes(gs, url)

  pstats = result['Titles']
  processes = []
  now = time.time()
  for pvalues in result['Processes']:
    process = {}
    if not isinstance(pvalues, types.ListType):
      invalid_processes(gs, url)
    if len(pstats) != len(pvalues):
      invalid_processes(gs, url)
    for pstat, pvalue in zip(pstats, pvalues):
      process[pstat] = pvalue

    # Prefix with container Id to ensure uniqueness across the whole graph.
    process_id = '%s/%s' % (container_short_hex_id, process['PID'])
    processes.append(utilities.wrap_object(
        process, 'Process', process_id, now, label=process['PID']))

  ret_value = gs.get_processes_cache().update(
      processes_label, processes, now)
  gs.logger_info(
      'get_processes(docker_host=%s, container_id=%s) returns %d processes',
      docker_host, container_id, len(processes))
  return ret_value
Example #28
0
def get_processes(gs, docker_host, container_id):
  """Gets the list of all processes in the 'docker_host' and 'container_id'.

  If the container is not found, returns an empty list of processes.

  Args:
    gs: global state.
    docker_host: the Docker host running the container.
    container_id: the container running the processes.

  Returns:
    list of wrapped process objects.
    Each element in the list is the result of
    utilities.wrap_object(process, 'Process', ...)

  Raises:
    CollectorError in case of failure to fetch data from Docker.
    Other exceptions may be raised due to exectution errors.
  """
  processes_label = '%s/%s' % (docker_host, container_id)
  processes, timestamp_secs = gs.get_processes_cache().lookup(
      processes_label)
  if timestamp_secs is not None:
    gs.logger_info(
        'get_processes(docker_host=%s, container_id=%s) cache hit',
        docker_host, container_id)
    return processes

  container = get_one_container(gs, docker_host, container_id)
  if container is not None:
    assert utilities.is_wrapped_object(container, 'Container')
    container_short_hex_id = utilities.object_to_hex_id(container['properties'])
    assert utilities.valid_string(container_short_hex_id)
  else:
    # Parent container not found. Container might have crashed while we were
    # looking for it.
    return []

  container_name = utilities.get_container_name(container)
  if not utilities.valid_string(container_name):
    msg = 'Invalid container "Name" attribute in container %s' % container_id
    gs.logger_error(msg)
    raise collector_error.CollectorError(msg)

  # NOTE: there is no trailing /json in this URL - this looks like a bug in the
  # Docker API
  # Note that the {container_id} in the URL must be the internal container
  # name in container['properties']['Name'][1:]
  # and not the container name in container['id'] which may contain an extra
  # suffix.
  url = ('http://{docker_host}:{port}/containers/{container_name}/top?'
         'ps_args=aux'.format(docker_host=docker_host,
                              port=gs.get_docker_port(),
                              container_name=container_name))
  fname = utilities.container_id_to_fname(
      docker_host, 'processes', container_name)

  try:
    # TODO(vasbala): what should we do in cases where the container is gone
    # (and replaced by a different one)?
    result = fetch_data(gs, url, fname, expect_missing=True)
  except ValueError:
     # this container does not exist anymore
    return []
  except collector_error.CollectorError:
    raise
  except:
    msg = 'fetching %s failed with exception %s' % (url, sys.exc_info()[0])
    gs.logger_exception(msg)
    raise collector_error.CollectorError(msg)

  if not isinstance(utilities.get_attribute(result, ['Titles']),
                    types.ListType):
    invalid_processes(gs, url)
  if not isinstance(utilities.get_attribute(result, ['Processes']),
                    types.ListType):
    invalid_processes(gs, url)

  pstats = result['Titles']
  processes = []
  now = time.time()
  for pvalues in result['Processes']:
    process = {}
    if not isinstance(pvalues, types.ListType):
      invalid_processes(gs, url)
    if len(pstats) != len(pvalues):
      invalid_processes(gs, url)
    for pstat, pvalue in zip(pstats, pvalues):
      process[pstat] = pvalue

    # Prefix with container Id to ensure uniqueness across the whole graph.
    process_id = '%s/%s' % (container_short_hex_id, process['PID'])
    processes.append(utilities.wrap_object(
        process, 'Process', process_id, now, label=process['PID']))

  ret_value = gs.get_processes_cache().update(
      processes_label, processes, now)
  gs.logger_info(
      'get_processes(docker_host=%s, container_id=%s) returns %d processes',
      docker_host, container_id, len(processes))
  return ret_value
Example #29
0
def get_pods(gs, node_id=None):
    """Gets the list of all pods in the given node or in the cluster.

  When 'node_id' is None, it returns the list of pods in the cluster.
  When 'node_id' is a non-empty string, it returns the list of pods in that
  node.

  Args:
    gs: global state.
    node_id: the parent node of the pods or None.

  Returns:
    list of wrapped pod objects.
    Each element in the list is the result of
    utilities.wrap_object(pod, 'Pod', ...)

  Raises:
    CollectorError: in case of failure to fetch data from Kubernetes.
    Other exceptions may be raised due to exectution errors.
  """
    pods_label = '' if node_id is None else node_id
    pods, timestamp_secs = gs.get_pods_cache().lookup(pods_label)
    if timestamp_secs is not None:
        gs.logger_info('get_pods(pods_label=%s) cache hit returns %d pods',
                       pods_label, len(pods))
        return pods

    pods = []
    url = '{kubernetes}/pods'.format(kubernetes=KUBERNETES_API)
    try:
        result = fetch_data(gs, url)
    except:
        msg = 'fetching %s failed with exception %s' % (url, sys.exc_info()[0])
        gs.logger_exception(msg)
        raise collector_error.CollectorError(msg)

    now = time.time()
    if not (isinstance(result, types.DictType) and 'items' in result):
        msg = 'invalid result when fetching %s' % url
        gs.logger_exception(msg)
        raise collector_error.CollectorError(msg)

    for pod in result['items']:
        name = utilities.get_attribute(pod, ['metadata', 'name'])
        if not utilities.valid_string(name):
            # an invalid pod without a valid pod ID value.
            continue
        wrapped_pod = utilities.wrap_object(pod, 'Pod', name, now)
        if node_id:
            # pod['spec']['host'] may be missing if the pod is in "Waiting"
            # status.
            if utilities.get_attribute(pod, ['spec', 'host']) == node_id:
                pods.append(wrapped_pod)
        else:
            # append pod to output if 'node_id' is not specified.
            pods.append(wrapped_pod)

    ret_value = gs.get_pods_cache().update(pods_label, pods, now)
    gs.logger_info('get_pods(node_id=%s) returns %d pods', pods_label,
                   len(pods))
    return ret_value
Example #30
0
def get_image(gs, docker_host, container):
  """Gets the information of the given image in the given host.

  Args:
    gs: global state.
    docker_host: Docker host name. Must not be empty.
    container: the container which runs the image.

  Returns:
    If image was found, returns the wrapped image object, which is the result of
    utilities.wrap_object(image, 'Image', ...)
    If the image was not found, returns None.

  Raises:
    CollectorError: in case of failure to fetch data from Docker.
    ValueError: in case the container does not contain a valid image ID.
    Other exceptions may be raised due to exectution errors.
  """
  assert utilities.is_wrapped_object(container, 'Container')
  # The 'image_id' should be a long hexadecimal string.
  image_id = utilities.get_attribute(container, ['properties', 'Image'])
  if not utilities.valid_hex_id(image_id):
    msg = 'missing or invalid image ID in container ID=%s' % container['id']
    gs.logger_error(msg)
    raise ValueError(msg)

  # The 'image_name' should be a symbolic name (not a hexadecimal string).
  image_name = utilities.get_attribute(
      container, ['properties', 'Config', 'Image'])

  if ((not utilities.valid_string(image_name)) or
      utilities.valid_hex_id(image_name)):
    msg = 'missing or invalid image name in container ID=%s' % container['id']
    gs.logger_error(msg)
    raise ValueError(msg)

  cache_key = '%s|%s' % (docker_host, image_id)
  image, timestamp_secs = gs.get_images_cache().lookup(cache_key)
  if timestamp_secs is not None:
    gs.logger_info('get_image(docker_host=%s, image_id=%s) cache hit',
                   docker_host, image_id)
    return image

  # A typical value of 'docker_host' is:
  # k8s-guestbook-node-3.c.rising-apricot-840.internal
  # Use only the first period-seperated element for the test file name.
  # The typical value of 'image_name' is:
  # brendanburns/php-redis
  # We convert embedded '/' and ':' characters to '-' to avoid interference with
  # the directory structure or file system.
  url = 'http://{docker_host}:{port}/images/{image_id}/json'.format(
      docker_host=docker_host, port=gs.get_docker_port(), image_id=image_id)
  fname = '{host}-image-{id}'.format(
      host=docker_host.split('.')[0],
      id=image_name.replace('/', '-').replace(':', '-'))

  try:
    image = fetch_data(gs, url, fname, expect_missing=True)
  except ValueError:
    # image not found.
    msg = 'image not found for image_id: %s' % image_id
    gs.logger_info(msg)
    return None
  except collector_error.CollectorError:
    raise
  except:
    msg = 'fetching %s failed with exception %s' % (url, sys.exc_info()[0])
    gs.logger_exception(msg)
    raise collector_error.CollectorError(msg)

  now = time.time()
  # compute the two labels of the image.
  # The first is a 12-digit hexadecimal number shown by "docker images".
  # The second is the symbolic name of the image.
  full_hex_label = image.get('Id')
  if not (isinstance(full_hex_label, types.StringTypes) and full_hex_label):
    msg = 'Image id=%s has an invalid "Id" attribute value' % image_id
    gs.logger_error(msg)
    raise collector_error.CollectorError(msg)

  short_hex_label = utilities.object_to_hex_id(image)
  if short_hex_label is None:
    msg = 'Could not compute short hex ID of image %s' % image_id
    gs.logger_error(msg)
    raise collector_error.CollectorError(msg)

  wrapped_image = utilities.wrap_object(
      image, 'Image', full_hex_label, now,
      label=short_hex_label, alt_label=image_name)

  ret_value = gs.get_images_cache().update(cache_key, wrapped_image, now)
  gs.logger_info('get_image(docker_host=%s, image_id=%s, image_name=%s)',
                 docker_host, image_id, image_name)
  return ret_value
Example #31
0
def _do_compute_master_pods(gs, cluster_guid, nodes_list, oldest_timestamp, g):
  """Adds pods running on the master node to the graph.

  These pods do not have a valid parent node, because the nodes list
  does not include the master node.

  This routine adds a dummy master node, and then adds the pods running
  on the master node to the graph. It does not add information about
  containers, processes, or images of these nodes, because there is no
  minion collector running on the master node.

  Note that in some configurations (for example, GKE), there is no
  master node.

  Args:
    gs: the global state.
    cluster_guid: the cluster's ID.
    nodes_list: a list of wrapped Node objects.
    oldest_timestamp: the timestamp of the oldest Node object.
    g: the context graph under construction.
  """
  assert isinstance(gs, global_state.GlobalState)
  assert utilities.valid_string(cluster_guid)
  assert isinstance(nodes_list, types.ListType)
  assert utilities.valid_string(oldest_timestamp)
  assert isinstance(g, ContextGraph)

  # Compute the set of known Node names.
  known_node_ids = set()
  project_id = '_unknown_'
  for node in nodes_list:
    assert utilities.is_wrapped_object(node, 'Node')
    known_node_ids.add(node['id'])
    project_id = utilities.node_id_to_project_id(node['id'])

  # Compute the set of Nodes referenced by pods but not in the known set.
  # The set of unknown node names may be empty.
  assert utilities.valid_string(project_id)
  missing_node_ids = set()
  for pod in kubernetes.get_pods(gs):
    assert utilities.is_wrapped_object(pod, 'Pod')
    # pod.properties.spec.nodeName may be missing if the pod is waiting.
    parent_node_id = utilities.get_attribute(
        pod, ['properties', 'spec', 'nodeName'])
    if not utilities.valid_string(parent_node_id):
      continue

    if parent_node_id in known_node_ids:
      continue

    # Found a pod that does not belong to any of the known nodes.
    missing_node_ids.add(parent_node_id)

  # Process the pods in each of the missing nodes.
  for node_id in missing_node_ids:
    # Create a dummy node object just as a placeholder for metric
    # annotations.
    node = utilities.wrap_object(
        {}, 'Node', node_id, time.time(),
        label=utilities.node_id_to_host_name(node_id))

    # The project_id may be '_unknown_'. This is not a big
    # deal, since the aggregator knows the project ID.
    metrics.annotate_node(project_id, node)
    node_guid = 'Node:' + node_id
    g.add_resource(node_guid, node['annotations'], 'Node', oldest_timestamp, {})
    g.add_relation(cluster_guid, node_guid, 'contains')  # Cluster contains Node
    for pod in kubernetes.get_pods(gs, node_id):
      _do_compute_pod(gs, cluster_guid, node_guid, pod, g)