Example #1
class PolicyAgent():
    def __init__(self):
        self._event_queue = Queue.Queue()
        Queue to populate with events from API watches.

        self.k8s_api = os.environ.get("K8S_API", DEFAULT_API)
        Scheme, IP and port of the Kubernetes API.

        self.auth_token = os.environ.get("K8S_AUTH_TOKEN", read_token_file())
        Auth token to use when accessing the API.
        _log.debug("Using auth token: %s", self.auth_token)

        self.ca_crt_exists = os.path.exists(CA_CERT_PATH)
        True if a CA cert has been mounted by Kubernetes.  

        self._client = DatastoreClient()
        Client for accessing the Calico datastore.

        self._handlers = {}
        self.add_handler(RESOURCE_TYPE_NAMESPACE, TYPE_ADDED,
        self.add_handler(RESOURCE_TYPE_POD, TYPE_ADDED, self._add_update_pod)
        self.add_handler(RESOURCE_TYPE_POD, TYPE_DELETED, self._delete_pod)
        Handlers for watch events.

    def add_handler(self, resource_type, event_type, handler):
        Adds an event handler for the given event type (ADD, DELETE) for the 
        given resource type.
        _log.info("Setting %s %s handler: %s", resource_type, event_type,
        key = (resource_type, event_type)
        self._handlers[key] = handler

    def get_handler(self, resource_type, event_type):
        Gets the correct handler.
        key = (resource_type, event_type)
        _log.debug("Looking up handler for event: %s", key)
        return self._handlers[key]

    def run(self):
        PolicyAgent.run() is called at program init to spawn watch threads,
        Loops to read responses from the _watcher Queue as they come in.
        resources = [
        for resource_type in resources:
            # Get existing resources from the API.
            _log.info("Getting existing %s objects", resource_type)
            get_url = GET_URLS[resource_type] % self.k8s_api
            resp = self._api_get(get_url, stream=False)
            _log.info("Response: %s", resp)

            if resp.status_code != 200:
                _log.error("Error querying API: %s", resp.json())
            updates = resp.json()["items"]
            metadata = resp.json().get("metadata", {})
            resource_version = metadata.get("resourceVersion")
            _log.debug("%s metadata: %s", resource_type, metadata)

            # Process the existing resources.
            _log.info("%s existing %s(s)", len(updates), resource_type)
            for update in updates:
                _log.debug("Processing existing resource: %s",
                           json.dumps(update, indent=2))
                self._process_update(TYPE_ADDED, resource_type, update)

            # Start watching for updates from the last resourceVersion.
            watch_url = WATCH_URLS[resource_type] % self.k8s_api
            t = Thread(target=self._watch_api,
                       args=(watch_url, resource_version))
            t.daemon = True
            _log.info("Started watch on: %s", resource_type)

        # Loop and read updates from the queue.
        _log.info("Reading from event queue")

    def read_updates(self):
        Reads from the update queue.
        update = None

        while True:
                # There may be an update already, since we do a blocking get
                # in the `except Queue.Empty` block.  If we have an update,
                # just process it before trying to read from the queue again.
                if not update:
                    _log.info("Non-blocking read from event queue")
                    update = self._event_queue.get(block=False)

                # We've recieved an update - process it.
                _log.debug("Read update from queue: %s",
                           json.dumps(update, indent=2))
                self._process_update(update["type"], update["object"]["kind"],
                update = None
            except Queue.Empty:
                _log.info("Queue empty, waiting for updates")
                update = self._event_queue.get(block=True)
            except KeyError:
                # We'll hit this if we fail to parse an invalid update.
                # Set update = None so we don't continue parsing the
                # invalid update.
                _log.exception("Invalid update: %s", update)
                update = None

    def _process_update(self, event_type, resource_type, resource):
        Takes an event updates our state accordingly.
        _log.info("Processing '%s' for kind '%s'", event_type, resource_type)

        # Determine the key for this object using namespace and name.
        # This is simply used for easy identification in logs, etc.
        name = resource["metadata"]["name"]
        namespace = resource["metadata"].get("namespace")
        key = (namespace, name)

        # Treat "modified" as "added".
        if event_type == TYPE_MODIFIED:
            _log.info("Treating 'MODIFIED' as 'ADDED'")
            event_type = TYPE_ADDED

        # Call the right handler.
            handler = self.get_handler(resource_type, event_type)
        except KeyError:
            _log.warning("No %s handlers for: %s", event_type, resource_type)
            _log.debug("Calling handler: %s", handler)
                handler(key, resource)
            except KeyError:
                _log.exception("Invalid %s: %s", resource_type,
                               json.dumps(resource, indent=2))

    def _add_update_network_policy(self, key, policy):
        Takes a new network policy from the Kubernetes API and 
        creates the corresponding Calico policy configuration.
        _log.info("Adding new network policy: %s", key)

        # Parse this network policy so we can convert it to the appropriate
        # Calico policy.  First, get the selector from the API object.
        k8s_selector = policy["spec"]["podSelector"]
        k8s_selector = k8s_selector or {}

        # Build the appropriate Calico label selector.  This is done using
        # the labels provided in the NetworkPolicy, as well as the
        # NetworkPolicy's namespace.
        namespace = policy["metadata"]["namespace"]
        selectors = [
            "%s == '%s'" % (k, v) for k, v in k8s_selector.iteritems()
        selectors += ["%s == '%s'" % (K8S_NAMESPACE_LABEL, namespace)]
        selector = " && ".join(selectors)

        # Determine the name for this global policy.
        name = "net_policy-%s" % policy["metadata"]["name"]

        # Build the Calico rules.
            inbound_rules = self._calculate_inbound_rules(policy)
        except Exception:
            # It is possible bad rules will be passed - we don't want to
            # crash the agent, but we do want to indicate a problem in the
            # logs, so that the policy can be fixed.
            _log.exception("Error parsing policy: %s",
                           json.dumps(policy, indent=2))
            rules = Rules(id=name,

        # Create the network policy using the calculated selector and rules.
        self._client.create_global_policy(NET_POL_GROUP_NAME, name, selector,
        _log.info("Updated global policy '%s' for NetworkPolicy %s", name, key)

    def _delete_network_policy(self, key, policy):
        Takes a deleted network policy and removes the corresponding
        configuration from the Calico datastore.
        _log.info("Deleting network policy: %s", key)

        # Determine the name for this global policy.
        name = "net_policy-%s" % policy["metadata"]["name"]

        # Delete the corresponding Calico policy
            self._client.remove_global_policy(NET_POL_GROUP_NAME, name)
        except KeyError:
            _log.info("Unable to find policy '%s' - already deleted", key)

    def _calculate_inbound_rules(self, policy):
        Takes a NetworkPolicy object from the API and returns a list of 
        Calico Rules objects which should be applied on ingress.
        _log.debug("Calculating inbound rules")

        # Store the rules to return.
        rules = []

        # Get this policy's namespace.
        policy_ns = policy["metadata"]["namespace"]

        # Iterate through each inbound rule and create the appropriate
        # rules.
        allow_incomings = policy["spec"].get("ingress") or []
        _log.info("Found %s ingress rules", len(allow_incomings))
        for r in allow_incomings:
            # Determine the destination ports to allow.  If no ports are
            # specified, allow all port / protocol combinations.
            _log.debug("Processing ingress rule: %s", r)
            ports_by_protocol = {}
            for to_port in r.get("ports", []):
                # Keep a dict of ports exposed, keyed by protocol.
                protocol = to_port.get("protocol")
                port = to_port.get("port")
                ports = ports_by_protocol.setdefault(protocol, [])
                if port:
                    _log.debug("Allow to port: %s/%s", protocol, port)

            # Convert into arguments to be passed to a Rule object.
            to_args = []
            for protocol, ports in ports_by_protocol.iteritems():
                arg = {"protocol": protocol.lower()}
                if ports:
                    arg["dst_ports"] = ports

            if not to_args:
                # There are not destination protocols / ports specified.
                # Allow to all protocols and ports.
                to_args = [{}]

            # Determine the from criteria.  If no "from" block is specified,
            # then we should allow from all sources.
            from_args = []
            for from_clause in r.get("from", []):
                # We need to check if the key exists, not just if there is
                # a non-null value.  The presence of the key with a null
                # value means "select all".
                pods_present = "pods" in from_clause
                namespaces_present = "namespaces" in from_clause
                _log.debug("Is 'pods:' present? %s", pods_present)
                _log.debug("Is 'namespaces:' present? %s", namespaces_present)

                if pods_present and namespaces_present:
                    # This is an error case according to the API.
                    msg = "Policy API does not support both 'pods' and " \
                          "'namespaces' selectors."
                    raise PolicyError(msg, policy)
                elif pods_present:
                    # There is a pod selector in this "from" clause.
                    pod_selector = from_clause["pods"] or {}
                    _log.debug("Allow from pods: %s", pod_selector)
                    selectors = [
                        "%s == '%s'" % (k, v)
                        for k, v in pod_selector.iteritems()

                    # We can only select on pods in this namespace.
                    selectors.append("%s == %s" %
                                     (K8S_NAMESPACE_LABEL, policy_ns))
                    selector = " && ".join(selectors)

                    # Append the selector to the from args.
                    _log.debug("Allowing pods which match: %s", selector)
                    from_args.append({"src_selector": selector})
                elif namespaces_present:
                    # There is a namespace selector.  Namespace labels are
                    # applied to each pod in the namespace using
                    # the per-namespace profile.  We can select on namespace
                    # labels using the NS_LABEL_KEY_FMT modifier.
                    namespaces = from_clause["namespaces"] or {}
                    _log.debug("Allow from namespaces: %s", namespaces)
                    selectors = ["%s == '%s'" % (NS_LABEL_KEY_FMT % k, v) \
                            for k,v in namespaces.iteritems()]
                    selector = " && ".join(selectors)
                    if selector:
                        # Allow from the selected namespaces.
                        _log.debug("Allowing from namespaces which match: %s",
                        from_args.append({"src_selector": selector})
                        # Allow from all pods in all namespaces.
                        _log.debug("Allowing from all pods in all namespaces")
                        selector = "has(%s)" % K8S_NAMESPACE_LABEL
                        from_args.append({"src_selector": selector})

            if not from_args:
                # There are no match criteria specified.  We should allow
                # from all sources to the given ports.
                from_args = [{}]

            # A rule per-protocol, per-from-clause.
            for to_arg in to_args:
                for from_arg in from_args:
                    # Create a rule by combining a 'from' argument with
                    # the protocol / ports arguments.
                    from_arg.update({"action": "allow"})

        _log.debug("Calculated rules: %s", rules)
        return rules

    def _add_update_namespace(self, key, namespace):
        Configures the necessary policy in Calico for this
        namespace.  Uses the `net.alpha.kubernetes.io/network-isolation` 
        _log.info("Adding/updating namespace: %s", key)

        # Determine the type of network-isolation specified by this namespace.
        # This defaults to no isolation.
        annotations = namespace["metadata"].get("annotations", {})
        _log.debug("Namespace %s has annotations: %s", key, annotations)
        net_isolation = annotations.get(NS_POLICY_ANNOTATION, "no") == "yes"
        _log.info("Namespace %s has network-isolation? %s", key, net_isolation)

        # Determine the profile name to create.
        namespace_name = namespace["metadata"]["name"]
        profile_name = NS_PROFILE_FMT % namespace_name

        # Determine the rules to use.
        outbound_rules = [Rule(action="allow")]
        if net_isolation:
            inbound_rules = [Rule(action="deny")]
            inbound_rules = [Rule(action="allow")]
        rules = Rules(id=profile_name,

        # Create the Calico policy to represent this namespace, or
        # update it if it already exists.  Namespace policies select each
        # pod within that namespace.
        self._client.create_profile(profile_name, rules)

        # Assign labels to the profile.  We modify the keys to use
        # a special prefix to indicate that these labels are inherited
        # from the namespace.
        labels = namespace["metadata"].get("labels", {})
        for k, v in labels.iteritems():
            labels[NS_LABEL_KEY_FMT % k] = v
            del labels[k]
        _log.debug("Generated namespace labels: %s", labels)

        # TODO: Actually assign labels to the profile.

        _log.info("Created/updated profile for namespace %s", namespace_name)

    def _delete_namespace(self, key, namespace):
        Takes a deleted namespace and removes the corresponding
        configuration from the Calico datastore.
        _log.info("Deleting namespace: %s", key)

        # Delete the Calico policy which represnets this namespace.
        # We need to make sure that there are no pods running
        # in this namespace first.
        namespace_name = namespace["metadata"]["name"]
        profile_name = NS_PROFILE_FMT % namespace_name
        except KeyError:
            _log.info("Unable to find profile for namespace '%s'", key)

    def _add_update_pod(self, key, pod):
        Takes a new or updated pod from the Kubernetes API and 
        creates the corresponding Calico configuration.
        _log.info("Adding new pod: %s", key)

        # Get the Calico endpoint.  This may or may not have already been
        # created by the CNI plugin.  If it hasn't been created, we need to
        # wait until is has before we can do any meaningful work.
        namespace = pod["metadata"]["namespace"]
        name = pod["metadata"]["name"]
        workload_id = "%s.%s" % (namespace, name)
            _log.debug("Looking for endpoint that matches workload_id=%s",
            endpoint = self._client.get_endpoint(orchestrator_id="cni",
        except KeyError:
            # We don't need to do anything special here, just return.
            # We'll receive another update when the Pod enters running state.
            _log.warn("No endpoint for '%s', wait until running", workload_id)
        except MultipleEndpointsMatch:
            # We should never have multiple endpoints with the same
            # workload_id.  This could theoretically occur if the Calico
            # datastore is out-of-sync with what pods actually exist, but
            # this is an error state and indicates a problem elsewhere.
            _log.error("Multiple Endpoints found matching ID %s", workload_id)

        # Get Kubernetes labels.
        labels = pod["metadata"].get("labels", {})
        _log.debug("Pod '%s' has labels: %s", key, labels)

        # Add a special label for the Kubernetes namespace.  This is used
        # by selector-based policies to select all pods in a given namespace.
        labels[K8S_NAMESPACE_LABEL] = namespace

        # Set the labels on the endpoint.
        endpoint.labels = labels
        _log.info("Updated labels on pod %s", key)

        # Configure this pod with its namespace profile.
        ns_profile = NS_PROFILE_FMT % namespace

    def _delete_pod(self, key, pod):
        We don't need to do anything when a pod is deleted - the CNI plugin
        handles the deletion of the endpoint.
        _log.info("Pod deleted: %s", key)

    def _watch_api(self, path, resource_version=None):
            self.__watch_api(path, resource_version)
        except Exception:
            _log.exception("Exception watching %s", path)

    def __watch_api(self, path, resource_version=None):
        Work loop for the watch thread.
        _log.info("Starting watch on path: %s", path)
        while True:
            # Attempt to stream API resources.
                response = self._api_get(path,
                _log.info("Watch response for %s: %s", path, response)
            except requests.ConnectionError:
                _log.exception("Error querying path: %s", path)

            # Check for successful response.
            if response.status_code != 200:
                _log.error("Error watching path: %s", response.text)

            # Success - add resources to the queue for processing.
            for line in response.iter_lines():
                # Filter out keep-alive new lines.
                if line:
                    _log.debug("Adding line to queue: %s", line)

    def _api_get(self, path, stream, resource_version=None):
        Watch a stream from the API given a resource.
        :param resource: The plural resource you would like to watch.
        :return: A stream of json objs e.g. {"type": "MODIFED"|"ADDED"|"DELETED", "object":{...}}
        :rtype stream
        # Append the resource version - this indicates where the
        # watch should start.
        _log.info("Getting API resources '%s' at version '%s'. stream=%s",
                  path, resource_version, stream)
        if resource_version:
            path += "?resourceVersion=%s" % resource_version

        session = requests.Session()
        if self.auth_token:
                {'Authorization': 'Bearer ' + self.auth_token})
        verify = CA_CERT_PATH if self.ca_crt_exists else False
        return session.get(path, verify=verify, stream=stream)
Example #2
class PolicyAgent():
    def __init__(self):
        self._event_queue = Queue.Queue()
        Queue to populate with events from API watches.

        self.k8s_api = os.environ.get("K8S_API", "")
        Scheme, IP and port of the Kubernetes API.

        self.auth_token = os.environ.get("K8S_AUTH_TOKEN")
        Auth token to use when accessing the API.

        path = NET_POLICY_WATCH_PATH % self.k8s_api
        self._network_policy_thread = Thread(target=self._watch_api, 
        self._network_policy_thread.daemon = True
        Thread which performs watch of Kubernetes API for changes to 
        NetworkPolicy objects.

        path = NAMESPACE_WATCH_PATH % self.k8s_api
        self._namespace_thread = Thread(target=self._watch_api, 
        self._namespace_thread.daemon = True
        Thread which performs watch of Kubernetes API for changes to 
        Namespace objects.

        path = POD_WATCH_PATH % self.k8s_api
        self._pod_thread = Thread(target=self._watch_api, 
        self._pod_thread.daemon = True
        Thread which performs watch of Kubernetes API for changes to 
        Pod objects.

        self._client = DatastoreClient()
        Client for accessing the Calico datastore.

        self._network_policies = {}
        self._namespaces = {}
        self._pods = {}
        Store internal state.

    def run(self):
        PolicyAgent.run() is called at program init to spawn watch threads,
        Loops to read responses from the _watcher Queue as they come in.
        # Start threads to watch Kubernetes API. 
        _log.info("Starting API watch on: NetworkPolicy, Pod, Namespace")

        # Loop and read updates from the queue.
        _log.info("Reading from event queue")

    def read_updates(self):
        Reads from the update queue.
        update = None

        while True:
                # There may be an update already, since we do a blocking get
                # in the `except Queue.Empty` block.  If we have an update, 
                # just process it before trying to read from the queue again.
                if not update:
                    _log.info("Non-blocking read from event queue")
                    update = self._event_queue.get(block=False)

                # We've recieved an update - process it.
                _log.debug("Read update from queue: %s", json.dumps(update, indent=2))
                update = None
            except Queue.Empty:
                _log.info("Queue empty, waiting for updates")
                update = self._event_queue.get(block=True)
            except KeyError:
                # We'll hit this if we fail to parse an invalid update.
                # Set update = None so we don't continue parsing the 
                # invalid update.
                _log.exception("Invalid update: %s", update)
                update = None

    def _process_update(self, update):
        Takes an update from the queue and updates our state accordingly.
        # Parse out the type of update and resource.
        update_type = update["type"]
        resource_type = update["object"]["kind"] 
        _log.info("Processing '%s' for kind '%s'", update_type, resource_type) 

        # Determine the key for this object.
        if resource_type == RESOURCE_TYPE_NAMESPACE:
            # Namespaces are just keyed off of their name.
            name = update["object"]["metadata"]["name"]
            key = (name,)
            # Objects are keyed off their name and namespace.
            name = update["object"]["metadata"]["name"]
            namespace = update["object"]["metadata"]["namespace"]
            key = (namespace, name)

        if resource_type == RESOURCE_TYPE_NETWORK_POLICY:
            # NetworkPolicy objects correspond directly to Calico
            # profiles - create, delete or update the corresponding Calico 
            # profile for each NetworkPolicy update. 
            if update_type in [TYPE_ADDED, TYPE_MODIFIED]:
                # Add or update network policy.
                self._add_new_network_policy(key, update)
                # Delete an existing network policy.
                assert update_type == TYPE_DELETED
                    self._delete_network_policy(key, update)
                except KeyError:
                    _log.warning("Delete for unknown network policy: %s", key)
        elif resource_type == RESOURCE_TYPE_NAMESPACE:
            # Namespaces correspond directly to Calico profiles. 
            if update_type in [TYPE_ADDED, TYPE_MODIFIED]:
                # Add or update network policy.
                self._add_new_namespace(key, update)
                # Delete an existing network policy.
                assert update_type == TYPE_DELETED
                    self._delete_namespace(key, update)
                except KeyError:
                    _log.warning("Delete for unknown namespace: %s", key)
        elif resource_type == RESOURCE_TYPE_POD:
            # Pods have policy applied to them using Namespaces and
            # NetworkPolicy objects.  We must update the corresponding 
            # endpoints in the Calico datastore to have the correct 
            # labels applied.
            if update_type in [TYPE_ADDED, TYPE_MODIFIED]:
                # Add or update pod.
                self._add_update_pod(key, update)
                assert update_type == TYPE_DELETED
                    self._delete_pod(key, update)
                except KeyError:
                    _log.warning("Delete for unknown pod: %s", key)

    def _add_new_network_policy(self, key, policy):
        Takes a new network policy from the Kubernetes API and 
        creates the corresponding Calico policy configuration.
        _log.info("Adding new network policy: %s", key)
        self._network_policies[key] = policy

        # Parse this network policy so we can convert it to the appropriate
        # Calico policy.  First, get the selector from the API object.
        k8s_selector = policy["object"]["spec"]["podSelector"]

        # Build the appropriate Calico label selector.  This is done using 
        # the labels provided in the NetworkPolicy, as well as the 
        # NetworkPolicy's namespace.
        namespace = policy["object"]["metadata"]["namespace"]
        selectors = ["%s == '%s'" % (k,v) for k,v in k8s_selector.iteritems()]
        selectors += ["%s == '%s'" % (K8S_NAMESPACE_LABEL, namespace)]
        selector = " && ".join(selectors)

        # Determine the name for this global policy.
        name = "net_policy-%s" % policy["object"]["metadata"]["name"]

        # Build the Calico rules.
            inbound_rules = self._calculate_inbound_rules(policy)
        except Exception:
            # It is possible bad rules will be passed - we don't want to 
            # crash the agent, but we do want to indicate a problem in the
            # logs, so that the policy can be fixed.
            _log.exception("Error parsing policy: %s", 
                           json.dumps(policy, indent=2))
            rules =  Rules(id=name,

        # Create the network policy using the calculated selector and rules.
        self._client.create_global_policy(NET_POL_GROUP_NAME, name, selector, rules)
        _log.info("Updated global policy '%s' for NetworkPolicy %s", name, key)

    def _delete_network_policy(self, key, policy):
        Takes a deleted network policy and removes the corresponding
        configuration from the Calico datastore.
        _log.info("Deleting network policy: %s", key)

        # Delete from internal dict.
        del self._network_policies[key]

        # Determine the name for this global policy.
        name = "net_policy-%s" % policy["object"]["metadata"]["name"]

        # Delete the corresponding Calico policy 
        self._client.remove_global_policy(NET_POL_GROUP_NAME, name)

    def _calculate_inbound_rules(self, policy):
        Takes a NetworkPolicy object from the API and returns a list of 
        Calico Rules objects which should be applied on ingress.
        # Store the rules to return.
        rules = []

        # Iterate through each inbound rule and create the appropriate
        # rules.
        allow_incomings = policy["object"]["spec"]["inbound"]
        for r in allow_incomings:
            # Determine the destination ports to allow.  If no ports are
            # specified, allow all port / protocol combinations.
            ports_by_protocol = {}
            for to_port in r.get("ports", []):
                # Keep a dict of ports exposed, keyed by protocol.
                protocol = to_port.get("protocol")
                port = to_port.get("port")
                ports = ports_by_protocol.setdefault(protocol, [])
                if port:
                    _log.debug("Allow to port: %s/%s", protocol, port)

            # Convert into arguments to be passed to a Rule object.
            to_args = []
            for protocol, ports in ports_by_protocol.iteritems():
                arg = {"protocol": protocol.lower()}
                if ports:
                    arg["dst_ports"] = ports

            if not to_args:
                # There are not destination protocols / ports specified.
                # Allow to all protocols and ports.
                to_args = [{}]

            # Determine the from criteria.  If no "from" block is specified,
            # then we should allow from all sources.
            from_args = []
            for from_clause in r.get("from", []):
                pod_selector = from_clause.get("pods", {})
                namespaces = from_clause.get("namespaces", {})
                if pod_selector:
                    # There is a pod selector in this "from" clause.
                    _log.debug("Allow from pods: %s", pod_selector)
                    selectors = ["%s == '%s'" % (k,v) for k,v in pod_selector.iteritems()]
                    selector = " && ".join(selectors)
                    from_args.append({"src_selector": selector})
                elif namespaces:
                    _log.warning("'from: {namespaces: {}}' is not yet "
                                 "supported - ignoring %s", from_clause)

            if not from_args:
                # There are no match criteria specified.  We should allow
                # from all sources to the given ports.
                from_args = [{}]

            # A rule per-protocol, per-from-clause.
            for to_arg in to_args: 
                for from_arg in from_args:
                    # Create a rule by combining a 'from' argument with
                    # the protocol / ports arguments.
                    from_arg.update({"action": "allow"})

        _log.debug("Calculated rules: %s", rules)
        return rules

    def _add_new_namespace(self, key, namespace):
        Takes a new namespace from the Kubernetes API and 
        creates the corresponding Calico policy configuration.
        _log.info("Adding new namespace: %s", key)

        # Store the namespace.
        self._namespaces[key] = namespace 

        # Determine the type of network-isolation specified by this namespace.
        # This defaults to no isolation.
        annotations = namespace["object"]["metadata"].get("annotations", {})
        _log.debug("Namespace %s has annotations: %s", key, annotations)
        net_isolation = annotations.get(NS_POLICY_ANNOTATION, "no") == "yes"
        _log.info("Namespace %s has: network-isolation=%s", key, net_isolation)

        # Determine the policy name to create.
        namespace_name = namespace["object"]["metadata"]["name"]
        policy_name = "k8s_ns-%s" % namespace_name

        # Determine the rules to use.
        outbound_rules = [Rule(action="allow")]
        if net_isolation:
            inbound_rules = [Rule(action="deny")]
            inbound_rules = [Rule(action="allow")]
        rules = Rules(id=policy_name,

        # Create the Calico policy to represent this namespace, or 
        # update it if it already exists.  Namespace policies select each
        # pod within that namespace.
        selector = "%s == '%s'" % (K8S_NAMESPACE_LABEL, namespace_name) 
        self._client.create_global_policy(NAMESPACE_GROUP_NAME, policy_name, 
                                          selector, rules=rules)
        _log.info("Created/updated global policy for namespace %s", 

    def _delete_namespace(self, key, namespace):
        Takes a deleted namespace and removes the corresponding
        configuration from the Calico datastore.
        _log.info("Deleting namespace: %s", key)

        # Delete the Calico policy which represnets this namespace.
        # We need to make sure that there are no pods running 
        # in this namespace first.
        namespace_name = namespace["object"]["metadata"]["name"]
        policy_name = "k8s_ns-%s" % namespace_name
        self._client.remove_global_policy(NAMESPACE_GROUP_NAME, policy_name)

        # Delete from internal dict.
        del self._namespaces[key]

    def _add_update_pod(self, key, pod):
        Takes a new or updated pod from the Kubernetes API and 
        creates the corresponding Calico configuration.
        _log.info("Adding new pod: %s", key)

        # Store the latest version of the API Pod.
        self._pods[key] = pod 

        # Get the Calico endpoint.  This may or may not have already been 
        # created by the CNI plugin.  If it hasn't been created, we need to 
        # wait until is has before we can do any meaningful work.
        workload_id = "%s.%s" % (pod["object"]["metadata"]["namespace"],
            _log.debug("Looking for endpoint that matches workload_id=%s",
            endpoint = self._client.get_endpoint(
        except KeyError:
            # We don't need to do anything special here, just return.
            # We'll receive another update when the Pod enters running state.
            _log.warn("No endpoint for '%s', wait until running", workload_id)
        except MultipleEndpointsMatch:
            # We should never have multiple endpoints with the same
            # workload_id.  This could theoretically occur if the Calico
            # datastore is out-of-sync with what pods actually exist, but 
            # this is an error state and indicates a problem elsewhere.
            _log.error("Multiple Endpoints found matching ID %s", workload_id)

        # Get Kubernetes labels.
        labels = pod["object"]["metadata"].get("labels", {}) 
        _log.debug("Pod '%s' has labels: %s", key, labels)

        # Add a special label for the Kubernetes namespace.
        labels[K8S_NAMESPACE_LABEL] = pod["object"]["metadata"]["namespace"]

        # Set the labels on the endpoint.
        endpoint.labels = labels
        _log.info("Updated labels on pod %s", key)

        # Remove the 'deny-inbound' profile from the pod now that 
        # it has been configured with labels.  It will match at least the 
        # per-namespace policy, and potentially others, which will 
        # define what connectivity is allowed.

    def _delete_pod(self, key, pod):
        Takes a deleted pod and removes the corresponding
        configuration from the Calico datastore.
        _log.info("Deleting pod: %s", key)

        # Delete from internal dict.
        del self._pods[key]

    def _watch_api(self, path, resource_version=None):
        Work loop for the watch thread.
        _log.info("Starting watch on path: %s", path)
        while True:
            # Attempt to stream API resources.
                response = self._get_api_stream(path, resource_version)
                _log.info("Watch response for %s: %s", path, response)
            except requests.ConnectionError:
                _log.exception("Error querying path: %s", path)

            # Check for successful response.
            if response.status_code != 200:
                _log.error("Error watching path: %s", response.text)

            # Success - add resources to the queue for processing.
            for line in response.iter_lines():
                # Filter out keep-alive new lines.
                if line:
                    _log.debug("Adding line to queue: %s", line)

    def _get_api_stream(self, path, resource_version=None):
        Watch a stream from the API given a resource.
        :param resource: The plural resource you would like to watch.
        :return: A stream of json objs e.g. {"type": "MODIFED"|"ADDED"|"DELETED", "object":{...}}
        :rtype stream
        # Append the resource version - this indicates where the 
        # watch should start.
        _log.info("Streaming API resources '%s' at version '%s'", path, resource_version)
        if resource_version:
            path += "?resourceVersion=%s" % resource_version

        session = requests.Session()
        if self.auth_token:
            _log.debug("Using Auth Token: %s", self.auth_token)
            session.headers.update({'Authorization': 'Bearer ' + self.auth_token})
        return session.get(path, verify=False, stream=True)
    def _get_api_resource(self, path):
        Get a resource from the API specified API path.
        :return: A JSON API object
        :rtype json dict
        _log.debug("Getting API Resource: %s", path)
        session = requests.Session()
        if self.auth_token:
            _log.debug("Using Auth Token: %s", self.auth_token)
            session.headers.update({'Authorization': 'Bearer ' + self.auth_token})
        response = session.get(path, verify=False)
        return json.loads(response.text)
class PolicyAgent():
    def __init__(self):
        self._event_queue = Queue.Queue()
        Queue to populate with events from API watches.

        self.k8s_api = os.environ.get("K8S_API", DEFAULT_API)
        Scheme, IP and port of the Kubernetes API.

        self.auth_token = os.environ.get("K8S_AUTH_TOKEN", read_token_file())
        Auth token to use when accessing the API.
        _log.debug("Using auth token: %s", self.auth_token)

        self.ca_crt_exists = os.path.exists(CA_CERT_PATH)
        True if a CA cert has been mounted by Kubernetes.  

        self._client = DatastoreClient()
        Client for accessing the Calico datastore.

        self._handlers = {}
        self.add_handler(RESOURCE_TYPE_NAMESPACE, TYPE_ADDED, 
        self.add_handler(RESOURCE_TYPE_NAMESPACE, TYPE_DELETED, 
        self.add_handler(RESOURCE_TYPE_POD, TYPE_ADDED, 
        self.add_handler(RESOURCE_TYPE_POD, TYPE_DELETED, 
        Handlers for watch events.
    def add_handler(self, resource_type, event_type, handler):
        Adds an event handler for the given event type (ADD, DELETE) for the 
        given resource type.
        _log.info("Setting %s %s handler: %s", 
                  resource_type, event_type, handler)
        key = (resource_type, event_type)
        self._handlers[key] = handler

    def get_handler(self, resource_type, event_type):
        Gets the correct handler.
        key = (resource_type, event_type)
        _log.debug("Looking up handler for event: %s", key)
        return self._handlers[key]

    def run(self):
        PolicyAgent.run() is called at program init to spawn watch threads,
        Loops to read responses from the _watcher Queue as they come in.
        resources = [RESOURCE_TYPE_NETWORK_POLICY, 
        for resource_type in resources:
            # Get existing resources from the API.
            _log.info("Getting existing %s objects", resource_type)
            get_url = GET_URLS[resource_type] % self.k8s_api
            resp = self._api_get(get_url, stream=False)
            _log.info("Response: %s", resp)

            if resp.status_code != 200:
                _log.error("Error querying API: %s", resp.json())
            updates = resp.json()["items"]
            metadata = resp.json().get("metadata", {})
            resource_version = metadata.get("resourceVersion")
            _log.debug("%s metadata: %s", resource_type, metadata)

            # Process the existing resources.
            _log.info("%s existing %s(s)", len(updates), resource_type)
            for update in updates:
                _log.debug("Processing existing resource: %s", 
                           json.dumps(update, indent=2))
                self._process_update(TYPE_ADDED, resource_type, update)

            # Start watching for updates from the last resourceVersion.
            watch_url = WATCH_URLS[resource_type] % self.k8s_api
            t = Thread(target=self._watch_api, 
                       args=(watch_url, resource_version))
            t.daemon = True
            _log.info("Started watch on: %s", resource_type)

        # Loop and read updates from the queue.
        _log.info("Reading from event queue")

    def read_updates(self):
        Reads from the update queue.
        update = None

        while True:
                # There may be an update already, since we do a blocking get
                # in the `except Queue.Empty` block.  If we have an update, 
                # just process it before trying to read from the queue again.
                if not update:
                    _log.info("Non-blocking read from event queue")
                    update = self._event_queue.get(block=False)

                # We've recieved an update - process it.
                _log.debug("Read update from queue: %s", json.dumps(update, indent=2))
                update = None
            except Queue.Empty:
                _log.info("Queue empty, waiting for updates")
                update = self._event_queue.get(block=True)
            except KeyError:
                # We'll hit this if we fail to parse an invalid update.
                # Set update = None so we don't continue parsing the 
                # invalid update.
                _log.exception("Invalid update: %s", update)
                update = None

    def _process_update(self, event_type, resource_type, resource):
        Takes an event updates our state accordingly.
        _log.info("Processing '%s' for kind '%s'", event_type, resource_type) 

        # Determine the key for this object using namespace and name.
        # This is simply used for easy identification in logs, etc.
        name = resource["metadata"]["name"]
        namespace = resource["metadata"].get("namespace")
        key = (namespace, name)

        # Treat "modified" as "added".
        if event_type == TYPE_MODIFIED: 
            _log.info("Treating 'MODIFIED' as 'ADDED'")
            event_type = TYPE_ADDED

        # Call the right handler.
            handler = self.get_handler(resource_type, event_type) 
        except KeyError:    
            _log.warning("No %s handlers for: %s", 
                         event_type, resource_type)
            _log.debug("Calling handler: %s", handler)
                handler(key, resource)
            except KeyError:
                _log.exception("Invalid %s: %s", resource_type, 
                               json.dumps(resource, indent=2))

    def _add_update_network_policy(self, key, policy):
        Takes a new network policy from the Kubernetes API and 
        creates the corresponding Calico policy configuration.
        _log.info("Adding new network policy: %s", key)

        # Parse this network policy so we can convert it to the appropriate
        # Calico policy.  First, get the selector from the API object.
        k8s_selector = policy["spec"]["podSelector"]
        k8s_selector = k8s_selector or {}

        # Build the appropriate Calico label selector.  This is done using 
        # the labels provided in the NetworkPolicy, as well as the 
        # NetworkPolicy's namespace.
        namespace = policy["metadata"]["namespace"]
        selectors = ["%s == '%s'" % (k,v) for k,v in k8s_selector.iteritems()]
        selectors += ["%s == '%s'" % (K8S_NAMESPACE_LABEL, namespace)]
        selector = " && ".join(selectors)

        # Determine the name for this global policy.
        name = "net_policy-%s" % policy["metadata"]["name"]

        # Build the Calico rules.
            inbound_rules = self._calculate_inbound_rules(policy)
        except Exception:
            # It is possible bad rules will be passed - we don't want to 
            # crash the agent, but we do want to indicate a problem in the
            # logs, so that the policy can be fixed.
            _log.exception("Error parsing policy: %s", 
                           json.dumps(policy, indent=2))
            rules =  Rules(id=name,

        # Create the network policy using the calculated selector and rules.
        self._client.create_global_policy(NET_POL_GROUP_NAME, name, selector, rules)
        _log.info("Updated global policy '%s' for NetworkPolicy %s", name, key)

    def _delete_network_policy(self, key, policy):
        Takes a deleted network policy and removes the corresponding
        configuration from the Calico datastore.
        _log.info("Deleting network policy: %s", key)

        # Determine the name for this global policy.
        name = "net_policy-%s" % policy["metadata"]["name"]

        # Delete the corresponding Calico policy 
            self._client.remove_global_policy(NET_POL_GROUP_NAME, name)
        except KeyError:
            _log.info("Unable to find policy '%s' - already deleted", key)

    def _calculate_inbound_rules(self, policy):
        Takes a NetworkPolicy object from the API and returns a list of 
        Calico Rules objects which should be applied on ingress.
        _log.debug("Calculating inbound rules")

        # Store the rules to return.
        rules = []

        # Get this policy's namespace.
        policy_ns = policy["metadata"]["namespace"]

        # Iterate through each inbound rule and create the appropriate
        # rules.
        allow_incomings = policy["spec"].get("ingress") or []
        _log.info("Found %s ingress rules", len(allow_incomings))
        for r in allow_incomings:
            # Determine the destination ports to allow.  If no ports are
            # specified, allow all port / protocol combinations.
            _log.debug("Processing ingress rule: %s", r)
            ports_by_protocol = {}
            for to_port in r.get("ports", []):
                # Keep a dict of ports exposed, keyed by protocol.
                protocol = to_port.get("protocol")
                port = to_port.get("port")
                ports = ports_by_protocol.setdefault(protocol, [])
                if port:
                    _log.debug("Allow to port: %s/%s", protocol, port)

            # Convert into arguments to be passed to a Rule object.
            to_args = []
            for protocol, ports in ports_by_protocol.iteritems():
                arg = {"protocol": protocol.lower()}
                if ports:
                    arg["dst_ports"] = ports

            if not to_args:
                # There are not destination protocols / ports specified.
                # Allow to all protocols and ports.
                to_args = [{}]

            # Determine the from criteria.  If no "from" block is specified,
            # then we should allow from all sources.
            from_args = []
            for from_clause in r.get("from", []):
                # We need to check if the key exists, not just if there is 
                # a non-null value.  The presence of the key with a null 
                # value means "select all".
                pods_present = "pods" in from_clause
                namespaces_present = "namespaces" in from_clause
                _log.debug("Is 'pods:' present? %s", pods_present)
                _log.debug("Is 'namespaces:' present? %s", namespaces_present)

                if pods_present and namespaces_present:
                    # This is an error case according to the API.
                    msg = "Policy API does not support both 'pods' and " \
                          "'namespaces' selectors."
                    raise PolicyError(msg, policy)
                elif pods_present:
                    # There is a pod selector in this "from" clause.
                    pod_selector = from_clause["pods"] or {}
                    _log.debug("Allow from pods: %s", pod_selector)
                    selectors = ["%s == '%s'" % (k,v) for k,v in pod_selector.iteritems()]

                    # We can only select on pods in this namespace.
                    selectors.append("%s == %s" % (K8S_NAMESPACE_LABEL, 
                    selector = " && ".join(selectors)

                    # Append the selector to the from args.
                    _log.debug("Allowing pods which match: %s", selector)
                    from_args.append({"src_selector": selector})
                elif namespaces_present:
                    # There is a namespace selector.  Namespace labels are
                    # applied to each pod in the namespace using 
                    # the per-namespace profile.  We can select on namespace
                    # labels using the NS_LABEL_KEY_FMT modifier.
                    namespaces = from_clause["namespaces"] or {}
                    _log.debug("Allow from namespaces: %s", namespaces)
                    selectors = ["%s == '%s'" % (NS_LABEL_KEY_FMT % k, v) \
                            for k,v in namespaces.iteritems()]
                    selector = " && ".join(selectors)
                    if selector:
                        # Allow from the selected namespaces.
                        _log.debug("Allowing from namespaces which match: %s", 
                        from_args.append({"src_selector": selector})
                        # Allow from all pods in all namespaces.
                        _log.debug("Allowing from all pods in all namespaces")
                        selector = "has(%s)" % K8S_NAMESPACE_LABEL
                        from_args.append({"src_selector": selector})

            if not from_args:
                # There are no match criteria specified.  We should allow
                # from all sources to the given ports.
                from_args = [{}]

            # A rule per-protocol, per-from-clause.
            for to_arg in to_args: 
                for from_arg in from_args:
                    # Create a rule by combining a 'from' argument with
                    # the protocol / ports arguments.
                    from_arg.update({"action": "allow"})

        _log.debug("Calculated rules: %s", rules)
        return rules

    def _add_update_namespace(self, key, namespace):
        Configures the necessary policy in Calico for this
        namespace.  Uses the `net.alpha.kubernetes.io/network-isolation` 
        _log.info("Adding/updating namespace: %s", key)

        # Determine the type of network-isolation specified by this namespace.
        # This defaults to no isolation.
        annotations = namespace["metadata"].get("annotations", {})
        _log.debug("Namespace %s has annotations: %s", key, annotations)
        net_isolation = annotations.get(NS_POLICY_ANNOTATION, "no") == "yes"
        _log.info("Namespace %s has network-isolation? %s", key, net_isolation)

        # Determine the profile name to create.
        namespace_name = namespace["metadata"]["name"]
        profile_name = NS_PROFILE_FMT % namespace_name

        # Determine the rules to use.
        outbound_rules = [Rule(action="allow")]
        if net_isolation:
            inbound_rules = [Rule(action="deny")]
            inbound_rules = [Rule(action="allow")]
        rules = Rules(id=profile_name,

        # Create the Calico policy to represent this namespace, or 
        # update it if it already exists.  Namespace policies select each
        # pod within that namespace.
        self._client.create_profile(profile_name, rules)

        # Assign labels to the profile.  We modify the keys to use 
        # a special prefix to indicate that these labels are inherited 
        # from the namespace.
        labels = namespace["metadata"].get("labels", {})
        for k,v in labels.iteritems():
            labels[NS_LABEL_KEY_FMT % k] = v
            del labels[k]
        _log.debug("Generated namespace labels: %s", labels)

        # TODO: Actually assign labels to the profile.

        _log.info("Created/updated profile for namespace %s", namespace_name)

    def _delete_namespace(self, key, namespace):
        Takes a deleted namespace and removes the corresponding
        configuration from the Calico datastore.
        _log.info("Deleting namespace: %s", key)

        # Delete the Calico policy which represnets this namespace.
        # We need to make sure that there are no pods running 
        # in this namespace first.
        namespace_name = namespace["metadata"]["name"]
        profile_name = NS_PROFILE_FMT % namespace_name
        except KeyError:
            _log.info("Unable to find profile for namespace '%s'", key)

    def _add_update_pod(self, key, pod):
        Takes a new or updated pod from the Kubernetes API and 
        creates the corresponding Calico configuration.
        _log.info("Adding new pod: %s", key)

        # Get the Calico endpoint.  This may or may not have already been 
        # created by the CNI plugin.  If it hasn't been created, we need to 
        # wait until is has before we can do any meaningful work.
        namespace = pod["metadata"]["namespace"]
        name = pod["metadata"]["name"]
        workload_id = "%s.%s" % (namespace, name)
            _log.debug("Looking for endpoint that matches workload_id=%s",
            endpoint = self._client.get_endpoint(
        except KeyError:
            # We don't need to do anything special here, just return.
            # We'll receive another update when the Pod enters running state.
            _log.warn("No endpoint for '%s', wait until running", workload_id)
        except MultipleEndpointsMatch:
            # We should never have multiple endpoints with the same
            # workload_id.  This could theoretically occur if the Calico
            # datastore is out-of-sync with what pods actually exist, but 
            # this is an error state and indicates a problem elsewhere.
            _log.error("Multiple Endpoints found matching ID %s", workload_id)

        # Get Kubernetes labels.
        labels = pod["metadata"].get("labels", {}) 
        _log.debug("Pod '%s' has labels: %s", key, labels)

        # Add a special label for the Kubernetes namespace.  This is used
        # by selector-based policies to select all pods in a given namespace.
        labels[K8S_NAMESPACE_LABEL] = namespace 

        # Set the labels on the endpoint.
        endpoint.labels = labels
        _log.info("Updated labels on pod %s", key)

        # Configure this pod with its namespace profile.
        ns_profile = NS_PROFILE_FMT % namespace

    def _delete_pod(self, key, pod):
        We don't need to do anything when a pod is deleted - the CNI plugin
        handles the deletion of the endpoint.
        _log.info("Pod deleted: %s", key)

    def _watch_api(self, path, resource_version=None):
            self.__watch_api(path, resource_version)
        except Exception:
            _log.exception("Exception watching %s", path)

    def __watch_api(self, path, resource_version=None):
        Work loop for the watch thread.
        _log.info("Starting watch on path: %s", path)
        while True:
            # Attempt to stream API resources.
                response = self._api_get(path, 
                _log.info("Watch response for %s: %s", path, response)
            except requests.ConnectionError:
                _log.exception("Error querying path: %s", path)

            # Check for successful response.
            if response.status_code != 200:
                _log.error("Error watching path: %s", response.text)

            # Success - add resources to the queue for processing.
            for line in response.iter_lines():
                # Filter out keep-alive new lines.
                if line:
                    _log.debug("Adding line to queue: %s", line)

    def _api_get(self, path, stream, resource_version=None):
        Watch a stream from the API given a resource.
        :param resource: The plural resource you would like to watch.
        :return: A stream of json objs e.g. {"type": "MODIFED"|"ADDED"|"DELETED", "object":{...}}
        :rtype stream
        # Append the resource version - this indicates where the 
        # watch should start.
        _log.info("Getting API resources '%s' at version '%s'. stream=%s", 
                  path, resource_version, stream)
        if resource_version:
            path += "?resourceVersion=%s" % resource_version

        session = requests.Session()
        if self.auth_token:
            session.headers.update({'Authorization': 'Bearer ' + self.auth_token})
        verify = CA_CERT_PATH if self.ca_crt_exists else False
        return session.get(path, verify=verify, stream=stream)