Example #1
0
 def _attempt_cleanup(self):
     our_host_dir = "/".join(
         [FELIX_STATUS_DIR, self._config.HOSTNAME, "workload"])
     try:
         # Grab all the existing status reports.
         response = self.client.read(our_host_dir, recursive=True)
     except EtcdKeyNotFound:
         _log.info("No endpoint statuses found, nothing to clean up")
     else:
         # Mark all statuses we find as dirty.  This will result in any
         # unknown endpoints being cleaned up.
         for node in response.leaves:
             combined_id = get_endpoint_id_from_key(node.key)
             if combined_id:
                 _log.debug(
                     "Endpoint %s removed by resync, marking "
                     "status key for cleanup", combined_id)
                 self._mark_endpoint_dirty(combined_id)
             elif node.dir:
                 # This leaf is an empty directory, try to clean it up.
                 # This is safe even if another thread is adding keys back
                 # into the directory.
                 _log.debug("Found empty directory %s, cleaning up",
                            node.key)
                 delete_empty_parents(self.client, node.key, our_host_dir)
Example #2
0
    def clean_up_endpoint_statuses(self, our_endpoints_ids):
        """
        Mark any endpoint status reports for non-existent endpoints
        for cleanup.

        :param set our_endpoints_ids: Set of endpoint IDs for endpoints on
               this host.
        """
        if not self._config.REPORT_ENDPOINT_STATUS:
            _log.debug("Endpoint status reporting disabled, ignoring.")
            return

        our_host_dir = "/".join([FELIX_STATUS_DIR, self._config.HOSTNAME, "workload"])
        try:
            # Grab all the existing status reports.
            response = self.client.read(our_host_dir, recursive=True)
        except EtcdKeyNotFound:
            _log.info("No endpoint statuses found, nothing to clean up")
        else:
            for node in response.leaves:
                combined_id = get_endpoint_id_from_key(node.key)
                if combined_id and combined_id not in our_endpoints_ids:
                    # We found an endpoint in our status reporting tree that
                    # wasn't in the main tree.  Mark it as dirty so the status
                    # reporting thread will clean it up.
                    _log.debug("Endpoint %s removed by resync, marking " "status key for cleanup", combined_id)
                    self._status_reporter.mark_endpoint_dirty(combined_id, async=True)
                elif node.dir:
                    # This leaf is an empty directory, try to clean it up.
                    # This is safe even if another thread is adding keys back
                    # into the directory.
                    _log.debug("Found empty directory %s, cleaning up", node.key)
                    delete_empty_parents(self.client, node.key, our_host_dir)
Example #3
0
 def _attempt_cleanup(self):
     our_host_dir = "/".join([FELIX_STATUS_DIR, self._config.HOSTNAME,
                              "workload"])
     try:
         # Grab all the existing status reports.
         response = self.client.read(our_host_dir,
                                     recursive=True)
     except EtcdKeyNotFound:
         _log.info("No endpoint statuses found, nothing to clean up")
     else:
         # Mark all statuses we find as dirty.  This will result in any
         # unknown endpoints being cleaned up.
         for node in response.leaves:
             combined_id = get_endpoint_id_from_key(node.key)
             if combined_id:
                 _log.debug("Endpoint %s removed by resync, marking "
                            "status key for cleanup",
                            combined_id)
                 self._mark_endpoint_dirty(combined_id)
             elif node.dir:
                 # This leaf is an empty directory, try to clean it up.
                 # This is safe even if another thread is adding keys back
                 # into the directory.
                 _log.debug("Found empty directory %s, cleaning up",
                            node.key)
                 delete_empty_parents(self.client, node.key, our_host_dir)
Example #4
0
    def _on_snapshot_loaded(self, etcd_snapshot_response):
        """Called whenever a snapshot is loaded from etcd.

        Updates the driver with the current state.
        """
        LOG.info("Started processing status-reporting snapshot from etcd")
        endpoints_by_host = collections.defaultdict(set)
        hosts_with_live_felix = set()

        # First pass: find all the Felixes that are alive.
        for etcd_node in etcd_snapshot_response.leaves:
            key = etcd_node.key
            felix_hostname = datamodel_v1.hostname_from_status_key(key)
            if felix_hostname:
                # Defer to the code for handling an event.
                hosts_with_live_felix.add(felix_hostname)
                self._on_status_set(etcd_node, felix_hostname)
                continue

        # Second pass: find all the endpoints associated with a live Felix.
        for etcd_node in etcd_snapshot_response.leaves:
            key = etcd_node.key
            endpoint_id = datamodel_v1.get_endpoint_id_from_key(key)
            if endpoint_id:
                if endpoint_id.host in hosts_with_live_felix:
                    LOG.debug("Endpoint %s is on a host with a live Felix.",
                              endpoint_id)
                    self._report_status(
                        endpoints_by_host,
                        endpoint_id,
                        etcd_node.value
                    )
                else:
                    LOG.debug("Endpoint %s is not on a host with live Felix;"
                              "marking it down.",
                              endpoint_id)
                    self.calico_driver.on_port_status_changed(
                        endpoint_id.host,
                        endpoint_id.endpoint,
                        None,
                    )
                continue

        # Find any removed endpoints.
        for host, endpoints in self._endpoints_by_host.iteritems():
            current_endpoints = endpoints_by_host.get(host, set())
            removed_endpoints = endpoints - current_endpoints
            for endpoint_id in removed_endpoints:
                LOG.debug("Endpoint %s removed by resync.")
                self.calico_driver.on_port_status_changed(
                    host,
                    endpoint_id.endpoint,
                    None,
                )

        # Swap in the newly-loaded state.
        self._endpoints_by_host = endpoints_by_host
        LOG.info("Finished processing status-reporting snapshot from etcd")
Example #5
0
    def _on_snapshot_loaded(self, etcd_snapshot_response):
        """Called whenever a snapshot is loaded from etcd.

        Updates the driver with the current state.
        """
        LOG.info("Started processing status-reporting snapshot from etcd")
        endpoints_by_host = collections.defaultdict(set)
        hosts_with_live_felix = set()

        # First pass: find all the Felixes that are alive.
        for etcd_node in etcd_snapshot_response.leaves:
            key = etcd_node.key
            felix_hostname = datamodel_v1.hostname_from_status_key(key)
            if felix_hostname:
                # Defer to the code for handling an event.
                hosts_with_live_felix.add(felix_hostname)
                self._on_status_set(etcd_node, felix_hostname)
                continue

        # Second pass: find all the endpoints associated with a live Felix.
        for etcd_node in etcd_snapshot_response.leaves:
            key = etcd_node.key
            endpoint_id = datamodel_v1.get_endpoint_id_from_key(key)
            if endpoint_id:
                if endpoint_id.host in hosts_with_live_felix:
                    LOG.debug("Endpoint %s is on a host with a live Felix.",
                              endpoint_id)
                    self._report_status(
                        endpoints_by_host,
                        endpoint_id,
                        etcd_node.value
                    )
                else:
                    LOG.debug("Endpoint %s is not on a host with live Felix;"
                              "marking it down.",
                              endpoint_id)
                    self.calico_driver.on_port_status_changed(
                        endpoint_id.host,
                        endpoint_id.endpoint,
                        None,
                    )
                continue

        # Find any removed endpoints.
        for host, endpoints in self._endpoints_by_host.iteritems():
            current_endpoints = endpoints_by_host.get(host, set())
            removed_endpoints = endpoints - current_endpoints
            for endpoint_id in removed_endpoints:
                LOG.debug("Endpoint %s removed by resync.")
                self.calico_driver.on_port_status_changed(
                    host,
                    endpoint_id.endpoint,
                    None,
                )

        # Swap in the newly-loaded state.
        self._endpoints_by_host = endpoints_by_host
        LOG.info("Finished processing status-reporting snapshot from etcd")
Example #6
0
def parse_if_endpoint(config, etcd_node):
    combined_id = get_endpoint_id_from_key(etcd_node.key)
    if combined_id:
        # Got an endpoint.
        if etcd_node.action == "delete":
            _log.debug("Found deleted endpoint %s", combined_id)
            endpoint = None
        else:
            endpoint = parse_endpoint(config, combined_id, etcd_node.value)
        # EndpointId does the interning for us.
        return combined_id, endpoint
    return None, None
Example #7
0
def parse_if_endpoint(config, etcd_node):
    combined_id = get_endpoint_id_from_key(etcd_node.key)
    if combined_id:
        # Got an endpoint.
        if etcd_node.action == "delete":
            _log.debug("Found deleted endpoint %s", combined_id)
            endpoint = None
        else:
            endpoint = parse_endpoint(config, combined_id, etcd_node.value)
        # EndpointId does the interning for us.
        return combined_id, endpoint
    return None, None
Example #8
0
    def _on_ep_set(self, response, hostname, workload, endpoint):
        """Called when the status key for a particular endpoint is updated.

        Reports the status to the driver and caches the existence of the
        endpoint.
        """
        ep_id = datamodel_v1.get_endpoint_id_from_key(response.key)
        if not ep_id:
            LOG.error(
                "Failed to extract endpoint ID from: %s.  Ignoring "
                "update!", response.key)
            return
        self._report_status(self._endpoints_by_host, ep_id, response.value)
Example #9
0
    def _on_ep_set(self, response, hostname, workload, endpoint):
        """Called when the status key for a particular endpoint is updated.

        Reports the status to the driver and caches the existence of the
        endpoint.
        """
        ep_id = datamodel_v1.get_endpoint_id_from_key(response.key)
        if not ep_id:
            LOG.error("Failed to extract endpoint ID from: %s.  Ignoring "
                      "update!", response.key)
            return
        self._report_status(self._endpoints_by_host,
                            ep_id,
                            response.value)
Example #10
0
    def _on_ep_delete(self, response, hostname, workload, endpoint):
        """Called when the status key for an endpoint is deleted.

        This typically means the endpoint has been deleted.  Reports
        the deletion to the driver.
        """
        LOG.debug("Port %s/%s/%s deleted", hostname, workload, endpoint)
        endpoint_id = datamodel_v1.get_endpoint_id_from_key(response.key)
        self._endpoints_by_host[hostname].discard(endpoint_id)
        if not self._endpoints_by_host[hostname]:
            del self._endpoints_by_host[hostname]
        self.calico_driver.on_port_status_changed(
            hostname,
            endpoint,
            None,
        )
Example #11
0
    def _on_ep_delete(self, response, hostname, workload, endpoint):
        """Called when the status key for an endpoint is deleted.

        This typically means the endpoint has been deleted.  Reports
        the deletion to the driver.
        """
        LOG.debug("Port %s/%s/%s deleted", hostname, workload, endpoint)
        endpoint_id = datamodel_v1.get_endpoint_id_from_key(response.key)
        self._endpoints_by_host[hostname].discard(endpoint_id)
        if not self._endpoints_by_host[hostname]:
            del self._endpoints_by_host[hostname]
        self.calico_driver.on_port_status_changed(
            hostname,
            endpoint,
            None,
        )
Example #12
0
    def clean_up_endpoint_statuses(self, our_endpoints_ids):
        """
        Mark any endpoint status reports for non-existent endpoints
        for cleanup.

        :param set our_endpoints_ids: Set of endpoint IDs for endpoints on
               this host.
        """
        if not self._config.REPORT_ENDPOINT_STATUS:
            _log.debug("Endpoint status reporting disabled, ignoring.")
            return

        our_host_dir = "/".join(
            [FELIX_STATUS_DIR, self._config.HOSTNAME, "workload"])
        try:
            # Grab all the existing status reports.
            response = self.client.read(our_host_dir, recursive=True)
        except EtcdKeyNotFound:
            _log.info("No endpoint statuses found, nothing to clean up")
        else:
            for node in response.leaves:
                combined_id = get_endpoint_id_from_key(node.key)
                if combined_id and combined_id not in our_endpoints_ids:
                    # We found an endpoint in our status reporting tree that
                    # wasn't in the main tree.  Mark it as dirty so the status
                    # reporting thread will clean it up.
                    _log.debug(
                        "Endpoint %s removed by resync, marking "
                        "status key for cleanup", combined_id)
                    self._status_reporter.mark_endpoint_dirty(combined_id,
                                                              async=True)
                elif node.dir:
                    # This leaf is an empty directory, try to clean it up.
                    # This is safe even if another thread is adding keys back
                    # into the directory.
                    _log.debug("Found empty directory %s, cleaning up",
                               node.key)
                    delete_empty_parents(self.client, node.key, our_host_dir)