예제 #1
    def __init__(self, table, ip_version=4):
        super(IptablesUpdater, self).__init__(qualifier="v%d" % ip_version)
        self.table = table
        if ip_version == 4:
            self._restore_cmd = "iptables-restore"
            self._save_cmd = "iptables-save"
            self._iptables_cmd = "iptables"
            assert ip_version == 6
            self._restore_cmd = "ip6tables-restore"
            self._save_cmd = "ip6tables-save"
            self._iptables_cmd = "ip6tables"

        self._chains_in_dataplane = None
        Set of chains that we know are actually in the dataplane.  Loaded
        at start of day and then kept in sync.
        self._grace_period_finished = False
        Flag that is set after the graceful restart window is over.

        self._explicitly_prog_chains = set()
        """Set of chains that we've explicitly programmed."""

        self._required_chains = defaultdict(set)
        """Map from chain name to the set of names of chains that it
        depends on."""
        self._requiring_chains = defaultdict(set)
        """Map from chain to the set of chains that depend on it.
        Inverse of self.required_chains."""

        # Since it's fairly complex to keep track of the changes required
        # for a particular batch and still be able to roll-back the changes
        # to our data structures, we delegate to a per-batch object that
        # does that calculation.
        self._txn = None
        """:type _Transaction: object used to track index changes
        for this batch."""
        self._completion_callbacks = None
        """List of callbacks to issue once the current batch completes."""

        # Diagnostic counters.
        self._stats = StatCounter("IPv%s %s iptables updater" %
                                  (ip_version, table))

        # Avoid duplicating init logic.
예제 #2
class IptablesUpdater(Actor):
    Actor that owns and applies updates to a particular iptables table.
    Supports batching updates for performance and dependency tracking
    between chains.

    iptables safety

    Concurrent access to the same table is not allowed by the
    underlying iptables architecture so there should be one instance of
    this class for each table.  Each IP version has its own set of
    non-conflicting tables.

    However, this class tries to be robust against concurrent access
    from outside the process by detecting and retrying such errors.

    Batching support

    This actor supports batching of multiple updates. It applies updates that
    are on the queue in one atomic batch. This is dramatically faster than
    issuing single iptables requests.

    If a request fails, it does a binary chop using the SplitBatchAndRetry
    mechanism to report the error to the correct request.

    Dependency tracking

    To offload a lot of coordination complexity from the classes that
    use this one, this class supports tracking dependencies between chains
    and programming stubs for missing chains:

    * When calling rewrite_chains() the caller must supply a dict that
      maps from chain to a set of chains it requires (i.e. the chains
      that appear in its --jump and --goto targets).

    * Any chains that are required but not present are created as "stub"
      chains, which drop all traffic. They are marked as such in the
      iptables rules with an iptables comment.

    * When a required chain is later explicitly created, the stub chain is
      replaced with the required contents of the chain.

    * If a required chain is explicitly deleted, it is rewritten as a stub

    * If a chain exists only as a stub chain to satisfy a dependency, then it
      is cleaned up when the dependency is removed.


    def __init__(self, table, ip_version=4):
        super(IptablesUpdater, self).__init__(qualifier="v%d" % ip_version)
        self.table = table
        if ip_version == 4:
            self._restore_cmd = "iptables-restore"
            self._save_cmd = "iptables-save"
            self._iptables_cmd = "iptables"
            assert ip_version == 6
            self._restore_cmd = "ip6tables-restore"
            self._save_cmd = "ip6tables-save"
            self._iptables_cmd = "ip6tables"

        self._chains_in_dataplane = None
        Set of chains that we know are actually in the dataplane.  Loaded
        at start of day and then kept in sync.
        self._grace_period_finished = False
        Flag that is set after the graceful restart window is over.

        self._explicitly_prog_chains = set()
        """Set of chains that we've explicitly programmed."""

        self._required_chains = defaultdict(set)
        """Map from chain name to the set of names of chains that it
        depends on."""
        self._requiring_chains = defaultdict(set)
        """Map from chain to the set of chains that depend on it.
        Inverse of self.required_chains."""

        # Since it's fairly complex to keep track of the changes required
        # for a particular batch and still be able to roll-back the changes
        # to our data structures, we delegate to a per-batch object that
        # does that calculation.
        self._txn = None
        """:type _Transaction: object used to track index changes
        for this batch."""
        self._completion_callbacks = None
        """List of callbacks to issue once the current batch completes."""

        # Diagnostic counters.
        self._stats = StatCounter("IPv%s %s iptables updater" %
                                  (ip_version, table))

        # Avoid duplicating init logic.

    def _reset_batched_work(self):
        """Reset the per-batch state in preparation for a new batch."""
        self._txn = _Transaction(self._explicitly_prog_chains,
        self._completion_callbacks = []

    def _refresh_chains_in_dataplane(self):
        self._stats.increment("Refreshed chain list")
        raw_ipt_output = subprocess.check_output([self._save_cmd, "--table",
        self._chains_in_dataplane = _extract_our_chains(self.table,

    def _read_unreferenced_chains(self):
        Read the list of chains in the dataplane which are not referenced.

        :returns list[str]: list of chains currently in the dataplane that
            are not referenced by other chains.
        raw_ipt_output = subprocess.check_output(
            [self._iptables_cmd, "--wait", "--list", "--table", self.table])
        return _extract_our_unreffed_chains(raw_ipt_output)

    def rewrite_chains(self, update_calls_by_chain,
                       dependent_chains, callback=None):
        Atomically apply a set of updates to the table.

        :param update_calls_by_chain: map from chain name to list of
               iptables-style update calls,
               e.g. {"chain_name": ["-A chain_name -j ACCEPT"]}. Chain will
               be flushed.
        :param dependent_chains: map from chain name to a set of chains
               that that chain requires to exist. They will be created
               (with a default drop) if they don't exist.
        :raises FailedSystemCall if a problem occurred.
        # We actually apply the changes in _finish_msg_batch().  Index the
        # changes by table and chain.
        _log.info("Iptables update: %s", update_calls_by_chain)
        _log.info("Iptables deps: %s", dependent_chains)
        self._stats.increment("Chain rewrites")
        for chain, updates in update_calls_by_chain.iteritems():
            # TODO: double-check whether this flush is needed.
            updates = ["--flush %s" % chain] + updates
            deps = dependent_chains.get(chain, set())
            self._txn.store_rewrite_chain(chain, updates, deps)
        if callback:

    # Does direct table manipulation, forbid batching with other messages.
    def ensure_rule_inserted(self, rule_fragment):
        Runs the given rule fragment, prefixed with --insert. If the
        rule was already present, it is removed and reinserted at the
        start of the chain.

        This covers the case where we need to insert a rule into the
        pre-existing kernel chains (only). For chains that are owned by Felix,
        use the more robust approach of rewriting the whole chain using

        :param rule_fragment: fragment to be inserted. For example,
           "INPUT --jump felix-INPUT"
        self._stats.increment("Rule inserts")
            # Do an atomic delete + insert of the rule.  If the rule already
            # exists then the rule will be moved to the start of the chain.
            _log.info("Attempting to move any existing instance of rule %r"
                      "to top of chain.", rule_fragment)
            self._execute_iptables(['*%s' % self.table,
                                    '--delete %s' % rule_fragment,
                                    '--insert %s' % rule_fragment,
        except FailedSystemCall:
            # Assume the rule didn't exist. Try inserting it.
            _log.info("Didn't find any existing instance of rule %r, "
                      "inserting it instead.", rule_fragment)
            self._execute_iptables(['*%s' % self.table,
                                    '--insert %s' % rule_fragment,

    def ensure_rule_removed(self, rule_fragment):
        Runs the given rule fragment, prefixed with --delete.

        :param rule_fragment: fragment to be deleted. For example,
           "INPUT --jump felix-INPUT"
        _log.info("Removing rule %r", rule_fragment)
        num_instances = 0
            while True:  # Delete all instances of rule.
                self._execute_iptables(['*%s' % self.table,
                                        '--delete %s' % rule_fragment,
                num_instances += 1
        except FailedSystemCall as e:
            if num_instances == 0:
                if "line 2 failed" in e.stderr:
                    # Rule was parsed OK but failed to apply, this means that
                    # it wasn't present.
                    _log.warning("Removal of rule %r failed; not present?",
                    _log.exception("Unexpected failure when trying to "
                                   "delete rule %r" % rule_fragment)
                _log.info("%s instances of rule %r removed", num_instances,

    def delete_chains(self, chain_names, callback=None):
        Deletes the named chains.

        :raises FailedSystemCall if a problem occurred.
        # We actually apply the changes in _finish_msg_batch().  Index the
        # changes by table and chain.
        _log.info("Deleting chains %s", chain_names)
        self._stats.increment("Chain deletes")
        for chain in chain_names:
        if callback:

    # It's much simpler to do cleanup in its own batch so that it doesn't have
    # to worry about in-flight updates.
    def cleanup(self):
        Tries to clean up any left-over chains from a previous run that
        are no longer required.
        _log.info("Cleaning up left-over iptables state.")
        self._stats.increment("Cleanups performed")

        # Start with the current state.

        required_chains = set(self._requiring_chains.keys())
        if not self._grace_period_finished:
            # Ensure that all chains that are required but not explicitly
            # programmed are stubs.
            # We have to do this at the end of the graceful restart period
            # during which we may have re-used old chains.
            chains_to_stub = (required_chains -
            _log.info("Graceful restart window finished, stubbing out "
                      "chains: %s", chains_to_stub)
            except NothingToDo:
            self._grace_period_finished = True

        # Now the generic cleanup, look for chains that we're not expecting to
        # be there and delete them.
        chains_we_tried_to_delete = set()
        finished = False
        while not finished:
            # Try to delete all the unreferenced chains, we use a loop to
            # ensure that we then clean up any chains that become unreferenced
            # when we delete the previous lot.
            unreferenced_chains = self._read_unreferenced_chains()
            orphans = (unreferenced_chains -
                       self._explicitly_prog_chains -
            if not chains_we_tried_to_delete.issuperset(orphans):
                _log.info("Cleanup found these unreferenced chains to "
                          "delete: %s", orphans)
                self._stats.increment("Orphans found during cleanup",
                # We've already tried to delete all the chains we found,
                # give up.
                _log.info("Cleanup finished, deleted %d chains, failed to "
                          "delete these chains: %s",
                          len(chains_we_tried_to_delete) - len(orphans),
                finished = True

        # Then some sanity checks:
        temp_chains = self._chains_in_dataplane
        if temp_chains != self._chains_in_dataplane:
            # We want to know about this but it's not fatal.
            _log.error("Chains in data plane inconsistent with calculated "
                       "index.  In dataplane but not in index: %s; In index: "
                       "but not dataplane: %s.",
                       self._chains_in_dataplane - temp_chains,
                       temp_chains - self._chains_in_dataplane)

        missing_chains = ((self._explicitly_prog_chains | required_chains) -
        if missing_chains:
            # This is fatal, some of our chains have disappeared.
            _log.error("Some of our chains disappeared from the dataplane: %s."
                       " Raising an exception.",
            raise IptablesInconsistent(
                "Felix chains missing from iptables: %s" % missing_chains)

    def _start_msg_batch(self, batch):
        return batch

    def _finish_msg_batch(self, batch, results):
        start = time.time()
            # We use two passes to update the dataplane.  In the first pass,
            # we make any updates, create new chains and replace to-be-deleted
            # chains with stubs (in case we fail to delete them below).
                input_lines = self._calculate_ipt_modify_input()
            except NothingToDo:
                _log.info("%s no updates in this batch.", self)
                _log.info("%s Successfully processed iptables updates.", self)
        except (IOError, OSError, FailedSystemCall) as e:
            if isinstance(e, FailedSystemCall):
                rc = e.retcode
                rc = "unknown"
            if len(batch) == 1:
                # We only executed a single message, report the failure.
                _log.error("Non-retryable %s failure. RC=%s",
                           self._restore_cmd, rc)
                self._stats.increment("Messages failed due to iptables "
                if self._completion_callbacks:
                final_result = ResultOrExc(None, e)
                results[0] = final_result
                _log.error("Non-retryable error from a combined batch, "
                           "splitting the batch to narrow down culprit.")
                self._stats.increment("Split batch due to error")
                raise SplitBatchAndRetry()
            # Modify succeeded, update our indexes for next time.
            # Make a best effort to delete the chains we no longer want.
            # If we fail due to a stray reference from an orphan chain, we
            # should catch them on the next cleanup().
            for c in self._completion_callbacks:
            self._stats.increment("Batches finished")

        end = time.time()
        _log.debug("Batch time: %.2f %s", end - start, len(batch))

    def _delete_best_effort(self, chains):
        Try to delete all the chains in the input list. Any errors are silently
        if not chains:
        chain_batches = [list(chains)]
        while chain_batches:
            batch = chain_batches.pop(0)
                # Try the next batch of chains...
                _log.debug("Attempting to delete chains: %s", batch)
            except (IOError, OSError, FailedSystemCall):
                _log.warning("Deleting chains %s failed", batch)
                if len(batch) > 1:
                    # We were trying to delete multiple chains, split the
                    # batch in half and put the batches back on the queue to
                    # try again.
                    _log.info("Batch was of length %s, splitting", len(batch))
                    split_point = len(batch) // 2
                    first_half = batch[:split_point]
                    second_half = batch[split_point:]
                    assert len(first_half) + len(second_half) == len(batch)
                    if chain_batches:
                        chain_batches[0][:0] = second_half
                        chain_batches[:0] = [second_half]
                    chain_batches[:0] = [first_half]
                    # Only trying to delete one chain, give up.  It must still
                    # be referenced.
                    _log.error("Failed to delete chain %s, giving up. Maybe "
                               "it is still referenced?", batch[0])
                    self._stats.increment("Chain delete failures")
                _log.debug("Deleted chains %s successfully, remaining "
                           "batches: %s", batch, len(chain_batches))

    def _stub_out_chains(self, chains):
        input_lines = self._calculate_ipt_stub_input(chains)

    def _attempt_delete(self, chains):
            input_lines = self._calculate_ipt_delete_input(chains)
        except NothingToDo:
            _log.debug("No chains to delete %s", chains)
            self._execute_iptables(input_lines, fail_log_level=logging.WARNING)
            self._chains_in_dataplane -= set(chains)

    def _update_indexes(self):
        Called after successfully processing a batch, updates the
        indices with the values calculated by the _Transaction.
        self._explicitly_prog_chains = self._txn.expl_prog_chains
        self._required_chains = self._txn.required_chns
        self._requiring_chains = self._txn.requiring_chns

    def _calculate_ipt_modify_input(self):
        Calculate the input for phase 1 of a batch, where we only modify and
        create chains.

        :raises NothingToDo: if the batch requires no modify operations.
        # Valid input looks like this.
        # *table
        # :chain_name
        # :chain_name_2
        # -F chain_name
        # -A chain_name -j ACCEPT
        # COMMIT
        # The chains are created if they don't exist.
        input_lines = []
        affected_chains = self._txn.affected_chains
        for chain in affected_chains:
            if (self._grace_period_finished or
                    chain not in self._chains_in_dataplane or
                    chain not in self._txn.chains_to_stub_out):
                # We're going to rewrite or delete this chain below, mark it
                # for creation/flush.
                input_lines.append(":%s -" % chain)
        for chain in self._txn.chains_to_stub_out:
            if (self._grace_period_finished or
                    chain not in self._chains_in_dataplane):
                # After graceful restart completes, we stub out all chains;
                # during the graceful restart, we reuse any existing chains
                # that happen to be there.
        for chain in self._txn.chains_to_delete:
            # Explicitly told to delete this chain.  Rather than delete it
            # outright, we stub it out first.  Then, if the delete fails
            # due to the chain still being referenced, at least the chain is
            # "safe".  Stubbing it out also stops it from referencing other
            # chains, accidentally keeping them alive.
        for chain, chain_updates in self._txn.updates.iteritems():
        if not input_lines:
            raise NothingToDo
        return ["*%s" % self.table] + input_lines + ["COMMIT"]

    def _calculate_ipt_delete_input(self, chains):
        Calculate the input for phase 2 of a batch, where we actually
        try to delete chains.

        :raises NothingToDo: if the batch requires no delete operations.
        input_lines = []
        found_delete = False
        input_lines.append("*%s" % self.table)
        for chain_name in chains:
            # Delete the chain
            input_lines.append(":%s -" % chain_name)
            input_lines.append("--delete-chain %s" % chain_name)
            found_delete = True
        if found_delete:
            return input_lines
            raise NothingToDo()

    def _calculate_ipt_stub_input(self, chains):
        Calculate input to replace the given chains with stubs.
        input_lines = []
        found_chain_to_stub = False
        input_lines.append("*%s" % self.table)
        for chain_name in chains:
            # Stub the chain
            input_lines.append(":%s -" % chain_name)
            found_chain_to_stub = True
        if found_chain_to_stub:
            return input_lines
            raise NothingToDo()

    def _execute_iptables(self, input_lines, fail_log_level=logging.ERROR):
        Runs ip(6)tables-restore with the given input.  Retries iff
        the COMMIT fails.

        :raises FailedSystemCall: if the command fails on a non-commit
            line or if it repeatedly fails and retries are exhausted.
        backoff = 0.01
        num_tries = 0
        success = False
        while not success:
            input_str = "\n".join(input_lines) + "\n"
            _log.debug("%s input:\n%s", self._restore_cmd, input_str)

            # Run iptables-restore in noflush mode so that it doesn't
            # blow away all the tables we're not touching.
            cmd = [self._restore_cmd, "--noflush", "--verbose"]
                futils.check_call(cmd, input_str=input_str)
            except FailedSystemCall as e:
                # Parse the output to determine if error is retryable.
                retryable, detail = _parse_ipt_restore_error(input_lines,
                num_tries += 1
                if retryable:
                    if num_tries < MAX_IPT_RETRIES:
                        _log.info("%s failed with retryable error. Retry in "
                                  "%.2fs", self._iptables_cmd, backoff)
                        self._stats.increment("iptables commit failure "
                        if backoff > MAX_IPT_BACKOFF:
                            backoff = MAX_IPT_BACKOFF
                        backoff *= (1.5 + random.random())
                            "Failed to run %s.  Out of retries: %s.\n"
                            "Input was:\n%s",
                            self._restore_cmd, detail, e.stdout, e.stderr,
                        self._stats.increment("iptables commit failure "
                                              "(out of retries)")
                        "%s failed with non-retryable error: %s.\n"
                        "Input was:\n%s",
                        self._restore_cmd, detail, e.stdout, e.stderr,
                    self._stats.increment("iptables non-retryable failure")
                self._stats.increment("iptables success")
                success = True
예제 #3
from calico.felix.futils import StatCounter

_log = logging.getLogger(__name__)

# Minimum gevent scheduling delay.  A delay of 0 should mean "yield" but
# gevent has a known issue that a greenlet that sleeps for 0 may be rescheduled
# immediately.  Any small positive value is enough to truly yield.
MIN_DELAY = 0.000001

ResultOrExc = collections.namedtuple("ResultOrExc", ("result", "exception"))

# Local storage to allow diagnostics.
actor_storage = gevent.local.local()

# Global diagnostic counters.
_stats = StatCounter("Actor framework counters")

class Actor(object):
    Class that contains a queue and a greenlet serving that queue.

    max_ops_before_yield = 1000
    """Number of calls to self._maybe_yield before it yields"""

    batch_delay = 0.01
    Minimum delay between schedules of this Actor.  Larger values encourage
    more batching of messages and reduce starvation (but introduce more
    latency when we're under load).
예제 #4
파일: fetcd.py 프로젝트: yarntime/calico
PER_ENDPOINT_KEY = ENDPOINT_DIR + "/<endpoint_id>"
CONFIG_PARAM_KEY = CONFIG_DIR + "/<config_param>"
PER_HOST_CONFIG_PARAM_KEY = PER_HOST_DIR + "/config/<config_param>"
TIER_DATA = POLICY_DIR + "/tier/<tier>/metadata"
TIERED_PROFILE = POLICY_DIR + "/tier/<tier>/policy/<policy_id>"

IPAM_V4_DIR = IPAM_DIR + "/v4"
POOL_V4_DIR = IPAM_V4_DIR + "/pool"
CIDR_V4_KEY = POOL_V4_DIR + "/<pool_id>"

# Max number of events from driver process before we yield to another greenlet.

# Global diagnostic counters.
_stats = StatCounter("Etcd counters")

class EtcdAPI(EtcdClientOwner, Actor):
    Our API to etcd.

    Since the python-etcd API is blocking, we defer API watches to
    a worker greenlet and communicate with it via Events.

    We use a second worker for writing our status back to etcd.  This
    avoids sharing the etcd client between reads and writes, which is
    problematic because we need to handle EtcdClusterIdChanged for polls
    but not for writes.
    def __init__(self, config, hosts_ipset):
예제 #5
파일: fiptables.py 프로젝트: sudeshm/calico
    def __init__(self, table, config, ip_version=4):
        super(IptablesUpdater, self).__init__(qualifier="v%d-%s" %
                                                        (ip_version, table))
        self.table = table
        self.refresh_interval = config.REFRESH_INTERVAL
        if ip_version == 4:
            self._restore_cmd = "iptables-restore"
            self._save_cmd = "iptables-save"
            self._iptables_cmd = "iptables"
            assert ip_version == 6
            self._restore_cmd = "ip6tables-restore"
            self._save_cmd = "ip6tables-save"
            self._iptables_cmd = "ip6tables"

        self._chains_in_dataplane = None
        Set of chains that we know are actually in the dataplane.  Loaded
        at start of day and then kept in sync.
        self._grace_period_finished = False
        Flag that is set after the graceful restart window is over.

        self._programmed_chain_contents = {}
        """Map from chain name to chain contents, only contains chains that
        have been explicitly programmed."""
        self._inserted_rule_fragments = set()
        """Special-case rule fragments that we've explicitly inserted."""
        self._removed_rule_fragments = set()
        """Special-case rule fragments that we've explicitly removed.
        We need to cache this to defend against other processes accidentally
        reverting our removal."""

        self._required_chains = defaultdict(set)
        """Map from chain name to the set of names of chains that it
        depends on."""
        self._requiring_chains = defaultdict(set)
        """Map from chain to the set of chains that depend on it.
        Inverse of self.required_chains."""

        # Since it's fairly complex to keep track of the changes required
        # for a particular batch and still be able to roll-back the changes
        # to our data structures, we delegate to a per-batch object that
        # does that calculation.
        self._txn = None
        """:type _Transaction: object used to track index changes
        for this batch."""
        self._completion_callbacks = None
        """List of callbacks to issue once the current batch completes."""

        # Diagnostic counters.
        self._stats = StatCounter("IPv%s %s iptables updater" %
                                  (ip_version, table))

        # Avoid duplicating init logic.

        # Optionally, start periodic refresh timer.
        if self.refresh_interval > 0:
            _log.info("Periodic iptables refresh enabled, starting "
                      "resync greenlet")
            refresh_greenlet = gevent.spawn(self._periodic_refresh)
예제 #6
class IptablesUpdater(Actor):
    Actor that owns and applies updates to a particular iptables table.
    Supports batching updates for performance and dependency tracking
    between chains.

    iptables safety

    Concurrent access to the same table is not allowed by the
    underlying iptables architecture so there should be one instance of
    this class for each table.  Each IP version has its own set of
    non-conflicting tables.

    However, this class tries to be robust against concurrent access
    from outside the process by detecting and retrying such errors.

    iptables manipulation guidelines

    Since any update to iptables is implemented by the iptables commands
    as a read-modify-write of the entire table, we try to batch (see below)
    as many updates into one call to iptables as possible.

    Rather than using individual iptables commands, we make use of
    iptables-restore to rewrite entire chains (or multiple chains) as a
    single atomic operation.

    This also allows us to avoid reading individual rules from iptables,
    which is a very tricky thing to get right (because iptables internally
    normalises rules, they don't always read back as-written).

    Batching support

    This actor supports batching of multiple updates. It applies updates that
    are on the queue in one atomic batch. This is dramatically faster than
    issuing single iptables requests.

    If a request fails, it does a binary chop using the SplitBatchAndRetry
    mechanism to report the error to the correct request.  To allow a batch
    to be retried, the per-batch state is tracked using a dedicated
    _Transaction object, which can simply be thrown away if the batch fails.

    Dependency tracking

    To offload a lot of coordination complexity from the classes that
    use this one, this class supports tracking dependencies between chains
    and programming stubs for missing chains:

    * When calling rewrite_chains() the caller must supply a dict that
      maps from chain to a set of chains it requires (i.e. the chains
      that appear in its --jump and --goto targets).

    * Any chains that are required but not present are created as "stub"
      chains, which (by default) drop all traffic. They are marked as such
      in the iptables rules with an iptables comment.  To facilitate graceful
      restart after a failure, the default behaviour for a missing chain can
      be pre-configured via set_missing_chain_override().

    * When a required chain is later explicitly created, the stub chain is
      replaced with the required contents of the chain.

    * If a required chain is explicitly deleted, it is rewritten as a stub

    * If a chain exists only as a stub chain to satisfy a dependency, then it
      is cleaned up when the dependency is removed.


    def __init__(self, table, config, ip_version=4):
        super(IptablesUpdater, self).__init__(qualifier="v%d-%s" %
                                                        (ip_version, table))
        self.table = table
        self.refresh_interval = config.REFRESH_INTERVAL
        self.iptables_generator = config.plugins["iptables_generator"]
        self.ip_version = ip_version
        if ip_version == 4:
            self._restore_cmd = "iptables-restore"
            self._save_cmd = "iptables-save"
            self._iptables_cmd = "iptables"
            assert ip_version == 6
            self._restore_cmd = "ip6tables-restore"
            self._save_cmd = "ip6tables-save"
            self._iptables_cmd = "ip6tables"

        self._chains_in_dataplane = None
        Set of chains that we know are actually in the dataplane.  Loaded
        at start of day and then kept in sync.
        self._grace_period_finished = False
        Flag that is set after the graceful restart window is over.

        self._programmed_chain_contents = {}
        """Map from chain name to chain contents, only contains chains that
        have been explicitly programmed."""
        self._inserted_rule_fragments = set()
        """Special-case rule fragments that we've explicitly inserted."""
        self._removed_rule_fragments = set()
        """Special-case rule fragments that we've explicitly removed.
        We need to cache this to defend against other processes accidentally
        reverting our removal."""
        self._missing_chain_overrides = {}
        """Overrides for chain contents when we need to program a chain but
        it's missing."""

        self._required_chains = defaultdict(set)
        """Map from chain name to the set of names of chains that it
        depends on."""
        self._requiring_chains = defaultdict(set)
        """Map from chain to the set of chains that depend on it.
        Inverse of self.required_chains."""

        # Since it's fairly complex to keep track of the changes required
        # for a particular batch and still be able to roll-back the changes
        # to our data structures, we delegate to a per-batch object that
        # does that calculation.
        self._txn = None
        """:type _Transaction: object used to track index changes
        for this batch."""
        self._completion_callbacks = None
        """List of callbacks to issue once the current batch completes."""

        # Diagnostic counters.
        self._stats = StatCounter("IPv%s %s iptables updater" %
                                  (ip_version, table))

        # Avoid duplicating init logic.

        # Optionally, start periodic refresh timer.
        if self.refresh_interval > 0:
            _log.info("Periodic iptables refresh enabled, starting "
                      "resync greenlet")
            refresh_greenlet = gevent.spawn(self._periodic_refresh)

    def _explicitly_prog_chains(self):
        return set(self._programmed_chain_contents.keys())

    def _reset_batched_work(self):
        """Reset the per-batch state in preparation for a new batch."""
        self._txn = _Transaction(self._programmed_chain_contents,
        self._completion_callbacks = []

    def _load_chain_names_from_iptables(self):
        Loads the set of (our) chains that already exist from iptables.

        Populates self._chains_in_dataplane.
        _log.debug("Loading chain names for iptables table %s, using "
                   "command %s", self.table, self._save_cmd)
        self._stats.increment("Refreshed chain list")
        raw_ipt_output = subprocess.check_output([self._save_cmd, "--table",
        self._chains_in_dataplane = _extract_our_chains(self.table,

    def _get_unreferenced_chains(self):
        Reads the list of chains in the dataplane which are not referenced.

        :returns list[str]: list of chains currently in the dataplane that
            are not referenced by other chains.
        raw_ipt_output = subprocess.check_output(
             "--list",  # Action to perform.
             "--numeric",  # Avoid DNS lookups.
             "--table", self.table])
        return _extract_our_unreffed_chains(raw_ipt_output)

    def rewrite_chains(self, update_calls_by_chain,
                       dependent_chains, callback=None):
        Atomically apply a set of updates to the table.

        :param update_calls_by_chain: map from chain name to list of
               iptables-style update calls,
               e.g. {"chain_name": ["-A chain_name -j ACCEPT"]}. Chain will
               be flushed.
        :param dependent_chains: map from chain name to a set of chains
               that that chain requires to exist. They will be created
               (with a default drop) if they don't exist.
        :raises FailedSystemCall if a problem occurred.
        # We actually apply the changes in _finish_msg_batch().  Index the
        # changes by table and chain.
        _log.info("iptables update to chains %s", update_calls_by_chain.keys())
        _log.debug("iptables update: %s", update_calls_by_chain)
        _log.debug("iptables deps: %s", dependent_chains)
        self._stats.increment("Chain rewrites")
        for chain, updates in update_calls_by_chain.iteritems():
            # TODO: double-check whether this flush is needed.
            updates = ["--flush %s" % chain] + updates
            deps = dependent_chains.get(chain, set())
            self._txn.store_rewrite_chain(chain, updates, deps)
        if callback:

    def set_missing_chain_override(self, chain_name, fragments):
        """Sets the contents to program if the given chain is required but
        it hasn't yet been written.

        This is useful for graceful restart at start of day, where we want
        to leave a chain in place for as long as possible, but if it's
        missing, we need it to be default-RETURN.

        Must be called before the chain is used as a dependency.

        :param chain_name: name of the chain.
        :param fragments: list of iptables fragments, as used by
        _log.info("Storing missing chain override for %s", chain_name)
        assert fragments is not None, "Removal of overrides not implemented"
        assert chain_name not in self._requiring_chains, \
            "Missing chain override set after chain in use"
        self._missing_chain_overrides[chain_name] = fragments

    # Does direct table manipulation, forbid batching with other messages.
    def ensure_rule_inserted(self, rule_fragment):
        Runs the given rule fragment, prefixed with --insert. If the
        rule was already present, it is removed and reinserted at the
        start of the chain.

        This covers the case where we need to insert a rule into the
        pre-existing kernel chains (only). For chains that are owned by Felix,
        use the more robust approach of rewriting the whole chain using

        :param rule_fragment: fragment to be inserted. For example,
           "INPUT --jump felix-INPUT"
        self._stats.increment("Rule inserts")
        _log.info("Inserting rule %r", rule_fragment)

    def _insert_rule(self, rule_fragment, log_level=logging.INFO):
        Execute the iptables commands to atomically (re)insert the
        given rule fragment into iptables.

        Has the side-effect of moving the rule to the top of the

        :param rule_fragment: A rule fragment, starting with the chain
            name; will be prefixed with "--insert ", for example, to
            create the actual iptables line to execute.
            # Do an atomic delete + insert of the rule.  If the rule already
            # exists then the rule will be moved to the start of the chain.
            _log.log(log_level, "Attempting to move any existing instance "
                                "of rule %r to top of chain.", rule_fragment)
            self._execute_iptables(['*%s' % self.table,
                                    '--delete %s' % rule_fragment,
                                    '--insert %s' % rule_fragment,
        except FailedSystemCall:
            # Assume the rule didn't exist. Try inserting it.
            _log.log(log_level, "Didn't find any existing instance of rule "
                                "%r, inserting it instead.", rule_fragment)
            self._execute_iptables(['*%s' % self.table,
                                    '--insert %s' % rule_fragment,

    def ensure_rule_removed(self, rule_fragment):
        If it exists, removes the given rule fragment.  Caches that the
        rule fragment should now not be present.

        WARNING: due to the caching, this is only suitable for a small
        number of static rules.  For example, to add and remove our
        "root" rules, which dispatch to our dynamic chains, from the
        top-level kernel chains.

        The caching is required to defend against other poorly-written
        processes, which use an iptables-save and then iptables-restore
        call to update their rules.  That clobbers our updates (including

        :param rule_fragment: fragment to be deleted. For example,
           "INPUT --jump felix-INPUT"
        _log.info("Removing rule %r", rule_fragment)
        self._stats.increment("Rule removals")

    def _remove_rule(self, rule_fragment, log_level=logging.INFO):
        Execute the iptables commands required to (atomically) remove
        the given rule_fragment if it is present.

        :param rule_fragment: A rule fragment, starting with the chain
            name; will be prefixed with "--delete " to create the
            actual iptables line to execute.
        _log.log(log_level, "Ensuring rule is not present %r", rule_fragment)
        num_instances = 0
            while True:  # Delete all instances of rule.
                self._execute_iptables(['*%s' % self.table,
                                        '--delete %s' % rule_fragment,
                num_instances += 1
                assert num_instances < 100, "Too many deletes, infinite loop?"
        except FailedSystemCall as e:
            if num_instances == 0:
                if "line 2 failed" in e.stderr:
                    # Rule was parsed OK but failed to apply, this means that
                    # it wasn't present.
                    _log.log(log_level, "Removal of rule %r rejected; not "
                                        "present?", rule_fragment)
                elif "at line: 2" in e.stderr and "doesn't exist" in e.stderr:
                    # Rule was rejected because some pre-requisite (such as an
                    # ipset) didn't exist.
                    _log.log(log_level, "Removal of rule %r failed due to "
                                        "missing pre-requisite; rule must "
                                        "not be present.", rule_fragment)
                    _log.exception("Unexpected failure when trying to "
                                   "delete rule %r" % rule_fragment)
                _log.log(log_level, "%s instances of rule %r removed",
                         num_instances, rule_fragment)

    def delete_chains(self, chain_names, callback=None):
        Deletes the named chains.

        :raises FailedSystemCall if a problem occurred.
        # We actually apply the changes in _finish_msg_batch().  Index the
        # changes by table and chain.
        _log.info("Deleting chains %s", chain_names)
        self._stats.increment("Chain deletes")
        for chain in chain_names:
        if callback:

    # It's much simpler to do cleanup in its own batch so that it doesn't have
    # to worry about in-flight updates.
    def cleanup(self):
        Tries to clean up any left-over chains from a previous run that
        are no longer required.
        _log.info("Cleaning up left-over iptables state.")
        self._stats.increment("Cleanups performed")

        # Start with the current state.

        required_chains = set(self._requiring_chains.keys())
        if not self._grace_period_finished:
            # Ensure that all chains that are required but not explicitly
            # programmed are stubs.
            # We have to do this at the end of the graceful restart period
            # during which we may have re-used old chains.
            chains_to_stub = (required_chains -
            _log.info("Graceful restart window finished, stubbing out "
                      "chains: %s", chains_to_stub)
            except NothingToDo:
            self._grace_period_finished = True

        # Now the generic cleanup, look for chains that we're not expecting to
        # be there and delete them.
        chains_we_tried_to_delete = set()
        finished = False
        while not finished:
            # Try to delete all the unreferenced chains, we use a loop to
            # ensure that we then clean up any chains that become unreferenced
            # when we delete the previous lot.
            unreferenced_chains = self._get_unreferenced_chains()
            orphans = (unreferenced_chains -
                       self._explicitly_prog_chains -
            if not chains_we_tried_to_delete.issuperset(orphans):
                _log.info("Cleanup found these unreferenced chains to "
                          "delete: %s", orphans)
                self._stats.increment("Orphans found during cleanup",
                # We've already tried to delete all the chains we found,
                # give up.
                _log.info("Cleanup finished, deleted %d chains, failed to "
                          "delete these chains: %s",
                          len(chains_we_tried_to_delete) - len(orphans),
                finished = True

        # Then some sanity checks:
        expected_chains = self._chains_in_dataplane
        loaded_chains = self._chains_in_dataplane
        missing_chains = ((self._explicitly_prog_chains | required_chains) -
        if expected_chains != self._chains_in_dataplane or missing_chains:
            # This is serious, either there's a bug in our model of iptables
            # or someone else has changed iptables under our feet.
            _log.error("Chains in data plane inconsistent with calculated "
                       "index.  In dataplane but not in index: %s; In index: "
                       "but not dataplane: %s; missing from iptables: %s.  "
                       "Another process may have clobbered our updates.",
                       loaded_chains - expected_chains,
                       expected_chains - loaded_chains,

            # Try to recover: trigger a full refresh of the dataplane to
            # bring it into sync.

    def _periodic_refresh(self):
        while True:
            # Jitter our sleep times by 20%.
            gevent.sleep(self.refresh_interval * (1 + random.random() * 0.2))

    def _on_worker_died(self, watch_greenlet):
        Greenlet: spawned by the gevent Hub if the etcd watch loop ever
        stops, kills the process.
        _log.critical("Worker greenlet died: %s; exiting.", watch_greenlet)

    def refresh_iptables(self):
        Re-apply our iptables state to the kernel.
        _log.info("Refreshing all our chains")

    def _start_msg_batch(self, batch):
        return batch

    def _finish_msg_batch(self, batch, results):
        start = time.time()
            # We use two passes to update the dataplane.  In the first pass,
            # we make any updates, create new chains and replace to-be-deleted
            # chains with stubs (in case we fail to delete them below).
                input_lines = self._calculate_ipt_modify_input()
            except NothingToDo:
                _log.info("%s no updates in this batch.", self)
                _log.info("%s Successfully processed iptables updates.", self)
        except (IOError, OSError, FailedSystemCall) as e:
            if isinstance(e, FailedSystemCall):
                rc = e.retcode
                rc = "unknown"
            if len(batch) == 1:
                # We only executed a single message, report the failure.
                _log.error("Non-retryable %s failure. RC=%s",
                           self._restore_cmd, rc)
                self._stats.increment("Messages failed due to iptables "
                if self._completion_callbacks:
                final_result = ResultOrExc(None, e)
                results[0] = final_result
                _log.error("Non-retryable error from a combined batch, "
                           "splitting the batch to narrow down culprit.")
                self._stats.increment("Split batch due to error")
                raise SplitBatchAndRetry()
            # Modify succeeded, update our indexes for next time.
            # Make a best effort to delete the chains we no longer want.
            # If we fail due to a stray reference from an orphan chain, we
            # should catch them on the next cleanup().
            for c in self._completion_callbacks:
            if self._txn.refresh:
                # Re-apply our inserts and deletions.  We do this after the
                # above processing because our inserts typically reference
                # our other chains and if the insert has been "rolled back"
                # by another process then it's likely that the referenced
                # chain was too.
                _log.info("Transaction included a refresh, re-applying our "
                          "inserts and deletions.")
                    for fragment in self._inserted_rule_fragments:
                        self._insert_rule(fragment, log_level=logging.DEBUG)
                    for fragment in self._removed_rule_fragments:
                        self._remove_rule(fragment, log_level=logging.DEBUG)
                except FailedSystemCall:
                    _log.error("Failed to refresh inserted/removed rules")
            self._stats.increment("Batches finished")

        end = time.time()
        _log.debug("Batch time: %.2f %s", end - start, len(batch))

    def _delete_best_effort(self, chains):
        Try to delete all the chains in the input list. Any errors are silently
        if not chains:
        chain_batches = [list(chains)]
        while chain_batches:
            batch = chain_batches.pop(0)
                # Try the next batch of chains...
                _log.debug("Attempting to delete chains: %s", batch)
            except (IOError, OSError, FailedSystemCall):
                _log.warning("Deleting chains %s failed", batch)
                if len(batch) > 1:
                    # We were trying to delete multiple chains, split the
                    # batch in half and put the batches back on the queue to
                    # try again.
                    _log.info("Batch was of length %s, splitting", len(batch))
                    split_point = len(batch) // 2
                    first_half = batch[:split_point]
                    second_half = batch[split_point:]
                    assert len(first_half) + len(second_half) == len(batch)
                    if chain_batches:
                        chain_batches[0][:0] = second_half
                        chain_batches[:0] = [second_half]
                    chain_batches[:0] = [first_half]
                    # Only trying to delete one chain, give up.  It must still
                    # be referenced.
                    _log.error("Failed to delete chain %s, giving up. Maybe "
                               "it is still referenced?", batch[0])
                    self._stats.increment("Chain delete failures")
                _log.debug("Deleted chains %s successfully, remaining "
                           "batches: %s", batch, len(chain_batches))

    def _stub_out_chains(self, chains):
        input_lines = self._calculate_ipt_stub_input(chains)

    def _attempt_delete(self, chains):
            input_lines = self._calculate_ipt_delete_input(chains)
        except NothingToDo:
            _log.debug("No chains to delete %s", chains)
            self._execute_iptables(input_lines, fail_log_level=logging.WARNING)
            self._chains_in_dataplane -= set(chains)

    def _update_indexes(self):
        Called after successfully processing a batch, updates the
        indices with the values calculated by the _Transaction.
        self._programmed_chain_contents = self._txn.prog_chains
        self._required_chains = self._txn.required_chns
        self._requiring_chains = self._txn.requiring_chns

    def _calculate_ipt_modify_input(self):
        Calculate the input for phase 1 of a batch, where we only modify and
        create chains.

        :raises NothingToDo: if the batch requires no modify operations.
        # Valid input looks like this.
        # *table
        # :chain_name
        # :chain_name_2
        # -F chain_name
        # -A chain_name -j ACCEPT
        # COMMIT
        # The chains are created if they don't exist.
        input_lines = []
        # Track the chains that we decide we need to touch so that we can
        # prepend the appropriate iptables header for each chain.
        modified_chains = set()
        # Generate rules to stub out chains.  We stub chains out if they're
        # referenced by another chain but they're not present for some reason.
        for chain in self._txn.chains_to_stub_out:
            if (self._grace_period_finished or
                    chain in self._txn.explicit_deletes or
                    chain not in self._chains_in_dataplane):
                # During graceful restart, we only stub out chains if
                # * the chain is genuinely missing from the dataplane, or
                # * we were told to delete the chain explicitly (but decided
                #   we couldn't because it was still referenced), implying
                #   that we now know the state of that chain and we should not
                #   wait for the end of graceful restart to clean it up.

        # Generate rules to stub out chains that we're about to delete, just
        # in case the delete fails later on.  Stubbing it out also stops it
        # from referencing other chains, accidentally keeping them alive.
        for chain in self._txn.chains_to_delete:

        # Now add the actual chain updates.
        for chain, chain_updates in self._txn.updates.iteritems():

        # Finally, prepend the input with instructions that do an idempotent
        # create-and-flush operation for the chains that we need to create or
        # rewrite.
        input_lines[:0] = [":%s -" % chain for chain in modified_chains]

        if not input_lines:
            raise NothingToDo
        return ["*%s" % self.table] + input_lines + ["COMMIT"]

    def _calculate_ipt_delete_input(self, chains):
        Calculate the input for phase 2 of a batch, where we actually
        try to delete chains.

        :raises NothingToDo: if the batch requires no delete operations.
        input_lines = []
        found_delete = False
        input_lines.append("*%s" % self.table)
        for chain_name in chains:
            # Delete the chain
            input_lines.append(":%s -" % chain_name)
            input_lines.append("--delete-chain %s" % chain_name)
            found_delete = True
        if found_delete:
            return input_lines
            raise NothingToDo()

    def _calculate_ipt_stub_input(self, chains):
        Calculate input to replace the given chains with stubs.
        input_lines = []
        found_chain_to_stub = False
        input_lines.append("*%s" % self.table)
        for chain_name in chains:
            # Stub the chain
            input_lines.append(":%s -" % chain_name)
            found_chain_to_stub = True
        if found_chain_to_stub:
            return input_lines
            raise NothingToDo()

    def _execute_iptables(self, input_lines, fail_log_level=logging.ERROR):
        Runs ip(6)tables-restore with the given input.  Retries iff
        the COMMIT fails.

        :raises FailedSystemCall: if the command fails on a non-commit
            line or if it repeatedly fails and retries are exhausted.
        backoff = 0.01
        num_tries = 0
        success = False
        while not success:
            input_str = "\n".join(input_lines) + "\n"
            _log.debug("%s input:\n%s", self._restore_cmd, input_str)

            # Run iptables-restore in noflush mode so that it doesn't
            # blow away all the tables we're not touching.
            cmd = [self._restore_cmd, "--noflush", "--verbose"]
                futils.check_call(cmd, input_str=input_str)
            except FailedSystemCall as e:
                # Parse the output to determine if error is retryable.
                retryable, detail = _parse_ipt_restore_error(input_lines,
                num_tries += 1
                if retryable:
                    if num_tries < MAX_IPT_RETRIES:
                        _log.info("%s failed with retryable error. Retry in "
                                  "%.2fs", self._iptables_cmd, backoff)
                        self._stats.increment("iptables commit failure "
                        if backoff > MAX_IPT_BACKOFF:
                            backoff = MAX_IPT_BACKOFF
                        backoff *= (1.5 + random.random())
                            "Failed to run %s.  Out of retries: %s.\n"
                            "Input was:\n%s",
                            self._restore_cmd, detail, e.stdout, e.stderr,
                        self._stats.increment("iptables commit failure "
                                              "(out of retries)")
                        "%s failed with non-retryable error: %s.\n"
                        "Input was:\n%s",
                        self._restore_cmd, detail, e.stdout, e.stderr,
                    self._stats.increment("iptables non-retryable failure")
                self._stats.increment("iptables success")
                success = True

    def _missing_chain_stub_rules(self, chain_name):
        :return: List of rule fragments to replace the given chain with a
            single drop rule.
        if chain_name in self._missing_chain_overrides:
            _log.debug("Generating missing chain %s; override in place",
            fragment = self._missing_chain_overrides[chain_name]
            fragment = ["--flush %s" % chain_name]
                'WARNING Missing chain'))
        return fragment
예제 #7
    def __init__(self, table, config, ip_version=4):
        super(IptablesUpdater, self).__init__(qualifier="v%d-%s" %
                                                        (ip_version, table))
        self.table = table
        self.refresh_interval = config.REFRESH_INTERVAL
        self.iptables_generator = config.plugins["iptables_generator"]
        self.ip_version = ip_version
        if ip_version == 4:
            self._restore_cmd = "iptables-restore"
            self._save_cmd = "iptables-save"
            self._iptables_cmd = "iptables"
            assert ip_version == 6
            self._restore_cmd = "ip6tables-restore"
            self._save_cmd = "ip6tables-save"
            self._iptables_cmd = "ip6tables"

        self._chains_in_dataplane = None
        Set of chains that we know are actually in the dataplane.  Loaded
        at start of day and then kept in sync.
        self._grace_period_finished = False
        Flag that is set after the graceful restart window is over.

        self._programmed_chain_contents = {}
        """Map from chain name to chain contents, only contains chains that
        have been explicitly programmed."""
        self._inserted_rule_fragments = set()
        """Special-case rule fragments that we've explicitly inserted."""
        self._removed_rule_fragments = set()
        """Special-case rule fragments that we've explicitly removed.
        We need to cache this to defend against other processes accidentally
        reverting our removal."""
        self._missing_chain_overrides = {}
        """Overrides for chain contents when we need to program a chain but
        it's missing."""

        self._required_chains = defaultdict(set)
        """Map from chain name to the set of names of chains that it
        depends on."""
        self._requiring_chains = defaultdict(set)
        """Map from chain to the set of chains that depend on it.
        Inverse of self.required_chains."""

        # Since it's fairly complex to keep track of the changes required
        # for a particular batch and still be able to roll-back the changes
        # to our data structures, we delegate to a per-batch object that
        # does that calculation.
        self._txn = None
        """:type _Transaction: object used to track index changes
        for this batch."""
        self._completion_callbacks = None
        """List of callbacks to issue once the current batch completes."""

        # Diagnostic counters.
        self._stats = StatCounter("IPv%s %s iptables updater" %
                                  (ip_version, table))

        # Avoid duplicating init logic.

        # Optionally, start periodic refresh timer.
        if self.refresh_interval > 0:
            _log.info("Periodic iptables refresh enabled, starting "
                      "resync greenlet")
            refresh_greenlet = gevent.spawn(self._periodic_refresh)