Ejemplo n.º 1
0
class Coordinator:
    """
    Class that coordinates the configuration, state and status reports
    for a single LVS instance
    """

    serverConfigUrl = 'file:///etc/pybal/squids'

    intvLoadServers = 60

    metric_keywords = {
        'labelnames': ('service', ),
        'namespace': 'pybal',
        'subsystem': 'service'
    }

    metrics = {
        'servers':
        Gauge('servers', 'Amount of servers', **metric_keywords),
        'servers_enabled':
        Gauge('servers_enabled', 'Amount of enabled servers',
              **metric_keywords),
        'servers_up':
        Gauge('servers_up', 'Amount of up servers', **metric_keywords),
        'servers_pooled':
        Gauge('servers_pooled', 'Amount of pooled servers', **metric_keywords),
        'can_depool':
        Gauge('can_depool', 'Can depool more servers', **metric_keywords),
        'pooled_down_servers':
        Gauge('pooled_down_servers',
              'Amount of down servers pooled because too many down',
              **metric_keywords),
        'could_not_depool_total':
        Counter('could_not_depool_total',
                'Pybal could not depool a server because too many down',
                **metric_keywords),
        'depool_threshold':
        Gauge(
            'depool_threshold',
            "Threshold of up servers vs total servers below which pybal can't depool any more",
            **metric_keywords),
    }

    def __init__(self, lvsservice, configUrl):
        """Constructor"""

        self.servers = {}
        self.lvsservice = lvsservice
        self.metric_labels = {'service': self.lvsservice.name}
        self.pooledDownServers = set()
        self.configHash = None
        self.serverConfigUrl = configUrl
        self.serverInitDeferredList = defer.Deferred()
        self.configObserver = config.ConfigurationObserver.fromUrl(
            self, configUrl)
        self.configObserver.startObserving()

        self.metrics['depool_threshold'].labels(**self.metric_labels).set(
            self.lvsservice.getDepoolThreshold())

    def __str__(self):
        return "[%s]" % self.lvsservice.name

    def assignServers(self):
        """
        Takes a new set of servers (as a host->Server dict) and
        hands them over to LVSService
        """

        # Hand over enabled servers to LVSService
        self.lvsservice.assignServers(
            set([
                server for server in self.servers.itervalues() if server.pooled
            ]))

    def refreshModifiedServers(self):
        """
        Calculates the status of every server that existed before the config change.
        """

        for server in self.servers.itervalues():
            if not server.modified: continue

            server.up = server.calcStatus()
            server.pooled = server.enabled and server.up

    def resultDown(self, monitor, reason=None):
        """
        Accepts a 'down' notification status result from a single monitoring instance
        and acts accordingly.
        """

        server = monitor.server

        data = {
            'service': self,
            'monitor': monitor.name(),
            'host': server.host,
            'status': server.textStatus(),
            'reason': (reason or '(reason unknown)')
        }
        msg = "Monitoring instance {monitor} " \
              "reports server {host} ({status}) down: {reason}"
        log.error(msg.format(**data), system=self.lvsservice.name)

        if server.up:
            server.up = False
            if server.pooled: self.depool(server)

    def resultUp(self, monitor):
        """
        Accepts a 'up' notification status result from a single monitoring instance
        and acts accordingly.
        """

        server = monitor.server

        if not server.up and server.calcStatus():
            log.info("Server {} ({}) is up".format(server.host,
                                                   server.textStatus()),
                     system=self.lvsservice.name)
            server.up = True
            if server.enabled and server.ready: self.repool(server)

    def depool(self, server):
        """Depools a single Server, if possible"""

        assert server.pooled

        if self.canDepool():
            self.lvsservice.removeServer(server)
            self.pooledDownServers.discard(server)
            self.metrics['servers_pooled'].labels(**self.metric_labels).dec()
        else:
            self.pooledDownServers.add(server)
            msg = "Could not depool server " \
                  "{} because of too many down!".format(server.host)
            log.error(msg, system=self.lvsservice.name)
            self.metrics['could_not_depool_total'].labels(
                **self.metric_labels).inc()
        self._updatePooledDownMetrics()

    def repool(self, server):
        """
        Repools a single server. Also depools previously downed Servers that could
        not be depooled then because of too many hosts down.
        """

        assert server.enabled and server.ready

        if not server.pooled:
            self.lvsservice.addServer(server)
            self.metrics['servers_pooled'].labels(**self.metric_labels).inc()
        else:
            msg = "Leaving previously pooled but down server {} pooled"
            log.info(msg.format(server.host), system=self.lvsservice.name)

        # If it had been pooled in down state before, remove it from the list
        self.pooledDownServers.discard(server)
        self._updatePooledDownMetrics()

        # See if we can depool any servers that could not be depooled before
        while len(self.pooledDownServers) > 0 and self.canDepool():
            self.depool(self.pooledDownServers.pop())

    def canDepool(self):
        """Returns a boolean denoting whether another server can be depooled"""

        # Construct a list of servers that have status 'down'
        downServers = [
            server for server in self.servers.itervalues() if not server.up
        ]

        # The total amount of pooled servers may never drop below a configured threshold
        return len(self.servers) - len(downServers) >= len(
            self.servers) * self.lvsservice.getDepoolThreshold()

    def onConfigUpdate(self, config):
        """Parses the server list and changes the state accordingly."""

        delServers = self.servers.copy()  # Shallow copy

        initList = []

        for hostName, hostConfig in config.items():
            if hostName in self.servers:
                # Existing server. merge
                server = delServers.pop(hostName)
                server.merge(hostConfig)
                data = {
                    'status': (server.enabled and "enabled" or "disabled"),
                    'host': hostName,
                    'weight': server.weight
                }
                log.info(
                    "Merged {status} server {host}, weight {weight}".format(
                        **data),
                    system=self.lvsservice.name)
            else:
                # New server
                server = Server.buildServer(hostName, hostConfig,
                                            self.lvsservice)
                data = {
                    'status': (server.enabled and "enabled" or "disabled"),
                    'host': hostName,
                    'weight': server.weight
                }
                # Initialize with LVS service specific configuration
                self.lvsservice.initServer(server)
                self.servers[hostName] = server
                initList.append(server.initialize(self))
                log.info("New {status} server {host}, weight {weight}".format(
                    **data),
                         system=self.lvsservice.name)

        # Remove old servers
        for hostName, server in delServers.iteritems():
            log.info(
                "{} Removing server {} (no longer found in new configuration)".
                format(self, hostName))
            server.destroy()
            del self.servers[hostName]

        # Calculate up status for previously existing, modified servers
        self.refreshModifiedServers()

        # Wait for all new servers to finish initializing
        self.serverInitDeferredList = defer.DeferredList(initList).addCallback(
            self._serverInitDone)

        # Update metrics
        self._updateServerMetrics()
        self._updatePooledDownMetrics()

    def _serverInitDone(self, result):
        """Called when all (new) servers have finished initializing"""

        log.info("{} Initialization complete".format(self))

        # Assign the updated list of enabled servers to the LVSService instance
        self.assignServers()

        self.metrics['servers_pooled'].labels(**self.metric_labels).set(
            len([s for s in self.servers.itervalues() if s.pooled]))
        self._updatePooledDownMetrics()

    def _updateServerMetrics(self):
        """Update gauge metrics for servers on config change"""
        self.metrics['servers'].labels(**self.metric_labels).set(
            len(self.servers))
        self.metrics['servers_enabled'].labels(**self.metric_labels).set(
            len([s for s in self.servers.itervalues() if s.enabled]))
        self.metrics['servers_up'].labels(**self.metric_labels).set(
            len([s for s in self.servers.itervalues() if s.up]))

    def _updatePooledDownMetrics(self):
        """Update gauge metrics for pooled-but-down servers"""
        self.metrics['pooled_down_servers'].labels(**self.metric_labels).set(
            len(self.pooledDownServers))
        self.metrics['can_depool'].labels(**self.metric_labels).set(
            self.canDepool() and 1 or 0)
Ejemplo n.º 2
0
class RunCommandMonitoringProtocol(monitor.LoopingCheckMonitoringProtocol):
    """
    Monitor that checks server uptime by repeatedly fetching a certain URL
    """

    __name__ = 'RunCommand'

    INTV_CHECK = 60

    TIMEOUT_RUN = 20

    metric_labelnames = ('service', 'host', 'monitor')
    metric_keywords = {
        'namespace': 'pybal',
        'subsystem': 'monitor_' + __name__.lower()
    }

    runcommand_metrics = {
        'run_duration_seconds':
        Gauge('run_duration_seconds',
              'Command duration',
              labelnames=metric_labelnames + ('result', 'exitcode'),
              **metric_keywords)
    }

    def __init__(self, coordinator, server, configuration={}, reactor=None):
        """Constructor"""

        # Call ancestor constructor
        super(RunCommandMonitoringProtocol,
              self).__init__(coordinator, server, configuration, reactor)

        locals = {'server': server}

        self.timeout = self._getConfigInt('timeout', self.TIMEOUT_RUN)
        self.command = self._getConfigString('command')
        try:
            self.arguments = self._getConfigStringList('arguments',
                                                       locals=locals)
        except (KeyError, ValueError):
            # Default to empty stringlist if runcommand.arguments has not been
            # specified or if it is an empty list
            self.arguments = [""]

        self.logOutput = self._getConfigBool('log-output', True)

        self.runningProcess = None
        self.runningProcessDeferred = None

    def stop(self):
        """Stop all running and/or upcoming checks"""

        super(RunCommandMonitoringProtocol, self).stop()

        # Try to kill any running check
        if self.runningProcess is not None:
            try:
                self.runningProcess.signalProcess(signal.SIGKILL)
            except error.ProcessExitedAlready:
                pass

    def runCommand(self):
        """Periodically called method that does a single uptime check."""

        self.checkStartTime = seconds()
        self.runningProcess = self._spawnProcess(
            self,
            self.command, [self.command] + self.arguments,
            sessionLeader=True,
            timeout=(self.timeout or None))
        self.runningProcessDeferred = defer.Deferred()
        return self.runningProcessDeferred

    check = runCommand

    def makeConnection(self, process):
        pass

    def childDataReceived(self, childFD, data):
        if not self.logOutput: return

        # Escape control chars
        map = {'\n': r'\n', '\r': r'\r', '\t': r'\t'}
        for char, subst in map.iteritems():
            data = data.replace(char, subst)

        self.report("Cmd stdout: " + data)

    def childConnectionLost(self, childFD):
        pass

    def processEnded(self, reason):
        """
        Called when the process has ended
        """

        duration = seconds() - self.checkStartTime
        if reason.check(error.ProcessDone):
            self._resultUp()
            result = 'successful'
            exitcode = 0
        elif reason.check(error.ProcessTerminated):
            self._resultDown(reason.getErrorMessage())
            result = 'failed'
            exitcode = reason.value.exitCode
        else:
            result = None
            exitcode = None

        self.runcommand_metrics['run_duration_seconds'].labels(
            result=result, exitcode=exitcode,
            **self.metric_labels).set(duration)

        self.runningProcessDeferred.callback(reason.type)
        reason.trap(error.ProcessDone, error.ProcessTerminated)

    def leftoverProcesses(self, allKilled):
        """
        Called when the child terminated cleanly, but left some of
        its child processes behind
        """

        if allKilled:
            msg = "Command %s %s left child processes behind, which have been killed!"
        else:
            msg = "Command %s %s left child processes behind, and not all could be killed!"
        self.report(msg % (self.command, str(self.arguments)),
                    level=logging.WARN)

    def _spawnProcess(self,
                      processProtocol,
                      executable,
                      args=(),
                      env={},
                      path=None,
                      uid=None,
                      gid=None,
                      childFDs=None,
                      sessionLeader=False,
                      timeout=None):
        """
        Replacement for posixbase.PosixReactorBase.spawnProcess with added
        process group / session and timeout support, and support for
        non-POSIX platforms and PTYs removed.
        """

        # Use the default reactor instead of self.reactor as not all (testing)
        # reactors provide _checkProcessArgs, and it's harmless anyway.
        args, env = twisted.internet.reactor._checkProcessArgs(args, env)
        return ProcessGroupProcess(self.reactor, executable, args, env, path,
                                   processProtocol, uid, gid, childFDs,
                                   sessionLeader, timeout)
Ejemplo n.º 3
0
class DNSQueryMonitoringProtocol(monitor.LoopingCheckMonitoringProtocol):
    """
    Monitor that checks a DNS server by doing repeated DNS queries
    """

    __name__ = 'DNSQuery'

    TIMEOUT_QUERY = 5

    catchList = (defer.TimeoutError, error.DomainError,
                 error.AuthoritativeDomainError, error.DNSFormatError, error.DNSNameError,
                 error.DNSQueryRefusedError, error.DNSQueryTimeoutError,
                 error.DNSServerError, error.DNSUnknownError)

    metric_labelnames = ('service', 'host', 'monitor')
    metric_keywords = {
        'namespace': 'pybal',
        'subsystem': 'monitor_' + __name__.lower()
    }

    dnsquery_metrics = {
        'request_duration_seconds': Gauge(
            'request_duration_seconds',
            'DNS query duration',
            labelnames=metric_labelnames + ('result',),
            **metric_keywords)
    }

    def __init__(self, coordinator, server, configuration, reactor=None):
        """Constructor"""

        # Call ancestor constructor
        super(DNSQueryMonitoringProtocol, self).__init__(
            coordinator,
            server,
            configuration,
            reactor=reactor)

        self.toQuery = self._getConfigInt('timeout', self.TIMEOUT_QUERY)
        self.hostnames = self._getConfigStringList('hostnames')
        self.failOnNXDOMAIN = self._getConfigBool('fail-on-nxdomain', False)

        self.resolver = None
        self.DNSQueryDeferred = None
        self.checkStartTime = None

    def run(self):
        """Start the monitoring"""

        super(DNSQueryMonitoringProtocol, self).run()

        # Create a resolver. Use the DNS server IPv4 addresses instead of
        # self.server.ip as Twisted's createResolver (< 17.1.0) does not
        # support querying a nameserver over IPv6.
        self.resolver = client.createResolver([(ip, 53) for ip in self.server.ip4_addresses])

    def stop(self):
        """Stop the monitoring"""
        super(DNSQueryMonitoringProtocol, self).stop()

        if self.DNSQueryDeferred is not None:
            self.DNSQueryDeferred.cancel()

    def check(self):
        """Periodically called method that does a single uptime check."""

        hostname = random.choice(self.hostnames)
        query = dns.Query(hostname, type=random.choice([dns.A, dns.AAAA]))

        self.checkStartTime = runtime.seconds()

        if query.type == dns.A:
            self.DNSQueryDeferred = self.resolver.lookupAddress(hostname, timeout=[self.toQuery])
        elif query.type == dns.AAAA:
            self.DNSQueryDeferred = self.resolver.lookupIPV6Address(hostname, timeout=[self.toQuery])

        self.DNSQueryDeferred.addCallback(self._querySuccessful, query
                ).addErrback(self._queryFailed, query
                ).addBoth(self._checkFinished)
        return self.DNSQueryDeferred

    def _querySuccessful(self, (answers, authority, additional), query):
        """Called when the DNS query finished successfully."""

        if query.type in (dns.A, dns.AAAA):
            addressFamily = query.type == dns.A and socket.AF_INET or socket.AF_INET6
            addresses = " ".join([socket.inet_ntop(addressFamily, r.payload.address)
                                  for r in answers
                                  if r.type == query.type])
            resultStr = "%s %s %s" % (query.name, dns.QUERY_TYPES[query.type], addresses)
        else:
            resultStr = None

        duration = runtime.seconds() - self.checkStartTime
        self.report('DNS query successful, %.3f s' % (duration)
                    + (resultStr and (': ' + resultStr) or ""))
        self._resultUp()

        self.dnsquery_metrics['request_duration_seconds'].labels(
            result='successful',
            **self.metric_labels
            ).set(duration)

        return answers, authority, additional
Ejemplo n.º 4
0
class ProxyFetchMonitoringProtocol(monitor.LoopingCheckMonitoringProtocol):
    """
    Monitor that checks server uptime by repeatedly fetching a certain URL
    """

    TIMEOUT_GET = 5

    HTTP_STATUS = 200

    __name__ = 'ProxyFetch'

    from twisted.internet import error
    from twisted.web import error as weberror
    catchList = (defer.TimeoutError, weberror.Error, error.ConnectError,
                 error.DNSLookupError)

    metric_labelnames = ('service', 'host', 'monitor')
    metric_keywords = {
        'namespace': 'pybal',
        'subsystem': 'monitor_' + __name__.lower()
    }

    proxyfetch_metrics = {
        'request_duration_seconds':
        Gauge(
            'request_duration_seconds',
            'HTTP(S) request duration',
            labelnames=metric_labelnames + ('result', ),  # TODO: statuscode
            **metric_keywords)
    }

    def __init__(self, coordinator, server, configuration={}, reactor=None):
        """Constructor"""

        # Call ancestor constructor
        super(ProxyFetchMonitoringProtocol, self).__init__(coordinator,
                                                           server,
                                                           configuration,
                                                           reactor=reactor)

        self.toGET = self._getConfigInt('timeout', self.TIMEOUT_GET)
        self.expectedStatus = self._getConfigInt('http_status',
                                                 self.HTTP_STATUS)

        self.getPageDeferred = None

        self.checkStartTime = None

        self.URL = self._getConfigStringList('url')

    def stop(self):
        """Stop all running and/or upcoming checks"""

        super(ProxyFetchMonitoringProtocol, self).stop()

        if self.getPageDeferred is not None:
            self.getPageDeferred.cancel()

    def check(self):
        """Periodically called method that does a single uptime check."""

        if not self.active:
            log.warn(
                "ProxyFetchMonitoringProtocol.check() called while active == False"
            )
            return

        # FIXME: Use GET as a workaround for a Twisted bug with HEAD/Content-length
        # where it expects a body and throws a PartialDownload failure

        url = random.choice(self.URL)

        self.checkStartTime = seconds()
        self.getPageDeferred = self.getProxyPage(
            url,
            method='GET',
            host=self.server.ip,
            port=self.server.port,
            status=self.expectedStatus,
            timeout=self.toGET,
            followRedirect=False,
            reactor=self.reactor).addCallbacks(self._fetchSuccessful,
                                               self._fetchFailed).addBoth(
                                                   self._checkFinished)
        return self.getPageDeferred

    def _fetchSuccessful(self, result):
        """Called when getProxyPage is finished successfully."""

        duration = seconds() - self.checkStartTime
        self.report('Fetch successful, %.3f s' % (duration))
        self._resultUp()

        self.proxyfetch_metrics['request_duration_seconds'].labels(
            result='successful', **self.metric_labels).set(duration)

        return result

    def _fetchFailed(self, failure):
        """Called when getProxyPage finished with a failure."""

        # Don't act as if the check failed if we cancelled it
        if failure.check(defer.CancelledError):
            return None

        duration = seconds() - self.checkStartTime
        self.report('Fetch failed, %.3f s' % (duration), level=logging.WARN)

        self._resultDown(failure.getErrorMessage())

        self.proxyfetch_metrics['request_duration_seconds'].labels(
            result='failed', **self.metric_labels).set(duration)

        failure.trap(*self.catchList)

    def _checkFinished(self, result):
        """
        Called when getProxyPage finished with either success or failure,
        to do after-check cleanups.
        """

        self.checkStartTime = None

        return result

    @staticmethod
    def getProxyPage(url,
                     contextFactory=None,
                     host=None,
                     port=None,
                     status=None,
                     reactor=twisted.internet.reactor,
                     *args,
                     **kwargs):
        """Download a web page as a string. (modified from twisted.web.client.getPage)

        Download a page. Return a deferred, which will callback with a
        page (as a string) or errback with a description of the error.

        See HTTPClientFactory to see what extra args can be passed.
        """
        if status > 300 and status < 304:
            factory = RedirHTTPClientFactory(url, *args, **kwargs)
        else:
            factory = client.HTTPClientFactory(url, *args, **kwargs)

        host = host or factory.host
        port = port or factory.port

        if factory.scheme == 'https':
            from twisted.internet import ssl
            if contextFactory is None:
                contextFactory = ssl.ClientContextFactory()
            reactor.connectSSL(host, port, factory, contextFactory)
        else:
            reactor.connectTCP(host, port, factory)
        return factory.deferred
Ejemplo n.º 5
0
class MonitoringProtocol(object):
    """
    Base class for all monitoring protocols. Declares a few obligatory
    abstract methods, and some commonly useful functions.
    """

    __name__ = ''

    metric_labelnames = ('service', 'host', 'monitor')
    metric_keywords = {
        'labelnames': metric_labelnames,
        'namespace': 'pybal',
        'subsystem': 'monitor'
    }

    metrics = {
        'up_transitions_total':
        Counter('up_transitions_total', 'Monitor up transition count',
                **metric_keywords),
        'down_transitions_total':
        Counter('down_transitions_total', 'Monitor down transition count',
                **metric_keywords),
        'up_results_total':
        Counter('up_results_total', 'Monitor up result count',
                **metric_keywords),
        'down_results_total':
        Counter('down_results_total', 'Monitor down result count',
                **metric_keywords),
        'status':
        Gauge('status', 'Monitor up status', **metric_keywords)
    }

    def __init__(self, coordinator, server, configuration={}, reactor=None):
        """Constructor"""

        self.coordinator = coordinator
        self.server = server
        self.configuration = configuration
        self.up = None  # None, False (Down) or True (Up)
        self.reactor = reactor or twisted.internet.reactor

        self.active = False
        self.firstCheck = True
        self._shutdownTriggerID = None

        self.metric_labels = {
            'service': self.server.lvsservice.name,
            'host': self.server.host,
            'monitor': self.name()
        }

    def run(self):
        """Start the monitoring"""
        assert self.active is False
        self.active = True

        # Install cleanup handler
        self._shutdownTriggerID = self.reactor.addSystemEventTrigger(
            'before', 'shutdown', self.stop)

    def stop(self):
        """Stop the monitoring; cancel any running or upcoming checks"""
        self.active = False
        if self._shutdownTriggerID is not None:
            # Remove cleanup handler
            self.reactor.removeSystemEventTrigger(self._shutdownTriggerID)
            self._shutdownTriggerID = None

    def name(self):
        """Returns a printable name for this monitor"""
        return self.__name__

    def _resultUp(self):
        """Sets own monitoring state to Up and notifies the coordinator
        if this implies a state change.
        """
        self.metrics['up_results_total'].labels(**self.metric_labels).inc()
        if self.active and self.up is False or self.firstCheck:
            self.up = True
            self.firstCheck = False
            if self.coordinator:
                self.coordinator.resultUp(self)

            self.metrics['up_transitions_total'].labels(
                **self.metric_labels).inc()
            self.metrics['status'].labels(**self.metric_labels).set(1)

    def _resultDown(self, reason=None):
        """Sets own monitoring state to Down and notifies the
        coordinator if this implies a state change."""
        self.metrics['down_results_total'].labels(**self.metric_labels).inc()
        if self.active and self.up is True or self.firstCheck:
            self.up = False
            self.firstCheck = False
            if self.coordinator:
                self.coordinator.resultDown(self, reason)

            self.metrics['down_transitions_total'].labels(
                **self.metric_labels).inc()
            self.metrics['status'].labels(**self.metric_labels).set(0)

    def report(self, text, level=logging.DEBUG):
        """Common method for reporting/logging check results."""
        msg = "%s (%s): %s" % (self.server.host, self.server.textStatus(),
                               text)
        s = "%s %s" % (self.server.lvsservice.name, self.__name__)
        _log(msg, level, s)

    def _getConfigBool(self, optionname, default=None):
        return self.configuration.getboolean(
            '%s.%s' % (self.__name__.lower(), optionname), default)

    def _getConfigInt(self, optionname, default=None):
        return self.configuration.getint(
            '%s.%s' % (self.__name__.lower(), optionname), default)

    def _getConfigString(self, optionname):
        val = self.configuration[self.__name__.lower() + '.' + optionname]
        if type(val) == str:
            return val
        else:
            raise ValueError("Value of %s is not a string" % optionname)

    def _getConfigStringList(self, optionname, locals=None, globals=None):
        """Takes a (string) value, eval()s it and checks whether it
        consists of either a single string, or a single list of
        strings."""
        key = self.__name__.lower() + '.' + optionname
        val = eval(self.configuration[key], locals, globals)
        if type(val) == str:
            return val
        elif (isinstance(val, list)
              and all(isinstance(x, basestring) for x in val) and val):
            # Checked that each list member is a string and that list is not
            # empty.
            return val
        else:
            raise ValueError("Value of %s is not a string or stringlist" %
                             optionname)
Ejemplo n.º 6
0
class BGPPeering(BGPFactory):
    """Class managing a BGP session with a peer"""

    implements(IBGPPeering, interfaces.IPushProducer)

    metric_labelnames = {'local_asn', 'peer'}
    metric_keywords = {
        'labelnames': metric_labelnames,
        'namespace': 'pybal',
        'subsystem': 'bgp'
    }

    metrics = {
        'bgp_session_established':
        Gauge('session_established', 'BGP session established',
              **metric_keywords)
    }

    def __init__(self, myASN=None, peerAddr=None):
        self.myASN = myASN
        self.peerAddr = peerAddr
        self.peerId = None
        self.fsm = BGPFactory.FSM(self)
        self.addressFamilies = set((AFI_INET, SAFI_UNICAST))
        self.inConnections = []
        self.outConnections = []
        self.estabProtocol = None  # reference to the BGPProtocol instance in ESTAB state
        self.consumers = set()

        self.metric_labels = {'local_asn': self.myASN, 'peer': self.peerAddr}
        self.metrics = BGPPeering.metrics
        self.metrics['bgp_session_established'].labels(
            **self.metric_labels).set(0)

    def __setattr__(self, name, value):
        if name == 'estabProtocol' and name in self.__dict__ and getattr(
                self, name) != value:
            if value:
                msg = 'established'
                metric_value = 1
            else:
                msg = 'gone'
                metric_value = 0
            self.log(
                "BGP session %s for ASN %s peer %s" %
                (msg, self.myASN, self.peerAddr), logging.INFO)
            self.metrics['bgp_session_established'].labels(
                **self.metric_labels).set(metric_value)
        #  old style class, super().__setattr__() doesn't work
        #  https://docs.python.org/2/reference/datamodel.html#customizing-attribute-access
        self.__dict__[name] = value

    def buildProtocol(self, addr):
        """Builds a BGP protocol instance"""

        self.log("Building a new BGP protocol instance")

        p = BGPFactory.buildProtocol(self, addr)
        if p is not None:
            self._initProtocol(p, addr)
            self.outConnections.append(p)

        return p

    def takeServerConnection(self, addr):
        """Builds a BGP protocol instance for a server connection"""

        p = BGPFactory.buildProtocol(self, addr)
        if p is not None:
            self._initProtocol(p, addr)
            self.inConnections.append(p)

        return p

    def _initProtocol(self, protocol, addr):
        """Initializes a BGPProtocol instance"""

        protocol.bgpPeering = self

        # Hand over the FSM
        protocol.fsm = self.fsm
        protocol.fsm.protocol = protocol

        # Create a new fsm for internal use for now
        self.fsm = BGPFactory.FSM(self)
        self.fsm.state = protocol.fsm.state

        if addr.port == PORT:
            protocol.fsm.state = ST_CONNECT
        else:
            protocol.fsm.state = ST_ACTIVE

        # Set up callback and error handlers
        protocol.deferred.addCallbacks(self.sessionEstablished,
                                       self.protocolError)

    def clientConnectionFailed(self, connector, reason):
        """Called when the outgoing connection failed."""

        self.log("Client connection failed: %s" % reason.getErrorMessage(),
                 logging.INFO)

        # There is no protocol instance yet at this point.
        # Catch a possible NotificationException
        try:
            self.fsm.connectionFailed()
        except NotificationSent, e:
            # TODO: error handling
            pass
Ejemplo n.º 7
0
Archivo: fsm.py Proyecto: guoyu07/PyBal
class FSM(object):
    class BGPTimer(object):
        """
        Timer class with a slightly different Timer interface than the
        Twisted DelayedCall interface
        """
        def __init__(self, callable):
            self.delayedCall = None
            self.callable = callable

        def cancel(self):
            """Cancels the timer if it was running, does nothing otherwise"""

            try:
                self.delayedCall.cancel()
            except (AttributeError, error.AlreadyCalled,
                    error.AlreadyCancelled):
                pass

        def reset(self, secondsFromNow):
            """Resets an already running timer, or starts it if it wasn't running."""

            try:
                self.delayedCall.reset(secondsFromNow)
            except (AttributeError, error.AlreadyCalled,
                    error.AlreadyCancelled):
                self.delayedCall = reactor.callLater(secondsFromNow,
                                                     self.callable)

        def active(self):
            """Returns True if the timer was running, False otherwise."""

            try:
                return self.delayedCall.active()
            except AttributeError:
                return False

    protocol = None

    state = ST_IDLE

    largeHoldTime = 4 * 60
    sendNotificationWithoutOpen = True  # No bullshit

    eventMethods = {
        1: 'manualStart',
        2: 'manualStop',
        3: 'automaticStart',
        9: 'connectRetryTimeEvent',
        10: 'holdTimeEvent',
        11: 'keepAliveEvent',
        12: 'delayOpenEvent',
        13: 'idleHoldTimeEvent',
        17: 'connectionMade',
        18: 'connectionFailed',
        19: 'openReceived',
        20: 'openReceived',
        21: 'headerError',
        22: 'openMessageError',
        23: 'openCollisionDump',
        24: 'versionError',
        25: 'notificationReceived',
        26: 'keepAliveReceived',
        27: 'updateReceived',
        28: 'updateError'
    }

    bgpTimers = {
        'connectRetryTimer', 'holdTimer', 'keepAliveTimer', 'delayOpenTimer',
        'idleHoldTimer'
    }

    metric_labelnames = {'local_asn', 'state', 'local_ip', 'remote_ip', 'side'}
    metric_keywords = {
        'labelnames': metric_labelnames,
        'namespace': 'pybal',
        'subsystem': 'bgp'
    }

    metrics = {
        'bgp_session_state_count':
        Gauge('session_state_count',
              'Number of sessions in the specified state', **metric_keywords)
    }

    def __init__(self, bgpPeering=None, protocol=None):
        self.bgpPeering = bgpPeering
        self.protocol = protocol

        self.connectRetryCounter = 0
        self.connectRetryTime = 30
        self.connectRetryTimer = FSM.BGPTimer(self.connectRetryTimeEvent)
        self.holdTime = 3 * 60
        self.holdTimer = FSM.BGPTimer(self.holdTimeEvent)
        self.keepAliveTime = self.holdTime / 3
        self.keepAliveTimer = FSM.BGPTimer(self.keepAliveEvent)

        self.allowAutomaticStart = True
        self.allowAutomaticStop = False
        self.delayOpen = False
        self.delayOpenTime = 30
        self.delayOpenTimer = FSM.BGPTimer(self.delayOpenEvent)

        self.dampPeerOscillations = True
        self.idleHoldTime = 30
        self.idleHoldTimer = FSM.BGPTimer(self.idleHoldTimeEvent)

        self.metric_labels = {
            'state': stateDescr[self.state],
            'local_asn': None,
            'local_ip': None,
            'remote_ip': None,
            'side': None
        }
        if self.bgpPeering:
            self.metric_labels['local_asn'] = self.bgpPeering.myASN

        self.initial_idle_state = True

    def log(self, msg, lvl=logging.DEBUG):
        s = "bgp.FSM@{}".format(hex(id(self)))
        if self.protocol is not None:
            s += " peer {}".format(self.protocol.peerAddrStr())
        elif self.bgpPeering is not None:
            s += " peer {}".format(self.bgpPeering.peerAddr)
        _log(msg, lvl, s)

    def __setattr__(self, name, value):
        if name == 'state' and value != getattr(self, name):
            self.log("State is now: %s" % stateDescr[value], logging.INFO)
            self.__update_metrics(value)
        super(FSM, self).__setattr__(name, value)

    def __update_metrics(self, new_state):
        if self.metric_labels['local_ip'] and self.metric_labels['remote_ip']:
            if not self.initial_idle_state:
                self.metrics['bgp_session_state_count'].labels(
                    **self.metric_labels).dec()
            else:
                self.initial_idle_state = False
            self.metric_labels['state'] = stateDescr[new_state]
            self.metrics['bgp_session_state_count'].labels(
                **self.metric_labels).inc()

    def manualStart(self):
        """
        Should be called when a BGP ManualStart event (event 1) is requested.
        Note that a protocol instance does not yet exist at this point,
        so this method requires some support from BGPPeering.manualStart().
        """

        if self.state == ST_IDLE:
            self.connectRetryCounter = 0
            self.connectRetryTimer.reset(self.connectRetryTime)

    def manualStop(self):
        """Should be called when a BGP ManualStop event (event 2) is requested."""

        if self.state != ST_IDLE:
            self.protocol.sendNotification(ERR_CEASE, 0)
            # Stop all timers
            for timer in (self.connectRetryTimer, self.holdTimer,
                          self.keepAliveTimer, self.delayOpenTimer,
                          self.idleHoldTimer):
                timer.cancel()
            if self.bgpPeering is not None:
                self.bgpPeering.releaseResources(self.protocol)
            self._closeConnection()
            self.connectRetryCounter = 0
            self.state = ST_IDLE
            raise NotificationSent(self.protocol, ERR_CEASE, 0)

    def automaticStart(self, idleHold=False):
        """
        Should be called when a BGP Automatic Start event (event 3) is requested.
        Returns True or False to indicate BGPPeering whether a connection attempt
        should be initiated.
        """

        if self.state == ST_IDLE:
            if idleHold:
                self.idleHoldTimer.reset(self.idleHoldTime)
                return False
            else:
                self.connectRetryCounter = 0
                self.connectRetryTimer.reset(self.connectRetryTime)
                return True

    def connectionMade(self):
        """Should be called when a TCP connection has successfully been
        established with the peer. (events 16, 17)
        """

        if self.state in (ST_CONNECT, ST_ACTIVE):
            # State Connect, Event 16 or 17
            if self.delayOpen:
                self.connectRetryTimer.cancel()
                self.delayOpenTimer.reset(self.delayOpenTime)
            else:
                self.connectRetryTimer.cancel()
                if self.bgpPeering: self.bgpPeering.completeInit(self.protocol)
                self.protocol.sendOpen()
                self.holdTimer.reset(self.largeHoldTime)
                self.state = ST_OPENSENT

    def connectionFailed(self):
        """Should be called when the associated TCP connection failed, or
        was lost. (event 18)"""

        if self.state == ST_CONNECT:
            # State Connect, event 18
            if self.delayOpenTimer.active():
                self.connectRetryTimer.reset(self.connectRetryTime)
                self.delayOpenTimer.cancel()
                self.state = ST_ACTIVE
            else:
                self.connectRetryTimer.cancel()
                self._closeConnection()
                if self.bgpPeering:
                    self.bgpPeering.releaseResources(self.protocol)
                self.state = ST_IDLE
        elif self.state == ST_ACTIVE:
            # State Active, event 18
            self.connectRetryTimer.reset(self.connectRetryTime)
            self.delayOpenTimer.cancel()
            if self.bgpPeering: self.bgpPeering.releaseResources(self.protocol)
            self.connectRetryCounter += 1
            # TODO: osc damping
            self.state = ST_IDLE
        elif self.state == ST_OPENSENT:
            # State OpenSent, event 18
            if self.bgpPeering: self.bgpPeering.releaseResources(self.protocol)
            self._closeConnection()
            self.connectRetryTimer.reset(self.connectRetryTime)
            self.state = ST_ACTIVE
        elif self.state in (ST_OPENCONFIRM, ST_ESTABLISHED):
            self._errorClose()

    def openReceived(self):
        """Should be called when a BGP Open message was received from
        the peer. (events 19, 20)
        """

        if self.state in (ST_CONNECT, ST_ACTIVE):
            if self.delayOpenTimer.active():
                # State Connect, event 20
                self.connectRetryTimer.cancel()
                if self.bgpPeering: self.bgpPeering.completeInit(self.protocol)
                self.delayOpenTimer.cancel()
                self.protocol.sendOpen()
                self.protocol.sendKeepAlive()
                if self.holdTime != 0:
                    self.keepAliveTimer.reset(self.keepAliveTime)
                    self.holdTimer.reset(self.holdTime)
                else:  # holdTime == 0
                    self.keepAliveTimer.cancel()
                    self.holdTimer.cancel()

                self.state = ST_OPENCONFIRM
            else:
                # State Connect, event 19
                self._errorClose()

        elif self.state == ST_OPENSENT:
            if not self.delayOpen:
                # State OpenSent, event 19
                self.delayOpenTimer.cancel()
                self.connectRetryTimer.cancel()
                self.protocol.sendKeepAlive()
                if self.holdTime > 0:
                    self.keepAliveTimer.reset(self.keepAliveTime)
                    self.holdTimer.reset(self.holdTime)
                self.state = ST_OPENCONFIRM
            else:
                # State OpenSent, event 20
                self.protocol.sendNotification(ERR_FSM, 0)
                self._errorClose()
                raise NotificationSent(self.protocol, ERR_FSM, 0)

        elif self.state == ST_OPENCONFIRM:
            if not self.delayOpen:
                # State OpenConfirm, events 19
                self.log("Running collision detection")

                # Perform collision detection
                self.protocol.collisionDetect()
            else:
                # State OpenConfirm, event 20
                self.protocol.sendNotification(ERR_FSM, 0)
                self._errorClose()
                raise NotificationSent(self.protocol, ERR_FSM, 0)

        elif self.state == ST_ESTABLISHED:
            # State Established, event 19 or 20
            self.protocol.sendNotification(ERR_FSM, 0)
            self._errorClose()
            raise NotificationSent(self.protocol, ERR_FSM, 0)

    def headerError(self, suberror, data=''):
        """
        Should be called when an invalid BGP message header was received.
        (event 21)
        """

        if self.state != ST_IDLE:
            self.protocol.sendNotification(ERR_MSG_HDR, suberror, data)
            # Note: RFC4271 states that we should send ERR_FSM in the
            # Established state, which contradicts earlier statements.
            self._errorClose()
            raise NotificationSent(self.protocol, ERR_MSG_HDR, suberror, data)

    def openMessageError(self, suberror, data=''):
        """
        Should be called when an invalid BGP Open message was received.
        (event 22)
        """

        if self.state != ST_IDLE:
            self.protocol.sendNotification(ERR_MSG_OPEN, suberror, data)
            # Note: RFC4271 states that we should send ERR_FSM in the
            # Established state, which contradicts earlier statements.
            self._errorClose()
            raise NotificationSent(self.protocol, ERR_MSG_OPEN, suberror, data)

    def keepAliveReceived(self):
        """
        Should be called when a BGP KeepAlive packet was received
        from the peer. (event 26)
        """

        if self.state == ST_OPENCONFIRM:
            # State OpenSent, event 26
            self.holdTimer.reset(self.holdTime)
            self.state = ST_ESTABLISHED
            self.protocol.deferred.callback(self.protocol)
        elif self.state == ST_ESTABLISHED:
            # State Established, event 26
            self.holdTimer.reset(self.holdTime)
        elif self.state in (ST_CONNECT, ST_ACTIVE):
            # States Connect, Active, event 26
            self._errorClose()
        elif self.state == ST_OPENSENT:
            # State OpenSent, event 26
            self.protocol.sendNotification(ERR_FSM, 0)
            self._errorClose()
            raise NotificationSent(self.protocol, ERR_FSM, 0)

    def versionError(self):
        """
        Should be called when a BGP Notification Open Version Error
        message was received from the peer. (event 24)
        """

        if self.state in (ST_OPENSENT, ST_OPENCONFIRM):
            # State OpenSent/OpenConfirm, event 24
            self.connectRetryTimer.cancel()
            if self.bgpPeering: self.bgpPeering.releaseResources(self.protocol)
            self._closeConnection()
            self.state = ST_IDLE
        elif self.state in (ST_CONNECT, ST_ACTIVE, ST_ESTABLISHED):
            # State Connect/Active/Established, event 24
            self._errorClose()

    def notificationReceived(self, error, suberror):
        """
        Should be called when a BGP Notification message was
        received from the peer. (events 24, 25)
        """

        if error == ERR_MSG_OPEN and suberror == 1:
            # Event 24
            self.versionError()
        else:
            if self.state != ST_IDLE:
                # State != Idle, events 24, 25
                if self.state == ST_OPENSENT:
                    self.protocol.sendNotification(ERR_FSM, 0)
                self._errorClose()

    def updateReceived(self, update):
        """Called when a valid BGP Update message was received. (event 27)"""

        if self.state == ST_ESTABLISHED:
            # State Established, event 27
            if self.holdTime != 0:
                self.holdTimer.reset(self.holdTime)

            self.bgpPeering.update(update)
        elif self.state in (ST_ACTIVE, ST_CONNECT):
            # States Active, Connect, event 27
            self._errorClose()
        elif self.state in (ST_OPENSENT, ST_OPENCONFIRM):
            # States OpenSent, OpenConfirm, event 27
            self.protocol.sendNotification(ERR_FSM, 0)
            self._errorClose()
            raise NotificationSent(self.protocol, ERR_FSM, 0)

    def updateError(self, suberror, data=''):
        """Called when an invalid BGP Update message was received. (event 28)"""

        if self.state == ST_ESTABLISHED:
            # State Established, event 28
            self.protocol.sendNotification(ERR_MSG_UPDATE, suberror, data)
            self._errorClose()
            raise NotificationSent(self.protocol, ERR_MSG_UPDATE, suberror,
                                   data)
        elif self.state in (ST_ACTIVE, ST_CONNECT):
            # States Active, Connect, event 28
            self._errorClose()
        elif self.state in (ST_OPENSENT, ST_OPENCONFIRM):
            # States OpenSent, OpenConfirm, event 28
            self.protocol.sendNotification(ERR_FSM, 0)
            self._errorClose()
            raise NotificationSent(self.protocol, ERR_FSM, 0)

    def openCollisionDump(self):
        """
        Called when the collision detection algorithm determined
        that the associated connection should be dumped.
        (event 23)
        """

        self.log("Collided, closing")

        if self.state == ST_IDLE:
            return
        elif self.state in (ST_OPENSENT, ST_OPENCONFIRM, ST_ESTABLISHED):
            self.protocol.sendNotification(ERR_CEASE, 0)

        self._errorClose()
        raise NotificationSent(self.protocol, ERR_CEASE, 0)

    def delayOpenEvent(self):
        """Called when the DelayOpenTimer expires. (event 12)"""

        assert (self.delayOpen)

        self.log("Delay Open event")

        if self.state == ST_CONNECT:
            # State Connect, event 12
            self.protocol.sendOpen()
            self.holdTimer.reset(self.largeHoldTime)
            self.state = ST_OPENSENT
        elif self.state == ST_ACTIVE:
            # State Active, event 12
            self.connectRetryTimer.cancel()
            self.delayOpenTimer.cancel()
            if self.bgpPeering: self.bgpPeering.completeInit(self.protocol)
            self.protocol.sendOpen()
            self.holdTimer.reset(self.largeHoldTime)
            self.state = ST_OPENSENT
        elif self.state != ST_IDLE:
            # State OpenSent, OpenConfirm, Established, event 12
            self.protocol.sendNotification(ERR_FSM, 0)
            self._errorClose()
            raise NotificationSent(self.protocol, ERR_FSM, 0)

    def keepAliveEvent(self):
        """Called when the KeepAliveTimer expires. (event 11)"""

        if self.state in (ST_OPENCONFIRM, ST_ESTABLISHED):
            # State OpenConfirm, Established, event 11
            self.protocol.sendKeepAlive()
            if self.holdTime > 0:
                self.keepAliveTimer.reset(self.keepAliveTime)
        elif self.state in (ST_CONNECT, ST_ACTIVE):
            self._errorClose()
        elif self.state == ST_OPENSENT:
            self.protocol.sendNotification(ERR_FSM, 0)
            self._errorClose()
            raise NotificationSent(self.protocol, ERR_FSM, 0)

    def holdTimeEvent(self):
        """Called when the HoldTimer expires. (event 10)"""

        if self.state in (ST_OPENSENT, ST_OPENCONFIRM, ST_ESTABLISHED):
            # States OpenSent, OpenConfirm, Established, event 10
            self.protocol.sendNotification(ERR_HOLD_TIMER_EXPIRED, 0)
            self._errorClose()
            # TODO: peer osc damping
        elif self.state in (ST_CONNECT, ST_ACTIVE):
            self._errorClose()

    def connectRetryTimeEvent(self):
        """Called when the ConnectRetryTimer expires. (event 9)"""

        if self.state == ST_CONNECT:
            # State Connect, event 9
            self._closeConnection()
            self.connectRetryTimer.reset(self.connectRetryTime)
            self.delayOpenTimer.cancel()
            # Initiate TCP connection
            if self.bgpPeering:
                self.bgpPeering.connectRetryEvent(self.protocol)
        elif self.state == ST_ACTIVE:
            # State Active, event 9
            self.connectRetryTimer.reset(self.connectRetryTime)
            # Initiate TCP connection
            if self.bgpPeering:
                self.bgpPeering.connectRetryEvent(self.protocol)
            self.state = ST_CONNECT
        elif self.state != ST_IDLE:
            # State OpenSent, OpenConfirm, Established, event 12
            self.protocol.sendNotification(ERR_FSM, 0)
            self._errorClose()
            raise NotificationSent(self.protocol, ERR_FSM, 0)

    def idleHoldTimeEvent(self):
        """Called when the IdleHoldTimer expires. (event 13)"""

        if self.state == ST_IDLE:
            if self.bgpPeering: self.bgpPeering.automaticStart(idleHold=False)
        else:
            fsmError = False
            if self.state in (ST_OPENSENT, ST_OPENCONFIRM, ST_ESTABLISHED):
                fsmError = True
                self.protocol.sendNotification(ERR_FSM, 0)
            self._errorClose()
            if fsmError:
                raise NotificationSent(self.protocol, ERR_FSM, 0)

    def updateSent(self):
        """Called by the protocol instance when it just sent an Update message."""

        if self.holdTime > 0:
            self.keepAliveTimer.reset(self.keepAliveTime)

    def _errorClose(self):
        """Internal method that closes a connection and returns the state
        to IDLE.
        """

        # Stop the timers
        for timer in (self.connectRetryTimer, self.delayOpenTimer,
                      self.holdTimer, self.keepAliveTimer):
            timer.cancel()

        # Release BGP resources (routes, etc)
        if self.bgpPeering: self.bgpPeering.releaseResources(self.protocol)

        self._closeConnection()

        self.connectRetryCounter += 1
        self.state = ST_IDLE

    def _closeConnection(self):
        """Internal method that close the connection if a valid BGP protocol
        instance exists.
        """

        if self.protocol is not None:
            self.protocol.closeConnection()
        # Remove from connections list
        if self.bgpPeering: self.bgpPeering.connectionClosed(self.protocol)
Ejemplo n.º 8
0
class BGPFailover:
    """Class for maintaining BGP sessions to routers for IP address failover"""

    prefixes = {}
    peerings = {}
    ipServices = {}

    metric_keywords = {'namespace': 'pybal', 'subsystem': 'bgp'}
    metrics = {'enabled': Gauge('enabled', 'BGP Enabled', **metric_keywords)}

    def __init__(self, globalConfig):
        # Store globalconfig so setup() can check whether BGP is enabled.
        self.globalConfig = globalConfig
        if not globalConfig.getboolean('bgp', False):
            self.metrics['enabled'].set(0)
            return
        self.metrics['enabled'].set(1)
        self._parseConfig()

    def _parseConfig(self):
        log.info("parsing BGP config", system="bgp")
        self.myASN = self.globalConfig.getint('bgp-local-asn')
        self.asPath = self.globalConfig.get('bgp-as-path', str(self.myASN))
        self.asPath = [int(asn) for asn in self.asPath.split()]

        self.defaultMED = self.globalConfig.getint('bgp-med', 0)

        try:
            self.nexthopIPv4 = self.globalConfig['bgp-nexthop-ipv4']
        except KeyError:
            if (bgp.AFI_INET, bgp.SAFI_UNICAST) in BGPFailover.prefixes:
                raise ValueError(
                    "IPv4 BGP NextHop (global configuration variable 'bgp-nexthop-ipv4') not set"
                )

        try:
            self.nexthopIPv6 = self.globalConfig['bgp-nexthop-ipv6']
        except KeyError:
            if (bgp.AFI_INET6, bgp.SAFI_UNICAST) in BGPFailover.prefixes:
                raise ValueError(
                    "IPv6 BGP NextHop (global configuration variable 'bgp-nexthop-ipv6') not set"
                )

        bgpPeerAddress = self.globalConfig.get('bgp-peer-address', '').strip()
        if not bgpPeerAddress.startswith('['):
            bgpPeerAddress = "[ \"{}\" ]".format(bgpPeerAddress)
        self.peerAddresses = eval(bgpPeerAddress)
        assert isinstance(self.peerAddresses, list)

    def setup(self):
        if not self.globalConfig.getboolean('bgp', False):
            return

        try:
            advertisements = self.buildAdvertisements()

            for peerAddr in self.peerAddresses:
                peering = bgp.NaiveBGPPeering(self.myASN, peerAddr)
                peering.setEnabledAddressFamilies(set(self.prefixes.keys()))
                peering.setAdvertisements(advertisements)

                log.info("Starting BGP session with peer {}".format(peerAddr))
                peering.automaticStart()
                self.peerings[peerAddr] = peering
                reactor.addSystemEventTrigger('before', 'shutdown',
                                              self.closeSession, peering)

        except Exception:
            log.critical("Could not set up BGP peering instances.")
            raise
        else:

            # Bind on the IPs listed in 'bgp_local_ips'. Default to
            # localhost v4 and v6 if no IPs have been specified in the
            # configuration.
            bgp_local_ips = eval(self.globalConfig.get('bgp-local-ips',
                                                       '[""]'))
            bgp_local_port = self.globalConfig.getint('bgp-local-port',
                                                      bgp.PORT)
            # Try to listen on the BGP port, not fatal if fails
            for ip in bgp_local_ips:
                try:
                    reactor.listenTCP(bgp_local_port,
                                      bgp.BGPServerFactory(self.peerings),
                                      interface=ip)
                except CannotListenError as e:
                    log.critical("Could not listen for BGP connections: " +
                                 str(e))
                    raise

    def closeSession(self, peering):
        log.info("Clearing session to {}".format(peering.peerAddr))
        # Withdraw all announcements
        peering.setAdvertisements(set())
        return peering.manualStop()

    def buildAdvertisements(self):
        baseAttrs = attrs.AttributeDict(
            [attrs.OriginAttribute(),
             attrs.ASPathAttribute(self.asPath)])

        advertisements = set()
        for af in self.prefixes:
            afAttrs = bgp.AttributeDict(baseAttrs)
            if af[0] == (bgp.AFI_INET):
                afAttrs[attrs.NextHopAttribute] = attrs.NextHopAttribute(
                    self.nexthopIPv4)
            elif af[0] == (bgp.AFI_INET6):
                afAttrs[
                    attrs.MPReachNLRIAttribute] = attrs.MPReachNLRIAttribute(
                        (af[0], af[1], IPv6IP(self.nexthopIPv6), []))
            else:
                raise ValueError("Unsupported address family {}".format(af))

            for prefix in self.prefixes[af]:
                attributes = bgp.AttributeDict(afAttrs)
                # This service IP may use a non-default MED
                med = self.ipServices[prefix][0][
                    'med']  # Guaranteed to exist, may be None
                if med is None:
                    attributes[attrs.MEDAttribute] = attrs.MEDAttribute(
                        self.defaultMED)
                else:
                    attributes[attrs.MEDAttribute] = attrs.MEDAttribute(med)

                attributes = attrs.FrozenAttributeDict(attributes)
                advertisements.add(bgp.Advertisement(prefix, attributes, af))

        return advertisements

    @classmethod
    def associateService(cls, ip, lvsservice, med):
        if ':' not in ip:
            af = (bgp.AFI_INET, bgp.SAFI_UNICAST)
            prefix = IPv4IP(ip)
        else:
            af = (bgp.AFI_INET6, bgp.SAFI_UNICAST)
            prefix = IPv6IP(ip)

        # All services need to agree on the same MED for this IP
        if prefix in cls.ipServices and not med == cls.ipServices[prefix][0][
                'med']:
            raise ValueError(
                "LVS service {} MED value {} differs from other MED values for IP {}"
                .format(lvsservice.name, med, ip))

        service_state = {'lvsservice': lvsservice, 'af': af, 'med': med}

        cls.ipServices.setdefault(prefix, []).append(service_state)
        cls.prefixes.setdefault(af, set()).add(prefix)