Ejemplo n.º 1
0
class AbstractWorker(service.BuildbotService, object):

    """This is the master-side representative for a remote buildbot worker.
    There is exactly one for each worker described in the config file (the
    c['workers'] list). When buildbots connect in (.attach), they get a
    reference to this instance. The BotMaster object is stashed as the
    .botmaster attribute. The BotMaster is also our '.parent' Service.

    I represent a worker -- a remote machine capable of
    running builds.  I am instantiated by the configuration file, and can be
    subclassed to add extra functionality."""

    implements(IWorker)

    # reconfig workers after builders
    reconfig_priority = 64

    def checkConfig(self, name, password, max_builds=None,
                    notify_on_missing=None,
                    missing_timeout=10 * 60,   # Ten minutes
                    properties=None, locks=None, keepalive_interval=3600):
        """
        @param name: botname this machine will supply when it connects
        @param password: password this machine will supply when
                         it connects
        @param max_builds: maximum number of simultaneous builds that will
                           be run concurrently on this worker (the
                           default is None for no limit)
        @param properties: properties that will be applied to builds run on
                           this worker
        @type properties: dictionary
        @param locks: A list of locks that must be acquired before this worker
                      can be used
        @type locks: dictionary
        """
        self.name = name = ascii2unicode(name)

        if properties is None:
            properties = {}

        self.password = password

        # protocol registration
        self.registration = None

        # these are set when the service is started
        self.manager = None
        self.workerid = None

        self.worker_status = WorkerStatus(name)
        self.worker_commands = None
        self.workerforbuilders = {}
        self.max_builds = max_builds
        self.access = []
        if locks:
            self.access = locks
        self.lock_subscriptions = []

        self.properties = Properties()
        self.properties.update(properties, "Worker")
        self.properties.setProperty("slavename", name, "Worker (deprecated)")
        self.properties.setProperty("workername", name, "Worker")

        self.lastMessageReceived = 0

        if notify_on_missing is None:
            notify_on_missing = []
        if isinstance(notify_on_missing, str):
            notify_on_missing = [notify_on_missing]
        self.notify_on_missing = notify_on_missing
        for i in notify_on_missing:
            if not isinstance(i, str):
                config.error(
                    'notify_on_missing arg %r is not a string' % (i,))

        self.missing_timeout = missing_timeout
        self.missing_timer = None

        # a protocol connection, if we're currently connected
        self.conn = None

        self._old_builder_list = None

    def __repr__(self):
        return "<%s %r>" % (self.__class__.__name__, self.name)

    @property
    def workername(self):
        # workername is now an alias to twisted.Service's name
        return self.name
    deprecatedWorkerClassProperty(locals(), workername)

    @property
    def botmaster(self):
        if self.master is None:
            return None
        return self.master.botmaster

    def updateLocks(self):
        """Convert the L{LockAccess} objects in C{self.locks} into real lock
        objects, while also maintaining the subscriptions to lock releases."""
        # unsubscribe from any old locks
        for s in self.lock_subscriptions:
            s.unsubscribe()

        # convert locks into their real form
        locks = [(self.botmaster.getLockFromLockAccess(a), a)
                 for a in self.access]
        self.locks = [(l.getLock(self), la) for l, la in locks]
        self.lock_subscriptions = [l.subscribeToReleases(self._lockReleased)
                                   for l, la in self.locks]

    def locksAvailable(self):
        """
        I am called to see if all the locks I depend on are available,
        in which I return True, otherwise I return False
        """
        if not self.locks:
            return True
        for lock, access in self.locks:
            if not lock.isAvailable(self, access):
                return False
        return True

    def acquireLocks(self):
        """
        I am called when a build is preparing to run. I try to claim all
        the locks that are needed for a build to happen. If I can't, then
        my caller should give up the build and try to get another worker
        to look at it.
        """
        log.msg("acquireLocks(worker %s, locks %s)" % (self, self.locks))
        if not self.locksAvailable():
            log.msg("worker %s can't lock, giving up" % (self, ))
            return False
        # all locks are available, claim them all
        for lock, access in self.locks:
            lock.claim(self, access)
        return True

    def releaseLocks(self):
        """
        I am called to release any locks after a build has finished
        """
        log.msg("releaseLocks(%s): %s" % (self, self.locks))
        for lock, access in self.locks:
            lock.release(self, access)

    def _lockReleased(self):
        """One of the locks for this worker was released; try scheduling
        builds."""
        if not self.botmaster:
            return  # oh well..
        self.botmaster.maybeStartBuildsForWorker(self.name)

    def _applyWorkerInfo(self, info):
        if not info:
            return

        self.worker_status.setAdmin(info.get("admin"))
        self.worker_status.setHost(info.get("host"))
        self.worker_status.setAccessURI(info.get("access_uri", None))
        self.worker_status.setVersion(info.get("version", "(unknown)"))

    @defer.inlineCallbacks
    def _getWorkerInfo(self):
        worker = yield self.master.data.get(
            ('workers', self.workerid))
        self._applyWorkerInfo(worker['workerinfo'])

    def setServiceParent(self, parent):
        # botmaster needs to set before setServiceParent which calls
        # startService

        self.manager = parent
        return service.BuildbotService.setServiceParent(self, parent)

    @defer.inlineCallbacks
    def startService(self):
        self.updateLocks()
        self.startMissingTimer()
        self.workerid = yield self.master.data.updates.findWorkerId(
            self.name)

        yield self._getWorkerInfo()
        yield service.BuildbotService.startService(self)

    @defer.inlineCallbacks
    def reconfigService(self, name, password, max_builds=None,
                        notify_on_missing=None, missing_timeout=3600,
                        properties=None, locks=None, keepalive_interval=3600):
        # Given a Worker config arguments, configure this one identically.
        # Because Worker objects are remotely referenced, we can't replace them
        # without disconnecting the worker, yet there's no reason to do that.

        assert self.name == name
        self.password = password

        # adopt new instance's configuration parameters
        self.max_builds = max_builds
        self.access = []
        if locks:
            self.access = locks
        self.notify_on_missing = notify_on_missing

        if self.missing_timeout != missing_timeout:
            running_missing_timer = self.missing_timer
            self.stopMissingTimer()
            self.missing_timeout = missing_timeout
            if running_missing_timer:
                self.startMissingTimer()

        if properties is None:
            properties = {}
        self.properties = Properties()
        self.properties.update(properties, "Worker")
        self.properties.setProperty("slavename", name, "Worker (deprecated)")
        self.properties.setProperty("workername", name, "Worker")

        # update our records with the worker manager
        if not self.registration:
            self.registration = yield self.master.workers.register(self)
        yield self.registration.update(self, self.master.config)

        self.updateLocks()

        bids = [
            b._builderid for b in self.botmaster.getBuildersForWorker(self.name)]
        yield self.master.data.updates.workerConfigured(self.workerid, self.master.masterid, bids)

        # update the attached worker's notion of which builders are attached.
        # This assumes that the relevant builders have already been configured,
        # which is why the reconfig_priority is set low in this class.
        yield self.updateWorker()

    @defer.inlineCallbacks
    def stopService(self):
        if self.registration:
            yield self.registration.unregister()
            self.registration = None
        self.stopMissingTimer()
        # mark this worker as configured for zero builders in this master
        yield self.master.data.updates.workerConfigured(self.workerid, self.master.masterid, [])
        yield service.BuildbotService.stopService(self)

    def startMissingTimer(self):
        if self.notify_on_missing and self.missing_timeout and self.parent:
            self.stopMissingTimer()  # in case it's already running
            self.missing_timer = self.master.reactor.callLater(self.missing_timeout,
                                                               self._missing_timer_fired)

    def stopMissingTimer(self):
        if self.missing_timer:
            self.missing_timer.cancel()
            self.missing_timer = None

    def isConnected(self):
        return self.conn

    def _missing_timer_fired(self):
        self.missing_timer = None
        # notify people, but only if we're still in the config
        if not self.parent:
            return

        buildmaster = self.botmaster.master
        status = buildmaster.getStatus()
        text = "The Buildbot working for '%s'\n" % status.getTitle()
        text += ("has noticed that the worker named %s went away\n" %
                 self.name)
        text += "\n"
        text += ("It last disconnected at %s (buildmaster-local time)\n" %
                 time.ctime(time.time() - self.missing_timeout))  # approx
        text += "\n"
        text += "The admin on record (as reported by WORKER:info/admin)\n"
        text += "was '%s'.\n" % self.worker_status.getAdmin()
        text += "\n"
        text += "Sincerely,\n"
        text += " The Buildbot\n"
        text += " %s\n" % status.getTitleURL()
        text += "\n"
        text += "%s\n" % status.getURLForThing(self.worker_status)
        subject = "Buildbot: worker %s was lost" % (self.name,)
        return self._mail_missing_message(subject, text)

    def updateWorker(self):
        """Called to add or remove builders after the worker has connected.

        @return: a Deferred that indicates when an attached worker has
        accepted the new builders and/or released the old ones."""
        if self.conn:
            return self.sendBuilderList()
        else:
            return defer.succeed(None)

    @defer.inlineCallbacks
    def attached(self, conn):
        """This is called when the worker connects."""

        metrics.MetricCountEvent.log("AbstractWorker.attached_workers", 1)

        # now we go through a sequence of calls, gathering information, then
        # tell the Botmaster that it can finally give this worker to all the
        # Builders that care about it.

        # Reset graceful shutdown status
        self.worker_status.setGraceful(False)
        # We want to know when the graceful shutdown flag changes
        self.worker_status.addGracefulWatcher(self._gracefulChanged)
        self.conn = conn
        self._old_builder_list = None  # clear builder list before proceed
        self.worker_status.addPauseWatcher(self._pauseChanged)

        self.worker_status.setConnected(True)

        self._applyWorkerInfo(conn.info)
        self.worker_commands = conn.info.get("slave_commands", {})
        self.worker_environ = conn.info.get("environ", {})
        self.worker_basedir = conn.info.get("basedir", None)
        self.worker_system = conn.info.get("system", None)

        self.conn.notifyOnDisconnect(self.detached)

        workerinfo = {
            'admin': conn.info.get('admin'),
            'host': conn.info.get('host'),
            'access_uri': conn.info.get('access_uri'),
            'version': conn.info.get('version')
        }

        yield self.master.data.updates.workerConnected(
            workerid=self.workerid,
            masterid=self.master.masterid,
            workerinfo=workerinfo
        )

        if self.worker_system == "nt":
            self.path_module = namedModule("ntpath")
        else:
            # most everything accepts / as separator, so posix should be a
            # reasonable fallback
            self.path_module = namedModule("posixpath")
        log.msg("bot attached")
        self.messageReceivedFromWorker()
        self.stopMissingTimer()
        self.master.status.workerConnected(self.name)
        yield self.updateWorker()
        yield self.botmaster.maybeStartBuildsForWorker(self.name)

    def messageReceivedFromWorker(self):
        now = time.time()
        self.lastMessageReceived = now
        self.worker_status.setLastMessageReceived(now)

    @defer.inlineCallbacks
    def detached(self):
        metrics.MetricCountEvent.log("AbstractWorker.attached_workers", -1)
        self.conn = None
        self._old_builder_list = []
        self.worker_status.removeGracefulWatcher(self._gracefulChanged)
        self.worker_status.removePauseWatcher(self._pauseChanged)
        self.worker_status.setConnected(False)
        log.msg("Worker.detached(%s)" % (self.name,))
        self.master.status.workerDisconnected(self.name)
        self.releaseLocks()
        yield self.master.data.updates.workerDisconnected(
            workerid=self.workerid,
            masterid=self.master.masterid,
        )

    def disconnect(self):
        """Forcibly disconnect the worker.

        This severs the TCP connection and returns a Deferred that will fire
        (with None) when the connection is probably gone.

        If the worker is still alive, they will probably try to reconnect
        again in a moment.

        This is called in two circumstances. The first is when a worker is
        removed from the config file. In this case, when they try to
        reconnect, they will be rejected as an unknown worker. The second is
        when we wind up with two connections for the same worker, in which
        case we disconnect the older connection.
        """

        if self.conn is None:
            return defer.succeed(None)
        log.msg("disconnecting old worker %s now" % (self.name,))
        # When this Deferred fires, we'll be ready to accept the new worker
        return self._disconnect(self.conn)

    def _disconnect(self, conn):
        # all kinds of teardown will happen as a result of
        # loseConnection(), but it happens after a reactor iteration or
        # two. Hook the actual disconnect so we can know when it is safe
        # to connect the new worker. We have to wait one additional
        # iteration (with callLater(0)) to make sure the *other*
        # notifyOnDisconnect handlers have had a chance to run.
        d = defer.Deferred()

        # notifyOnDisconnect runs the callback
        def _disconnected():
            eventually(d.callback, None)
        conn.notifyOnDisconnect(_disconnected)
        conn.loseConnection()
        log.msg("waiting for worker to finish disconnecting")

        return d

    def sendBuilderList(self):
        our_builders = self.botmaster.getBuildersForWorker(self.name)
        blist = [(b.name, b.config.workerbuilddir) for b in our_builders]
        if blist == self._old_builder_list:
            return defer.succeed(None)

        d = self.conn.remoteSetBuilderList(builders=blist)

        @d.addCallback
        def sentBuilderList(ign):
            self._old_builder_list = blist
            return ign
        return d

    def shutdownRequested(self):
        log.msg("worker %s wants to shut down" % (self.name,))
        self.worker_status.setGraceful(True)

    def addWorkerForBuilder(self, wfb):
        self.workerforbuilders[wfb.builder_name] = wfb

    def removeWorkerForBuilder(self, wfb):
        try:
            del self.workerforbuilders[wfb.builder_name]
        except KeyError:
            pass

    def buildFinished(self, wfb):
        """This is called when a build on this worker is finished."""
        self.botmaster.maybeStartBuildsForWorker(self.name)

    def canStartBuild(self):
        """
        I am called when a build is requested to see if this worker
        can start a build.  This function can be used to limit overall
        concurrency on the worker.

        Note for subclassers: if a worker can become willing to start a build
        without any action on that worker (for example, by a resource in use on
        another worker becoming available), then you must arrange for
        L{maybeStartBuildsForWorker} to be called at that time, or builds on
        this worker will not start.
        """

        if self.worker_status.isPaused():
            return False

        # If we're waiting to shutdown gracefully, then we shouldn't
        # accept any new jobs.
        if self.worker_status.getGraceful():
            return False

        if self.max_builds:
            active_builders = [sb for sb in itervalues(self.workerforbuilders)
                               if sb.isBusy()]
            if len(active_builders) >= self.max_builds:
                return False

        if not self.locksAvailable():
            return False

        return True

    def _mail_missing_message(self, subject, text):
        # FIXME: This should be handled properly via the event api
        # we should send a missing message on the mq, and let any reporter
        # handle that

        # first, see if we have a MailNotifier we can use. This gives us a
        # fromaddr and a relayhost.
        buildmaster = self.botmaster.master
        for st in buildmaster.services:
            if isinstance(st, MailNotifier):
                break
        else:
            # if not, they get a default MailNotifier, which always uses SMTP
            # to localhost and uses a dummy fromaddr of "buildbot".
            log.msg("worker-missing msg using default MailNotifier")
            st = MailNotifier("buildbot")
        # now construct the mail

        m = Message()
        m.set_payload(text)
        m['Date'] = formatdate(localtime=True)
        m['Subject'] = subject
        m['From'] = st.fromaddr
        recipients = self.notify_on_missing
        m['To'] = ", ".join(recipients)
        d = st.sendMessage(m, recipients)
        # return the Deferred for testing purposes
        return d

    def _gracefulChanged(self, graceful):
        """This is called when our graceful shutdown setting changes"""
        self.maybeShutdown()

    @defer.inlineCallbacks
    def shutdown(self):
        """Shutdown the worker"""
        if not self.conn:
            log.msg("no remote; worker is already shut down")
            return

        yield self.conn.remoteShutdown()

    def maybeShutdown(self):
        """Shut down this worker if it has been asked to shut down gracefully,
        and has no active builders."""
        if not self.worker_status.getGraceful():
            return
        active_builders = [sb for sb in itervalues(self.workerforbuilders)
                           if sb.isBusy()]
        if active_builders:
            return
        d = self.shutdown()
        d.addErrback(log.err, 'error while shutting down worker')

    def _pauseChanged(self, paused):
        if paused is True:
            self.botmaster.master.status.workerPaused(self.name)
        else:
            self.botmaster.master.status.workerUnpaused(self.name)

    def pause(self):
        """Stop running new builds on the worker."""
        self.worker_status.setPaused(True)

    def unpause(self):
        """Restart running new builds on the worker."""
        self.worker_status.setPaused(False)
        self.botmaster.maybeStartBuildsForWorker(self.name)

    def isPaused(self):
        return self.worker_status.isPaused()