Ejemplo n.º 1
0
class AbstractBuildSlave(NewCredPerspective, service.MultiService):
    """This is the master-side representative for a remote buildbot slave.
    There is exactly one for each slave described in the config file (the
    c['slaves'] list). When buildbots connect in (.attach), they get a
    reference to this instance. The BotMaster object is stashed as the
    .botmaster attribute. The BotMaster is also our '.parent' Service.

    I represent a build slave -- a remote machine capable of
    running builds.  I am instantiated by the configuration file, and can be
    subclassed to add extra functionality."""

    implements(IBuildSlave)

    def __init__(self,
                 name,
                 password,
                 max_builds=None,
                 notify_on_missing=[],
                 missing_timeout=3600,
                 properties={}):
        """
        @param name: botname this machine will supply when it connects
        @param password: password this machine will supply when
                         it connects
        @param max_builds: maximum number of simultaneous builds that will
                           be run concurrently on this buildslave (the
                           default is None for no limit)
        @param properties: properties that will be applied to builds run on
                           this slave
        @type properties: dictionary
        """
        service.MultiService.__init__(self)
        self.slavename = name
        self.password = password
        self.botmaster = None  # no buildmaster yet
        self.slave_status = SlaveStatus(name)
        self.slave = None  # a RemoteReference to the Bot, when connected
        self.slave_commands = None
        self.slavebuilders = {}
        self.max_builds = max_builds

        self.properties = Properties()
        self.properties.update(properties, "BuildSlave")
        self.properties.setProperty("slavename", name, "BuildSlave")

        self.lastMessageReceived = 0
        if isinstance(notify_on_missing, str):
            notify_on_missing = [notify_on_missing]
        self.notify_on_missing = notify_on_missing
        for i in notify_on_missing:
            assert isinstance(i, str)
        self.missing_timeout = missing_timeout
        self.missing_timer = None

    def update(self, new):
        """
        Given a new BuildSlave, configure this one identically.  Because
        BuildSlave objects are remotely referenced, we can't replace them
        without disconnecting the slave, yet there's no reason to do that.
        """
        # the reconfiguration logic should guarantee this:
        assert self.slavename == new.slavename
        assert self.password == new.password
        assert self.__class__ == new.__class__
        self.max_builds = new.max_builds

    def __repr__(self):
        if self.botmaster:
            builders = self.botmaster.getBuildersForSlave(self.slavename)
            return "<%s '%s', current builders: %s>" % \
               (self.__class__.__name__, self.slavename,
                ','.join(map(lambda b: b.name, builders)))
        else:
            return "<%s '%s', (no builders yet)>" % \
                (self.__class__.__name__, self.slavename)

    def setBotmaster(self, botmaster):
        assert not self.botmaster, "BuildSlave already has a botmaster"
        self.botmaster = botmaster
        self.startMissingTimer()

    def stopMissingTimer(self):
        if self.missing_timer:
            self.missing_timer.cancel()
            self.missing_timer = None

    def startMissingTimer(self):
        if self.notify_on_missing and self.missing_timeout and self.parent:
            self.stopMissingTimer()  # in case it's already running
            self.missing_timer = reactor.callLater(self.missing_timeout,
                                                   self._missing_timer_fired)

    def _missing_timer_fired(self):
        self.missing_timer = None
        # notify people, but only if we're still in the config
        if not self.parent:
            return

        buildmaster = self.botmaster.parent
        status = buildmaster.getStatus()
        text = "The Buildbot working for '%s'\n" % status.getProjectName()
        text += ("has noticed that the buildslave named %s went away\n" %
                 self.slavename)
        text += "\n"
        text += ("It last disconnected at %s (buildmaster-local time)\n" %
                 time.ctime(time.time() - self.missing_timeout))  # approx
        text += "\n"
        text += "The admin on record (as reported by BUILDSLAVE:info/admin)\n"
        text += "was '%s'.\n" % self.slave_status.getAdmin()
        text += "\n"
        text += "Sincerely,\n"
        text += " The Buildbot\n"
        text += " %s\n" % status.getProjectURL()
        subject = "Buildbot: buildslave %s was lost" % self.slavename
        return self._mail_missing_message(subject, text)

    def updateSlave(self):
        """Called to add or remove builders after the slave has connected.

        @return: a Deferred that indicates when an attached slave has
        accepted the new builders and/or released the old ones."""
        if self.slave:
            return self.sendBuilderList()
        else:
            return defer.succeed(None)

    def updateSlaveStatus(self, buildStarted=None, buildFinished=None):
        if buildStarted:
            self.slave_status.buildStarted(buildStarted)
        if buildFinished:
            self.slave_status.buildFinished(buildFinished)

    def attached(self, bot):
        """This is called when the slave connects.

        @return: a Deferred that fires with a suitable pb.IPerspective to
                 give to the slave (i.e. 'self')"""

        if self.slave:
            # uh-oh, we've got a duplicate slave. The most likely
            # explanation is that the slave is behind a slow link, thinks we
            # went away, and has attempted to reconnect, so we've got two
            # "connections" from the same slave, but the previous one is
            # stale. Give the new one precedence.
            log.msg("duplicate slave %s replacing old one" % self.slavename)

            # just in case we've got two identically-configured slaves,
            # report the IP addresses of both so someone can resolve the
            # squabble
            tport = self.slave.broker.transport
            log.msg("old slave was connected from", tport.getPeer())
            log.msg("new slave is from", bot.broker.transport.getPeer())
            d = self.disconnect()
        else:
            d = defer.succeed(None)
        # now we go through a sequence of calls, gathering information, then
        # tell the Botmaster that it can finally give this slave to all the
        # Builders that care about it.

        # we accumulate slave information in this 'state' dictionary, then
        # set it atomically if we make it far enough through the process
        state = {}

        # Reset graceful shutdown status
        self.slave_status.setGraceful(False)
        # We want to know when the graceful shutdown flag changes
        self.slave_status.addGracefulWatcher(self._gracefulChanged)

        def _log_attachment_on_slave(res):
            d1 = bot.callRemote("print", "attached")
            d1.addErrback(lambda why: None)
            return d1

        d.addCallback(_log_attachment_on_slave)

        def _get_info(res):
            d1 = bot.callRemote("getSlaveInfo")

            def _got_info(info):
                log.msg("Got slaveinfo from '%s'" % self.slavename)
                # TODO: info{} might have other keys
                state["admin"] = info.get("admin")
                state["host"] = info.get("host")
                state["access_uri"] = info.get("access_uri", None)

            def _info_unavailable(why):
                # maybe an old slave, doesn't implement remote_getSlaveInfo
                log.msg("BuildSlave.info_unavailable")
                log.err(why)

            d1.addCallbacks(_got_info, _info_unavailable)
            return d1

        d.addCallback(_get_info)

        def _get_version(res):
            d1 = bot.callRemote("getVersion")

            def _got_version(version):
                state["version"] = version

            def _version_unavailable(why):
                # probably an old slave
                log.msg("BuildSlave.version_unavailable")
                log.err(why)

            d1.addCallbacks(_got_version, _version_unavailable)

        d.addCallback(_get_version)

        def _get_commands(res):
            d1 = bot.callRemote("getCommands")

            def _got_commands(commands):
                state["slave_commands"] = commands

            def _commands_unavailable(why):
                # probably an old slave
                log.msg("BuildSlave._commands_unavailable")
                if why.check(AttributeError):
                    return
                log.err(why)

            d1.addCallbacks(_got_commands, _commands_unavailable)
            return d1

        d.addCallback(_get_commands)

        def _accept_slave(res):
            self.slave_status.setAdmin(state.get("admin"))
            self.slave_status.setHost(state.get("host"))
            self.slave_status.setAccessURI(state.get("access_uri"))
            self.slave_status.setVersion(state.get("version"))
            self.slave_status.setConnected(True)
            self.slave_commands = state.get("slave_commands")
            self.slave = bot
            log.msg("bot attached")
            self.messageReceivedFromSlave()
            self.stopMissingTimer()
            self.botmaster.parent.status.slaveConnected(self.slavename)

            return self.updateSlave()

        d.addCallback(_accept_slave)
        d.addCallback(lambda res: self.botmaster.maybeStartAllBuilds())

        # Finally, the slave gets a reference to this BuildSlave. They
        # receive this later, after we've started using them.
        d.addCallback(lambda res: self)
        return d

    def messageReceivedFromSlave(self):
        now = time.time()
        self.lastMessageReceived = now
        self.slave_status.setLastMessageReceived(now)

    def detached(self, mind):
        self.slave = None
        self.slave_status.removeGracefulWatcher(self._gracefulChanged)
        self.slave_status.setConnected(False)
        log.msg("BuildSlave.detached(%s)" % self.slavename)
        self.botmaster.parent.status.slaveDisconnected(self.slavename)

    def disconnect(self):
        """Forcibly disconnect the slave.

        This severs the TCP connection and returns a Deferred that will fire
        (with None) when the connection is probably gone.

        If the slave is still alive, they will probably try to reconnect
        again in a moment.

        This is called in two circumstances. The first is when a slave is
        removed from the config file. In this case, when they try to
        reconnect, they will be rejected as an unknown slave. The second is
        when we wind up with two connections for the same slave, in which
        case we disconnect the older connection.
        """

        if not self.slave:
            return defer.succeed(None)
        log.msg("disconnecting old slave %s now" % self.slavename)
        # When this Deferred fires, we'll be ready to accept the new slave
        return self._disconnect(self.slave)

    def _disconnect(self, slave):
        # all kinds of teardown will happen as a result of
        # loseConnection(), but it happens after a reactor iteration or
        # two. Hook the actual disconnect so we can know when it is safe
        # to connect the new slave. We have to wait one additional
        # iteration (with callLater(0)) to make sure the *other*
        # notifyOnDisconnect handlers have had a chance to run.
        d = defer.Deferred()

        # notifyOnDisconnect runs the callback with one argument, the
        # RemoteReference being disconnected.
        def _disconnected(rref):
            reactor.callLater(0, d.callback, None)

        slave.notifyOnDisconnect(_disconnected)
        tport = slave.broker.transport
        # this is the polite way to request that a socket be closed
        tport.loseConnection()
        try:
            # but really we don't want to wait for the transmit queue to
            # drain. The remote end is unlikely to ACK the data, so we'd
            # probably have to wait for a (20-minute) TCP timeout.
            #tport._closeSocket()
            # however, doing _closeSocket (whether before or after
            # loseConnection) somehow prevents the notifyOnDisconnect
            # handlers from being run. Bummer.
            tport.offset = 0
            tport.dataBuffer = ""
        except:
            # however, these hacks are pretty internal, so don't blow up if
            # they fail or are unavailable
            log.msg("failed to accelerate the shutdown process")
            pass
        log.msg("waiting for slave to finish disconnecting")

        return d

    def sendBuilderList(self):
        our_builders = self.botmaster.getBuildersForSlave(self.slavename)
        blist = [(b.name, b.slavebuilddir) for b in our_builders]
        d = self.slave.callRemote("setBuilderList", blist)
        return d

    def perspective_keepalive(self):
        pass

    def addSlaveBuilder(self, sb):
        self.slavebuilders[sb.builder_name] = sb

    def removeSlaveBuilder(self, sb):
        try:
            del self.slavebuilders[sb.builder_name]
        except KeyError:
            pass

    def canStartBuild(self):
        """
        I am called when a build is requested to see if this buildslave
        can start a build.  This function can be used to limit overall
        concurrency on the buildslave.
        """
        # If we're waiting to shutdown gracefully, then we shouldn't
        # accept any new jobs.
        if self.slave_status.getGraceful():
            return False

        if self.max_builds:
            active_builders = [
                sb for sb in self.slavebuilders.values() if sb.isBusy()
            ]
            if len(active_builders) >= self.max_builds:
                return False
        return True

    def _mail_missing_message(self, subject, text):
        # first, see if we have a MailNotifier we can use. This gives us a
        # fromaddr and a relayhost.
        buildmaster = self.botmaster.parent
        for st in buildmaster.statusTargets:
            if isinstance(st, MailNotifier):
                break
        else:
            # if not, they get a default MailNotifier, which always uses SMTP
            # to localhost and uses a dummy fromaddr of "buildbot".
            log.msg("buildslave-missing msg using default MailNotifier")
            st = MailNotifier("buildbot")
        # now construct the mail

        m = Message()
        m.set_payload(text)
        m['Date'] = formatdate(localtime=True)
        m['Subject'] = subject
        m['From'] = st.fromaddr
        recipients = self.notify_on_missing
        m['To'] = ", ".join(recipients)
        d = st.sendMessage(m, recipients)
        # return the Deferred for testing purposes
        return d

    def _gracefulChanged(self, graceful):
        """This is called when our graceful shutdown setting changes"""
        if graceful:
            active_builders = [
                sb for sb in self.slavebuilders.values() if sb.isBusy()
            ]
            if len(active_builders) == 0:
                # Shut down!
                self.shutdown()

    def shutdown(self):
        """Shutdown the slave"""
        # Look for a builder with a remote reference to the client side
        # slave.  If we can find one, then call "shutdown" on the remote
        # builder, which will cause the slave buildbot process to exit.
        d = None
        for b in self.slavebuilders.values():
            if b.remote:
                d = b.remote.callRemote("shutdown")
                break

        if d:
            log.msg("Shutting down slave: %s" % self.slavename)

            # The remote shutdown call will not complete successfully since the
            # buildbot process exits almost immediately after getting the
            # shutdown request.
            # Here we look at the reason why the remote call failed, and if
            # it's because the connection was lost, that means the slave
            # shutdown as expected.
            def _errback(why):
                if why.check(twisted.spread.pb.PBConnectionLost):
                    log.msg("Lost connection to %s" % self.slavename)
                else:
                    log.err("Unexpected error when trying to shutdown %s" %
                            self.slavename)

            d.addErrback(_errback)
            return d
        log.err("Couldn't find remote builder to shut down slave")
        return defer.succeed(None)
Ejemplo n.º 2
0
class AbstractBuildSlave(NewCredPerspective, service.MultiService):
    """This is the master-side representative for a remote buildbot slave.
    There is exactly one for each slave described in the config file (the
    c['slaves'] list). When buildbots connect in (.attach), they get a
    reference to this instance. The BotMaster object is stashed as the
    .botmaster attribute. The BotMaster is also our '.parent' Service.

    I represent a build slave -- a remote machine capable of
    running builds.  I am instantiated by the configuration file, and can be
    subclassed to add extra functionality."""

    implements(IBuildSlave)

    def __init__(self, name, password, max_builds=None,
                 notify_on_missing=[], missing_timeout=3600,
                 properties={}):
        """
        @param name: botname this machine will supply when it connects
        @param password: password this machine will supply when
                         it connects
        @param max_builds: maximum number of simultaneous builds that will
                           be run concurrently on this buildslave (the
                           default is None for no limit)
        @param properties: properties that will be applied to builds run on
                           this slave
        @type properties: dictionary
        """
        service.MultiService.__init__(self)
        self.slavename = name
        self.password = password
        self.botmaster = None # no buildmaster yet
        self.slave_status = SlaveStatus(name)
        self.slave = None # a RemoteReference to the Bot, when connected
        self.slave_commands = None
        self.slavebuilders = {}
        self.max_builds = max_builds

        self.properties = Properties()
        self.properties.update(properties, "BuildSlave")
        self.properties.setProperty("slavename", name, "BuildSlave")

        self.lastMessageReceived = 0
        if isinstance(notify_on_missing, str):
            notify_on_missing = [notify_on_missing]
        self.notify_on_missing = notify_on_missing
        for i in notify_on_missing:
            assert isinstance(i, str)
        self.missing_timeout = missing_timeout
        self.missing_timer = None

    def update(self, new):
        """
        Given a new BuildSlave, configure this one identically.  Because
        BuildSlave objects are remotely referenced, we can't replace them
        without disconnecting the slave, yet there's no reason to do that.
        """
        # the reconfiguration logic should guarantee this:
        assert self.slavename == new.slavename
        assert self.password == new.password
        assert self.__class__ == new.__class__
        self.max_builds = new.max_builds

    def __repr__(self):
        if self.botmaster:
            builders = self.botmaster.getBuildersForSlave(self.slavename)
            return "<%s '%s', current builders: %s>" % \
               (self.__class__.__name__, self.slavename,
                ','.join(map(lambda b: b.name, builders)))
        else:
            return "<%s '%s', (no builders yet)>" % \
                (self.__class__.__name__, self.slavename)

    def setBotmaster(self, botmaster):
        assert not self.botmaster, "BuildSlave already has a botmaster"
        self.botmaster = botmaster
        self.startMissingTimer()

    def stopMissingTimer(self):
        if self.missing_timer:
            self.missing_timer.cancel()
            self.missing_timer = None

    def startMissingTimer(self):
        if self.notify_on_missing and self.missing_timeout and self.parent:
            self.stopMissingTimer() # in case it's already running
            self.missing_timer = reactor.callLater(self.missing_timeout,
                                                   self._missing_timer_fired)

    def _missing_timer_fired(self):
        self.missing_timer = None
        # notify people, but only if we're still in the config
        if not self.parent:
            return

        buildmaster = self.botmaster.parent
        status = buildmaster.getStatus()
        text = "The Buildbot working for '%s'\n" % status.getProjectName()
        text += ("has noticed that the buildslave named %s went away\n" %
                 self.slavename)
        text += "\n"
        text += ("It last disconnected at %s (buildmaster-local time)\n" %
                 time.ctime(time.time() - self.missing_timeout)) # approx
        text += "\n"
        text += "The admin on record (as reported by BUILDSLAVE:info/admin)\n"
        text += "was '%s'.\n" % self.slave_status.getAdmin()
        text += "\n"
        text += "Sincerely,\n"
        text += " The Buildbot\n"
        text += " %s\n" % status.getProjectURL()
        subject = "Buildbot: buildslave %s was lost" % self.slavename
        return self._mail_missing_message(subject, text)


    def updateSlave(self):
        """Called to add or remove builders after the slave has connected.

        @return: a Deferred that indicates when an attached slave has
        accepted the new builders and/or released the old ones."""
        if self.slave:
            return self.sendBuilderList()
        else:
            return defer.succeed(None)

    def updateSlaveStatus(self, buildStarted=None, buildFinished=None):
        if buildStarted:
            self.slave_status.buildStarted(buildStarted)
        if buildFinished:
            self.slave_status.buildFinished(buildFinished)

    def attached(self, bot):
        """This is called when the slave connects.

        @return: a Deferred that fires with a suitable pb.IPerspective to
                 give to the slave (i.e. 'self')"""

        if self.slave:
            # uh-oh, we've got a duplicate slave. The most likely
            # explanation is that the slave is behind a slow link, thinks we
            # went away, and has attempted to reconnect, so we've got two
            # "connections" from the same slave, but the previous one is
            # stale. Give the new one precedence.
            log.msg("duplicate slave %s replacing old one" % self.slavename)

            # just in case we've got two identically-configured slaves,
            # report the IP addresses of both so someone can resolve the
            # squabble
            tport = self.slave.broker.transport
            log.msg("old slave was connected from", tport.getPeer())
            log.msg("new slave is from", bot.broker.transport.getPeer())
            d = self.disconnect()
        else:
            d = defer.succeed(None)
        # now we go through a sequence of calls, gathering information, then
        # tell the Botmaster that it can finally give this slave to all the
        # Builders that care about it.

        # we accumulate slave information in this 'state' dictionary, then
        # set it atomically if we make it far enough through the process
        state = {}

        # Reset graceful shutdown status
        self.slave_status.setGraceful(False)
        # We want to know when the graceful shutdown flag changes
        self.slave_status.addGracefulWatcher(self._gracefulChanged)

        def _log_attachment_on_slave(res):
            d1 = bot.callRemote("print", "attached")
            d1.addErrback(lambda why: None)
            return d1
        d.addCallback(_log_attachment_on_slave)

        def _get_info(res):
            d1 = bot.callRemote("getSlaveInfo")
            def _got_info(info):
                log.msg("Got slaveinfo from '%s'" % self.slavename)
                # TODO: info{} might have other keys
                state["admin"] = info.get("admin")
                state["host"] = info.get("host")
            def _info_unavailable(why):
                # maybe an old slave, doesn't implement remote_getSlaveInfo
                log.msg("BuildSlave.info_unavailable")
                log.err(why)
            d1.addCallbacks(_got_info, _info_unavailable)
            return d1
        d.addCallback(_get_info)

        def _get_commands(res):
            d1 = bot.callRemote("getCommands")
            def _got_commands(commands):
                state["slave_commands"] = commands
            def _commands_unavailable(why):
                # probably an old slave
                log.msg("BuildSlave._commands_unavailable")
                if why.check(AttributeError):
                    return
                log.err(why)
            d1.addCallbacks(_got_commands, _commands_unavailable)
            return d1
        d.addCallback(_get_commands)

        def _accept_slave(res):
            self.slave_status.setAdmin(state.get("admin"))
            self.slave_status.setHost(state.get("host"))
            self.slave_status.setConnected(True)
            self.slave_commands = state.get("slave_commands")
            self.slave = bot
            log.msg("bot attached")
            self.messageReceivedFromSlave()
            self.stopMissingTimer()

            return self.updateSlave()
        d.addCallback(_accept_slave)
        d.addCallback(lambda res: self.botmaster.maybeStartAllBuilds())

        # Finally, the slave gets a reference to this BuildSlave. They
        # receive this later, after we've started using them.
        d.addCallback(lambda res: self)
        return d

    def messageReceivedFromSlave(self):
        now = time.time()
        self.lastMessageReceived = now
        self.slave_status.setLastMessageReceived(now)

    def detached(self, mind):
        self.slave = None
        self.slave_status.removeGracefulWatcher(self._gracefulChanged)
        self.slave_status.setConnected(False)
        log.msg("BuildSlave.detached(%s)" % self.slavename)

    def disconnect(self):
        """Forcibly disconnect the slave.

        This severs the TCP connection and returns a Deferred that will fire
        (with None) when the connection is probably gone.

        If the slave is still alive, they will probably try to reconnect
        again in a moment.

        This is called in two circumstances. The first is when a slave is
        removed from the config file. In this case, when they try to
        reconnect, they will be rejected as an unknown slave. The second is
        when we wind up with two connections for the same slave, in which
        case we disconnect the older connection.
        """

        if not self.slave:
            return defer.succeed(None)
        log.msg("disconnecting old slave %s now" % self.slavename)
        # When this Deferred fires, we'll be ready to accept the new slave
        return self._disconnect(self.slave)

    def _disconnect(self, slave):
        # all kinds of teardown will happen as a result of
        # loseConnection(), but it happens after a reactor iteration or
        # two. Hook the actual disconnect so we can know when it is safe
        # to connect the new slave. We have to wait one additional
        # iteration (with callLater(0)) to make sure the *other*
        # notifyOnDisconnect handlers have had a chance to run.
        d = defer.Deferred()

        # notifyOnDisconnect runs the callback with one argument, the
        # RemoteReference being disconnected.
        def _disconnected(rref):
            reactor.callLater(0, d.callback, None)
        slave.notifyOnDisconnect(_disconnected)
        tport = slave.broker.transport
        # this is the polite way to request that a socket be closed
        tport.loseConnection()
        try:
            # but really we don't want to wait for the transmit queue to
            # drain. The remote end is unlikely to ACK the data, so we'd
            # probably have to wait for a (20-minute) TCP timeout.
            #tport._closeSocket()
            # however, doing _closeSocket (whether before or after
            # loseConnection) somehow prevents the notifyOnDisconnect
            # handlers from being run. Bummer.
            tport.offset = 0
            tport.dataBuffer = ""
        except:
            # however, these hacks are pretty internal, so don't blow up if
            # they fail or are unavailable
            log.msg("failed to accelerate the shutdown process")
            pass
        log.msg("waiting for slave to finish disconnecting")

        return d

    def sendBuilderList(self):
        our_builders = self.botmaster.getBuildersForSlave(self.slavename)
        blist = [(b.name, b.builddir) for b in our_builders]
        d = self.slave.callRemote("setBuilderList", blist)
        return d

    def perspective_keepalive(self):
        pass

    def addSlaveBuilder(self, sb):
        if sb.builder_name not in self.slavebuilders:
            log.msg("%s adding %s" % (self, sb))
        elif sb is not self.slavebuilders[sb.builder_name]:
            log.msg("%s replacing %s" % (self, sb))
        else:
            return
        self.slavebuilders[sb.builder_name] = sb

    def removeSlaveBuilder(self, sb):
        try:
            del self.slavebuilders[sb.builder_name]
        except KeyError:
            pass
        else:
            log.msg("%s removed %s" % (self, sb))

    def canStartBuild(self):
        """
        I am called when a build is requested to see if this buildslave
        can start a build.  This function can be used to limit overall
        concurrency on the buildslave.
        """
        # If we're waiting to shutdown gracefully, then we shouldn't
        # accept any new jobs.
        if self.slave_status.getGraceful():
            return False

        if self.max_builds:
            active_builders = [sb for sb in self.slavebuilders.values()
                               if sb.isBusy()]
            if len(active_builders) >= self.max_builds:
                return False
        return True

    def _mail_missing_message(self, subject, text):
        # first, see if we have a MailNotifier we can use. This gives us a
        # fromaddr and a relayhost.
        buildmaster = self.botmaster.parent
        for st in buildmaster.statusTargets:
            if isinstance(st, MailNotifier):
                break
        else:
            # if not, they get a default MailNotifier, which always uses SMTP
            # to localhost and uses a dummy fromaddr of "buildbot".
            log.msg("buildslave-missing msg using default MailNotifier")
            st = MailNotifier("buildbot")
        # now construct the mail

        m = Message()
        m.set_payload(text)
        m['Date'] = formatdate(localtime=True)
        m['Subject'] = subject
        m['From'] = st.fromaddr
        recipients = self.notify_on_missing
        m['To'] = ", ".join(recipients)
        d = st.sendMessage(m, recipients)
        # return the Deferred for testing purposes
        return d

    def _gracefulChanged(self, graceful):
        """This is called when our graceful shutdown setting changes"""
        if graceful:
            active_builders = [sb for sb in self.slavebuilders.values()
                               if sb.isBusy()]
            if len(active_builders) == 0:
                # Shut down!
                self.shutdown()

    def shutdown(self):
        """Shutdown the slave"""
        # Look for a builder with a remote reference to the client side
        # slave.  If we can find one, then call "shutdown" on the remote
        # builder, which will cause the slave buildbot process to exit.
        d = None
        for b in self.slavebuilders.values():
            if b.remote:
                d = b.remote.callRemote("shutdown")
                break

        if d:
            log.msg("Shutting down slave: %s" % self.slavename)
            # The remote shutdown call will not complete successfully since the
            # buildbot process exits almost immediately after getting the
            # shutdown request.
            # Here we look at the reason why the remote call failed, and if
            # it's because the connection was lost, that means the slave
            # shutdown as expected.
            def _errback(why):
                if why.check(twisted.spread.pb.PBConnectionLost):
                    log.msg("Lost connection to %s" % self.slavename)
                else:
                    log.err("Unexpected error when trying to shutdown %s" % self.slavename)
            d.addErrback(_errback)
            return d
        log.err("Couldn't find remote builder to shut down slave")
        return defer.succeed(None)
Ejemplo n.º 3
0
class AbstractBuildSlave(pb.Avatar, service.MultiService):
    """This is the master-side representative for a remote buildbot slave.
    There is exactly one for each slave described in the config file (the
    c['slaves'] list). When buildbots connect in (.attach), they get a
    reference to this instance. The BotMaster object is stashed as the
    .botmaster attribute. The BotMaster is also our '.parent' Service.

    I represent a build slave -- a remote machine capable of
    running builds.  I am instantiated by the configuration file, and can be
    subclassed to add extra functionality."""

    implements(IBuildSlave)
    keepalive_timer = None
    keepalive_interval = None

    def __init__(self, name, password, max_builds=None,
                 notify_on_missing=[], missing_timeout=3600,
                 properties={}, locks=None, keepalive_interval=3600):
        """
        @param name: botname this machine will supply when it connects
        @param password: password this machine will supply when
                         it connects
        @param max_builds: maximum number of simultaneous builds that will
                           be run concurrently on this buildslave (the
                           default is None for no limit)
        @param properties: properties that will be applied to builds run on
                           this slave
        @type properties: dictionary
        @param locks: A list of locks that must be acquired before this slave
                      can be used
        @type locks: dictionary
        """
        service.MultiService.__init__(self)
        self.slavename = name
        self.password = password
        self.botmaster = None # no buildmaster yet
        self.slave_status = SlaveStatus(name)
        self.slave = None # a RemoteReference to the Bot, when connected
        self.slave_commands = None
        self.slavebuilders = {}
        self.max_builds = max_builds
        self.access = []
        if locks:
            self.access = locks

        self.properties = Properties()
        self.properties.update(properties, "BuildSlave")
        self.properties.setProperty("slavename", name, "BuildSlave")

        self.lastMessageReceived = 0
        if isinstance(notify_on_missing, str):
            notify_on_missing = [notify_on_missing]
        self.notify_on_missing = notify_on_missing
        for i in notify_on_missing:
            assert isinstance(i, str)
        self.missing_timeout = missing_timeout
        self.missing_timer = None
        self.keepalive_interval = keepalive_interval

        self._old_builder_list = None

    def update(self, new):
        """
        Given a new BuildSlave, configure this one identically.  Because
        BuildSlave objects are remotely referenced, we can't replace them
        without disconnecting the slave, yet there's no reason to do that.
        """
        # the reconfiguration logic should guarantee this:
        assert self.slavename == new.slavename
        assert self.password == new.password
        assert self.__class__ == new.__class__
        self.max_builds = new.max_builds
        self.access = new.access
        self.notify_on_missing = new.notify_on_missing
        self.missing_timeout = new.missing_timeout

        self.properties = Properties()
        self.properties.updateFromProperties(new.properties)

        if self.botmaster:
            self.updateLocks()

    def __repr__(self):
        if self.botmaster:
            builders = self.botmaster.getBuildersForSlave(self.slavename)
            return "<%s '%s', current builders: %s>" % \
               (self.__class__.__name__, self.slavename,
                ','.join(map(lambda b: b.name, builders)))
        else:
            return "<%s '%s', (no builders yet)>" % \
                (self.__class__.__name__, self.slavename)

    def updateLocks(self):
        # convert locks into their real form
        locks = []
        for access in self.access:
            if not isinstance(access, LockAccess):
                access = access.defaultAccess()
            lock = self.botmaster.getLockByID(access.lockid)
            locks.append((lock, access))
        self.locks = [(l.getLock(self), la) for l, la in locks]

    def locksAvailable(self):
        """
        I am called to see if all the locks I depend on are available,
        in which I return True, otherwise I return False
        """
        if not self.locks:
            return True
        for lock, access in self.locks:
            if not lock.isAvailable(access):
                return False
        return True

    def acquireLocks(self):
        """
        I am called when a build is preparing to run. I try to claim all
        the locks that are needed for a build to happen. If I can't, then
        my caller should give up the build and try to get another slave
        to look at it.
        """
        log.msg("acquireLocks(slave %s, locks %s)" % (self, self.locks))
        if not self.locksAvailable():
            log.msg("slave %s can't lock, giving up" % (self, ))
            return False
        # all locks are available, claim them all
        for lock, access in self.locks:
            lock.claim(self, access)
        return True

    def releaseLocks(self):
        """
        I am called to release any locks after a build has finished
        """
        log.msg("releaseLocks(%s): %s" % (self, self.locks))
        for lock, access in self.locks:
            lock.release(self, access)

    def setBotmaster(self, botmaster):
        assert not self.botmaster, "BuildSlave already has a botmaster"
        self.botmaster = botmaster
        self.updateLocks()
        self.startMissingTimer()

    def stopMissingTimer(self):
        if self.missing_timer:
            self.missing_timer.cancel()
            self.missing_timer = None

    def startMissingTimer(self):
        if self.notify_on_missing and self.missing_timeout and self.parent:
            self.stopMissingTimer() # in case it's already running
            self.missing_timer = reactor.callLater(self.missing_timeout,
                                                self._missing_timer_fired)

    def doKeepalive(self):
        self.keepalive_timer = reactor.callLater(self.keepalive_interval,
                                                self.doKeepalive)
        if not self.slave:
            return
        d = self.slave.callRemote("print", "Received keepalive from master")
        d.addErrback(log.msg, "Keepalive failed for '%s'" % (self.slavename, ))

    def stopKeepaliveTimer(self):
        if self.keepalive_timer:
            self.keepalive_timer.cancel()

    def startKeepaliveTimer(self):
        assert self.keepalive_interval
        log.msg("Starting buildslave keepalive timer for '%s'" % \
                                        (self.slavename, ))
        self.doKeepalive()

    def recordConnectTime(self):
        if self.slave_status:
            self.slave_status.recordConnectTime()

    def isConnected(self):
        return self.slave

    def _missing_timer_fired(self):
        self.missing_timer = None
        # notify people, but only if we're still in the config
        if not self.parent:
            return

        buildmaster = self.botmaster.parent
        status = buildmaster.getStatus()
        text = "The Buildbot working for '%s'\n" % status.getProjectName()
        text += ("has noticed that the buildslave named %s went away\n" %
                 self.slavename)
        text += "\n"
        text += ("It last disconnected at %s (buildmaster-local time)\n" %
                 time.ctime(time.time() - self.missing_timeout)) # approx
        text += "\n"
        text += "The admin on record (as reported by BUILDSLAVE:info/admin)\n"
        text += "was '%s'.\n" % self.slave_status.getAdmin()
        text += "\n"
        text += "Sincerely,\n"
        text += " The Buildbot\n"
        text += " %s\n" % status.getProjectURL()
        subject = "Buildbot: buildslave %s was lost" % self.slavename
        return self._mail_missing_message(subject, text)


    def updateSlave(self):
        """Called to add or remove builders after the slave has connected.

        @return: a Deferred that indicates when an attached slave has
        accepted the new builders and/or released the old ones."""
        if self.slave:
            return self.sendBuilderList()
        else:
            return defer.succeed(None)

    def updateSlaveStatus(self, buildStarted=None, buildFinished=None):
        if buildStarted:
            self.slave_status.buildStarted(buildStarted)
        if buildFinished:
            self.slave_status.buildFinished(buildFinished)

    def attached(self, bot):
        """This is called when the slave connects.

        @return: a Deferred that fires when the attachment is complete
        """

        # the botmaster should ensure this.
        assert not self.isConnected()

        # now we go through a sequence of calls, gathering information, then
        # tell the Botmaster that it can finally give this slave to all the
        # Builders that care about it.

        # we accumulate slave information in this 'state' dictionary, then
        # set it atomically if we make it far enough through the process
        state = {}

        # Reset graceful shutdown status
        self.slave_status.setGraceful(False)
        # We want to know when the graceful shutdown flag changes
        self.slave_status.addGracefulWatcher(self._gracefulChanged)

        d = defer.succeed(None)
        def _log_attachment_on_slave(res):
            d1 = bot.callRemote("print", "attached")
            d1.addErrback(lambda why: None)
            return d1
        d.addCallback(_log_attachment_on_slave)

        def _get_info(res):
            d1 = bot.callRemote("getSlaveInfo")
            def _got_info(info):
                log.msg("Got slaveinfo from '%s'" % self.slavename)
                # TODO: info{} might have other keys
                state["admin"] = info.get("admin")
                state["host"] = info.get("host")
                state["access_uri"] = info.get("access_uri", None)
                state["slave_environ"] = info.get("environ", {})
                state["slave_basedir"] = info.get("basedir", None)
                state["slave_system"] = info.get("system", None)
            def _info_unavailable(why):
                why.trap(pb.NoSuchMethod)
                # maybe an old slave, doesn't implement remote_getSlaveInfo
                log.msg("BuildSlave.info_unavailable")
                log.err(why)
            d1.addCallbacks(_got_info, _info_unavailable)
            return d1
        d.addCallback(_get_info)
        self.startKeepaliveTimer()

        def _get_version(res):
            d = bot.callRemote("getVersion")
            def _got_version(version):
                state["version"] = version
            def _version_unavailable(why):
                why.trap(pb.NoSuchMethod)
                # probably an old slave
                state["version"] = '(unknown)'
            d.addCallbacks(_got_version, _version_unavailable)
            return d
        d.addCallback(_get_version)

        def _get_commands(res):
            d1 = bot.callRemote("getCommands")
            def _got_commands(commands):
                state["slave_commands"] = commands
            def _commands_unavailable(why):
                # probably an old slave
                log.msg("BuildSlave._commands_unavailable")
                if why.check(AttributeError):
                    return
                log.err(why)
            d1.addCallbacks(_got_commands, _commands_unavailable)
            return d1
        d.addCallback(_get_commands)

        def _accept_slave(res):
            self.slave_status.setAdmin(state.get("admin"))
            self.slave_status.setHost(state.get("host"))
            self.slave_status.setAccessURI(state.get("access_uri"))
            self.slave_status.setVersion(state.get("version"))
            self.slave_status.setConnected(True)
            self.slave_commands = state.get("slave_commands")
            self.slave_environ = state.get("slave_environ")
            self.slave_basedir = state.get("slave_basedir")
            self.slave_system = state.get("slave_system")
            self.slave = bot
            if self.slave_system == "win32":
                self.path_module = namedModule("win32path")
            else:
                # most eveything accepts / as separator, so posix should be a
                # reasonable fallback
                self.path_module = namedModule("posixpath")
            log.msg("bot attached")
            self.messageReceivedFromSlave()
            self.stopMissingTimer()
            self.botmaster.parent.status.slaveConnected(self.slavename)

            return self.updateSlave()
        d.addCallback(_accept_slave)
        d.addCallback(lambda res: self.botmaster.triggerNewBuildCheck())

        # Finally, the slave gets a reference to this BuildSlave. They
        # receive this later, after we've started using them.
        d.addCallback(lambda res: self)
        return d

    def messageReceivedFromSlave(self):
        now = time.time()
        self.lastMessageReceived = now
        self.slave_status.setLastMessageReceived(now)

    def detached(self, mind):
        self.slave = None
        self.slave_status.removeGracefulWatcher(self._gracefulChanged)
        self.slave_status.setConnected(False)
        log.msg("BuildSlave.detached(%s)" % self.slavename)
        self.botmaster.parent.status.slaveDisconnected(self.slavename)
        self.stopKeepaliveTimer()

    def disconnect(self):
        """Forcibly disconnect the slave.

        This severs the TCP connection and returns a Deferred that will fire
        (with None) when the connection is probably gone.

        If the slave is still alive, they will probably try to reconnect
        again in a moment.

        This is called in two circumstances. The first is when a slave is
        removed from the config file. In this case, when they try to
        reconnect, they will be rejected as an unknown slave. The second is
        when we wind up with two connections for the same slave, in which
        case we disconnect the older connection.
        """

        if not self.slave:
            return defer.succeed(None)
        log.msg("disconnecting old slave %s now" % self.slavename)
        # When this Deferred fires, we'll be ready to accept the new slave
        return self._disconnect(self.slave)

    def _disconnect(self, slave):
        # all kinds of teardown will happen as a result of
        # loseConnection(), but it happens after a reactor iteration or
        # two. Hook the actual disconnect so we can know when it is safe
        # to connect the new slave. We have to wait one additional
        # iteration (with callLater(0)) to make sure the *other*
        # notifyOnDisconnect handlers have had a chance to run.
        d = defer.Deferred()

        # notifyOnDisconnect runs the callback with one argument, the
        # RemoteReference being disconnected.
        def _disconnected(rref):
            reactor.callLater(0, d.callback, None)
        slave.notifyOnDisconnect(_disconnected)
        tport = slave.broker.transport
        # this is the polite way to request that a socket be closed
        tport.loseConnection()
        try:
            # but really we don't want to wait for the transmit queue to
            # drain. The remote end is unlikely to ACK the data, so we'd
            # probably have to wait for a (20-minute) TCP timeout.
            #tport._closeSocket()
            # however, doing _closeSocket (whether before or after
            # loseConnection) somehow prevents the notifyOnDisconnect
            # handlers from being run. Bummer.
            tport.offset = 0
            tport.dataBuffer = ""
        except:
            # however, these hacks are pretty internal, so don't blow up if
            # they fail or are unavailable
            log.msg("failed to accelerate the shutdown process")
        log.msg("waiting for slave to finish disconnecting")

        return d

    def sendBuilderList(self):
        our_builders = self.botmaster.getBuildersForSlave(self.slavename)
        blist = [(b.name, b.slavebuilddir) for b in our_builders]
        if blist == self._old_builder_list:
            log.msg("Builder list is unchanged; not calling setBuilderList")
            return defer.succeed(None)

        d = self.slave.callRemote("setBuilderList", blist)
        def sentBuilderList(ign):
            self._old_builder_list = blist
            return ign
        d.addCallback(sentBuilderList)
        return d

    def perspective_keepalive(self):
        self.messageReceivedFromSlave()

    def perspective_shutdown(self):
        log.msg("slave %s wants to shut down" % self.slavename)
        self.slave_status.setGraceful(True)

    def addSlaveBuilder(self, sb):
        self.slavebuilders[sb.builder_name] = sb

    def removeSlaveBuilder(self, sb):
        try:
            del self.slavebuilders[sb.builder_name]
        except KeyError:
            pass

    def buildFinished(self, sb):
        """This is called when a build on this slave is finished."""
        raise NotImplementedError

    def canStartBuild(self):
        """
        I am called when a build is requested to see if this buildslave
        can start a build.  This function can be used to limit overall
        concurrency on the buildslave.
        """
        # If we're waiting to shutdown gracefully, then we shouldn't
        # accept any new jobs.
        if self.slave_status.getGraceful():
            return False

        if self.max_builds:
            active_builders = [sb for sb in self.slavebuilders.values()
                               if sb.isBusy()]
            if len(active_builders) >= self.max_builds:
                return False

        if not self.locksAvailable():
            return False

        return True

    def _mail_missing_message(self, subject, text):
        # first, see if we have a MailNotifier we can use. This gives us a
        # fromaddr and a relayhost.
        buildmaster = self.botmaster.parent
        for st in buildmaster.statusTargets:
            if isinstance(st, MailNotifier):
                break
        else:
            # if not, they get a default MailNotifier, which always uses SMTP
            # to localhost and uses a dummy fromaddr of "buildbot".
            log.msg("buildslave-missing msg using default MailNotifier")
            st = MailNotifier("buildbot")
        # now construct the mail

        m = Message()
        m.set_payload(text)
        m['Date'] = formatdate(localtime=True)
        m['Subject'] = subject
        m['From'] = st.fromaddr
        recipients = self.notify_on_missing
        m['To'] = ", ".join(recipients)
        d = st.sendMessage(m, recipients)
        # return the Deferred for testing purposes
        return d

    def _gracefulChanged(self, graceful):
        """This is called when our graceful shutdown setting changes"""
        self.maybeShutdown()

    @defer.deferredGenerator
    def shutdown(self):
        """Shutdown the slave"""
        if not self.slave:
            log.msg("no remote; slave is already shut down")
            return

        # First, try the "new" way - calling our own remote's shutdown
        # method.  The method was only added in 0.8.3, so ignore NoSuchMethod
        # failures.
        def new_way():
            d = self.slave.callRemote('shutdown')
            d.addCallback(lambda _ : True) # successful shutdown request
            def check_nsm(f):
                f.trap(pb.NoSuchMethod)
                return False # fall through to the old way
            d.addErrback(check_nsm)
            def check_connlost(f):
                f.trap(pb.PBConnectionLost)
                return True # the slave is gone, so call it finished
            d.addErrback(check_connlost)
            return d

        wfd = defer.waitForDeferred(new_way())
        yield wfd
        if wfd.getResult():
            return # done!

        # Now, the old way.  Look for a builder with a remote reference to the
        # client side slave.  If we can find one, then call "shutdown" on the
        # remote builder, which will cause the slave buildbot process to exit.
        def old_way():
            d = None
            for b in self.slavebuilders.values():
                if b.remote:
                    d = b.remote.callRemote("shutdown")
                    break

            if d:
                log.msg("Shutting down (old) slave: %s" % self.slavename)
                # The remote shutdown call will not complete successfully since the
                # buildbot process exits almost immediately after getting the
                # shutdown request.
                # Here we look at the reason why the remote call failed, and if
                # it's because the connection was lost, that means the slave
                # shutdown as expected.
                def _errback(why):
                    if why.check(pb.PBConnectionLost):
                        log.msg("Lost connection to %s" % self.slavename)
                    else:
                        log.err("Unexpected error when trying to shutdown %s" % self.slavename)
                d.addErrback(_errback)
                return d
            log.err("Couldn't find remote builder to shut down slave")
            return defer.succeed(None)
        wfd = defer.waitForDeferred(old_way())
        yield wfd
        wfd.getResult()

    def maybeShutdown(self):
        """Shut down this slave if it has been asked to shut down gracefully,
        and has no active builders."""
        if not self.slave_status.getGraceful():
            return
        active_builders = [sb for sb in self.slavebuilders.values()
                           if sb.isBusy()]
        if active_builders:
            return
        d = self.shutdown()
        d.addErrback(log.err, 'error while shutting down slave')