class AbstractBuildSlave(NewCredPerspective, service.MultiService): """This is the master-side representative for a remote buildbot slave. There is exactly one for each slave described in the config file (the c['slaves'] list). When buildbots connect in (.attach), they get a reference to this instance. The BotMaster object is stashed as the .botmaster attribute. The BotMaster is also our '.parent' Service. I represent a build slave -- a remote machine capable of running builds. I am instantiated by the configuration file, and can be subclassed to add extra functionality.""" implements(IBuildSlave) def __init__(self, name, password, max_builds=None, notify_on_missing=[], missing_timeout=3600, properties={}): """ @param name: botname this machine will supply when it connects @param password: password this machine will supply when it connects @param max_builds: maximum number of simultaneous builds that will be run concurrently on this buildslave (the default is None for no limit) @param properties: properties that will be applied to builds run on this slave @type properties: dictionary """ service.MultiService.__init__(self) self.slavename = name self.password = password self.botmaster = None # no buildmaster yet self.slave_status = SlaveStatus(name) self.slave = None # a RemoteReference to the Bot, when connected self.slave_commands = None self.slavebuilders = {} self.max_builds = max_builds self.properties = Properties() self.properties.update(properties, "BuildSlave") self.properties.setProperty("slavename", name, "BuildSlave") self.lastMessageReceived = 0 if isinstance(notify_on_missing, str): notify_on_missing = [notify_on_missing] self.notify_on_missing = notify_on_missing for i in notify_on_missing: assert isinstance(i, str) self.missing_timeout = missing_timeout self.missing_timer = None def update(self, new): """ Given a new BuildSlave, configure this one identically. Because BuildSlave objects are remotely referenced, we can't replace them without disconnecting the slave, yet there's no reason to do that. """ # the reconfiguration logic should guarantee this: assert self.slavename == new.slavename assert self.password == new.password assert self.__class__ == new.__class__ self.max_builds = new.max_builds def __repr__(self): if self.botmaster: builders = self.botmaster.getBuildersForSlave(self.slavename) return "<%s '%s', current builders: %s>" % \ (self.__class__.__name__, self.slavename, ','.join(map(lambda b: b.name, builders))) else: return "<%s '%s', (no builders yet)>" % \ (self.__class__.__name__, self.slavename) def setBotmaster(self, botmaster): assert not self.botmaster, "BuildSlave already has a botmaster" self.botmaster = botmaster self.startMissingTimer() def stopMissingTimer(self): if self.missing_timer: self.missing_timer.cancel() self.missing_timer = None def startMissingTimer(self): if self.notify_on_missing and self.missing_timeout and self.parent: self.stopMissingTimer() # in case it's already running self.missing_timer = reactor.callLater(self.missing_timeout, self._missing_timer_fired) def _missing_timer_fired(self): self.missing_timer = None # notify people, but only if we're still in the config if not self.parent: return buildmaster = self.botmaster.parent status = buildmaster.getStatus() text = "The Buildbot working for '%s'\n" % status.getProjectName() text += ("has noticed that the buildslave named %s went away\n" % self.slavename) text += "\n" text += ("It last disconnected at %s (buildmaster-local time)\n" % time.ctime(time.time() - self.missing_timeout)) # approx text += "\n" text += "The admin on record (as reported by BUILDSLAVE:info/admin)\n" text += "was '%s'.\n" % self.slave_status.getAdmin() text += "\n" text += "Sincerely,\n" text += " The Buildbot\n" text += " %s\n" % status.getProjectURL() subject = "Buildbot: buildslave %s was lost" % self.slavename return self._mail_missing_message(subject, text) def updateSlave(self): """Called to add or remove builders after the slave has connected. @return: a Deferred that indicates when an attached slave has accepted the new builders and/or released the old ones.""" if self.slave: return self.sendBuilderList() else: return defer.succeed(None) def updateSlaveStatus(self, buildStarted=None, buildFinished=None): if buildStarted: self.slave_status.buildStarted(buildStarted) if buildFinished: self.slave_status.buildFinished(buildFinished) def attached(self, bot): """This is called when the slave connects. @return: a Deferred that fires with a suitable pb.IPerspective to give to the slave (i.e. 'self')""" if self.slave: # uh-oh, we've got a duplicate slave. The most likely # explanation is that the slave is behind a slow link, thinks we # went away, and has attempted to reconnect, so we've got two # "connections" from the same slave, but the previous one is # stale. Give the new one precedence. log.msg("duplicate slave %s replacing old one" % self.slavename) # just in case we've got two identically-configured slaves, # report the IP addresses of both so someone can resolve the # squabble tport = self.slave.broker.transport log.msg("old slave was connected from", tport.getPeer()) log.msg("new slave is from", bot.broker.transport.getPeer()) d = self.disconnect() else: d = defer.succeed(None) # now we go through a sequence of calls, gathering information, then # tell the Botmaster that it can finally give this slave to all the # Builders that care about it. # we accumulate slave information in this 'state' dictionary, then # set it atomically if we make it far enough through the process state = {} # Reset graceful shutdown status self.slave_status.setGraceful(False) # We want to know when the graceful shutdown flag changes self.slave_status.addGracefulWatcher(self._gracefulChanged) def _log_attachment_on_slave(res): d1 = bot.callRemote("print", "attached") d1.addErrback(lambda why: None) return d1 d.addCallback(_log_attachment_on_slave) def _get_info(res): d1 = bot.callRemote("getSlaveInfo") def _got_info(info): log.msg("Got slaveinfo from '%s'" % self.slavename) # TODO: info{} might have other keys state["admin"] = info.get("admin") state["host"] = info.get("host") state["access_uri"] = info.get("access_uri", None) def _info_unavailable(why): # maybe an old slave, doesn't implement remote_getSlaveInfo log.msg("BuildSlave.info_unavailable") log.err(why) d1.addCallbacks(_got_info, _info_unavailable) return d1 d.addCallback(_get_info) def _get_version(res): d1 = bot.callRemote("getVersion") def _got_version(version): state["version"] = version def _version_unavailable(why): # probably an old slave log.msg("BuildSlave.version_unavailable") log.err(why) d1.addCallbacks(_got_version, _version_unavailable) d.addCallback(_get_version) def _get_commands(res): d1 = bot.callRemote("getCommands") def _got_commands(commands): state["slave_commands"] = commands def _commands_unavailable(why): # probably an old slave log.msg("BuildSlave._commands_unavailable") if why.check(AttributeError): return log.err(why) d1.addCallbacks(_got_commands, _commands_unavailable) return d1 d.addCallback(_get_commands) def _accept_slave(res): self.slave_status.setAdmin(state.get("admin")) self.slave_status.setHost(state.get("host")) self.slave_status.setAccessURI(state.get("access_uri")) self.slave_status.setVersion(state.get("version")) self.slave_status.setConnected(True) self.slave_commands = state.get("slave_commands") self.slave = bot log.msg("bot attached") self.messageReceivedFromSlave() self.stopMissingTimer() self.botmaster.parent.status.slaveConnected(self.slavename) return self.updateSlave() d.addCallback(_accept_slave) d.addCallback(lambda res: self.botmaster.maybeStartAllBuilds()) # Finally, the slave gets a reference to this BuildSlave. They # receive this later, after we've started using them. d.addCallback(lambda res: self) return d def messageReceivedFromSlave(self): now = time.time() self.lastMessageReceived = now self.slave_status.setLastMessageReceived(now) def detached(self, mind): self.slave = None self.slave_status.removeGracefulWatcher(self._gracefulChanged) self.slave_status.setConnected(False) log.msg("BuildSlave.detached(%s)" % self.slavename) self.botmaster.parent.status.slaveDisconnected(self.slavename) def disconnect(self): """Forcibly disconnect the slave. This severs the TCP connection and returns a Deferred that will fire (with None) when the connection is probably gone. If the slave is still alive, they will probably try to reconnect again in a moment. This is called in two circumstances. The first is when a slave is removed from the config file. In this case, when they try to reconnect, they will be rejected as an unknown slave. The second is when we wind up with two connections for the same slave, in which case we disconnect the older connection. """ if not self.slave: return defer.succeed(None) log.msg("disconnecting old slave %s now" % self.slavename) # When this Deferred fires, we'll be ready to accept the new slave return self._disconnect(self.slave) def _disconnect(self, slave): # all kinds of teardown will happen as a result of # loseConnection(), but it happens after a reactor iteration or # two. Hook the actual disconnect so we can know when it is safe # to connect the new slave. We have to wait one additional # iteration (with callLater(0)) to make sure the *other* # notifyOnDisconnect handlers have had a chance to run. d = defer.Deferred() # notifyOnDisconnect runs the callback with one argument, the # RemoteReference being disconnected. def _disconnected(rref): reactor.callLater(0, d.callback, None) slave.notifyOnDisconnect(_disconnected) tport = slave.broker.transport # this is the polite way to request that a socket be closed tport.loseConnection() try: # but really we don't want to wait for the transmit queue to # drain. The remote end is unlikely to ACK the data, so we'd # probably have to wait for a (20-minute) TCP timeout. #tport._closeSocket() # however, doing _closeSocket (whether before or after # loseConnection) somehow prevents the notifyOnDisconnect # handlers from being run. Bummer. tport.offset = 0 tport.dataBuffer = "" except: # however, these hacks are pretty internal, so don't blow up if # they fail or are unavailable log.msg("failed to accelerate the shutdown process") pass log.msg("waiting for slave to finish disconnecting") return d def sendBuilderList(self): our_builders = self.botmaster.getBuildersForSlave(self.slavename) blist = [(b.name, b.slavebuilddir) for b in our_builders] d = self.slave.callRemote("setBuilderList", blist) return d def perspective_keepalive(self): pass def addSlaveBuilder(self, sb): self.slavebuilders[sb.builder_name] = sb def removeSlaveBuilder(self, sb): try: del self.slavebuilders[sb.builder_name] except KeyError: pass def canStartBuild(self): """ I am called when a build is requested to see if this buildslave can start a build. This function can be used to limit overall concurrency on the buildslave. """ # If we're waiting to shutdown gracefully, then we shouldn't # accept any new jobs. if self.slave_status.getGraceful(): return False if self.max_builds: active_builders = [ sb for sb in self.slavebuilders.values() if sb.isBusy() ] if len(active_builders) >= self.max_builds: return False return True def _mail_missing_message(self, subject, text): # first, see if we have a MailNotifier we can use. This gives us a # fromaddr and a relayhost. buildmaster = self.botmaster.parent for st in buildmaster.statusTargets: if isinstance(st, MailNotifier): break else: # if not, they get a default MailNotifier, which always uses SMTP # to localhost and uses a dummy fromaddr of "buildbot". log.msg("buildslave-missing msg using default MailNotifier") st = MailNotifier("buildbot") # now construct the mail m = Message() m.set_payload(text) m['Date'] = formatdate(localtime=True) m['Subject'] = subject m['From'] = st.fromaddr recipients = self.notify_on_missing m['To'] = ", ".join(recipients) d = st.sendMessage(m, recipients) # return the Deferred for testing purposes return d def _gracefulChanged(self, graceful): """This is called when our graceful shutdown setting changes""" if graceful: active_builders = [ sb for sb in self.slavebuilders.values() if sb.isBusy() ] if len(active_builders) == 0: # Shut down! self.shutdown() def shutdown(self): """Shutdown the slave""" # Look for a builder with a remote reference to the client side # slave. If we can find one, then call "shutdown" on the remote # builder, which will cause the slave buildbot process to exit. d = None for b in self.slavebuilders.values(): if b.remote: d = b.remote.callRemote("shutdown") break if d: log.msg("Shutting down slave: %s" % self.slavename) # The remote shutdown call will not complete successfully since the # buildbot process exits almost immediately after getting the # shutdown request. # Here we look at the reason why the remote call failed, and if # it's because the connection was lost, that means the slave # shutdown as expected. def _errback(why): if why.check(twisted.spread.pb.PBConnectionLost): log.msg("Lost connection to %s" % self.slavename) else: log.err("Unexpected error when trying to shutdown %s" % self.slavename) d.addErrback(_errback) return d log.err("Couldn't find remote builder to shut down slave") return defer.succeed(None)
class AbstractBuildSlave(NewCredPerspective, service.MultiService): """This is the master-side representative for a remote buildbot slave. There is exactly one for each slave described in the config file (the c['slaves'] list). When buildbots connect in (.attach), they get a reference to this instance. The BotMaster object is stashed as the .botmaster attribute. The BotMaster is also our '.parent' Service. I represent a build slave -- a remote machine capable of running builds. I am instantiated by the configuration file, and can be subclassed to add extra functionality.""" implements(IBuildSlave) def __init__(self, name, password, max_builds=None, notify_on_missing=[], missing_timeout=3600, properties={}): """ @param name: botname this machine will supply when it connects @param password: password this machine will supply when it connects @param max_builds: maximum number of simultaneous builds that will be run concurrently on this buildslave (the default is None for no limit) @param properties: properties that will be applied to builds run on this slave @type properties: dictionary """ service.MultiService.__init__(self) self.slavename = name self.password = password self.botmaster = None # no buildmaster yet self.slave_status = SlaveStatus(name) self.slave = None # a RemoteReference to the Bot, when connected self.slave_commands = None self.slavebuilders = {} self.max_builds = max_builds self.properties = Properties() self.properties.update(properties, "BuildSlave") self.properties.setProperty("slavename", name, "BuildSlave") self.lastMessageReceived = 0 if isinstance(notify_on_missing, str): notify_on_missing = [notify_on_missing] self.notify_on_missing = notify_on_missing for i in notify_on_missing: assert isinstance(i, str) self.missing_timeout = missing_timeout self.missing_timer = None def update(self, new): """ Given a new BuildSlave, configure this one identically. Because BuildSlave objects are remotely referenced, we can't replace them without disconnecting the slave, yet there's no reason to do that. """ # the reconfiguration logic should guarantee this: assert self.slavename == new.slavename assert self.password == new.password assert self.__class__ == new.__class__ self.max_builds = new.max_builds def __repr__(self): if self.botmaster: builders = self.botmaster.getBuildersForSlave(self.slavename) return "<%s '%s', current builders: %s>" % \ (self.__class__.__name__, self.slavename, ','.join(map(lambda b: b.name, builders))) else: return "<%s '%s', (no builders yet)>" % \ (self.__class__.__name__, self.slavename) def setBotmaster(self, botmaster): assert not self.botmaster, "BuildSlave already has a botmaster" self.botmaster = botmaster self.startMissingTimer() def stopMissingTimer(self): if self.missing_timer: self.missing_timer.cancel() self.missing_timer = None def startMissingTimer(self): if self.notify_on_missing and self.missing_timeout and self.parent: self.stopMissingTimer() # in case it's already running self.missing_timer = reactor.callLater(self.missing_timeout, self._missing_timer_fired) def _missing_timer_fired(self): self.missing_timer = None # notify people, but only if we're still in the config if not self.parent: return buildmaster = self.botmaster.parent status = buildmaster.getStatus() text = "The Buildbot working for '%s'\n" % status.getProjectName() text += ("has noticed that the buildslave named %s went away\n" % self.slavename) text += "\n" text += ("It last disconnected at %s (buildmaster-local time)\n" % time.ctime(time.time() - self.missing_timeout)) # approx text += "\n" text += "The admin on record (as reported by BUILDSLAVE:info/admin)\n" text += "was '%s'.\n" % self.slave_status.getAdmin() text += "\n" text += "Sincerely,\n" text += " The Buildbot\n" text += " %s\n" % status.getProjectURL() subject = "Buildbot: buildslave %s was lost" % self.slavename return self._mail_missing_message(subject, text) def updateSlave(self): """Called to add or remove builders after the slave has connected. @return: a Deferred that indicates when an attached slave has accepted the new builders and/or released the old ones.""" if self.slave: return self.sendBuilderList() else: return defer.succeed(None) def updateSlaveStatus(self, buildStarted=None, buildFinished=None): if buildStarted: self.slave_status.buildStarted(buildStarted) if buildFinished: self.slave_status.buildFinished(buildFinished) def attached(self, bot): """This is called when the slave connects. @return: a Deferred that fires with a suitable pb.IPerspective to give to the slave (i.e. 'self')""" if self.slave: # uh-oh, we've got a duplicate slave. The most likely # explanation is that the slave is behind a slow link, thinks we # went away, and has attempted to reconnect, so we've got two # "connections" from the same slave, but the previous one is # stale. Give the new one precedence. log.msg("duplicate slave %s replacing old one" % self.slavename) # just in case we've got two identically-configured slaves, # report the IP addresses of both so someone can resolve the # squabble tport = self.slave.broker.transport log.msg("old slave was connected from", tport.getPeer()) log.msg("new slave is from", bot.broker.transport.getPeer()) d = self.disconnect() else: d = defer.succeed(None) # now we go through a sequence of calls, gathering information, then # tell the Botmaster that it can finally give this slave to all the # Builders that care about it. # we accumulate slave information in this 'state' dictionary, then # set it atomically if we make it far enough through the process state = {} # Reset graceful shutdown status self.slave_status.setGraceful(False) # We want to know when the graceful shutdown flag changes self.slave_status.addGracefulWatcher(self._gracefulChanged) def _log_attachment_on_slave(res): d1 = bot.callRemote("print", "attached") d1.addErrback(lambda why: None) return d1 d.addCallback(_log_attachment_on_slave) def _get_info(res): d1 = bot.callRemote("getSlaveInfo") def _got_info(info): log.msg("Got slaveinfo from '%s'" % self.slavename) # TODO: info{} might have other keys state["admin"] = info.get("admin") state["host"] = info.get("host") def _info_unavailable(why): # maybe an old slave, doesn't implement remote_getSlaveInfo log.msg("BuildSlave.info_unavailable") log.err(why) d1.addCallbacks(_got_info, _info_unavailable) return d1 d.addCallback(_get_info) def _get_commands(res): d1 = bot.callRemote("getCommands") def _got_commands(commands): state["slave_commands"] = commands def _commands_unavailable(why): # probably an old slave log.msg("BuildSlave._commands_unavailable") if why.check(AttributeError): return log.err(why) d1.addCallbacks(_got_commands, _commands_unavailable) return d1 d.addCallback(_get_commands) def _accept_slave(res): self.slave_status.setAdmin(state.get("admin")) self.slave_status.setHost(state.get("host")) self.slave_status.setConnected(True) self.slave_commands = state.get("slave_commands") self.slave = bot log.msg("bot attached") self.messageReceivedFromSlave() self.stopMissingTimer() return self.updateSlave() d.addCallback(_accept_slave) d.addCallback(lambda res: self.botmaster.maybeStartAllBuilds()) # Finally, the slave gets a reference to this BuildSlave. They # receive this later, after we've started using them. d.addCallback(lambda res: self) return d def messageReceivedFromSlave(self): now = time.time() self.lastMessageReceived = now self.slave_status.setLastMessageReceived(now) def detached(self, mind): self.slave = None self.slave_status.removeGracefulWatcher(self._gracefulChanged) self.slave_status.setConnected(False) log.msg("BuildSlave.detached(%s)" % self.slavename) def disconnect(self): """Forcibly disconnect the slave. This severs the TCP connection and returns a Deferred that will fire (with None) when the connection is probably gone. If the slave is still alive, they will probably try to reconnect again in a moment. This is called in two circumstances. The first is when a slave is removed from the config file. In this case, when they try to reconnect, they will be rejected as an unknown slave. The second is when we wind up with two connections for the same slave, in which case we disconnect the older connection. """ if not self.slave: return defer.succeed(None) log.msg("disconnecting old slave %s now" % self.slavename) # When this Deferred fires, we'll be ready to accept the new slave return self._disconnect(self.slave) def _disconnect(self, slave): # all kinds of teardown will happen as a result of # loseConnection(), but it happens after a reactor iteration or # two. Hook the actual disconnect so we can know when it is safe # to connect the new slave. We have to wait one additional # iteration (with callLater(0)) to make sure the *other* # notifyOnDisconnect handlers have had a chance to run. d = defer.Deferred() # notifyOnDisconnect runs the callback with one argument, the # RemoteReference being disconnected. def _disconnected(rref): reactor.callLater(0, d.callback, None) slave.notifyOnDisconnect(_disconnected) tport = slave.broker.transport # this is the polite way to request that a socket be closed tport.loseConnection() try: # but really we don't want to wait for the transmit queue to # drain. The remote end is unlikely to ACK the data, so we'd # probably have to wait for a (20-minute) TCP timeout. #tport._closeSocket() # however, doing _closeSocket (whether before or after # loseConnection) somehow prevents the notifyOnDisconnect # handlers from being run. Bummer. tport.offset = 0 tport.dataBuffer = "" except: # however, these hacks are pretty internal, so don't blow up if # they fail or are unavailable log.msg("failed to accelerate the shutdown process") pass log.msg("waiting for slave to finish disconnecting") return d def sendBuilderList(self): our_builders = self.botmaster.getBuildersForSlave(self.slavename) blist = [(b.name, b.builddir) for b in our_builders] d = self.slave.callRemote("setBuilderList", blist) return d def perspective_keepalive(self): pass def addSlaveBuilder(self, sb): if sb.builder_name not in self.slavebuilders: log.msg("%s adding %s" % (self, sb)) elif sb is not self.slavebuilders[sb.builder_name]: log.msg("%s replacing %s" % (self, sb)) else: return self.slavebuilders[sb.builder_name] = sb def removeSlaveBuilder(self, sb): try: del self.slavebuilders[sb.builder_name] except KeyError: pass else: log.msg("%s removed %s" % (self, sb)) def canStartBuild(self): """ I am called when a build is requested to see if this buildslave can start a build. This function can be used to limit overall concurrency on the buildslave. """ # If we're waiting to shutdown gracefully, then we shouldn't # accept any new jobs. if self.slave_status.getGraceful(): return False if self.max_builds: active_builders = [sb for sb in self.slavebuilders.values() if sb.isBusy()] if len(active_builders) >= self.max_builds: return False return True def _mail_missing_message(self, subject, text): # first, see if we have a MailNotifier we can use. This gives us a # fromaddr and a relayhost. buildmaster = self.botmaster.parent for st in buildmaster.statusTargets: if isinstance(st, MailNotifier): break else: # if not, they get a default MailNotifier, which always uses SMTP # to localhost and uses a dummy fromaddr of "buildbot". log.msg("buildslave-missing msg using default MailNotifier") st = MailNotifier("buildbot") # now construct the mail m = Message() m.set_payload(text) m['Date'] = formatdate(localtime=True) m['Subject'] = subject m['From'] = st.fromaddr recipients = self.notify_on_missing m['To'] = ", ".join(recipients) d = st.sendMessage(m, recipients) # return the Deferred for testing purposes return d def _gracefulChanged(self, graceful): """This is called when our graceful shutdown setting changes""" if graceful: active_builders = [sb for sb in self.slavebuilders.values() if sb.isBusy()] if len(active_builders) == 0: # Shut down! self.shutdown() def shutdown(self): """Shutdown the slave""" # Look for a builder with a remote reference to the client side # slave. If we can find one, then call "shutdown" on the remote # builder, which will cause the slave buildbot process to exit. d = None for b in self.slavebuilders.values(): if b.remote: d = b.remote.callRemote("shutdown") break if d: log.msg("Shutting down slave: %s" % self.slavename) # The remote shutdown call will not complete successfully since the # buildbot process exits almost immediately after getting the # shutdown request. # Here we look at the reason why the remote call failed, and if # it's because the connection was lost, that means the slave # shutdown as expected. def _errback(why): if why.check(twisted.spread.pb.PBConnectionLost): log.msg("Lost connection to %s" % self.slavename) else: log.err("Unexpected error when trying to shutdown %s" % self.slavename) d.addErrback(_errback) return d log.err("Couldn't find remote builder to shut down slave") return defer.succeed(None)
class AbstractBuildSlave(pb.Avatar, service.MultiService): """This is the master-side representative for a remote buildbot slave. There is exactly one for each slave described in the config file (the c['slaves'] list). When buildbots connect in (.attach), they get a reference to this instance. The BotMaster object is stashed as the .botmaster attribute. The BotMaster is also our '.parent' Service. I represent a build slave -- a remote machine capable of running builds. I am instantiated by the configuration file, and can be subclassed to add extra functionality.""" implements(IBuildSlave) keepalive_timer = None keepalive_interval = None def __init__(self, name, password, max_builds=None, notify_on_missing=[], missing_timeout=3600, properties={}, locks=None, keepalive_interval=3600): """ @param name: botname this machine will supply when it connects @param password: password this machine will supply when it connects @param max_builds: maximum number of simultaneous builds that will be run concurrently on this buildslave (the default is None for no limit) @param properties: properties that will be applied to builds run on this slave @type properties: dictionary @param locks: A list of locks that must be acquired before this slave can be used @type locks: dictionary """ service.MultiService.__init__(self) self.slavename = name self.password = password self.botmaster = None # no buildmaster yet self.slave_status = SlaveStatus(name) self.slave = None # a RemoteReference to the Bot, when connected self.slave_commands = None self.slavebuilders = {} self.max_builds = max_builds self.access = [] if locks: self.access = locks self.properties = Properties() self.properties.update(properties, "BuildSlave") self.properties.setProperty("slavename", name, "BuildSlave") self.lastMessageReceived = 0 if isinstance(notify_on_missing, str): notify_on_missing = [notify_on_missing] self.notify_on_missing = notify_on_missing for i in notify_on_missing: assert isinstance(i, str) self.missing_timeout = missing_timeout self.missing_timer = None self.keepalive_interval = keepalive_interval self._old_builder_list = None def update(self, new): """ Given a new BuildSlave, configure this one identically. Because BuildSlave objects are remotely referenced, we can't replace them without disconnecting the slave, yet there's no reason to do that. """ # the reconfiguration logic should guarantee this: assert self.slavename == new.slavename assert self.password == new.password assert self.__class__ == new.__class__ self.max_builds = new.max_builds self.access = new.access self.notify_on_missing = new.notify_on_missing self.missing_timeout = new.missing_timeout self.properties = Properties() self.properties.updateFromProperties(new.properties) if self.botmaster: self.updateLocks() def __repr__(self): if self.botmaster: builders = self.botmaster.getBuildersForSlave(self.slavename) return "<%s '%s', current builders: %s>" % \ (self.__class__.__name__, self.slavename, ','.join(map(lambda b: b.name, builders))) else: return "<%s '%s', (no builders yet)>" % \ (self.__class__.__name__, self.slavename) def updateLocks(self): # convert locks into their real form locks = [] for access in self.access: if not isinstance(access, LockAccess): access = access.defaultAccess() lock = self.botmaster.getLockByID(access.lockid) locks.append((lock, access)) self.locks = [(l.getLock(self), la) for l, la in locks] def locksAvailable(self): """ I am called to see if all the locks I depend on are available, in which I return True, otherwise I return False """ if not self.locks: return True for lock, access in self.locks: if not lock.isAvailable(access): return False return True def acquireLocks(self): """ I am called when a build is preparing to run. I try to claim all the locks that are needed for a build to happen. If I can't, then my caller should give up the build and try to get another slave to look at it. """ log.msg("acquireLocks(slave %s, locks %s)" % (self, self.locks)) if not self.locksAvailable(): log.msg("slave %s can't lock, giving up" % (self, )) return False # all locks are available, claim them all for lock, access in self.locks: lock.claim(self, access) return True def releaseLocks(self): """ I am called to release any locks after a build has finished """ log.msg("releaseLocks(%s): %s" % (self, self.locks)) for lock, access in self.locks: lock.release(self, access) def setBotmaster(self, botmaster): assert not self.botmaster, "BuildSlave already has a botmaster" self.botmaster = botmaster self.updateLocks() self.startMissingTimer() def stopMissingTimer(self): if self.missing_timer: self.missing_timer.cancel() self.missing_timer = None def startMissingTimer(self): if self.notify_on_missing and self.missing_timeout and self.parent: self.stopMissingTimer() # in case it's already running self.missing_timer = reactor.callLater(self.missing_timeout, self._missing_timer_fired) def doKeepalive(self): self.keepalive_timer = reactor.callLater(self.keepalive_interval, self.doKeepalive) if not self.slave: return d = self.slave.callRemote("print", "Received keepalive from master") d.addErrback(log.msg, "Keepalive failed for '%s'" % (self.slavename, )) def stopKeepaliveTimer(self): if self.keepalive_timer: self.keepalive_timer.cancel() def startKeepaliveTimer(self): assert self.keepalive_interval log.msg("Starting buildslave keepalive timer for '%s'" % \ (self.slavename, )) self.doKeepalive() def recordConnectTime(self): if self.slave_status: self.slave_status.recordConnectTime() def isConnected(self): return self.slave def _missing_timer_fired(self): self.missing_timer = None # notify people, but only if we're still in the config if not self.parent: return buildmaster = self.botmaster.parent status = buildmaster.getStatus() text = "The Buildbot working for '%s'\n" % status.getProjectName() text += ("has noticed that the buildslave named %s went away\n" % self.slavename) text += "\n" text += ("It last disconnected at %s (buildmaster-local time)\n" % time.ctime(time.time() - self.missing_timeout)) # approx text += "\n" text += "The admin on record (as reported by BUILDSLAVE:info/admin)\n" text += "was '%s'.\n" % self.slave_status.getAdmin() text += "\n" text += "Sincerely,\n" text += " The Buildbot\n" text += " %s\n" % status.getProjectURL() subject = "Buildbot: buildslave %s was lost" % self.slavename return self._mail_missing_message(subject, text) def updateSlave(self): """Called to add or remove builders after the slave has connected. @return: a Deferred that indicates when an attached slave has accepted the new builders and/or released the old ones.""" if self.slave: return self.sendBuilderList() else: return defer.succeed(None) def updateSlaveStatus(self, buildStarted=None, buildFinished=None): if buildStarted: self.slave_status.buildStarted(buildStarted) if buildFinished: self.slave_status.buildFinished(buildFinished) def attached(self, bot): """This is called when the slave connects. @return: a Deferred that fires when the attachment is complete """ # the botmaster should ensure this. assert not self.isConnected() # now we go through a sequence of calls, gathering information, then # tell the Botmaster that it can finally give this slave to all the # Builders that care about it. # we accumulate slave information in this 'state' dictionary, then # set it atomically if we make it far enough through the process state = {} # Reset graceful shutdown status self.slave_status.setGraceful(False) # We want to know when the graceful shutdown flag changes self.slave_status.addGracefulWatcher(self._gracefulChanged) d = defer.succeed(None) def _log_attachment_on_slave(res): d1 = bot.callRemote("print", "attached") d1.addErrback(lambda why: None) return d1 d.addCallback(_log_attachment_on_slave) def _get_info(res): d1 = bot.callRemote("getSlaveInfo") def _got_info(info): log.msg("Got slaveinfo from '%s'" % self.slavename) # TODO: info{} might have other keys state["admin"] = info.get("admin") state["host"] = info.get("host") state["access_uri"] = info.get("access_uri", None) state["slave_environ"] = info.get("environ", {}) state["slave_basedir"] = info.get("basedir", None) state["slave_system"] = info.get("system", None) def _info_unavailable(why): why.trap(pb.NoSuchMethod) # maybe an old slave, doesn't implement remote_getSlaveInfo log.msg("BuildSlave.info_unavailable") log.err(why) d1.addCallbacks(_got_info, _info_unavailable) return d1 d.addCallback(_get_info) self.startKeepaliveTimer() def _get_version(res): d = bot.callRemote("getVersion") def _got_version(version): state["version"] = version def _version_unavailable(why): why.trap(pb.NoSuchMethod) # probably an old slave state["version"] = '(unknown)' d.addCallbacks(_got_version, _version_unavailable) return d d.addCallback(_get_version) def _get_commands(res): d1 = bot.callRemote("getCommands") def _got_commands(commands): state["slave_commands"] = commands def _commands_unavailable(why): # probably an old slave log.msg("BuildSlave._commands_unavailable") if why.check(AttributeError): return log.err(why) d1.addCallbacks(_got_commands, _commands_unavailable) return d1 d.addCallback(_get_commands) def _accept_slave(res): self.slave_status.setAdmin(state.get("admin")) self.slave_status.setHost(state.get("host")) self.slave_status.setAccessURI(state.get("access_uri")) self.slave_status.setVersion(state.get("version")) self.slave_status.setConnected(True) self.slave_commands = state.get("slave_commands") self.slave_environ = state.get("slave_environ") self.slave_basedir = state.get("slave_basedir") self.slave_system = state.get("slave_system") self.slave = bot if self.slave_system == "win32": self.path_module = namedModule("win32path") else: # most eveything accepts / as separator, so posix should be a # reasonable fallback self.path_module = namedModule("posixpath") log.msg("bot attached") self.messageReceivedFromSlave() self.stopMissingTimer() self.botmaster.parent.status.slaveConnected(self.slavename) return self.updateSlave() d.addCallback(_accept_slave) d.addCallback(lambda res: self.botmaster.triggerNewBuildCheck()) # Finally, the slave gets a reference to this BuildSlave. They # receive this later, after we've started using them. d.addCallback(lambda res: self) return d def messageReceivedFromSlave(self): now = time.time() self.lastMessageReceived = now self.slave_status.setLastMessageReceived(now) def detached(self, mind): self.slave = None self.slave_status.removeGracefulWatcher(self._gracefulChanged) self.slave_status.setConnected(False) log.msg("BuildSlave.detached(%s)" % self.slavename) self.botmaster.parent.status.slaveDisconnected(self.slavename) self.stopKeepaliveTimer() def disconnect(self): """Forcibly disconnect the slave. This severs the TCP connection and returns a Deferred that will fire (with None) when the connection is probably gone. If the slave is still alive, they will probably try to reconnect again in a moment. This is called in two circumstances. The first is when a slave is removed from the config file. In this case, when they try to reconnect, they will be rejected as an unknown slave. The second is when we wind up with two connections for the same slave, in which case we disconnect the older connection. """ if not self.slave: return defer.succeed(None) log.msg("disconnecting old slave %s now" % self.slavename) # When this Deferred fires, we'll be ready to accept the new slave return self._disconnect(self.slave) def _disconnect(self, slave): # all kinds of teardown will happen as a result of # loseConnection(), but it happens after a reactor iteration or # two. Hook the actual disconnect so we can know when it is safe # to connect the new slave. We have to wait one additional # iteration (with callLater(0)) to make sure the *other* # notifyOnDisconnect handlers have had a chance to run. d = defer.Deferred() # notifyOnDisconnect runs the callback with one argument, the # RemoteReference being disconnected. def _disconnected(rref): reactor.callLater(0, d.callback, None) slave.notifyOnDisconnect(_disconnected) tport = slave.broker.transport # this is the polite way to request that a socket be closed tport.loseConnection() try: # but really we don't want to wait for the transmit queue to # drain. The remote end is unlikely to ACK the data, so we'd # probably have to wait for a (20-minute) TCP timeout. #tport._closeSocket() # however, doing _closeSocket (whether before or after # loseConnection) somehow prevents the notifyOnDisconnect # handlers from being run. Bummer. tport.offset = 0 tport.dataBuffer = "" except: # however, these hacks are pretty internal, so don't blow up if # they fail or are unavailable log.msg("failed to accelerate the shutdown process") log.msg("waiting for slave to finish disconnecting") return d def sendBuilderList(self): our_builders = self.botmaster.getBuildersForSlave(self.slavename) blist = [(b.name, b.slavebuilddir) for b in our_builders] if blist == self._old_builder_list: log.msg("Builder list is unchanged; not calling setBuilderList") return defer.succeed(None) d = self.slave.callRemote("setBuilderList", blist) def sentBuilderList(ign): self._old_builder_list = blist return ign d.addCallback(sentBuilderList) return d def perspective_keepalive(self): self.messageReceivedFromSlave() def perspective_shutdown(self): log.msg("slave %s wants to shut down" % self.slavename) self.slave_status.setGraceful(True) def addSlaveBuilder(self, sb): self.slavebuilders[sb.builder_name] = sb def removeSlaveBuilder(self, sb): try: del self.slavebuilders[sb.builder_name] except KeyError: pass def buildFinished(self, sb): """This is called when a build on this slave is finished.""" raise NotImplementedError def canStartBuild(self): """ I am called when a build is requested to see if this buildslave can start a build. This function can be used to limit overall concurrency on the buildslave. """ # If we're waiting to shutdown gracefully, then we shouldn't # accept any new jobs. if self.slave_status.getGraceful(): return False if self.max_builds: active_builders = [sb for sb in self.slavebuilders.values() if sb.isBusy()] if len(active_builders) >= self.max_builds: return False if not self.locksAvailable(): return False return True def _mail_missing_message(self, subject, text): # first, see if we have a MailNotifier we can use. This gives us a # fromaddr and a relayhost. buildmaster = self.botmaster.parent for st in buildmaster.statusTargets: if isinstance(st, MailNotifier): break else: # if not, they get a default MailNotifier, which always uses SMTP # to localhost and uses a dummy fromaddr of "buildbot". log.msg("buildslave-missing msg using default MailNotifier") st = MailNotifier("buildbot") # now construct the mail m = Message() m.set_payload(text) m['Date'] = formatdate(localtime=True) m['Subject'] = subject m['From'] = st.fromaddr recipients = self.notify_on_missing m['To'] = ", ".join(recipients) d = st.sendMessage(m, recipients) # return the Deferred for testing purposes return d def _gracefulChanged(self, graceful): """This is called when our graceful shutdown setting changes""" self.maybeShutdown() @defer.deferredGenerator def shutdown(self): """Shutdown the slave""" if not self.slave: log.msg("no remote; slave is already shut down") return # First, try the "new" way - calling our own remote's shutdown # method. The method was only added in 0.8.3, so ignore NoSuchMethod # failures. def new_way(): d = self.slave.callRemote('shutdown') d.addCallback(lambda _ : True) # successful shutdown request def check_nsm(f): f.trap(pb.NoSuchMethod) return False # fall through to the old way d.addErrback(check_nsm) def check_connlost(f): f.trap(pb.PBConnectionLost) return True # the slave is gone, so call it finished d.addErrback(check_connlost) return d wfd = defer.waitForDeferred(new_way()) yield wfd if wfd.getResult(): return # done! # Now, the old way. Look for a builder with a remote reference to the # client side slave. If we can find one, then call "shutdown" on the # remote builder, which will cause the slave buildbot process to exit. def old_way(): d = None for b in self.slavebuilders.values(): if b.remote: d = b.remote.callRemote("shutdown") break if d: log.msg("Shutting down (old) slave: %s" % self.slavename) # The remote shutdown call will not complete successfully since the # buildbot process exits almost immediately after getting the # shutdown request. # Here we look at the reason why the remote call failed, and if # it's because the connection was lost, that means the slave # shutdown as expected. def _errback(why): if why.check(pb.PBConnectionLost): log.msg("Lost connection to %s" % self.slavename) else: log.err("Unexpected error when trying to shutdown %s" % self.slavename) d.addErrback(_errback) return d log.err("Couldn't find remote builder to shut down slave") return defer.succeed(None) wfd = defer.waitForDeferred(old_way()) yield wfd wfd.getResult() def maybeShutdown(self): """Shut down this slave if it has been asked to shut down gracefully, and has no active builders.""" if not self.slave_status.getGraceful(): return active_builders = [sb for sb in self.slavebuilders.values() if sb.isBusy()] if active_builders: return d = self.shutdown() d.addErrback(log.err, 'error while shutting down slave')