def test_notify_no_waiters(self): """ Calling `Notifier.notify` when there are no waiters does not raise. """ n = Notifier() n.notify(object())
def reconfigService(self, name, password, build_wait_timeout=60 * 10, **kwargs): self._substantiation_notifier = Notifier() self._insubstantiation_notifier = Notifier() self.build_wait_timeout = build_wait_timeout return super().reconfigService(name, password, **kwargs)
def test_nonzero_waiters(self): """ If there are waiters, ``Notifier`` evaluates as `True`. """ n = Notifier() n.wait() self.assertTrue(n)
def test_nonzero_cleared_waiters(self): """ After notifying waiters, ``Notifier`` evaluates as `False`. """ n = Notifier() n.wait() n.notify(object()) self.assertFalse(n)
def test_notify_failure(self): """ If a failure is passed to `Notifier.notify` then the waiters are errback'd. """ n = Notifier() d = n.wait() n.notify(Failure(TestException())) self.failureResultOf(d, TestException)
def test_notify_multiple_waiters(self): """ If there all multiple waiters, `Notifier.notify` fires all the deferreds with the same value. """ value = object() n = Notifier() ds = [n.wait(), n.wait()] n.notify(value) self.assertEqual( [self.successResultOf(d) for d in ds], [value] * 2, )
def checkConfig(self, name, password, build_wait_timeout=60 * 10, **kwargs): AbstractWorker.checkConfig(self, name, password, **kwargs) self.building = set() self.build_wait_timeout = build_wait_timeout self._substantiation_notifier = Notifier()
def reconfigService(self, name, build_wait_timeout=0, missing_timeout=DEFAULT_MISSING_TIMEOUT, **kwargs): super().reconfigService(name, **kwargs) self.build_wait_timeout = build_wait_timeout self.missing_timeout = missing_timeout for worker in self.workers: if not interfaces.ILatentWorker.providedBy(worker): raise Exception('Worker is not latent {}'.format( worker.name)) self.state = States.STOPPED self._start_notifier = Notifier() self._stop_notifier = Notifier() self._build_wait_timer = None self._missing_timer = None
def test_new_waiters_not_notified(self): """ If a new waiter is added while notifying, it won't be notified until the next notification. """ value = object() n = Notifier() box = [] def add_new_waiter(_): box.append(n.wait()) n.wait().addCallback(add_new_waiter) n.notify(object()) self.assertNoResult(box[0]) n.notify(value) self.assertEqual( self.successResultOf(box[0]), value, )
class AbstractLatentMachine(Machine): DEFAULT_MISSING_TIMEOUT = 20 * 60 def checkConfig(self, name, build_wait_timeout=0, missing_timeout=DEFAULT_MISSING_TIMEOUT, **kwargs): super().checkConfig(name, **kwargs) self.state = States.STOPPED self.latent_workers = [] @defer.inlineCallbacks def reconfigService(self, name, build_wait_timeout=0, missing_timeout=DEFAULT_MISSING_TIMEOUT, **kwargs): yield super().reconfigService(name, **kwargs) self.build_wait_timeout = build_wait_timeout self.missing_timeout = missing_timeout for worker in self.workers: if not interfaces.ILatentWorker.providedBy(worker): raise Exception(f'Worker is not latent {worker.name}') self.state = States.STOPPED self._start_notifier = Notifier() self._stop_notifier = Notifier() self._build_wait_timer = None self._missing_timer = None def start_machine(self): # Responsible for starting the machine. The function should return a # deferred which should result in True if the startup has been # successful, or False otherwise. raise NotImplementedError def stop_machine(self): # Responsible for shutting down the machine raise NotImplementedError @defer.inlineCallbacks def substantiate(self, starting_worker): if self.state == States.STOPPING: # wait until stop action finishes yield self._stop_notifier.wait() if self.state == States.STARTED: # may happen if we waited for stop to complete and in the mean # time the machine was successfully woken. return True # wait for already proceeding startup to finish, if any if self.state == States.STARTING: return (yield self._start_notifier.wait()) self.state = States.STARTING # substantiate all workers that will start if we wake the machine. We # do so before waking the machine to guarantee that we're already # waiting for worker connection as waking may take time confirming # machine came online. We'll call substantiate on the worker that # invoked this function again, but that's okay as that function is # reentrant. Note that we substantiate without gathering results # because the original call to substantiate will get them anyway and # we don't want to be slowed down by other workers on the machine. for worker in self.workers: if worker.starts_without_substantiate: worker.substantiate(None, None) # Start the machine. We don't need to wait for any workers to actually # come online as that's handled in their substantiate() functions. try: ret = yield self.start_machine() except Exception as e: log.err(e, f'while starting latent machine {self.name}') ret = False if not ret: yield defer.DeferredList( [worker.insubstantiate() for worker in self.workers], consumeErrors=True) else: self._setMissingTimer() self.state = States.STARTED if ret else States.STOPPED self._start_notifier.notify(ret) return ret @defer.inlineCallbacks def _stop(self): if any(worker.building for worker in self.workers) or \ self.state == States.STARTING: return None if self.state == States.STOPPING: yield self._stop_notifier.wait() return None self.state = States.STOPPING # wait until workers insubstantiate, then stop yield defer.DeferredList( [worker.insubstantiate() for worker in self.workers], consumeErrors=True) try: yield self.stop_machine() except Exception as e: log.err(e, f'while stopping latent machine {self.name}') self.state = States.STOPPED self._stop_notifier.notify(None) return None def notifyBuildStarted(self): self._clearMissingTimer() def notifyBuildFinished(self): if any(worker.building for worker in self.workers): self._clearBuildWaitTimer() else: self._setBuildWaitTimer() def _clearMissingTimer(self): if self._missing_timer is not None: if self._missing_timer.active(): self._missing_timer.cancel() self._missing_timer = None def _setMissingTimer(self): self._clearMissingTimer() self._missing_timer = self.master.reactor.callLater( self.missing_timeout, self._stop) def _clearBuildWaitTimer(self): if self._build_wait_timer is not None: if self._build_wait_timer.active(): self._build_wait_timer.cancel() self._build_wait_timer = None def _setBuildWaitTimer(self): self._clearBuildWaitTimer() self._build_wait_timer = self.master.reactor.callLater( self.build_wait_timeout, self._stop) def __repr__(self): return f"<AbstractLatentMachine '{self.name}' at {id(self)}>"
class AbstractLatentWorker(AbstractWorker): """A worker that will start up a worker instance when needed. To use, subclass and implement start_instance and stop_instance. See ec2.py for a concrete example. """ substantiated = False substantiation_build = None insubstantiating = False build_wait_timer = None def checkConfig(self, name, password, build_wait_timeout=60 * 10, **kwargs): AbstractWorker.checkConfig(self, name, password, **kwargs) self.build_wait_timeout = build_wait_timeout self._substantiation_notifier = Notifier() def reconfigService(self, name, password, build_wait_timeout=60 * 10, **kwargs): self.build_wait_timeout = build_wait_timeout return AbstractWorker.reconfigService(self, name, password, **kwargs) @property def building(self): # A LatentWorkerForBuilder will only be busy if it is building. return {wfb for wfb in itervalues(self.workerforbuilders) if wfb.isBusy()} def failed_to_start(self, instance_id, instance_state): log.msg('%s %s failed to start instance %s (%s)' % (self.__class__.__name__, self.workername, instance_id, instance_state)) raise LatentWorkerFailedToSubstantiate(instance_id, instance_state) def start_instance(self, build): # responsible for starting instance that will try to connect with this # master. Should return deferred with either True (instance started) # or False (instance not started, so don't run a build here). Problems # should use an errback. raise NotImplementedError def stop_instance(self, fast=False): # responsible for shutting down instance. raise NotImplementedError def substantiate(self, sb, build): if self.substantiated: self._clearBuildWaitTimer() self._setBuildWaitTimer() return defer.succeed(True) if not self._substantiation_notifier: if self.parent and not self.missing_timer: # start timer. if timer times out, fail deferred self.missing_timer = self.master.reactor.callLater( self.missing_timeout, self._substantiation_failed, defer.TimeoutError()) self.substantiation_build = build # if substantiate fails synchronously we need to have the deferred ready to be notified d = self._substantiation_notifier.wait() if self.conn is None: self._substantiate(build) # else: we're waiting for an old one to detach. the _substantiate # will be done in ``detached`` below. return d return self._substantiation_notifier.wait() def _substantiate(self, build): # register event trigger d = self.start_instance(build) def start_instance_result(result): # If we don't report success, then preparation failed. if not result: msg = "Worker does not want to substantiate at this time" self._substantiation_notifier.notify(LatentWorkerFailedToSubstantiate(self.name, msg)) return None return result def clean_up(failure): if self.missing_timer is not None: self.missing_timer.cancel() self._substantiation_failed(failure) # swallow the failure as it is given to notified return None d.addCallbacks(start_instance_result, clean_up) return d @defer.inlineCallbacks def attached(self, bot): if not self._substantiation_notifier and self.build_wait_timeout >= 0: msg = 'Worker %s received connection while not trying to ' \ 'substantiate. Disconnecting.' % (self.name,) log.msg(msg) self._disconnect(bot) raise RuntimeError(msg) try: yield AbstractWorker.attached(self, bot) except Exception: self._substantiation_failed(failure.Failure()) return log.msg(r"Worker %s substantiated \o/" % (self.name,)) self.substantiated = True if not self._substantiation_notifier: log.msg("No substantiation deferred for %s" % (self.name,)) else: log.msg( "Firing %s substantiation deferred with success" % (self.name,)) self.substantiation_build = None self._substantiation_notifier.notify(True) def attachBuilder(self, builder): sb = self.workerforbuilders.get(builder.name) return sb.attached(self, self.worker_commands) def detached(self): AbstractWorker.detached(self) if self._substantiation_notifier: d = self._substantiate(self.substantiation_build) d.addErrback(log.err, 'while re-substantiating') def _substantiation_failed(self, failure): self.missing_timer = None if self.substantiation_build: self.substantiation_build = None self._substantiation_notifier.notify(failure) d = self.insubstantiate() d.addErrback(log.err, 'while insubstantiating') # notify people, but only if we're still in the config if not self.parent or not self.notify_on_missing: return buildmaster = self.botmaster.master status = buildmaster.getStatus() text = "The Buildbot working for '%s'\n" % status.getTitle() text += ("has noticed that the latent worker named %s \n" % self.name) text += "never substantiated after a request\n" text += "\n" text += ("The request was made at %s (buildmaster-local time)\n" % time.ctime(time.time() - self.missing_timeout)) # approx text += "\n" text += "Sincerely,\n" text += " The Buildbot\n" text += " %s\n" % status.getTitleURL() subject = "Buildbot: worker %s never substantiated" % (self.name,) return self._mail_missing_message(subject, text) def canStartBuild(self): if self.insubstantiating: return False return AbstractWorker.canStartBuild(self) def buildStarted(self, sb): self._clearBuildWaitTimer() def buildFinished(self, sb): AbstractWorker.buildFinished(self, sb) if not self.building: if self.build_wait_timeout == 0: d = self.insubstantiate() # try starting builds for this worker after insubstantiating; # this will cause the worker to re-substantiate immediately if # there are pending build requests. d.addCallback(lambda _: self.botmaster.maybeStartBuildsForWorker(self.workername)) else: self._setBuildWaitTimer() def _clearBuildWaitTimer(self): if self.build_wait_timer is not None: if self.build_wait_timer.active(): self.build_wait_timer.cancel() self.build_wait_timer = None def _setBuildWaitTimer(self): self._clearBuildWaitTimer() if self.build_wait_timeout <= 0: return self.build_wait_timer = self.master.reactor.callLater( self.build_wait_timeout, self._soft_disconnect) @defer.inlineCallbacks def insubstantiate(self, fast=False): self.insubstantiating = True self._clearBuildWaitTimer() d = self.stop_instance(fast) self.substantiated = False yield d self.insubstantiating = False if self._substantiation_notifier: # notify waiters that substantiation was cancelled self._substantiation_notifier.notify(failure.Failure(Exception("cancelled"))) self.botmaster.maybeStartBuildsForWorker(self.name) @defer.inlineCallbacks def _soft_disconnect(self, fast=False): if self.building: # wait until build finished return # a negative build_wait_timeout means the worker should never be shut # down, so just disconnect. if self.build_wait_timeout < 0: yield AbstractWorker.disconnect(self) return if self.missing_timer: self.missing_timer.cancel() self.missing_timer = None # if master is stopping, we will never achieve consistent state, as workermanager # wont accept new connection if self._substantiation_notifier and self.master.running: log.msg("Weird: Got request to stop before started. Allowing " "worker to start cleanly to avoid inconsistent state") yield self._substantiation_notifier.wait() self.substantiation_build = None log.msg("Substantiation complete, immediately terminating.") if self.conn is not None: yield defer.DeferredList([ AbstractWorker.disconnect(self), self.insubstantiate(fast) ], consumeErrors=True, fireOnOneErrback=True) else: yield AbstractWorker.disconnect(self) yield self.stop_instance(fast) def disconnect(self): # This returns a Deferred but we don't use it self._soft_disconnect() # this removes the worker from all builders. It won't come back # without a restart (or maybe a sighup) self.botmaster.workerLost(self) @defer.inlineCallbacks def stopService(self): if self.conn is not None or self._substantiation_notifier: yield self._soft_disconnect() res = yield AbstractWorker.stopService(self) defer.returnValue(res) def updateWorker(self): """Called to add or remove builders after the worker has connected. Also called after botmaster's builders are initially set. @return: a Deferred that indicates when an attached worker has accepted the new builders and/or released the old ones.""" for b in self.botmaster.getBuildersForWorker(self.name): if b.name not in self.workerforbuilders: b.addLatentWorker(self) return AbstractWorker.updateWorker(self)
class AbstractLatentWorker(AbstractWorker): """A worker that will start up a worker instance when needed. To use, subclass and implement start_instance and stop_instance. See ec2.py for a concrete example. """ substantiation_build = None insubstantiating = False build_wait_timer = None start_missing_on_startup = False def checkConfig(self, name, password, build_wait_timeout=60 * 10, **kwargs): AbstractWorker.checkConfig(self, name, password, **kwargs) def reconfigService(self, name, password, build_wait_timeout=60 * 10, **kwargs): self._substantiation_notifier = Notifier() self.build_wait_timeout = build_wait_timeout return AbstractWorker.reconfigService(self, name, password, **kwargs) def getRandomPass(self): """ compute a random password There is no point to configure a password for a LatentWorker, as it is created by the master. For supporting backend, a password can be generated by this API """ return ''.join( random.choice(string.ascii_letters + string.digits) for _ in range(20)) @property def building(self): # A LatentWorkerForBuilder will only be busy if it is building. return {wfb for wfb in itervalues(self.workerforbuilders) if wfb.isBusy()} def failed_to_start(self, instance_id, instance_state): log.msg('%s %s failed to start instance %s (%s)' % (self.__class__.__name__, self.workername, instance_id, instance_state)) raise LatentWorkerFailedToSubstantiate(instance_id, instance_state) def start_instance(self, build): # responsible for starting instance that will try to connect with this # master. Should return deferred with either True (instance started) # or False (instance not started, so don't run a build here). Problems # should use an errback. raise NotImplementedError def stop_instance(self, fast=False): # responsible for shutting down instance. raise NotImplementedError @property def substantiated(self): return self.conn is not None def substantiate(self, wfb, build): if self.conn is not None: self._clearBuildWaitTimer() self._setBuildWaitTimer() return defer.succeed(True) if not self._substantiation_notifier: self.startMissingTimer() self.substantiation_build = build # if substantiate fails synchronously we need to have the deferred # ready to be notified d = self._substantiation_notifier.wait() if self.conn is None: self._substantiate(build) # else: we're waiting for an old one to detach. the _substantiate # will be done in ``detached`` below. return d return self._substantiation_notifier.wait() def _substantiate(self, build): # register event trigger try: d = self.start_instance(build) except Exception: # if start_instance crashes without defer, we still handle the # cleanup d = defer.fail(failure.Failure()) def start_instance_result(result): # If we don't report success, then preparation failed. # we let the errback handle the issue if not result: # this behaviour is kept as compatibility, but it is better # to just errback with a workable reason msg = "Worker does not want to substantiate at this time" return failure.Failure(LatentWorkerFailedToSubstantiate(self.name, msg)) return result def clean_up(failure): self.stopMissingTimer() self._substantiation_failed(failure) # swallow the failure as it is given to notified return None d.addCallback(start_instance_result) d.addErrback(clean_up) return d @defer.inlineCallbacks def attached(self, bot): if not self._substantiation_notifier and self.build_wait_timeout >= 0: msg = 'Worker %s received connection while not trying to ' \ 'substantiate. Disconnecting.' % (self.name,) log.msg(msg) self._disconnect(bot) raise RuntimeError(msg) try: yield AbstractWorker.attached(self, bot) except Exception: self._substantiation_failed(failure.Failure()) return log.msg(r"Worker %s substantiated \o/" % (self.name,)) if not self._substantiation_notifier: log.msg("No substantiation deferred for %s" % (self.name,)) else: log.msg( "Firing %s substantiation deferred with success" % (self.name,)) self.substantiation_build = None self._substantiation_notifier.notify(True) def attachBuilder(self, builder): wfb = self.workerforbuilders.get(builder.name) return wfb.attached(self, self.worker_commands) def detached(self): AbstractWorker.detached(self) if self._substantiation_notifier: d = self._substantiate(self.substantiation_build) d.addErrback(log.err, 'while re-substantiating') def _missing_timer_fired(self): self.missing_timer = None return self._substantiation_failed(defer.TimeoutError()) def _substantiation_failed(self, failure): if self.substantiation_build: self.substantiation_build = None self._substantiation_notifier.notify(failure) d = self.insubstantiate() d.addErrback(log.err, 'while insubstantiating') # notify people, but only if we're still in the config if not self.parent or not self.notify_on_missing: return return self.master.data.updates.workerMissing( workerid=self.workerid, masterid=self.master.masterid, last_connection="Latent worker never connected", notify=self.notify_on_missing ) def canStartBuild(self): # we were disconnected, but all the builds are not yet cleaned up. if self.conn is None and self.building: return False if self.insubstantiating: return False return AbstractWorker.canStartBuild(self) def buildStarted(self, wfb): assert wfb.isBusy() self._clearBuildWaitTimer() def buildFinished(self, wfb): assert not wfb.isBusy() if not self.building: if self.build_wait_timeout == 0: # we insubstantiate asynchronously to trigger more bugs with # the fake reactor self.master.reactor.callLater(0, self._soft_disconnect) # insubstantiate will automatically retry to create build for # this worker else: self._setBuildWaitTimer() # AbstractWorker.buildFinished() will try to start the next build for # that worker AbstractWorker.buildFinished(self, wfb) def _clearBuildWaitTimer(self): if self.build_wait_timer is not None: if self.build_wait_timer.active(): self.build_wait_timer.cancel() self.build_wait_timer = None def _setBuildWaitTimer(self): self._clearBuildWaitTimer() if self.build_wait_timeout <= 0: return self.build_wait_timer = self.master.reactor.callLater( self.build_wait_timeout, self._soft_disconnect) @defer.inlineCallbacks def insubstantiate(self, fast=False): self.insubstantiating = True self._clearBuildWaitTimer() d = self.stop_instance(fast) try: yield d except Exception as e: # The case of failure for insubstantiation is bad as we have a left-over costing resource # There is not much thing to do here generically, so we must put the problem of stop_instance # reliability to the backend driver log.err(e, "while insubstantiating") self.insubstantiating = False if self._substantiation_notifier: self._substantiation_notifier.notify( failure.Failure(LatentWorkerSubstantiatiationCancelled())) self.botmaster.maybeStartBuildsForWorker(self.name) @defer.inlineCallbacks def _soft_disconnect(self, fast=False): if self.building: # wait until build finished return # a negative build_wait_timeout means the worker should never be shut # down, so just disconnect. if self.build_wait_timeout < 0: yield AbstractWorker.disconnect(self) return self.stopMissingTimer() # if master is stopping, we will never achieve consistent state, as workermanager # wont accept new connection if self._substantiation_notifier and self.master.running: log.msg("Weird: Got request to stop before started. Allowing " "worker to start cleanly to avoid inconsistent state") yield self._substantiation_notifier.wait() self.substantiation_build = None log.msg("Substantiation complete, immediately terminating.") if self.conn is not None: yield defer.DeferredList([ AbstractWorker.disconnect(self), self.insubstantiate(fast) ], consumeErrors=True, fireOnOneErrback=True) else: yield AbstractWorker.disconnect(self) yield self.stop_instance(fast) def disconnect(self): # This returns a Deferred but we don't use it self._soft_disconnect() # this removes the worker from all builders. It won't come back # without a restart (or maybe a sighup) self.botmaster.workerLost(self) @defer.inlineCallbacks def stopService(self): if self.conn is not None or self._substantiation_notifier: yield self._soft_disconnect() self._clearBuildWaitTimer() res = yield AbstractWorker.stopService(self) defer.returnValue(res) def updateWorker(self): """Called to add or remove builders after the worker has connected. Also called after botmaster's builders are initially set. @return: a Deferred that indicates when an attached worker has accepted the new builders and/or released the old ones.""" for b in self.botmaster.getBuildersForWorker(self.name): if b.name not in self.workerforbuilders: b.addLatentWorker(self) return AbstractWorker.updateWorker(self)
def test_nonzero_no_waiters(self): """ If there no waiters, ``Notifier`` evaluates as `False`. """ n = Notifier() self.assertFalse(n)
def _handle_disconnection_delivery_notifier(self): self._pending_disconnection_delivery_notifier = Notifier() yield self.conn.waitForNotifyDisconnectedDelivered() self._pending_disconnection_delivery_notifier.notify(None) self._pending_disconnection_delivery_notifier = None
def test_wait(self): """ Calling `Notifier.wait` returns a deferred that hasn't fired. """ n = Notifier() self.assertNoResult(n.wait())
class AbstractLatentWorker(AbstractWorker): """A worker that will start up a worker instance when needed. To use, subclass and implement start_instance and stop_instance. See ec2buildslave.py for a concrete example. Also see the stub example in test/test_slaves.py. """ implements(ILatentWorker) substantiated = False substantiation_build = None insubstantiating = False build_wait_timer = None _shutdown_callback_handle = None def checkConfig(self, name, password, build_wait_timeout=60 * 10, **kwargs): AbstractWorker.checkConfig(self, name, password, **kwargs) self.building = set() self.build_wait_timeout = build_wait_timeout self._substantiation_notifier = Notifier() def reconfigService(self, name, password, build_wait_timeout=60 * 10, **kwargs): self.build_wait_timeout = build_wait_timeout return AbstractWorker.reconfigService(self, name, password, **kwargs) def failed_to_start(self, instance_id, instance_state): log.msg('%s %s failed to start instance %s (%s)' % (self.__class__.__name__, self.workername, instance_id, instance_state)) raise LatentWorkerFailedToSubstantiate(instance_id, instance_state) def start_instance(self, build): # responsible for starting instance that will try to connect with this # master. Should return deferred with either True (instance started) # or False (instance not started, so don't run a build here). Problems # should use an errback. raise NotImplementedError def stop_instance(self, fast=False): # responsible for shutting down instance. raise NotImplementedError def substantiate(self, sb, build): if self.substantiated: self._clearBuildWaitTimer() self._setBuildWaitTimer() return defer.succeed(True) if not self._substantiation_notifier: if self.parent and not self.missing_timer: # start timer. if timer times out, fail deferred self.missing_timer = self.master.reactor.callLater( self.missing_timeout, self._substantiation_failed, defer.TimeoutError()) self.substantiation_build = build if self.conn is None: d = self._substantiate(build) # start up instance d.addErrback(log.err, "while substantiating") # else: we're waiting for an old one to detach. the _substantiate # will be done in ``detached`` below. return self._substantiation_notifier.wait() def _substantiate(self, build): # register event trigger d = self.start_instance(build) self._shutdown_callback_handle = self.master.reactor.addSystemEventTrigger( 'before', 'shutdown', self._soft_disconnect, fast=True) def start_instance_result(result): # If we don't report success, then preparation failed. if not result: log.msg( "Worker '%s' does not want to substantiate at this time" % (self.name,)) self._substantiation_notifier.notify(False) return result def clean_up(failure): if self.missing_timer is not None: self.missing_timer.cancel() self._substantiation_failed(failure) if self._shutdown_callback_handle is not None: handle = self._shutdown_callback_handle del self._shutdown_callback_handle self.master.reactor.removeSystemEventTrigger(handle) return failure d.addCallbacks(start_instance_result, clean_up) return d def attached(self, bot): if not self._substantiation_notifier and self.build_wait_timeout >= 0: msg = 'Worker %s received connection while not trying to ' \ 'substantiate. Disconnecting.' % (self.name,) log.msg(msg) self._disconnect(bot) return defer.fail(RuntimeError(msg)) return AbstractWorker.attached(self, bot) def detached(self): AbstractWorker.detached(self) if self._substantiation_notifier: d = self._substantiate(self.substantiation_build) d.addErrback(log.err, 'while re-substantiating') def _substantiation_failed(self, failure): self.missing_timer = None if self._substantiation_notifier: self.substantiation_build = None self._substantiation_notifier.notify(failure) d = self.insubstantiate() d.addErrback(log.err, 'while insubstantiating') # notify people, but only if we're still in the config if not self.parent or not self.notify_on_missing: return buildmaster = self.botmaster.master status = buildmaster.getStatus() text = "The Buildbot working for '%s'\n" % status.getTitle() text += ("has noticed that the latent worker named %s \n" % self.name) text += "never substantiated after a request\n" text += "\n" text += ("The request was made at %s (buildmaster-local time)\n" % time.ctime(time.time() - self.missing_timeout)) # approx text += "\n" text += "Sincerely,\n" text += " The Buildbot\n" text += " %s\n" % status.getTitleURL() subject = "Buildbot: worker %s never substantiated" % (self.name,) return self._mail_missing_message(subject, text) def canStartBuild(self): if self.insubstantiating: return False return AbstractWorker.canStartBuild(self) def buildStarted(self, sb): assert self.substantiated self._clearBuildWaitTimer() self.building.add(sb.builder_name) def buildFinished(self, sb): AbstractWorker.buildFinished(self, sb) self.building.remove(sb.builder_name) if not self.building: if self.build_wait_timeout == 0: d = self.insubstantiate() # try starting builds for this worker after insubstantiating; # this will cause the worker to re-substantiate immediately if # there are pending build requests. d.addCallback(lambda _: self.botmaster.maybeStartBuildsForWorker(self.workername)) else: self._setBuildWaitTimer() def _clearBuildWaitTimer(self): if self.build_wait_timer is not None: if self.build_wait_timer.active(): self.build_wait_timer.cancel() self.build_wait_timer = None def _setBuildWaitTimer(self): self._clearBuildWaitTimer() if self.build_wait_timeout <= 0: return self.build_wait_timer = self.master.reactor.callLater( self.build_wait_timeout, self._soft_disconnect) @defer.inlineCallbacks def insubstantiate(self, fast=False): self.insubstantiating = True self._clearBuildWaitTimer() d = self.stop_instance(fast) if self._shutdown_callback_handle is not None: handle = self._shutdown_callback_handle del self._shutdown_callback_handle self.master.reactor.removeSystemEventTrigger(handle) self.substantiated = False self.building.clear() # just to be sure yield d self.insubstantiating = False self.botmaster.maybeStartBuildsForWorker(self.name) @defer.inlineCallbacks def _soft_disconnect(self, fast=False): # a negative build_wait_timeout means the worker should never be shut # down, so just disconnect. if self.build_wait_timeout < 0: yield AbstractWorker.disconnect(self) return if self.missing_timer: self.missing_timer.cancel() self.missing_timer = None if self._substantiation_notifier: log.msg("Weird: Got request to stop before started. Allowing " "worker to start cleanly to avoid inconsistent state") yield self._substantiation_notifier.wait() self.substantiation_build = None log.msg("Substantiation complete, immediately terminating.") if self.conn is not None: yield defer.DeferredList([ AbstractWorker.disconnect(self), self.insubstantiate(fast) ], consumeErrors=True, fireOnOneErrback=True) else: yield AbstractWorker.disconnect(self) yield self.stop_instance(fast) def disconnect(self): # This returns a Deferred but we don't use it self._soft_disconnect() # this removes the worker from all builders. It won't come back # without a restart (or maybe a sighup) self.botmaster.workerLost(self) def stopService(self): res = defer.maybeDeferred(AbstractWorker.stopService, self) if self.conn is not None: d = self._soft_disconnect() res = defer.DeferredList([res, d]) return res def updateWorker(self): """Called to add or remove builders after the worker has connected. Also called after botmaster's builders are initially set. @return: a Deferred that indicates when an attached worker has accepted the new builders and/or released the old ones.""" for b in self.botmaster.getBuildersForWorker(self.name): if b.name not in self.workerforbuilders: b.addLatentWorker(self) return AbstractWorker.updateWorker(self) def sendBuilderList(self): d = AbstractWorker.sendBuilderList(self) def _sent(slist): if not slist: return dl = [] for name in slist: # use get() since we might have changed our mind since then. # we're checking on the builder in addition to the # workers out of a bit of paranoia. b = self.botmaster.builders.get(name) sb = self.workerforbuilders.get(name) if b and sb: d1 = sb.attached(self, self.worker_commands) dl.append(d1) return defer.DeferredList(dl) def _set_failed(why): log.msg("Worker.sendBuilderList (%s) failed" % self) log.err(why) # TODO: hang up on them?, without setBuilderList we can't use # them if self._substantiation_notifier: self.substantiation_build = None self._substantiation_notifier.notify(why) if self.missing_timer: self.missing_timer.cancel() self.missing_timer = None # TODO: maybe log? send an email? return why d.addCallbacks(_sent, _set_failed) @d.addCallback def _substantiated(res): log.msg(r"Worker %s substantiated \o/" % (self.name,)) self.substantiated = True if not self._substantiation_notifier: log.msg("No substantiation deferred for %s" % (self.name,)) else: log.msg( "Firing %s substantiation deferred with success" % (self.name,)) self.substantiation_build = None self._substantiation_notifier.notify(True) # note that the missing_timer is already handled within # ``attached`` if not self.building: self._setBuildWaitTimer() return d
def __init__(self, *args, poll_timeout=120, **kwargs): super().__init__(*args, **kwargs) self._polling_finished_notifier = Notifier() self.poll_timeout = poll_timeout
def __init__(self, name): self.name = name self.subscriptions = set() self._unfinished_deliveries = [] self._unfinished_notifier = Notifier()
class Build(properties.PropertiesMixin): """I represent a single build by a single worker. Specialized Builders can use subclasses of Build to hold status information unique to those build processes. I control B{how} the build proceeds. The actual build is broken up into a series of steps, saved in the .buildSteps[] array as a list of L{buildbot.process.step.BuildStep} objects. Each step is a single remote command, possibly a shell command. After the build, I go away. I can be used by a factory by setting buildClass on L{buildbot.process.factory.BuildFactory} @ivar requests: the list of L{BuildRequest}s that triggered me """ VIRTUAL_BUILDERNAME_PROP = "virtual_builder_name" VIRTUAL_BUILDERDESCRIPTION_PROP = "virtual_builder_description" VIRTUAL_BUILDERTAGS_PROP = "virtual_builder_tags" workdir = "build" reason = "changes" finished = False results = None stopped = False set_runtime_properties = True subs = None _sentinel = [] # used as a sentinel to indicate unspecified initial_value def __init__(self, requests): self.requests = requests self.locks = [] # build a source stamp self.sources = requests[0].mergeSourceStampsWith(requests[1:]) self.reason = requests[0].mergeReasons(requests[1:]) self.currentStep = None self.workerEnvironment = {} self.buildid = None self._buildid_notifier = Notifier() self.number = None self.executedSteps = [] self.stepnames = {} self.terminate = False self._acquiringLock = None self._builderid = None # overall results, may downgrade after each step self.results = SUCCESS self.properties = properties.Properties() # tracks execution during the build finish phase self._locks_released = False self._build_finished = False # tracks execution during substantiation self._is_substantiating = False # tracks the config version for locks self.config_version = None def getProperties(self): return self.properties def setBuilder(self, builder): """ Set the given builder as our builder. @type builder: L{buildbot.process.builder.Builder} """ self.builder = builder self.master = builder.master self.config_version = builder.config_version @defer.inlineCallbacks def setLocks(self, lockList): self.locks = yield self.builder.botmaster.getLockFromLockAccesses( lockList, self.config_version) def setWorkerEnvironment(self, env): # TODO: remove once we don't have anything depending on this method or attribute # e.g., old-style steps (ShellMixin pulls the environment out of the # builder directly) self.workerEnvironment = env def getSourceStamp(self, codebase=''): for source in self.sources: if source.codebase == codebase: return source return None def getAllSourceStamps(self): return list(self.sources) @staticmethod def allChangesFromSources(sources): for s in sources: for c in s.changes: yield c def allChanges(self): return Build.allChangesFromSources(self.sources) def allFiles(self): # return a list of all source files that were changed files = [] for c in self.allChanges(): for f in c.files: files.append(f) return files def __repr__(self): return "<Build {} number:{} results:{}>".format( self.builder.name, repr(self.number), statusToString(self.results)) def blamelist(self): # Note that this algorithm is also implemented in # buildbot.reporters.utils.getResponsibleUsersForBuild, but using the data api. # it is important for the UI to have the blamelist easily available. # The best way is to make sure the owners property is set to full blamelist blamelist = [] for c in self.allChanges(): if c.who not in blamelist: blamelist.append(c.who) for source in self.sources: if source.patch: # Add patch author to blamelist blamelist.append(source.patch_info[0]) blamelist.sort() return blamelist def changesText(self): changetext = "" for c in self.allChanges(): changetext += "-" * 60 + "\n\n" + c.asText() + "\n" # consider sorting these by number return changetext def setStepFactories(self, step_factories): """Set a list of 'step factories', which are tuples of (class, kwargs), where 'class' is generally a subclass of step.BuildStep . These are used to create the Steps themselves when the Build starts (as opposed to when it is first created). By creating the steps later, their __init__ method will have access to things like build.allFiles() .""" self.stepFactories = list(step_factories) useProgress = True def getWorkerCommandVersion(self, command, oldversion=None): return self.workerforbuilder.getWorkerCommandVersion( command, oldversion) def getWorkerName(self): return self.workerforbuilder.worker.workername @staticmethod def setupPropertiesKnownBeforeBuildStarts(props, requests, builder, workerforbuilder): # Note that this function does not setup the 'builddir' worker property # It's not possible to know it until before the actual worker has # attached. # start with global properties from the configuration props.updateFromProperties(builder.master.config.properties) # from the SourceStamps, which have properties via Change sources = requests[0].mergeSourceStampsWith(requests[1:]) for change in Build.allChangesFromSources(sources): props.updateFromProperties(change.properties) # get any properties from requests (this is the path through which # schedulers will send us properties) for rq in requests: props.updateFromProperties(rq.properties) # get builder properties builder.setupProperties(props) # get worker properties # navigate our way back to the L{buildbot.worker.Worker} # object that came from the config, and get its properties workerforbuilder.worker.setupProperties(props) def setupOwnProperties(self): # now set some properties of our own, corresponding to the # build itself props = self.getProperties() props.setProperty("buildnumber", self.number, "Build") if self.sources and len(self.sources) == 1: # old interface for backwards compatibility source = self.sources[0] props.setProperty("branch", source.branch, "Build") props.setProperty("revision", source.revision, "Build") props.setProperty("repository", source.repository, "Build") props.setProperty("codebase", source.codebase, "Build") props.setProperty("project", source.project, "Build") def setupWorkerBuildirProperty(self, workerforbuilder): path_module = workerforbuilder.worker.path_module # navigate our way back to the L{buildbot.worker.Worker} # object that came from the config, and get its properties if workerforbuilder.worker.worker_basedir: builddir = path_module.join( bytes2unicode(workerforbuilder.worker.worker_basedir), bytes2unicode(self.builder.config.workerbuilddir)) self.setProperty("builddir", builddir, "Worker") def setupWorkerForBuilder(self, workerforbuilder): self.path_module = workerforbuilder.worker.path_module self.workername = workerforbuilder.worker.workername @defer.inlineCallbacks def getBuilderId(self): if self._builderid is None: if self.hasProperty(self.VIRTUAL_BUILDERNAME_PROP): self._builderid = yield self.builder.getBuilderIdForName( self.getProperty(self.VIRTUAL_BUILDERNAME_PROP)) description = self.getProperty( self.VIRTUAL_BUILDERDESCRIPTION_PROP, self.builder.config.description) tags = self.getProperty(self.VIRTUAL_BUILDERTAGS_PROP, self.builder.config.tags) if type(tags) == type([]) and '_virtual_' not in tags: tags.append('_virtual_') self.master.data.updates.updateBuilderInfo( self._builderid, description, tags) else: self._builderid = yield self.builder.getBuilderId() return self._builderid @defer.inlineCallbacks def startBuild(self, workerforbuilder): """This method sets up the build, then starts it by invoking the first Step. It returns a Deferred which will fire when the build finishes. This Deferred is guaranteed to never errback.""" self.workerforbuilder = workerforbuilder self.conn = None worker = workerforbuilder.worker log.msg("{}.startBuild".format(self)) # TODO: this will go away when build collapsing is implemented; until # then we just assign the build to the first buildrequest brid = self.requests[0].id builderid = yield self.getBuilderId() self.buildid, self.number = \ yield self.master.data.updates.addBuild( builderid=builderid, buildrequestid=brid, workerid=worker.workerid) self._buildid_notifier.notify(self.buildid) self.stopBuildConsumer = yield self.master.mq.startConsuming( self.controlStopBuild, ("control", "builds", str(self.buildid), "stop")) # the preparation step counts the time needed for preparing the worker and getting the # locks. # we cannot use a real step as we don't have a worker yet. self.preparation_step = buildstep.BuildStep(name="worker_preparation") self.preparation_step.setBuild(self) yield self.preparation_step.addStep() self.setupOwnProperties() # then narrow WorkerLocks down to the right worker self.locks = [(l.getLockForWorker(workerforbuilder.worker.workername), a) for l, a in self.locks] metrics.MetricCountEvent.log('active_builds', 1) # make sure properties are available to people listening on 'new' # events yield self.master.data.updates.setBuildProperties(self.buildid, self) yield self.master.data.updates.setBuildStateString( self.buildid, 'starting') yield self.master.data.updates.generateNewBuildEvent(self.buildid) try: self.setupBuild() # create .steps except Exception: yield self.buildPreparationFailure(Failure(), "setupBuild") yield self.buildFinished(['Build.setupBuild', 'failed'], EXCEPTION) return # flush properties in the beginning of the build yield self.master.data.updates.setBuildProperties(self.buildid, self) yield self.master.data.updates.setBuildStateString( self.buildid, 'preparing worker') try: ready_or_failure = False if workerforbuilder.worker and workerforbuilder.worker.acquireLocks( ): self._is_substantiating = True ready_or_failure = yield workerforbuilder.substantiate_if_needed( self) except Exception: ready_or_failure = Failure() finally: self._is_substantiating = False # If prepare returns True then it is ready and we start a build # If it returns failure then we don't start a new build. if ready_or_failure is not True: yield self.buildPreparationFailure(ready_or_failure, "worker_prepare") if self.stopped: yield self.buildFinished(["worker", "cancelled"], self.results) elif isinstance(ready_or_failure, Failure) and \ ready_or_failure.check(interfaces.LatentWorkerCannotSubstantiate): yield self.buildFinished(["worker", "cannot", "substantiate"], EXCEPTION) else: yield self.buildFinished(["worker", "not", "available"], RETRY) return # ping the worker to make sure they're still there. If they've # fallen off the map (due to a NAT timeout or something), this # will fail in a couple of minutes, depending upon the TCP # timeout. # # TODO: This can unnecessarily suspend the starting of a build, in # situations where the worker is live but is pushing lots of data to # us in a build. yield self.master.data.updates.setBuildStateString( self.buildid, 'pinging worker') log.msg("starting build {}.. pinging the worker {}".format( self, workerforbuilder)) try: ping_success_or_failure = yield workerforbuilder.ping() except Exception: ping_success_or_failure = Failure() if ping_success_or_failure is not True: yield self.buildPreparationFailure(ping_success_or_failure, "worker_ping") yield self.buildFinished(["worker", "not", "pinged"], RETRY) return self.conn = workerforbuilder.worker.conn # To retrieve the builddir property, the worker must be attached as we # depend on its path_module. Latent workers become attached only after # preparing them, so we can't setup the builddir property earlier like # the rest of properties self.setupWorkerBuildirProperty(workerforbuilder) self.setupWorkerForBuilder(workerforbuilder) self.subs = self.conn.notifyOnDisconnect(self.lostRemote) # tell the remote that it's starting a build, too try: yield self.conn.remoteStartBuild(self.builder.name) except Exception: yield self.buildPreparationFailure(Failure(), "start_build") yield self.buildFinished(["worker", "not", "building"], RETRY) return yield self.master.data.updates.setBuildStateString( self.buildid, 'acquiring locks') yield self.acquireLocks() readymsg = "worker {} ready".format(self.getWorkerName()) yield self.master.data.updates.setStepStateString( self.preparation_step.stepid, readymsg) yield self.master.data.updates.finishStep(self.preparation_step.stepid, SUCCESS, False) yield self.master.data.updates.setBuildStateString( self.buildid, 'building') # start the sequence of steps self.startNextStep() @defer.inlineCallbacks def buildPreparationFailure(self, why, state_string): if self.stopped: # if self.stopped, then this failure is a LatentWorker's failure to substantiate # which we triggered on purpose in stopBuild() log.msg("worker stopped while " + state_string, why) yield self.master.data.updates.finishStep( self.preparation_step.stepid, CANCELLED, False) else: log.err(why, "while " + state_string) self.workerforbuilder.worker.putInQuarantine() if isinstance(why, failure.Failure): yield self.preparation_step.addLogWithFailure(why) yield self.master.data.updates.setStepStateString( self.preparation_step.stepid, "error while " + state_string) yield self.master.data.updates.finishStep( self.preparation_step.stepid, EXCEPTION, False) @staticmethod def _canAcquireLocks(lockList, workerforbuilder): for lock, access in lockList: worker_lock = lock.getLockForWorker( workerforbuilder.worker.workername) if not worker_lock.isAvailable(None, access): return False return True def acquireLocks(self, res=None): self._acquiringLock = None if not self.locks: return defer.succeed(None) if self.stopped: return defer.succeed(None) log.msg("acquireLocks(build {}, locks {})".format(self, self.locks)) for lock, access in self.locks: if not lock.isAvailable(self, access): log.msg("Build {} waiting for lock {}".format(self, lock)) d = lock.waitUntilMaybeAvailable(self, access) d.addCallback(self.acquireLocks) self._acquiringLock = (lock, access, d) return d # all locks are available, claim them all for lock, access in self.locks: lock.claim(self, access) return defer.succeed(None) def setUniqueStepName(self, step): # If there are any name collisions, we add a count to the loser # until it is unique. name = step.name if name in self.stepnames: count = self.stepnames[name] count += 1 self.stepnames[name] = count name = "{}_{}".format(step.name, count) else: self.stepnames[name] = 0 step.name = name def setupBuildSteps(self, step_factories): steps = [] for factory in step_factories: step = buildstep.create_step_from_step_or_factory(factory) step.setBuild(self) step.setWorker(self.workerforbuilder.worker) steps.append(step) if self.useProgress: step.setupProgress() return steps def setupBuild(self): # create the actual BuildSteps. self.steps = self.setupBuildSteps(self.stepFactories) owners = set(self.blamelist()) # gather owners from build requests owners.update({ r.properties['owner'] for r in self.requests if "owner" in r.properties }) if owners: self.setProperty('owners', sorted(owners), 'Build') self.text = [] # list of text string lists (text2) def addStepsAfterCurrentStep(self, step_factories): # Add the new steps after the step that is running. # The running step has already been popped from self.steps self.steps[0:0] = self.setupBuildSteps(step_factories) def addStepsAfterLastStep(self, step_factories): # Add the new steps to the end. self.steps.extend(self.setupBuildSteps(step_factories)) def getNextStep(self): """This method is called to obtain the next BuildStep for this build. When it returns None (or raises a StopIteration exception), the build is complete.""" if not self.steps: return None if not self.conn: return None if self.terminate or self.stopped: # Run any remaining alwaysRun steps, and skip over the others while True: s = self.steps.pop(0) if s.alwaysRun: return s if not self.steps: return None else: return self.steps.pop(0) def startNextStep(self): try: s = self.getNextStep() except StopIteration: s = None if not s: return self.allStepsDone() self.executedSteps.append(s) self.currentStep = s # the following function returns a deferred, but we don't wait for it self._start_next_step_impl(s) return defer.succeed(None) @defer.inlineCallbacks def _start_next_step_impl(self, step): try: results = yield step.startStep(self.conn) yield self.master.data.updates.setBuildProperties( self.buildid, self) self.currentStep = None if self.finished: return # build was interrupted, don't keep building terminate = yield self.stepDone(results, step) # interpret/merge results if terminate: self.terminate = True yield self.startNextStep() except Exception as e: log.msg("{} build got exception when running step {}".format( self, step)) log.err(e) yield self.master.data.updates.setBuildProperties( self.buildid, self) # Note that buildFinished can't throw exception yield self.buildFinished(["build", "exception"], EXCEPTION) @defer.inlineCallbacks def stepDone(self, results, step): """This method is called when the BuildStep completes. It is passed a status object from the BuildStep and is responsible for merging the Step's results into those of the overall Build.""" terminate = False text = None if isinstance(results, tuple): results, text = results assert isinstance(results, type(SUCCESS)), "got %r" % (results, ) summary = yield step.getBuildResultSummary() if 'build' in summary: text = [summary['build']] log.msg(" step '{}' complete: {} ({})".format(step.name, statusToString(results), text)) if text: self.text.extend(text) self.master.data.updates.setBuildStateString( self.buildid, bytes2unicode(" ".join(self.text))) self.results, terminate = computeResultAndTermination( step, results, self.results) if not self.conn: # force the results to retry if the connection was lost self.results = RETRY terminate = True return terminate def lostRemote(self, conn=None): # the worker went away. There are several possible reasons for this, # and they aren't necessarily fatal. For now, kill the build, but # TODO: see if we can resume the build when it reconnects. log.msg("{}.lostRemote".format(self)) self.conn = None self.text = ["lost", "connection"] self.results = RETRY if self.currentStep and self.currentStep.results is None: # this should cause the step to finish. log.msg(" stopping currentStep", self.currentStep) self.currentStep.interrupt(Failure(error.ConnectionLost())) else: self.text = ["lost", "connection"] self.stopped = True if self._acquiringLock: lock, access, d = self._acquiringLock lock.stopWaitingUntilAvailable(self, access, d) def controlStopBuild(self, key, params): return self.stopBuild(**params) def stopBuild(self, reason="<no reason given>", results=CANCELLED): # the idea here is to let the user cancel a build because, e.g., # they realized they committed a bug and they don't want to waste # the time building something that they know will fail. Another # reason might be to abandon a stuck build. We want to mark the # build as failed quickly rather than waiting for the worker's # timeout to kill it on its own. log.msg(" {}: stopping build: {} {}".format(self, reason, results)) if self.finished: return # TODO: include 'reason' in this point event self.stopped = True if self.currentStep and self.currentStep.results is None: self.currentStep.interrupt(reason) self.results = results if self._acquiringLock: lock, access, d = self._acquiringLock lock.stopWaitingUntilAvailable(self, access, d) elif self._is_substantiating: # We're having a latent worker that hasn't been substantiated yet. We need to abort # that to not have a latent worker without an associated build self.workerforbuilder.insubstantiate_if_needed() def allStepsDone(self): if self.results == FAILURE: text = ["failed"] elif self.results == WARNINGS: text = ["warnings"] elif self.results == EXCEPTION: text = ["exception"] elif self.results == RETRY: text = ["retry"] elif self.results == CANCELLED: text = ["cancelled"] else: text = ["build", "successful"] text.extend(self.text) return self.buildFinished(text, self.results) @defer.inlineCallbacks def buildFinished(self, text, results): """This method must be called when the last Step has completed. It marks the Build as complete and returns the Builder to the 'idle' state. It takes two arguments which describe the overall build status: text, results. 'results' is one of the possible results (see buildbot.process.results). If 'results' is SUCCESS or WARNINGS, we will permit any dependent builds to start. If it is 'FAILURE', those builds will be abandoned. This method never throws.""" try: self.stopBuildConsumer.stopConsuming() self.finished = True if self.conn: self.subs.unsubscribe() self.subs = None self.conn = None log.msg(" {}: build finished".format(self)) self.results = worst_status(self.results, results) eventually(self.releaseLocks) metrics.MetricCountEvent.log('active_builds', -1) yield self.master.data.updates.setBuildStateString( self.buildid, bytes2unicode(" ".join(text))) yield self.master.data.updates.finishBuild(self.buildid, self.results) if self.results == EXCEPTION: # When a build has an exception, put the worker in quarantine for a few seconds # to make sure we try next build with another worker self.workerforbuilder.worker.putInQuarantine() elif self.results != RETRY: # This worker looks sane if status is neither retry or exception # Avoid a race in case the build step reboot the worker if self.workerforbuilder.worker is not None: self.workerforbuilder.worker.resetQuarantine() # mark the build as finished self.workerforbuilder.buildFinished() self.builder.buildFinished(self, self.workerforbuilder) self._tryScheduleBuildsAfterLockUnlock(build_finished=True) except Exception: log.err( None, 'from finishing a build; this is a ' 'serious error - please file a bug at http://buildbot.net') def releaseLocks(self): if self.locks: log.msg("releaseLocks({}): {}".format(self, self.locks)) for lock, access in self.locks: if lock.isOwner(self, access): lock.release(self, access) self._tryScheduleBuildsAfterLockUnlock(locks_released=True) def _tryScheduleBuildsAfterLockUnlock(self, locks_released=False, build_finished=False): # we need to inform the botmaster to attempt to schedule any pending # build request if we released any locks. This is because buildrequest # may be started for a completely unrelated builder and yet depend on # a lock released by this build. # # TODO: the current approach is dumb as we just attempt to schedule # all buildrequests. A much better idea would be to record the reason # of why a buildrequest was not scheduled in the BuildRequestDistributor # and then attempt to schedule only these buildrequests which may have # had that reason resolved. # this function is complicated by the fact that the botmaster must be # informed only when all locks have been released and the actions in # buildFinished have concluded. Since releaseLocks is called using # eventually this may happen in any order. self._locks_released = self._locks_released or locks_released self._build_finished = self._build_finished or build_finished if not self.locks: return if self._locks_released and self._build_finished: self.builder.botmaster.maybeStartBuildsForAllBuilders() def getSummaryStatistic(self, name, summary_fn, initial_value=_sentinel): step_stats_list = [ st.getStatistic(name) for st in self.executedSteps if st.hasStatistic(name) ] if initial_value is self._sentinel: return reduce(summary_fn, step_stats_list) return reduce(summary_fn, step_stats_list, initial_value) @defer.inlineCallbacks def getUrl(self): builder_id = yield self.getBuilderId() return getURLForBuild(self.master, builder_id, self.number) @defer.inlineCallbacks def get_buildid(self): if self.buildid is not None: return self.buildid buildid = yield self._buildid_notifier.wait() return buildid @defer.inlineCallbacks def waitUntilFinished(self): buildid = yield self.get_buildid() yield self.master.mq.waitUntilEvent( ('builds', str(buildid), 'finished'), lambda: self.finished) def getWorkerInfo(self): return self.workerforbuilder.worker.info
def _handle_conn_shutdown_notifier(self, conn): self._pending_conn_shutdown_notifier = Notifier() yield conn.waitShutdown() self._pending_conn_shutdown_notifier.notify(None) self._pending_conn_shutdown_notifier = None
class AbstractWorker(service.BuildbotService): """This is the master-side representative for a remote buildbot worker. There is exactly one for each worker described in the config file (the c['workers'] list). When buildbots connect in (.attach), they get a reference to this instance. The BotMaster object is stashed as the .botmaster attribute. The BotMaster is also our '.parent' Service. I represent a worker -- a remote machine capable of running builds. I am instantiated by the configuration file, and can be subclassed to add extra functionality.""" # reconfig workers after builders reconfig_priority = 64 quarantine_timer = None quarantine_timeout = quarantine_initial_timeout = 10 quarantine_max_timeout = 60 * 60 start_missing_on_startup = True DEFAULT_MISSING_TIMEOUT = 3600 DEFAULT_KEEPALIVE_INTERVAL = 3600 # override to True if isCompatibleWithBuild may return False builds_may_be_incompatible = False def checkConfig(self, name, password, max_builds=None, notify_on_missing=None, missing_timeout=None, properties=None, defaultProperties=None, locks=None, keepalive_interval=DEFAULT_KEEPALIVE_INTERVAL, machine_name=None): """ @param name: botname this machine will supply when it connects @param password: password this machine will supply when it connects @param max_builds: maximum number of simultaneous builds that will be run concurrently on this worker (the default is None for no limit) @param properties: properties that will be applied to builds run on this worker @type properties: dictionary @param defaultProperties: properties that will be applied to builds run on this worker only if the property has not been set by another source @type defaultProperties: dictionary @param locks: A list of locks that must be acquired before this worker can be used @type locks: dictionary @param machine_name: The name of the machine to associate with the worker. """ self.name = name = bytes2unicode(name) self.machine_name = machine_name self.password = password # protocol registration self.registration = None self._graceful = False self._paused = False # these are set when the service is started self.manager = None self.workerid = None self.info = Properties() self.worker_commands = None self.workerforbuilders = {} self.max_builds = max_builds self.access = [] if locks: self.access = locks self.lock_subscriptions = [] self.properties = Properties() self.properties.update(properties or {}, "Worker") self.properties.setProperty("workername", name, "Worker") self.defaultProperties = Properties() self.defaultProperties.update(defaultProperties or {}, "Worker") if self.machine_name is not None: self.properties.setProperty('machine_name', self.machine_name, 'Worker') self.machine = None self.lastMessageReceived = 0 if notify_on_missing is None: notify_on_missing = [] if isinstance(notify_on_missing, str): notify_on_missing = [notify_on_missing] self.notify_on_missing = notify_on_missing for i in notify_on_missing: if not isinstance(i, str): config.error('notify_on_missing arg %r is not a string' % (i, )) self.missing_timeout = missing_timeout self.missing_timer = None # a protocol connection, if we're currently connected self.conn = None # during disconnection self.conn will be set to None before all disconnection notifications # are delivered. During that period _pending_conn_shutdown_notifier will be set to # a notifier and allows interested users to wait until all disconnection notifications are # delivered. self._pending_conn_shutdown_notifier = None self._old_builder_list = None self._configured_builderid_list = None def __repr__(self): return "<{} {}>".format(self.__class__.__name__, repr(self.name)) @property def workername(self): # workername is now an alias to twisted.Service's name return self.name @property def botmaster(self): if self.master is None: return None return self.master.botmaster @defer.inlineCallbacks def updateLocks(self): """Convert the L{LockAccess} objects in C{self.locks} into real lock objects, while also maintaining the subscriptions to lock releases.""" # unsubscribe from any old locks for s in self.lock_subscriptions: s.unsubscribe() # convert locks into their real form locks = yield self.botmaster.getLockFromLockAccesses( self.access, self.config_version) self.locks = [(l.getLockForWorker(self.workername), la) for l, la in locks] self.lock_subscriptions = [ l.subscribeToReleases(self._lockReleased) for l, la in self.locks ] def locksAvailable(self): """ I am called to see if all the locks I depend on are available, in which I return True, otherwise I return False """ if not self.locks: return True for lock, access in self.locks: if not lock.isAvailable(self, access): return False return True def acquireLocks(self): """ I am called when a build is preparing to run. I try to claim all the locks that are needed for a build to happen. If I can't, then my caller should give up the build and try to get another worker to look at it. """ log.msg("acquireLocks(worker {}, locks {})".format(self, self.locks)) if not self.locksAvailable(): log.msg("worker {} can't lock, giving up".format(self)) return False # all locks are available, claim them all for lock, access in self.locks: lock.claim(self, access) return True def releaseLocks(self): """ I am called to release any locks after a build has finished """ log.msg("releaseLocks({}): {}".format(self, self.locks)) for lock, access in self.locks: lock.release(self, access) def _lockReleased(self): """One of the locks for this worker was released; try scheduling builds.""" if not self.botmaster: return # oh well.. self.botmaster.maybeStartBuildsForWorker(self.name) def _applyWorkerInfo(self, info): if not info: return # set defaults self.info.setProperty("version", "(unknown)", "Worker") # store everything as Properties for k, v in info.items(): if k in ('environ', 'worker_commands'): continue self.info.setProperty(k, v, "Worker") @defer.inlineCallbacks def _getWorkerInfo(self): worker = yield self.master.data.get(('workers', self.workerid)) self._applyWorkerInfo(worker['workerinfo']) def setServiceParent(self, parent): # botmaster needs to set before setServiceParent which calls # startService self.manager = parent return super().setServiceParent(parent) @defer.inlineCallbacks def startService(self): # tracks config version for locks self.config_version = self.master.config_version self.updateLocks() self.workerid = yield self.master.data.updates.findWorkerId(self.name) self.workerActionConsumer = yield self.master.mq.startConsuming( self.controlWorker, ("control", "worker", str(self.workerid), None)) yield self._getWorkerInfo() yield super().startService() # startMissingTimer wants the service to be running to really start if self.start_missing_on_startup: self.startMissingTimer() @defer.inlineCallbacks def reconfigService(self, name, password, max_builds=None, notify_on_missing=None, missing_timeout=DEFAULT_MISSING_TIMEOUT, properties=None, defaultProperties=None, locks=None, keepalive_interval=DEFAULT_KEEPALIVE_INTERVAL, machine_name=None): # Given a Worker config arguments, configure this one identically. # Because Worker objects are remotely referenced, we can't replace them # without disconnecting the worker, yet there's no reason to do that. assert self.name == name self.password = password # adopt new instance's configuration parameters self.max_builds = max_builds self.access = [] if locks: self.access = locks if notify_on_missing is None: notify_on_missing = [] if isinstance(notify_on_missing, str): notify_on_missing = [notify_on_missing] self.notify_on_missing = notify_on_missing if self.missing_timeout != missing_timeout: running_missing_timer = self.missing_timer self.stopMissingTimer() self.missing_timeout = missing_timeout if running_missing_timer: self.startMissingTimer() self.properties = Properties() self.properties.update(properties or {}, "Worker") self.properties.setProperty("workername", name, "Worker") self.defaultProperties = Properties() self.defaultProperties.update(defaultProperties or {}, "Worker") # Note that before first reconfig self.machine will always be None and # out of sync with self.machine_name, thus more complex logic is needed. if self.machine is not None and self.machine_name != machine_name: self.machine.unregisterWorker(self) self.machine = None self.machine_name = machine_name if self.machine is None and self.machine_name is not None: self.machine = self.master.machine_manager.getMachineByName( self.machine_name) if self.machine is not None: self.machine.registerWorker(self) self.properties.setProperty("machine_name", self.machine_name, "Worker") else: log.err("Unknown machine '{}' for worker '{}'".format( self.machine_name, self.name)) # update our records with the worker manager if not self.registration: self.registration = yield self.master.workers.register(self) yield self.registration.update(self, self.master.config) # tracks config version for locks self.config_version = self.master.config_version self.updateLocks() @defer.inlineCallbacks def reconfigServiceWithSibling(self, sibling): # reconfigServiceWithSibling will only reconfigure the worker when it is configured # differently. # However, the worker configuration depends on which builder it is configured yield super().reconfigServiceWithSibling(sibling) # update the attached worker's notion of which builders are attached. # This assumes that the relevant builders have already been configured, # which is why the reconfig_priority is set low in this class. bids = [ b.getBuilderId() for b in self.botmaster.getBuildersForWorker(self.name) ] bids = yield defer.gatherResults(bids, consumeErrors=True) if self._configured_builderid_list != bids: yield self.master.data.updates.workerConfigured( self.workerid, self.master.masterid, bids) yield self.updateWorker() self._configured_builderid_list = bids @defer.inlineCallbacks def stopService(self): if self.registration: yield self.registration.unregister() self.registration = None self.workerActionConsumer.stopConsuming() self.stopMissingTimer() self.stopQuarantineTimer() # mark this worker as configured for zero builders in this master yield self.master.data.updates.workerConfigured( self.workerid, self.master.masterid, []) # during master shutdown we need to wait until the disconnection notification deliveries # are completed, otherwise some of the events may still be firing long after the master # is completely shut down. yield self.disconnect() yield self.waitForCompleteShutdown() yield super().stopService() def isCompatibleWithBuild(self, build_props): # given a build properties object, determines whether the build is # compatible with the currently running worker or not. This is most # often useful for latent workers where it's possible to request # different kinds of workers. return defer.succeed(True) def startMissingTimer(self): if self.missing_timeout and self.parent and self.running: self.stopMissingTimer() # in case it's already running self.missing_timer = self.master.reactor.callLater( self.missing_timeout, self._missing_timer_fired) def stopMissingTimer(self): if self.missing_timer: if self.missing_timer.active(): self.missing_timer.cancel() self.missing_timer = None def isConnected(self): return self.conn def _missing_timer_fired(self): self.missing_timer = None # notify people, but only if we're still in the config if not self.parent: return last_connection = time.ctime(time.time() - self.missing_timeout) self.master.data.updates.workerMissing(workerid=self.workerid, masterid=self.master.masterid, last_connection=last_connection, notify=self.notify_on_missing) def updateWorker(self): """Called to add or remove builders after the worker has connected. @return: a Deferred that indicates when an attached worker has accepted the new builders and/or released the old ones.""" if self.conn: return self.sendBuilderList() # else: return defer.succeed(None) @defer.inlineCallbacks def attached(self, conn): """This is called when the worker connects.""" assert self.conn is None metrics.MetricCountEvent.log("AbstractWorker.attached_workers", 1) # now we go through a sequence of calls, gathering information, then # tell the Botmaster that it can finally give this worker to all the # Builders that care about it. # Reset graceful shutdown status self._graceful = False self.conn = conn self._old_builder_list = None # clear builder list before proceed self._applyWorkerInfo(conn.info) self.worker_commands = conn.info.get("worker_commands", {}) self.worker_environ = conn.info.get("environ", {}) self.worker_basedir = conn.info.get("basedir", None) self.worker_system = conn.info.get("system", None) # The _detach_sub member is only ever used from tests. self._detached_sub = self.conn.notifyOnDisconnect(self.detached) workerinfo = { 'admin': conn.info.get('admin'), 'host': conn.info.get('host'), 'access_uri': conn.info.get('access_uri'), 'version': conn.info.get('version') } yield self.master.data.updates.workerConnected( workerid=self.workerid, masterid=self.master.masterid, workerinfo=workerinfo) if self.worker_system == "nt": self.path_module = namedModule("ntpath") else: # most everything accepts / as separator, so posix should be a # reasonable fallback self.path_module = namedModule("posixpath") log.msg("bot attached") self.messageReceivedFromWorker() self.stopMissingTimer() yield self.updateWorker() yield self.botmaster.maybeStartBuildsForWorker(self.name) self.updateState() def messageReceivedFromWorker(self): now = time.time() self.lastMessageReceived = now def setupProperties(self, props): for name in self.properties.properties: props.setProperty(name, self.properties.getProperty(name), "Worker") for name in self.defaultProperties.properties: if name not in props: props.setProperty(name, self.defaultProperties.getProperty(name), "Worker") @defer.inlineCallbacks def _handle_conn_shutdown_notifier(self, conn): self._pending_conn_shutdown_notifier = Notifier() yield conn.waitShutdown() self._pending_conn_shutdown_notifier.notify(None) self._pending_conn_shutdown_notifier = None @defer.inlineCallbacks def detached(self): conn = self.conn self.conn = None self._handle_conn_shutdown_notifier(conn) # Note that _pending_conn_shutdown_notifier will not be fired until detached() # is complete. metrics.MetricCountEvent.log("AbstractWorker.attached_workers", -1) self._old_builder_list = [] log.msg("Worker.detached({})".format(self.name)) self.releaseLocks() yield self.master.data.updates.workerDisconnected( workerid=self.workerid, masterid=self.master.masterid, ) def disconnect(self): """Forcibly disconnect the worker. This severs the TCP connection and returns a Deferred that will fire (with None) when the connection is probably gone. If the worker is still alive, they will probably try to reconnect again in a moment. This is called in two circumstances. The first is when a worker is removed from the config file. In this case, when they try to reconnect, they will be rejected as an unknown worker. The second is when we wind up with two connections for the same worker, in which case we disconnect the older connection. """ if self.conn is None: return defer.succeed(None) log.msg("disconnecting old worker {} now".format(self.name)) # When this Deferred fires, we'll be ready to accept the new worker return self._disconnect(self.conn) def waitForCompleteShutdown(self): # This function waits until the disconnection to happen and the disconnection # notifications have been delivered and acted upon. return self._waitForCompleteShutdownImpl(self.conn) @defer.inlineCallbacks def _waitForCompleteShutdownImpl(self, conn): if conn: yield conn.wait_shutdown_started() yield conn.waitShutdown() elif self._pending_conn_shutdown_notifier is not None: yield self._pending_conn_shutdown_notifier.wait() @defer.inlineCallbacks def _disconnect(self, conn): # This function waits until the disconnection to happen and the disconnection # notifications have been delivered and acted upon d = self._waitForCompleteShutdownImpl(conn) conn.loseConnection() log.msg("waiting for worker to finish disconnecting") yield d @defer.inlineCallbacks def sendBuilderList(self): our_builders = self.botmaster.getBuildersForWorker(self.name) blist = [(b.name, b.config.workerbuilddir) for b in our_builders] if blist == self._old_builder_list: return slist = yield self.conn.remoteSetBuilderList(builders=blist) self._old_builder_list = blist # Nothing has changed, so don't need to re-attach to everything if not slist: return dl = [] for name in slist: # use get() since we might have changed our mind since then b = self.botmaster.builders.get(name) if b: d1 = self.attachBuilder(b) dl.append(d1) yield defer.DeferredList(dl) def attachBuilder(self, builder): return builder.attached(self, self.worker_commands) def controlWorker(self, key, params): log.msg("worker {} wants to {}: {}".format(self.name, key[-1], params)) if key[-1] == "stop": return self.shutdownRequested() if key[-1] == "pause": self.pause() if key[-1] == "unpause": self.unpause() if key[-1] == "kill": self.shutdown() return None def shutdownRequested(self): self._graceful = True self.maybeShutdown() self.updateState() def addWorkerForBuilder(self, wfb): self.workerforbuilders[wfb.builder_name] = wfb def removeWorkerForBuilder(self, wfb): try: del self.workerforbuilders[wfb.builder_name] except KeyError: pass def buildFinished(self, wfb): """This is called when a build on this worker is finished.""" self.botmaster.maybeStartBuildsForWorker(self.name) def canStartBuild(self): """ I am called when a build is requested to see if this worker can start a build. This function can be used to limit overall concurrency on the worker. Note for subclassers: if a worker can become willing to start a build without any action on that worker (for example, by a resource in use on another worker becoming available), then you must arrange for L{maybeStartBuildsForWorker} to be called at that time, or builds on this worker will not start. """ # If we're waiting to shutdown gracefully or paused, then we shouldn't # accept any new jobs. if self._graceful or self._paused: return False if self.max_builds: active_builders = [ wfb for wfb in self.workerforbuilders.values() if wfb.isBusy() ] if len(active_builders) >= self.max_builds: return False if not self.locksAvailable(): return False return True @defer.inlineCallbacks def shutdown(self): """Shutdown the worker""" if not self.conn: log.msg("no remote; worker is already shut down") return yield self.conn.remoteShutdown() def maybeShutdown(self): """Shut down this worker if it has been asked to shut down gracefully, and has no active builders.""" if not self._graceful: return active_builders = [ wfb for wfb in self.workerforbuilders.values() if wfb.isBusy() ] if active_builders: return d = self.shutdown() d.addErrback(log.err, 'error while shutting down worker') def updateState(self): self.master.data.updates.setWorkerState(self.workerid, self._paused, self._graceful) def pause(self): """Stop running new builds on the worker.""" self._paused = True self.updateState() def unpause(self): """Restart running new builds on the worker.""" self._paused = False self.botmaster.maybeStartBuildsForWorker(self.name) self.updateState() def isPaused(self): return self._paused def resetQuarantine(self): self.quarantine_timeout = self.quarantine_initial_timeout def putInQuarantine(self): if self.quarantine_timer: # already in quarantine return self.pause() self.quarantine_timer = self.master.reactor.callLater( self.quarantine_timeout, self.exitQuarantine) log.msg("{} has been put in quarantine for {}s".format( self.name, self.quarantine_timeout)) # next we will wait twice as long self.quarantine_timeout *= 2 if self.quarantine_timeout > self.quarantine_max_timeout: # unless we hit the max timeout self.quarantine_timeout = self.quarantine_max_timeout def exitQuarantine(self): self.quarantine_timer = None self.unpause() def stopQuarantineTimer(self): if self.quarantine_timer is not None: self.quarantine_timer.cancel() self.quarantine_timer = None self.unpause()
class AbstractLatentWorker(AbstractWorker): """A worker that will start up a worker instance when needed. To use, subclass and implement start_instance and stop_instance. Additionally, if the instances render any kind of data affecting instance type from the build properties, set the class variable builds_may_be_incompatible to True and override isCompatibleWithBuild method. See ec2.py for a concrete example. """ substantiation_build = None build_wait_timer = None start_missing_on_startup = False # Caveats: The handling of latent workers is much more complex than it # might seem. The code must handle at least the following conditions: # # - non-silent disconnection by the worker at any time which generated # TCP resets and in the end resulted in detached() being called # # - silent disconnection by worker at any time by silent TCP connection # failure which did not generate TCP resets, but on the other hand no # response may be received. self.conn is not None is that case. # # - no disconnection by worker during substantiation when # build_wait_timeout param is negative. # # The above means that the connection state of the worker (self.conn) must # be tracked separately from the intended state of the worker (self.state). state = States.NOT_SUBSTANTIATED # state transitions: # # substantiate(): either of # NOT_SUBSTANTIATED -> SUBSTANTIATING # INSUBSTANTIATING -> INSUBSTANTIATING_SUBSTANTIATING # # attached(): # SUBSTANTIATING -> SUBSTANTIATED # self.conn -> not None # # detached(): # self.conn -> None # # errors in any of above will call insubstantiate() # # insubstantiate(): # SUBSTANTIATED -> INSUBSTANTIATING # < other state transitions may happen during this time > # INSUBSTANTIATING_SUBSTANTIATING -> SUBSTANTIATING # INSUBSTANTIATING -> NOT_SUBSTANTIATED def checkConfig(self, name, password, build_wait_timeout=60 * 10, **kwargs): super().checkConfig(name, password, **kwargs) def reconfigService(self, name, password, build_wait_timeout=60 * 10, **kwargs): self._substantiation_notifier = Notifier() self._insubstantiation_notifier = Notifier() self.build_wait_timeout = build_wait_timeout return super().reconfigService(name, password, **kwargs) def getRandomPass(self): """ compute a random password There is no point to configure a password for a LatentWorker, as it is created by the master. For supporting backend, a password can be generated by this API """ return ''.join( random.choice(string.ascii_letters + string.digits) for _ in range(20)) @property def building(self): # A LatentWorkerForBuilder will only be busy if it is building. return {wfb for wfb in self.workerforbuilders.values() if wfb.isBusy()} def failed_to_start(self, instance_id, instance_state): log.msg('%s %s failed to start instance %s (%s)' % (self.__class__.__name__, self.workername, instance_id, instance_state)) raise LatentWorkerFailedToSubstantiate(instance_id, instance_state) def start_instance(self, build): # responsible for starting instance that will try to connect with this # master. Should return deferred with either True (instance started) # or False (instance not started, so don't run a build here). Problems # should use an errback. raise NotImplementedError def stop_instance(self, fast=False): # responsible for shutting down instance. raise NotImplementedError @property def substantiated(self): return self.state == States.SUBSTANTIATED and self.conn is not None def substantiate(self, wfb, build): log.msg("substantiating worker %s" % (wfb,)) if self.state == States.SUBSTANTIATED and self.conn is not None: self._setBuildWaitTimer() return defer.succeed(True) if self.state in [States.SUBSTANTIATING, States.INSUBSTANTIATING_SUBSTANTIATING]: return self._substantiation_notifier.wait() self.startMissingTimer() self.substantiation_build = build # if anything of the following fails synchronously we need to have a # deferred ready to be notified d = self._substantiation_notifier.wait() if self.state == States.SUBSTANTIATED and self.conn is None: # connection dropped while we were substantiated. # insubstantiate to clean up and then substantiate normally. d_ins = self.insubstantiate(_force_substantiation=True) d_ins.addErrback(log.err, 'while insubstantiating') return d assert self.state in [States.NOT_SUBSTANTIATED, States.INSUBSTANTIATING] if self.state == States.NOT_SUBSTANTIATED: self.state = States.SUBSTANTIATING self._substantiate(build) else: self.state = States.INSUBSTANTIATING_SUBSTANTIATING return d @defer.inlineCallbacks def _substantiate(self, build): # register event trigger try: # if build_wait_timeout is negative we don't ever disconnect the # worker ourselves, so we don't need to wait for it to attach # to declare it as substantiated. dont_wait_to_attach = \ self.build_wait_timeout < 0 and self.conn is not None start_success = yield self.start_instance(build) if not start_success: # this behaviour is kept as compatibility, but it is better # to just errback with a workable reason msg = "Worker does not want to substantiate at this time" raise LatentWorkerFailedToSubstantiate(self.name, msg) if dont_wait_to_attach and \ self.state == States.SUBSTANTIATING and \ self.conn is not None: log.msg(r"Worker %s substantiated (already attached)" % (self.name,)) self.state = States.SUBSTANTIATED self._fireSubstantiationNotifier(True) except Exception as e: self.stopMissingTimer() self._substantiation_failed(failure.Failure(e)) # swallow the failure as it is notified def _fireSubstantiationNotifier(self, result): if not self._substantiation_notifier: log.msg("No substantiation deferred for %s" % (self.name,)) return result_msg = 'success' if result is True else 'failure' log.msg("Firing {} substantiation deferred with {}".format( self.name, result_msg)) self.substantiation_build = None self._substantiation_notifier.notify(result) @defer.inlineCallbacks def attached(self, bot): if self.state != States.SUBSTANTIATING and \ self.build_wait_timeout >= 0: msg = 'Worker %s received connection while not trying to ' \ 'substantiate. Disconnecting.' % (self.name,) log.msg(msg) self._disconnect(bot) raise RuntimeError(msg) try: yield super().attached(bot) except Exception: self._substantiation_failed(failure.Failure()) return log.msg(r"Worker %s substantiated \o/" % (self.name,)) # only change state when we are actually substantiating. We could # end up at this point in different state than SUBSTANTIATING if # build_wait_timeout is negative. When build_wait_timeout is not # negative, we throw an error (see above) if self.state == States.SUBSTANTIATING: self.state = States.SUBSTANTIATED self._fireSubstantiationNotifier(True) def attachBuilder(self, builder): wfb = self.workerforbuilders.get(builder.name) return wfb.attached(self, self.worker_commands) def _missing_timer_fired(self): self.missing_timer = None return self._substantiation_failed(defer.TimeoutError()) def _substantiation_failed(self, failure): if self.state == States.SUBSTANTIATING: self.substantiation_build = None self._fireSubstantiationNotifier(failure) d = self.insubstantiate() d.addErrback(log.err, 'while insubstantiating') # notify people, but only if we're still in the config if not self.parent or not self.notify_on_missing: return return self.master.data.updates.workerMissing( workerid=self.workerid, masterid=self.master.masterid, last_connection="Latent worker never connected", notify=self.notify_on_missing ) def canStartBuild(self): # we were disconnected, but all the builds are not yet cleaned up. if self.conn is None and self.building: return False return super().canStartBuild() def buildStarted(self, wfb): assert wfb.isBusy() self._clearBuildWaitTimer() def buildFinished(self, wfb): assert not wfb.isBusy() if not self.building: if self.build_wait_timeout == 0: # we insubstantiate asynchronously to trigger more bugs with # the fake reactor self.master.reactor.callLater(0, self._soft_disconnect) # insubstantiate will automatically retry to create build for # this worker else: self._setBuildWaitTimer() # AbstractWorker.buildFinished() will try to start the next build for # that worker super().buildFinished(wfb) def _clearBuildWaitTimer(self): if self.build_wait_timer is not None: if self.build_wait_timer.active(): self.build_wait_timer.cancel() self.build_wait_timer = None def _setBuildWaitTimer(self): self._clearBuildWaitTimer() if self.build_wait_timeout <= 0: return self.build_wait_timer = self.master.reactor.callLater( self.build_wait_timeout, self._soft_disconnect) @defer.inlineCallbacks def insubstantiate(self, fast=False, _force_substantiation=False): # _force_substantiation=True means we'll try to substantiate a build # with stored substantiation_build at the end of substantiation log.msg("insubstantiating worker {}".format(self)) if self.state == States.NOT_SUBSTANTIATED: return if self.state == States.INSUBSTANTIATING: yield self._insubstantiation_notifier.wait() return notify_cancel = self.state == States.SUBSTANTIATING if _force_substantiation: self.state = States.INSUBSTANTIATING_SUBSTANTIATING else: self.state = States.INSUBSTANTIATING self._clearBuildWaitTimer() d = self.stop_instance(fast) try: yield d except Exception as e: # The case of failure for insubstantiation is bad as we have a # left-over costing resource There is not much thing to do here # generically, so we must put the problem of stop_instance # reliability to the backend driver log.err(e, "while insubstantiating") assert self.state in [States.INSUBSTANTIATING, States.INSUBSTANTIATING_SUBSTANTIATING] if notify_cancel: self._fireSubstantiationNotifier( failure.Failure(LatentWorkerSubstantiatiationCancelled())) if self.state == States.INSUBSTANTIATING_SUBSTANTIATING: self.state = States.SUBSTANTIATING if self._insubstantiation_notifier: self._insubstantiation_notifier.notify(True) self._substantiate(self.substantiation_build) elif self.state == States.INSUBSTANTIATING: self.state = States.NOT_SUBSTANTIATED if self._insubstantiation_notifier: self._insubstantiation_notifier.notify(True) else: pass self.botmaster.maybeStartBuildsForWorker(self.name) @defer.inlineCallbacks def _soft_disconnect(self, fast=False): if self.building: # wait until build finished # TODO: remove this behavior as AbstractWorker disconnects forcibly return # a negative build_wait_timeout means the worker should never be shut # down, so just disconnect. if self.build_wait_timeout < 0: yield super().disconnect() return self.stopMissingTimer() # if master is stopping, we will never achieve consistent state, as workermanager # won't accept new connection if self._substantiation_notifier and self.master.running: log.msg("Weird: Got request to stop before started. Allowing " "worker to start cleanly to avoid inconsistent state") yield self._substantiation_notifier.wait() self.substantiation_build = None log.msg("Substantiation complete, immediately terminating.") yield defer.DeferredList([ super().disconnect(), self.insubstantiate(fast) ], consumeErrors=True, fireOnOneErrback=True) def disconnect(self): # This returns a Deferred but we don't use it self._soft_disconnect() # this removes the worker from all builders. It won't come back # without a restart (or maybe a sighup) self.botmaster.workerLost(self) @defer.inlineCallbacks def stopService(self): # the worker might be insubstantiating from buildWaitTimeout if self.state in [States.INSUBSTANTIATING, States.INSUBSTANTIATING_SUBSTANTIATING]: yield self._insubstantiation_notifier.wait() if self.conn is not None or self.state in [States.SUBSTANTIATING, States.SUBSTANTIATED]: yield self._soft_disconnect() self._clearBuildWaitTimer() res = yield super().stopService() return res def updateWorker(self): """Called to add or remove builders after the worker has connected. Also called after botmaster's builders are initially set. @return: a Deferred that indicates when an attached worker has accepted the new builders and/or released the old ones.""" for b in self.botmaster.getBuildersForWorker(self.name): if b.name not in self.workerforbuilders: b.addLatentWorker(self) return super().updateWorker()
def checkConfig(self, name, password, build_wait_timeout=60 * 10, **kwargs): AbstractWorker.checkConfig(self, name, password, **kwargs) self.build_wait_timeout = build_wait_timeout self._substantiation_notifier = Notifier()
class AbstractLatentMachine(Machine): DEFAULT_MISSING_TIMEOUT = 20 * 60 def checkConfig(self, name, build_wait_timeout=0, missing_timeout=DEFAULT_MISSING_TIMEOUT, **kwargs): super().checkConfig(name, **kwargs) self.state = States.STOPPED self.latent_workers = [] def reconfigService(self, name, build_wait_timeout=0, missing_timeout=DEFAULT_MISSING_TIMEOUT, **kwargs): super().reconfigService(name, **kwargs) self.build_wait_timeout = build_wait_timeout self.missing_timeout = missing_timeout for worker in self.workers: if not interfaces.ILatentWorker.providedBy(worker): raise Exception('Worker is not latent {}'.format( worker.name)) self.state = States.STOPPED self._start_notifier = Notifier() self._stop_notifier = Notifier() self._build_wait_timer = None self._missing_timer = None def start_machine(self): # Responsible for starting the machine. The function should return a # deferred which should result in True if the startup has been # successful, or False otherwise. raise NotImplementedError def stop_machine(self): # Responsible for shutting down the machine raise NotImplementedError @defer.inlineCallbacks def substantiate(self, starting_worker): if self.state == States.STOPPING: # wait until stop action finishes yield self._stop_notifier.wait() if self.state == States.STARTED: # may happen if we waited for stop to complete and in the mean # time the machine was successfully woken. return True # wait for already proceeding startup to finish, if any if self.state == States.STARTING: return (yield self._start_notifier.wait()) self.state = States.STARTING # Start the machine. start_machine may substantiate additional workers # depending on the implementation. try: ret = yield self.start_machine() except Exception as e: log.err(e, 'while starting latent machine {0}'.format(self.name)) ret = False if not ret: yield defer.DeferredList([worker.insubstantiate() for worker in self.workers], consumeErrors=True) else: self._setMissingTimer() self.state = States.STARTED if ret else States.STOPPED self._start_notifier.notify(ret) return ret @defer.inlineCallbacks def _stop(self): if any(worker.building for worker in self.workers) or \ self.state == States.STARTING: return None if self.state == States.STOPPING: yield self._stop_notifier.wait() return None self.state = States.STOPPING # wait until workers insubstantiate, then stop yield defer.DeferredList([worker.insubstantiate() for worker in self.workers], consumeErrors=True) try: yield self.stop_machine() except Exception as e: log.err(e, 'while stopping latent machine {0}'.format( self.name)) self.state = States.STOPPED self._stop_notifier.notify(None) def notifyBuildStarted(self): self._clearMissingTimer() def notifyBuildFinished(self): if any(worker.building for worker in self.workers): self._clearBuildWaitTimer() else: self._setBuildWaitTimer() def _clearMissingTimer(self): if self._missing_timer is not None: if self._missing_timer.active(): self._missing_timer.cancel() self._missing_timer = None def _setMissingTimer(self): self._clearMissingTimer() self._missing_timer = self.master.reactor.callLater( self.missing_timeout, self._stop) def _clearBuildWaitTimer(self): if self._build_wait_timer is not None: if self._build_wait_timer.active(): self._build_wait_timer.cancel() self._build_wait_timer = None def _setBuildWaitTimer(self): self._clearBuildWaitTimer() self._build_wait_timer = self.master.reactor.callLater( self.build_wait_timeout, self._stop) def __repr__(self): return "<AbstractLatentMachine '{}' at {}>".format(self.name, id(self))
def reconfigService(self, name, password, build_wait_timeout=60 * 10, **kwargs): self._substantiation_notifier = Notifier() self.build_wait_timeout = build_wait_timeout return AbstractWorker.reconfigService(self, name, password, **kwargs)
class AbstractLatentWorker(AbstractWorker): """A worker that will start up a worker instance when needed. To use, subclass and implement start_instance and stop_instance. Additionally, if the instances render any kind of data affecting instance type from the build properties, set the class variable builds_may_be_incompatible to True and override isCompatibleWithBuild method. See ec2.py for a concrete example. """ substantiation_build = None build_wait_timer = None start_missing_on_startup = False # override if the latent worker may connect without substantiate. Most # often this will be used in workers whose lifetime is managed by # latent machines. starts_without_substantiate = False # Caveats: The handling of latent workers is much more complex than it # might seem. The code must handle at least the following conditions: # # - non-silent disconnection by the worker at any time which generated # TCP resets and in the end resulted in detached() being called # # - silent disconnection by worker at any time by silent TCP connection # failure which did not generate TCP resets, but on the other hand no # response may be received. self.conn is not None is that case. # # - no disconnection by worker during substantiation when # build_wait_timeout param is negative. # # - worker attaching before start_instance returned. # # The above means that the following parts of the state must be tracked separately and can # result in various state combinations: # - connection state of the worker (self.conn) # - intended state of the worker (self.state) # - whether start_instance() has been called and has not yet finished. state = States.NOT_SUBSTANTIATED ''' state transitions: substantiate(): either of NOT_SUBSTANTIATED -> SUBSTANTIATING INSUBSTANTIATING -> INSUBSTANTIATING_SUBSTANTIATING _substantiate(): either of: SUBSTANTIATING -> SUBSTANTIATING_STARTING SUBSTANTIATING -> SUBSTANTIATING_STARTING -> SUBSTANTIATED attached(): either of: SUBSTANTIATING -> SUBSTANTIATED SUBSTANTIATING_STARTING -> SUBSTANTIATED then: self.conn -> not None detached(): self.conn -> None errors in any of above will call insubstantiate() insubstantiate(): either of: SUBSTANTIATED -> INSUBSTANTIATING INSUBSTANTIATING_SUBSTANTIATING -> INSUBSTANTIATING (cancels substantiation request) SUBSTANTIATING -> INSUBSTANTIATING SUBSTANTIATING -> INSUBSTANTIATING_SUBSTANTIATING SUBSTANTIATING_STARTING -> INSUBSTANTIATING SUBSTANTIATING_STARTING -> INSUBSTANTIATING_SUBSTANTIATING then: < other state transitions may happen during this time > then either of: INSUBSTANTIATING_SUBSTANTIATING -> SUBSTANTIATING INSUBSTANTIATING -> NOT_SUBSTANTIATED stopService(): NOT_SUBSTANTIATED -> SHUT_DOWN ''' def checkConfig(self, name, password, build_wait_timeout=60 * 10, **kwargs): super().checkConfig(name, password, **kwargs) def reconfigService(self, name, password, build_wait_timeout=60 * 10, **kwargs): self._substantiation_notifier = Notifier() self._start_stop_lock = defer.DeferredLock() self._deferwaiter = deferwaiter.DeferWaiter() self.build_wait_timeout = build_wait_timeout return super().reconfigService(name, password, **kwargs) def getRandomPass(self): """ compute a random password There is no point to configure a password for a LatentWorker, as it is created by the master. For supporting backend, a password can be generated by this API """ return ''.join( random.choice(string.ascii_letters + string.digits) for _ in range(20)) @property def building(self): # A LatentWorkerForBuilder will only be busy if it is building. return {wfb for wfb in self.workerforbuilders.values() if wfb.isBusy()} def failed_to_start(self, instance_id, instance_state): log.msg('{} {} failed to start instance {} ({})'.format(self.__class__.__name__, self.workername, instance_id, instance_state)) raise LatentWorkerFailedToSubstantiate(instance_id, instance_state) def _log_start_stop_locked(self, action_str): if self._start_stop_lock.locked: log.msg(('while {} worker {}: waiting until previous ' + 'start_instance/stop_instance finishes').format(action_str, self)) def start_instance(self, build): # responsible for starting instance that will try to connect with this # master. Should return deferred with either True (instance started) # or False (instance not started, so don't run a build here). Problems # should use an errback. raise NotImplementedError def stop_instance(self, fast=False): # responsible for shutting down instance. raise NotImplementedError @property def substantiated(self): return self.state == States.SUBSTANTIATED and self.conn is not None def substantiate(self, wfb, build): log.msg("substantiating worker {}".format(wfb)) if self.state == States.SHUT_DOWN: return defer.succeed(False) if self.state == States.SUBSTANTIATED and self.conn is not None: self._setBuildWaitTimer() return defer.succeed(True) if self.state in [States.SUBSTANTIATING, States.SUBSTANTIATING_STARTING, States.INSUBSTANTIATING_SUBSTANTIATING]: return self._substantiation_notifier.wait() self.startMissingTimer() # if anything of the following fails synchronously we need to have a # deferred ready to be notified d = self._substantiation_notifier.wait() if self.state == States.SUBSTANTIATED and self.conn is None: # connection dropped while we were substantiated. # insubstantiate to clean up and then substantiate normally. d_ins = self.insubstantiate(force_substantiation_build=build) d_ins.addErrback(log.err, 'while insubstantiating') return d assert self.state in [States.NOT_SUBSTANTIATED, States.INSUBSTANTIATING] if self.state == States.NOT_SUBSTANTIATED: self.state = States.SUBSTANTIATING self._substantiate(build) else: self.state = States.INSUBSTANTIATING_SUBSTANTIATING self.substantiation_build = build return d @defer.inlineCallbacks def _substantiate(self, build): assert self.state == States.SUBSTANTIATING try: # if build_wait_timeout is negative we don't ever disconnect the # worker ourselves, so we don't need to wait for it to attach # to declare it as substantiated. dont_wait_to_attach = \ self.build_wait_timeout < 0 and self.conn is not None start_success = True if ILatentMachine.providedBy(self.machine): start_success = yield self.machine.substantiate(self) try: self._log_start_stop_locked('substantiating') yield self._start_stop_lock.acquire() if start_success: self.state = States.SUBSTANTIATING_STARTING start_success = yield self.start_instance(build) finally: self._start_stop_lock.release() if not start_success: # this behaviour is kept as compatibility, but it is better # to just errback with a workable reason msg = "Worker does not want to substantiate at this time" raise LatentWorkerFailedToSubstantiate(self.name, msg) if dont_wait_to_attach and \ self.state == States.SUBSTANTIATING_STARTING and \ self.conn is not None: log.msg(r"Worker {} substantiated (already attached)".format(self.name)) self.state = States.SUBSTANTIATED self._fireSubstantiationNotifier(True) except Exception as e: self.stopMissingTimer() self._substantiation_failed(failure.Failure(e)) # swallow the failure as it is notified def _fireSubstantiationNotifier(self, result): if not self._substantiation_notifier: log.msg("No substantiation deferred for {}".format(self.name)) return result_msg = 'success' if result is True else 'failure' log.msg("Firing {} substantiation deferred with {}".format(self.name, result_msg)) self._substantiation_notifier.notify(result) @defer.inlineCallbacks def attached(self, bot): if self.state != States.SUBSTANTIATING_STARTING and \ self.build_wait_timeout >= 0: msg = ('Worker {} received connection while not trying to substantiate.' 'Disconnecting.').format(self.name) log.msg(msg) self._deferwaiter.add(self._disconnect(bot)) raise RuntimeError(msg) try: yield super().attached(bot) except Exception: self._substantiation_failed(failure.Failure()) return log.msg(r"Worker {} substantiated \o/".format(self.name)) # only change state when we are actually substantiating. We could # end up at this point in different state than SUBSTANTIATING_STARTING # if build_wait_timeout is negative. In that case, the worker is never # shut down, but it may reconnect if the connection drops on its side # without master seeing this condition. # # When build_wait_timeout is not negative, we throw an error (see above) if self.state in [States.SUBSTANTIATING, States.SUBSTANTIATING_STARTING]: self.state = States.SUBSTANTIATED self._fireSubstantiationNotifier(True) def attachBuilder(self, builder): wfb = self.workerforbuilders.get(builder.name) return wfb.attached(self, self.worker_commands) def _missing_timer_fired(self): self.missing_timer = None return self._substantiation_failed(defer.TimeoutError()) def _substantiation_failed(self, failure): if self.state in [States.SUBSTANTIATING, States.SUBSTANTIATING_STARTING]: self._fireSubstantiationNotifier(failure) d = self.insubstantiate() d.addErrback(log.err, 'while insubstantiating') self._deferwaiter.add(d) # notify people, but only if we're still in the config if not self.parent or not self.notify_on_missing: return return self.master.data.updates.workerMissing( workerid=self.workerid, masterid=self.master.masterid, last_connection="Latent worker never connected", notify=self.notify_on_missing ) def canStartBuild(self): # we were disconnected, but all the builds are not yet cleaned up. if self.conn is None and self.building: return False return super().canStartBuild() def buildStarted(self, wfb): assert wfb.isBusy() self._clearBuildWaitTimer() if ILatentMachine.providedBy(self.machine): self.machine.notifyBuildStarted() def buildFinished(self, wfb): assert not wfb.isBusy() if not self.building: if self.build_wait_timeout == 0: # we insubstantiate asynchronously to trigger more bugs with # the fake reactor self.master.reactor.callLater(0, self._soft_disconnect) # insubstantiate will automatically retry to create build for # this worker else: self._setBuildWaitTimer() # AbstractWorker.buildFinished() will try to start the next build for # that worker super().buildFinished(wfb) if ILatentMachine.providedBy(self.machine): self.machine.notifyBuildFinished() def _clearBuildWaitTimer(self): if self.build_wait_timer is not None: if self.build_wait_timer.active(): self.build_wait_timer.cancel() self.build_wait_timer = None def _setBuildWaitTimer(self): self._clearBuildWaitTimer() if self.build_wait_timeout <= 0: return self.build_wait_timer = self.master.reactor.callLater( self.build_wait_timeout, self._soft_disconnect) @defer.inlineCallbacks def insubstantiate(self, fast=False, force_substantiation_build=None): # If force_substantiation_build is not None, we'll try to substantiate the given build # after insubstantiation concludes. This parameter allows to go directly to the # SUBSTANTIATING state without going through NOT_SUBSTANTIATED state. log.msg("insubstantiating worker {}".format(self)) if self.state == States.INSUBSTANTIATING_SUBSTANTIATING: # there's another insubstantiation ongoing. We'll wait for it to finish by waiting # on self._start_stop_lock self.state = States.INSUBSTANTIATING self.substantiation_build = None self._fireSubstantiationNotifier( failure.Failure(LatentWorkerSubstantiatiationCancelled())) try: self._log_start_stop_locked('insubstantiating') yield self._start_stop_lock.acquire() assert self.state not in [States.INSUBSTANTIATING, States.INSUBSTANTIATING_SUBSTANTIATING] if self.state in [States.NOT_SUBSTANTIATED, States.SHUT_DOWN]: return prev_state = self.state if force_substantiation_build is not None: self.state = States.INSUBSTANTIATING_SUBSTANTIATING self.substantiation_build = force_substantiation_build else: self.state = States.INSUBSTANTIATING if prev_state in [States.SUBSTANTIATING, States.SUBSTANTIATING_STARTING]: self._fireSubstantiationNotifier( failure.Failure(LatentWorkerSubstantiatiationCancelled())) self._clearBuildWaitTimer() if prev_state in [States.SUBSTANTIATING_STARTING, States.SUBSTANTIATED]: try: yield self.stop_instance(fast) except Exception as e: # The case of failure for insubstantiation is bad as we have a # left-over costing resource There is not much thing to do here # generically, so we must put the problem of stop_instance # reliability to the backend driver log.err(e, "while insubstantiating") assert self.state in [States.INSUBSTANTIATING, States.INSUBSTANTIATING_SUBSTANTIATING] if self.state == States.INSUBSTANTIATING_SUBSTANTIATING: build, self.substantiation_build = self.substantiation_build, None self.state = States.SUBSTANTIATING self._substantiate(build) else: # self.state == States.INSUBSTANTIATING: self.state = States.NOT_SUBSTANTIATED finally: self._start_stop_lock.release() self.botmaster.maybeStartBuildsForWorker(self.name) @defer.inlineCallbacks def _soft_disconnect(self, fast=False, stopping_service=False): # a negative build_wait_timeout means the worker should never be shut # down, so just disconnect. if not stopping_service and self.build_wait_timeout < 0: yield super().disconnect() return self.stopMissingTimer() # we add the Deferreds to DeferWaiter because we don't wait for a Deferred if # the other Deferred errbacks yield defer.DeferredList([ self._deferwaiter.add(super().disconnect()), self._deferwaiter.add(self.insubstantiate(fast)) ], consumeErrors=True, fireOnOneErrback=True) def disconnect(self): self._deferwaiter.add(self._soft_disconnect()) # this removes the worker from all builders. It won't come back # without a restart (or maybe a sighup) self.botmaster.workerLost(self) @defer.inlineCallbacks def stopService(self): # stops the service. Waits for any pending substantiations, insubstantiations or builds # that are running or about to start to complete. while self.state not in [States.NOT_SUBSTANTIATED, States.SHUT_DOWN]: if self.state in [States.INSUBSTANTIATING, States.INSUBSTANTIATING_SUBSTANTIATING, States.SUBSTANTIATING, States.SUBSTANTIATING_STARTING]: self._log_start_stop_locked('stopService') yield self._start_stop_lock.acquire() self._start_stop_lock.release() if self.conn is not None or self.state in [States.SUBSTANTIATED, States.SUBSTANTIATING_STARTING]: yield self._soft_disconnect(stopping_service=True) yield self._deferwaiter.wait() # prevent any race conditions with any future builds that are in the process of # being started. if self.state == States.NOT_SUBSTANTIATED: self.state = States.SHUT_DOWN self._clearBuildWaitTimer() res = yield super().stopService() return res def updateWorker(self): """Called to add or remove builders after the worker has connected. Also called after botmaster's builders are initially set. @return: a Deferred that indicates when an attached worker has accepted the new builders and/or released the old ones.""" for b in self.botmaster.getBuildersForWorker(self.name): if b.name not in self.workerforbuilders: b.addLatentWorker(self) return super().updateWorker()
class AbstractLatentWorker(AbstractWorker): """A worker that will start up a worker instance when needed. To use, subclass and implement start_instance and stop_instance. Additionally, if the instances render any kind of data affecting instance type from the build properties, set the class variable builds_may_be_incompatible to True and override isCompatibleWithBuild method. See ec2.py for a concrete example. """ substantiation_build = None insubstantiating = False build_wait_timer = None start_missing_on_startup = False def checkConfig(self, name, password, build_wait_timeout=60 * 10, **kwargs): AbstractWorker.checkConfig(self, name, password, **kwargs) def reconfigService(self, name, password, build_wait_timeout=60 * 10, **kwargs): self._substantiation_notifier = Notifier() self.build_wait_timeout = build_wait_timeout return AbstractWorker.reconfigService(self, name, password, **kwargs) def getRandomPass(self): """ compute a random password There is no point to configure a password for a LatentWorker, as it is created by the master. For supporting backend, a password can be generated by this API """ return ''.join( random.choice(string.ascii_letters + string.digits) for _ in range(20)) @property def building(self): # A LatentWorkerForBuilder will only be busy if it is building. return { wfb for wfb in itervalues(self.workerforbuilders) if wfb.isBusy() } def failed_to_start(self, instance_id, instance_state): log.msg('%s %s failed to start instance %s (%s)' % (self.__class__.__name__, self.workername, instance_id, instance_state)) raise LatentWorkerFailedToSubstantiate(instance_id, instance_state) def start_instance(self, build): # responsible for starting instance that will try to connect with this # master. Should return deferred with either True (instance started) # or False (instance not started, so don't run a build here). Problems # should use an errback. raise NotImplementedError def stop_instance(self, fast=False): # responsible for shutting down instance. raise NotImplementedError @property def substantiated(self): return self.conn is not None def substantiate(self, wfb, build): log.msg("substantiating worker %s" % (wfb, )) if self.conn is not None: self._clearBuildWaitTimer() self._setBuildWaitTimer() return defer.succeed(True) if not self._substantiation_notifier: self.startMissingTimer() self.substantiation_build = build # if substantiate fails synchronously we need to have the deferred # ready to be notified d = self._substantiation_notifier.wait() if self.conn is None: self._substantiate(build) # else: we're waiting for an old one to detach. the _substantiate # will be done in ``detached`` below. return d return self._substantiation_notifier.wait() def _substantiate(self, build): # register event trigger try: d = self.start_instance(build) except Exception: # if start_instance crashes without defer, we still handle the # cleanup d = defer.fail(failure.Failure()) def start_instance_result(result): # If we don't report success, then preparation failed. # we let the errback handle the issue if not result: # this behaviour is kept as compatibility, but it is better # to just errback with a workable reason msg = "Worker does not want to substantiate at this time" return failure.Failure( LatentWorkerFailedToSubstantiate(self.name, msg)) return result def clean_up(failure): self.stopMissingTimer() self._substantiation_failed(failure) # swallow the failure as it is given to notified return None d.addCallback(start_instance_result) d.addErrback(clean_up) return d @defer.inlineCallbacks def attached(self, bot): if not self._substantiation_notifier and self.build_wait_timeout >= 0: msg = 'Worker %s received connection while not trying to ' \ 'substantiate. Disconnecting.' % (self.name,) log.msg(msg) self._disconnect(bot) raise RuntimeError(msg) try: yield AbstractWorker.attached(self, bot) except Exception: self._substantiation_failed(failure.Failure()) return log.msg(r"Worker %s substantiated \o/" % (self.name, )) if not self._substantiation_notifier: log.msg("No substantiation deferred for %s" % (self.name, )) else: log.msg("Firing %s substantiation deferred with success" % (self.name, )) self.substantiation_build = None self._substantiation_notifier.notify(True) def attachBuilder(self, builder): wfb = self.workerforbuilders.get(builder.name) return wfb.attached(self, self.worker_commands) def detached(self): AbstractWorker.detached(self) if self._substantiation_notifier: d = self._substantiate(self.substantiation_build) d.addErrback(log.err, 'while re-substantiating') def _missing_timer_fired(self): self.missing_timer = None return self._substantiation_failed(defer.TimeoutError()) def _substantiation_failed(self, failure): if self.substantiation_build: self.substantiation_build = None self._substantiation_notifier.notify(failure) d = self.insubstantiate() d.addErrback(log.err, 'while insubstantiating') # notify people, but only if we're still in the config if not self.parent or not self.notify_on_missing: return return self.master.data.updates.workerMissing( workerid=self.workerid, masterid=self.master.masterid, last_connection="Latent worker never connected", notify=self.notify_on_missing) def canStartBuild(self): # we were disconnected, but all the builds are not yet cleaned up. if self.conn is None and self.building: return False if self.insubstantiating: return False return AbstractWorker.canStartBuild(self) def buildStarted(self, wfb): assert wfb.isBusy() self._clearBuildWaitTimer() def buildFinished(self, wfb): assert not wfb.isBusy() if not self.building: if self.build_wait_timeout == 0: # we insubstantiate asynchronously to trigger more bugs with # the fake reactor self.master.reactor.callLater(0, self._soft_disconnect) # insubstantiate will automatically retry to create build for # this worker else: self._setBuildWaitTimer() # AbstractWorker.buildFinished() will try to start the next build for # that worker AbstractWorker.buildFinished(self, wfb) def _clearBuildWaitTimer(self): if self.build_wait_timer is not None: if self.build_wait_timer.active(): self.build_wait_timer.cancel() self.build_wait_timer = None def _setBuildWaitTimer(self): self._clearBuildWaitTimer() if self.build_wait_timeout <= 0: return self.build_wait_timer = self.master.reactor.callLater( self.build_wait_timeout, self._soft_disconnect) @defer.inlineCallbacks def insubstantiate(self, fast=False): log.msg("insubstantiating worker %s" % (self, )) self.insubstantiating = True self._clearBuildWaitTimer() d = self.stop_instance(fast) try: yield d except Exception as e: # The case of failure for insubstantiation is bad as we have a left-over costing resource # There is not much thing to do here generically, so we must put the problem of stop_instance # reliability to the backend driver log.err(e, "while insubstantiating") self.insubstantiating = False if self._substantiation_notifier: self._substantiation_notifier.notify( failure.Failure(LatentWorkerSubstantiatiationCancelled())) self.botmaster.maybeStartBuildsForWorker(self.name) @defer.inlineCallbacks def _soft_disconnect(self, fast=False): if self.building: # wait until build finished return # a negative build_wait_timeout means the worker should never be shut # down, so just disconnect. if self.build_wait_timeout < 0: yield AbstractWorker.disconnect(self) return self.stopMissingTimer() # if master is stopping, we will never achieve consistent state, as workermanager # won't accept new connection if self._substantiation_notifier and self.master.running: log.msg("Weird: Got request to stop before started. Allowing " "worker to start cleanly to avoid inconsistent state") yield self._substantiation_notifier.wait() self.substantiation_build = None log.msg("Substantiation complete, immediately terminating.") if self.conn is not None: yield defer.DeferredList( [AbstractWorker.disconnect(self), self.insubstantiate(fast)], consumeErrors=True, fireOnOneErrback=True) else: yield AbstractWorker.disconnect(self) yield self.stop_instance(fast) def disconnect(self): # This returns a Deferred but we don't use it self._soft_disconnect() # this removes the worker from all builders. It won't come back # without a restart (or maybe a sighup) self.botmaster.workerLost(self) @defer.inlineCallbacks def stopService(self): # the worker might be insubstantiating from buildWaitTimeout while self.insubstantiating: yield asyncSleep(0.1) if self.conn is not None or self._substantiation_notifier: yield self._soft_disconnect() self._clearBuildWaitTimer() res = yield AbstractWorker.stopService(self) defer.returnValue(res) def updateWorker(self): """Called to add or remove builders after the worker has connected. Also called after botmaster's builders are initially set. @return: a Deferred that indicates when an attached worker has accepted the new builders and/or released the old ones.""" for b in self.botmaster.getBuildersForWorker(self.name): if b.name not in self.workerforbuilders: b.addLatentWorker(self) return AbstractWorker.updateWorker(self)
class AbstractLatentWorker(AbstractWorker): """A worker that will start up a worker instance when needed. To use, subclass and implement start_instance and stop_instance. See ec2buildslave.py for a concrete example. Also see the stub example in test/test_slaves.py. """ implements(ILatentWorker) substantiated = False substantiation_build = None insubstantiating = False build_wait_timer = None _shutdown_callback_handle = None def checkConfig(self, name, password, build_wait_timeout=60 * 10, **kwargs): AbstractWorker.checkConfig(self, name, password, **kwargs) self.build_wait_timeout = build_wait_timeout self._substantiation_notifier = Notifier() def reconfigService(self, name, password, build_wait_timeout=60 * 10, **kwargs): self.build_wait_timeout = build_wait_timeout return AbstractWorker.reconfigService(self, name, password, **kwargs) @property def building(self): # A LatentWorkerForBuilder will only be busy if it is building. return {wfb for wfb in itervalues(self.workerforbuilders) if wfb.isBusy()} def failed_to_start(self, instance_id, instance_state): log.msg('%s %s failed to start instance %s (%s)' % (self.__class__.__name__, self.workername, instance_id, instance_state)) raise LatentWorkerFailedToSubstantiate(instance_id, instance_state) def start_instance(self, build): # responsible for starting instance that will try to connect with this # master. Should return deferred with either True (instance started) # or False (instance not started, so don't run a build here). Problems # should use an errback. raise NotImplementedError def stop_instance(self, fast=False): # responsible for shutting down instance. raise NotImplementedError def substantiate(self, sb, build): if self.substantiated: self._clearBuildWaitTimer() self._setBuildWaitTimer() return defer.succeed(True) if not self._substantiation_notifier: if self.parent and not self.missing_timer: # start timer. if timer times out, fail deferred self.missing_timer = self.master.reactor.callLater( self.missing_timeout, self._substantiation_failed, defer.TimeoutError()) self.substantiation_build = build if self.conn is None: d = self._substantiate(build) # start up instance d.addErrback(log.err, "while substantiating") # else: we're waiting for an old one to detach. the _substantiate # will be done in ``detached`` below. return self._substantiation_notifier.wait() def _substantiate(self, build): # register event trigger d = self.start_instance(build) self._shutdown_callback_handle = self.master.reactor.addSystemEventTrigger( 'before', 'shutdown', self._soft_disconnect, fast=True) def start_instance_result(result): # If we don't report success, then preparation failed. if not result: log.msg( "Worker '%s' does not want to substantiate at this time" % (self.name,)) self._substantiation_notifier.notify(False) return result def clean_up(failure): if self.missing_timer is not None: self.missing_timer.cancel() self._substantiation_failed(failure) if self._shutdown_callback_handle is not None: handle = self._shutdown_callback_handle del self._shutdown_callback_handle self.master.reactor.removeSystemEventTrigger(handle) return failure d.addCallbacks(start_instance_result, clean_up) return d def attached(self, bot): if not self._substantiation_notifier and self.build_wait_timeout >= 0: msg = 'Worker %s received connection while not trying to ' \ 'substantiate. Disconnecting.' % (self.name,) log.msg(msg) self._disconnect(bot) return defer.fail(RuntimeError(msg)) return AbstractWorker.attached(self, bot) def detached(self): AbstractWorker.detached(self) if self._substantiation_notifier: d = self._substantiate(self.substantiation_build) d.addErrback(log.err, 'while re-substantiating') def _substantiation_failed(self, failure): self.missing_timer = None if self._substantiation_notifier: self.substantiation_build = None self._substantiation_notifier.notify(failure) d = self.insubstantiate() d.addErrback(log.err, 'while insubstantiating') # notify people, but only if we're still in the config if not self.parent or not self.notify_on_missing: return buildmaster = self.botmaster.master status = buildmaster.getStatus() text = "The Buildbot working for '%s'\n" % status.getTitle() text += ("has noticed that the latent worker named %s \n" % self.name) text += "never substantiated after a request\n" text += "\n" text += ("The request was made at %s (buildmaster-local time)\n" % time.ctime(time.time() - self.missing_timeout)) # approx text += "\n" text += "Sincerely,\n" text += " The Buildbot\n" text += " %s\n" % status.getTitleURL() subject = "Buildbot: worker %s never substantiated" % (self.name,) return self._mail_missing_message(subject, text) def canStartBuild(self): if self.insubstantiating: return False return AbstractWorker.canStartBuild(self) def buildStarted(self, sb): self._clearBuildWaitTimer() def buildFinished(self, sb): AbstractWorker.buildFinished(self, sb) if not self.building: if self.build_wait_timeout == 0: d = self.insubstantiate() # try starting builds for this worker after insubstantiating; # this will cause the worker to re-substantiate immediately if # there are pending build requests. d.addCallback(lambda _: self.botmaster.maybeStartBuildsForWorker(self.workername)) else: self._setBuildWaitTimer() def _clearBuildWaitTimer(self): if self.build_wait_timer is not None: if self.build_wait_timer.active(): self.build_wait_timer.cancel() self.build_wait_timer = None def _setBuildWaitTimer(self): self._clearBuildWaitTimer() if self.build_wait_timeout <= 0: return self.build_wait_timer = self.master.reactor.callLater( self.build_wait_timeout, self._soft_disconnect) @defer.inlineCallbacks def insubstantiate(self, fast=False): self.insubstantiating = True self._clearBuildWaitTimer() d = self.stop_instance(fast) if self._shutdown_callback_handle is not None: handle = self._shutdown_callback_handle del self._shutdown_callback_handle self.master.reactor.removeSystemEventTrigger(handle) self.substantiated = False yield d self.insubstantiating = False self.botmaster.maybeStartBuildsForWorker(self.name) @defer.inlineCallbacks def _soft_disconnect(self, fast=False): # a negative build_wait_timeout means the worker should never be shut # down, so just disconnect. if self.build_wait_timeout < 0: yield AbstractWorker.disconnect(self) return if self.missing_timer: self.missing_timer.cancel() self.missing_timer = None if self._substantiation_notifier: log.msg("Weird: Got request to stop before started. Allowing " "worker to start cleanly to avoid inconsistent state") yield self._substantiation_notifier.wait() self.substantiation_build = None log.msg("Substantiation complete, immediately terminating.") if self.conn is not None: yield defer.DeferredList([ AbstractWorker.disconnect(self), self.insubstantiate(fast) ], consumeErrors=True, fireOnOneErrback=True) else: yield AbstractWorker.disconnect(self) yield self.stop_instance(fast) def disconnect(self): # This returns a Deferred but we don't use it self._soft_disconnect() # this removes the worker from all builders. It won't come back # without a restart (or maybe a sighup) self.botmaster.workerLost(self) def stopService(self): res = defer.maybeDeferred(AbstractWorker.stopService, self) if self.conn is not None: d = self._soft_disconnect() res = defer.DeferredList([res, d]) return res def updateWorker(self): """Called to add or remove builders after the worker has connected. Also called after botmaster's builders are initially set. @return: a Deferred that indicates when an attached worker has accepted the new builders and/or released the old ones.""" for b in self.botmaster.getBuildersForWorker(self.name): if b.name not in self.workerforbuilders: b.addLatentWorker(self) return AbstractWorker.updateWorker(self) def sendBuilderList(self): d = AbstractWorker.sendBuilderList(self) def _sent(slist): if not slist: return dl = [] for name in slist: # use get() since we might have changed our mind since then. # we're checking on the builder in addition to the # workers out of a bit of paranoia. b = self.botmaster.builders.get(name) sb = self.workerforbuilders.get(name) if b and sb: d1 = sb.attached(self, self.worker_commands) dl.append(d1) return defer.DeferredList(dl) def _set_failed(why): log.msg("Worker.sendBuilderList (%s) failed" % self) log.err(why) # TODO: hang up on them?, without setBuilderList we can't use # them if self._substantiation_notifier: self.substantiation_build = None self._substantiation_notifier.notify(why) if self.missing_timer: self.missing_timer.cancel() self.missing_timer = None # TODO: maybe log? send an email? return why d.addCallbacks(_sent, _set_failed) @d.addCallback def _substantiated(res): log.msg(r"Worker %s substantiated \o/" % (self.name,)) self.substantiated = True if not self._substantiation_notifier: log.msg("No substantiation deferred for %s" % (self.name,)) else: log.msg( "Firing %s substantiation deferred with success" % (self.name,)) self.substantiation_build = None self._substantiation_notifier.notify(True) # note that the missing_timer is already handled within # ``attached`` if not self.building: self._setBuildWaitTimer() return d
def __init__(self): self._waited = set() self._finish_notifier = Notifier()
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self._substantiation_notifier = Notifier() self._start_stop_lock = defer.DeferredLock() self._deferwaiter = deferwaiter.DeferWaiter()
class AbstractLatentWorker(AbstractWorker): """A worker that will start up a worker instance when needed. To use, subclass and implement start_instance and stop_instance. Additionally, if the instances render any kind of data affecting instance type from the build properties, set the class variable builds_may_be_incompatible to True and override isCompatibleWithBuild method. See ec2.py for a concrete example. """ substantiation_build = None build_wait_timer = None start_missing_on_startup = False # Caveats: The handling of latent workers is much more complex than it # might seem. The code must handle at least the following conditions: # # - non-silent disconnection by the worker at any time which generated # TCP resets and in the end resulted in detached() being called # # - silent disconnection by worker at any time by silent TCP connection # failure which did not generate TCP resets, but on the other hand no # response may be received. self.conn is not None is that case. # # - no disconnection by worker during substantiation when # build_wait_timeout param is negative. # # The above means that the connection state of the worker (self.conn) must # be tracked separately from the intended state of the worker (self.state). state = States.NOT_SUBSTANTIATED # state transitions: # # substantiate(): either of # NOT_SUBSTANTIATED -> SUBSTANTIATING # INSUBSTANTIATING -> INSUBSTANTIATING_SUBSTANTIATING # # attached(): # SUBSTANTIATING -> SUBSTANTIATED # self.conn -> not None # # detached(): # self.conn -> None # # errors in any of above will call insubstantiate() # # insubstantiate(): # SUBSTANTIATED -> INSUBSTANTIATING # < other state transitions may happen during this time > # INSUBSTANTIATING_SUBSTANTIATING -> SUBSTANTIATING # INSUBSTANTIATING -> NOT_SUBSTANTIATED def checkConfig(self, name, password, build_wait_timeout=60 * 10, **kwargs): super().checkConfig(name, password, **kwargs) def reconfigService(self, name, password, build_wait_timeout=60 * 10, **kwargs): self._substantiation_notifier = Notifier() self._insubstantiation_notifier = Notifier() self.build_wait_timeout = build_wait_timeout return super().reconfigService(name, password, **kwargs) def getRandomPass(self): """ compute a random password There is no point to configure a password for a LatentWorker, as it is created by the master. For supporting backend, a password can be generated by this API """ return ''.join( random.choice(string.ascii_letters + string.digits) for _ in range(20)) @property def building(self): # A LatentWorkerForBuilder will only be busy if it is building. return {wfb for wfb in self.workerforbuilders.values() if wfb.isBusy()} def failed_to_start(self, instance_id, instance_state): log.msg('%s %s failed to start instance %s (%s)' % (self.__class__.__name__, self.workername, instance_id, instance_state)) raise LatentWorkerFailedToSubstantiate(instance_id, instance_state) def start_instance(self, build): # responsible for starting instance that will try to connect with this # master. Should return deferred with either True (instance started) # or False (instance not started, so don't run a build here). Problems # should use an errback. raise NotImplementedError def stop_instance(self, fast=False): # responsible for shutting down instance. raise NotImplementedError @property def substantiated(self): return self.state == States.SUBSTANTIATED and self.conn is not None def substantiate(self, wfb, build): log.msg("substantiating worker %s" % (wfb, )) if self.state == States.SUBSTANTIATED and self.conn is not None: self._setBuildWaitTimer() return defer.succeed(True) if self.state in [ States.SUBSTANTIATING, States.INSUBSTANTIATING_SUBSTANTIATING ]: return self._substantiation_notifier.wait() self.startMissingTimer() self.substantiation_build = build # if anything of the following fails synchronously we need to have a # deferred ready to be notified d = self._substantiation_notifier.wait() if self.state == States.SUBSTANTIATED and self.conn is None: # connection dropped while we were substantiated. # insubstantiate to clean up and then substantiate normally. d_ins = self.insubstantiate(_force_substantiation=True) d_ins.addErrback(log.err, 'while insubstantiating') return d assert self.state in [ States.NOT_SUBSTANTIATED, States.INSUBSTANTIATING ] if self.state == States.NOT_SUBSTANTIATED: self.state = States.SUBSTANTIATING self._substantiate(build) else: self.state = States.INSUBSTANTIATING_SUBSTANTIATING return d @defer.inlineCallbacks def _substantiate(self, build): # register event trigger try: # if build_wait_timeout is negative we don't ever disconnect the # worker ourselves, so we don't need to wait for it to attach # to declare it as substantiated. dont_wait_to_attach = \ self.build_wait_timeout < 0 and self.conn is not None start_success = yield self.start_instance(build) if not start_success: # this behaviour is kept as compatibility, but it is better # to just errback with a workable reason msg = "Worker does not want to substantiate at this time" raise LatentWorkerFailedToSubstantiate(self.name, msg) if dont_wait_to_attach and \ self.state == States.SUBSTANTIATING and \ self.conn is not None: log.msg(r"Worker %s substantiated (already attached)" % (self.name, )) self.state = States.SUBSTANTIATED self._fireSubstantiationNotifier(True) except Exception as e: self.stopMissingTimer() self._substantiation_failed(failure.Failure(e)) # swallow the failure as it is notified def _fireSubstantiationNotifier(self, result): if not self._substantiation_notifier: log.msg("No substantiation deferred for %s" % (self.name, )) return result_msg = 'success' if result is True else 'failure' log.msg("Firing {} substantiation deferred with {}".format( self.name, result_msg)) self.substantiation_build = None self._substantiation_notifier.notify(result) @defer.inlineCallbacks def attached(self, bot): if self.state != States.SUBSTANTIATING and \ self.build_wait_timeout >= 0: msg = 'Worker %s received connection while not trying to ' \ 'substantiate. Disconnecting.' % (self.name,) log.msg(msg) self._disconnect(bot) raise RuntimeError(msg) try: yield super().attached(bot) except Exception: self._substantiation_failed(failure.Failure()) return log.msg(r"Worker %s substantiated \o/" % (self.name, )) # only change state when we are actually substantiating. We could # end up at this point in different state than SUBSTANTIATING if # build_wait_timeout is negative. When build_wait_timeout is not # negative, we throw an error (see above) if self.state == States.SUBSTANTIATING: self.state = States.SUBSTANTIATED self._fireSubstantiationNotifier(True) def attachBuilder(self, builder): wfb = self.workerforbuilders.get(builder.name) return wfb.attached(self, self.worker_commands) def _missing_timer_fired(self): self.missing_timer = None return self._substantiation_failed(defer.TimeoutError()) def _substantiation_failed(self, failure): if self.state == States.SUBSTANTIATING: self.substantiation_build = None self._fireSubstantiationNotifier(failure) d = self.insubstantiate() d.addErrback(log.err, 'while insubstantiating') # notify people, but only if we're still in the config if not self.parent or not self.notify_on_missing: return return self.master.data.updates.workerMissing( workerid=self.workerid, masterid=self.master.masterid, last_connection="Latent worker never connected", notify=self.notify_on_missing) def canStartBuild(self): # we were disconnected, but all the builds are not yet cleaned up. if self.conn is None and self.building: return False return super().canStartBuild() def buildStarted(self, wfb): assert wfb.isBusy() self._clearBuildWaitTimer() def buildFinished(self, wfb): assert not wfb.isBusy() if not self.building: if self.build_wait_timeout == 0: # we insubstantiate asynchronously to trigger more bugs with # the fake reactor self.master.reactor.callLater(0, self._soft_disconnect) # insubstantiate will automatically retry to create build for # this worker else: self._setBuildWaitTimer() # AbstractWorker.buildFinished() will try to start the next build for # that worker super().buildFinished(wfb) def _clearBuildWaitTimer(self): if self.build_wait_timer is not None: if self.build_wait_timer.active(): self.build_wait_timer.cancel() self.build_wait_timer = None def _setBuildWaitTimer(self): self._clearBuildWaitTimer() if self.build_wait_timeout <= 0: return self.build_wait_timer = self.master.reactor.callLater( self.build_wait_timeout, self._soft_disconnect) @defer.inlineCallbacks def insubstantiate(self, fast=False, _force_substantiation=False): # _force_substantiation=True means we'll try to substantiate a build # with stored substantiation_build at the end of substantiation log.msg("insubstantiating worker {}".format(self)) if self.state == States.NOT_SUBSTANTIATED: return if self.state in [ States.INSUBSTANTIATING, States.INSUBSTANTIATING_SUBSTANTIATING ]: yield self._insubstantiation_notifier.wait() return notify_cancel = self.state == States.SUBSTANTIATING if _force_substantiation: self.state = States.INSUBSTANTIATING_SUBSTANTIATING else: self.state = States.INSUBSTANTIATING self._clearBuildWaitTimer() d = self.stop_instance(fast) try: yield d except Exception as e: # The case of failure for insubstantiation is bad as we have a # left-over costing resource There is not much thing to do here # generically, so we must put the problem of stop_instance # reliability to the backend driver log.err(e, "while insubstantiating") assert self.state in [ States.INSUBSTANTIATING, States.INSUBSTANTIATING_SUBSTANTIATING ] if notify_cancel: self._fireSubstantiationNotifier( failure.Failure(LatentWorkerSubstantiatiationCancelled())) if self.state == States.INSUBSTANTIATING_SUBSTANTIATING: self.state = States.SUBSTANTIATING if self._insubstantiation_notifier: self._insubstantiation_notifier.notify(True) self._substantiate(self.substantiation_build) elif self.state == States.INSUBSTANTIATING: self.state = States.NOT_SUBSTANTIATED if self._insubstantiation_notifier: self._insubstantiation_notifier.notify(True) else: pass self.botmaster.maybeStartBuildsForWorker(self.name) @defer.inlineCallbacks def _soft_disconnect(self, fast=False): if self.building: # wait until build finished # TODO: remove this behavior as AbstractWorker disconnects forcibly return # a negative build_wait_timeout means the worker should never be shut # down, so just disconnect. if self.build_wait_timeout < 0: yield super().disconnect() return self.stopMissingTimer() # if master is stopping, we will never achieve consistent state, as workermanager # won't accept new connection if self._substantiation_notifier and self.master.running: log.msg("Weird: Got request to stop before started. Allowing " "worker to start cleanly to avoid inconsistent state") yield self._substantiation_notifier.wait() self.substantiation_build = None log.msg("Substantiation complete, immediately terminating.") yield defer.DeferredList( [super().disconnect(), self.insubstantiate(fast)], consumeErrors=True, fireOnOneErrback=True) def disconnect(self): # This returns a Deferred but we don't use it self._soft_disconnect() # this removes the worker from all builders. It won't come back # without a restart (or maybe a sighup) self.botmaster.workerLost(self) @defer.inlineCallbacks def stopService(self): # the worker might be insubstantiating from buildWaitTimeout if self.state in [ States.INSUBSTANTIATING, States.INSUBSTANTIATING_SUBSTANTIATING ]: yield self._insubstantiation_notifier.wait() if self.conn is not None or self.state in [ States.SUBSTANTIATING, States.SUBSTANTIATED ]: yield self._soft_disconnect() self._clearBuildWaitTimer() res = yield super().stopService() return res def updateWorker(self): """Called to add or remove builders after the worker has connected. Also called after botmaster's builders are initially set. @return: a Deferred that indicates when an attached worker has accepted the new builders and/or released the old ones.""" for b in self.botmaster.getBuildersForWorker(self.name): if b.name not in self.workerforbuilders: b.addLatentWorker(self) return super().updateWorker()
class AbstractLatentMachine(Machine): DEFAULT_MISSING_TIMEOUT = 20 * 60 def checkConfig(self, name, build_wait_timeout=0, missing_timeout=DEFAULT_MISSING_TIMEOUT, **kwargs): super().checkConfig(name, **kwargs) self.state = States.STOPPED self.latent_workers = [] def reconfigService(self, name, build_wait_timeout=0, missing_timeout=DEFAULT_MISSING_TIMEOUT, **kwargs): super().reconfigService(name, **kwargs) self.build_wait_timeout = build_wait_timeout self.missing_timeout = missing_timeout for worker in self.workers: if not interfaces.ILatentWorker.providedBy(worker): raise Exception('Worker is not latent {}'.format(worker.name)) self.state = States.STOPPED self._start_notifier = Notifier() self._stop_notifier = Notifier() self._build_wait_timer = None self._missing_timer = None def start_machine(self): # Responsible for starting the machine. The function should return a # deferred which should result in True if the startup has been # successful, or False otherwise. raise NotImplementedError def stop_machine(self): # Responsible for shutting down the machine raise NotImplementedError @defer.inlineCallbacks def substantiate(self, starting_worker): if self.state == States.STOPPING: # wait until stop action finishes yield self._stop_notifier.wait() if self.state == States.STARTED: # may happen if we waited for stop to complete and in the mean # time the machine was successfully woken. return True # wait for already proceeding startup to finish, if any if self.state == States.STARTING: return (yield self._start_notifier.wait()) self.state = States.STARTING # Start the machine. start_machine may substantiate additional workers # depending on the implementation. try: ret = yield self.start_machine() except Exception as e: log.err(e, 'while starting latent machine {0}'.format(self.name)) ret = False if not ret: yield defer.DeferredList( [worker.insubstantiate() for worker in self.workers], consumeErrors=True) else: self._setMissingTimer() self.state = States.STARTED if ret else States.STOPPED self._start_notifier.notify(ret) return ret @defer.inlineCallbacks def _stop(self): if any(worker.building for worker in self.workers) or \ self.state == States.STARTING: return None if self.state == States.STOPPING: yield self._stop_notifier.wait() return None self.state = States.STOPPING # wait until workers insubstantiate, then stop yield defer.DeferredList( [worker.insubstantiate() for worker in self.workers], consumeErrors=True) try: yield self.stop_machine() except Exception as e: log.err(e, 'while stopping latent machine {0}'.format(self.name)) self.state = States.STOPPED self._stop_notifier.notify(None) def notifyBuildStarted(self): self._clearMissingTimer() def notifyBuildFinished(self): if any(worker.building for worker in self.workers): self._clearBuildWaitTimer() else: self._setBuildWaitTimer() def _clearMissingTimer(self): if self._missing_timer is not None: if self._missing_timer.active(): self._missing_timer.cancel() self._missing_timer = None def _setMissingTimer(self): self._clearMissingTimer() self._missing_timer = self.master.reactor.callLater( self.missing_timeout, self._stop) def _clearBuildWaitTimer(self): if self._build_wait_timer is not None: if self._build_wait_timer.active(): self._build_wait_timer.cancel() self._build_wait_timer = None def _setBuildWaitTimer(self): self._clearBuildWaitTimer() self._build_wait_timer = self.master.reactor.callLater( self.build_wait_timeout, self._stop) def __repr__(self): return "<AbstractLatentMachine '{}' at {}>".format(self.name, id(self))