def setExpectations(self, progress): """Mark the build as successful and update expectations for the next build. Only call this when the build did not fail in any way that would invalidate the time expectations generated by it. (if the compile failed and thus terminated early, we can't use the last build to predict how long the next one will take). """ if self.expectations: self.expectations.update(progress) else: # the first time we get a good build, create our Expectations # based upon its results self.expectations = Expectations(progress) log.msg("new expectations: %s seconds" % \ self.expectations.expectedBuildTime())
def setExpectations(self, progress): """Mark the build as successful and update expectations for the next build. Only call this when the build did not fail in any way that would invalidate the time expectations generated by it. (if the compile failed and thus terminated early, we can't use the last build to predict how long the next one will take). """ if self.expectations: self.expectations.update(progress) else: # the first time we get a good build, create our Expectations # based upon its results self.expectations = Expectations(progress) log.msg("new expectations: %s seconds" % self.expectations.expectedBuildTime())
class Builder(config.ReconfigurableServiceMixin, pb.Referenceable, service.MultiService): # reconfigure builders before slaves reconfig_priority = 196 def __init__(self, name, _addServices=True): service.MultiService.__init__(self) self.name = name # this is created the first time we get a good build self.expectations = None # build/wannabuild slots: Build objects move along this sequence self.building = [] # old_building holds active builds that were stolen from a predecessor self.old_building = weakref.WeakKeyDictionary() # buildslaves which have connected but which are not yet available. # These are always in the ATTACHING state. self.attaching_slaves = [] # buildslaves at our disposal. Each SlaveBuilder instance has a # .state that is IDLE, PINGING, or BUILDING. "PINGING" is used when a # Build is about to start, to make sure that they're still alive. self.slaves = [] self.config = None self.builder_status = None if _addServices: self.reclaim_svc = internet.TimerService(10*60, self.reclaimAllBuilds) self.reclaim_svc.setServiceParent(self) # update big status every 30 minutes, working around #1980 self.updateStatusService = internet.TimerService(30*60, self.updateBigStatus) self.updateStatusService.setServiceParent(self) def reconfigService(self, new_config): # find this builder in the config for builder_config in new_config.builders: if builder_config.name == self.name: break else: assert 0, "no config found for builder '%s'" % self.name # set up a builder status object on the first reconfig if not self.builder_status: self.builder_status = self.master.status.builderAdded( builder_config.name, builder_config.builddir, builder_config.category, builder_config.description) self.config = builder_config self.builder_status.setDescription(builder_config.description) self.builder_status.setCategory(builder_config.category) self.builder_status.setSlavenames(self.config.slavenames) self.builder_status.setCacheSize(new_config.caches['Builds']) return defer.succeed(None) def stopService(self): d = defer.maybeDeferred(lambda : service.MultiService.stopService(self)) def flushMaybeStartBuilds(_): # at this point, self.running = False, so another maybeStartBuild # invocation won't hurt anything, but it also will not complete # until any currently-running invocations are done, so we know that # the builder is quiescent at that time. return self.maybeStartBuild() d.addCallback(flushMaybeStartBuilds) return d def __repr__(self): return "<Builder '%r' at %d>" % (self.name, id(self)) @defer.inlineCallbacks def getOldestRequestTime(self): """Returns the submitted_at of the oldest unclaimed build request for this builder, or None if there are no build requests. @returns: datetime instance or None, via Deferred """ unclaimed = yield self.master.db.buildrequests.getBuildRequests( buildername=self.name, claimed=False) if unclaimed: unclaimed = [ brd['submitted_at'] for brd in unclaimed ] unclaimed.sort() defer.returnValue(unclaimed[0]) else: defer.returnValue(None) def reclaimAllBuilds(self): brids = set() for b in self.building: brids.update([br.id for br in b.requests]) for b in self.old_building: brids.update([br.id for br in b.requests]) if not brids: return defer.succeed(None) d = self.master.db.buildrequests.reclaimBuildRequests(brids) d.addErrback(log.err, 'while re-claiming running BuildRequests') return d def getBuild(self, number): for b in self.building: if b.build_status and b.build_status.number == number: return b for b in self.old_building.keys(): if b.build_status and b.build_status.number == number: return b return None def addLatentSlave(self, slave): assert interfaces.ILatentBuildSlave.providedBy(slave) for s in self.slaves: if s == slave: break else: sb = slavebuilder.LatentSlaveBuilder(slave, self) self.builder_status.addPointEvent( ['added', 'latent', slave.slavename]) self.slaves.append(sb) self.botmaster.maybeStartBuildsForBuilder(self.name) def attached(self, slave, remote, commands): """This is invoked by the BuildSlave when the self.slavename bot registers their builder. @type slave: L{buildbot.buildslave.BuildSlave} @param slave: the BuildSlave that represents the buildslave as a whole @type remote: L{twisted.spread.pb.RemoteReference} @param remote: a reference to the L{buildbot.slave.bot.SlaveBuilder} @type commands: dict: string -> string, or None @param commands: provides the slave's version of each RemoteCommand @rtype: L{twisted.internet.defer.Deferred} @return: a Deferred that fires (with 'self') when the slave-side builder is fully attached and ready to accept commands. """ for s in self.attaching_slaves + self.slaves: if s.slave == slave: # already attached to them. This is fairly common, since # attached() gets called each time we receive the builder # list from the slave, and we ask for it each time we add or # remove a builder. So if the slave is hosting builders # A,B,C, and the config file changes A, we'll remove A and # re-add it, triggering two builder-list requests, getting # two redundant calls to attached() for B, and another two # for C. # # Therefore, when we see that we're already attached, we can # just ignore it. return defer.succeed(self) sb = slavebuilder.SlaveBuilder() sb.setBuilder(self) self.attaching_slaves.append(sb) d = sb.attached(slave, remote, commands) d.addCallback(self._attached) d.addErrback(self._not_attached, slave) return d def _attached(self, sb): self.builder_status.addPointEvent(['connect', sb.slave.slavename]) self.attaching_slaves.remove(sb) self.slaves.append(sb) self.updateBigStatus() return self def _not_attached(self, why, slave): # already log.err'ed by SlaveBuilder._attachFailure # TODO: remove from self.slaves (except that detached() should get # run first, right?) log.err(why, 'slave failed to attach') self.builder_status.addPointEvent(['failed', 'connect', slave.slavename]) # TODO: add an HTMLLogFile of the exception def detached(self, slave): """This is called when the connection to the bot is lost.""" for sb in self.attaching_slaves + self.slaves: if sb.slave == slave: break else: log.msg("WEIRD: Builder.detached(%s) (%s)" " not in attaching_slaves(%s)" " or slaves(%s)" % (slave, slave.slavename, self.attaching_slaves, self.slaves)) return if sb.state == BUILDING: # the Build's .lostRemote method (invoked by a notifyOnDisconnect # handler) will cause the Build to be stopped, probably right # after the notifyOnDisconnect that invoked us finishes running. pass if sb in self.attaching_slaves: self.attaching_slaves.remove(sb) if sb in self.slaves: self.slaves.remove(sb) self.builder_status.addPointEvent(['disconnect', slave.slavename]) sb.detached() # inform the SlaveBuilder that their slave went away self.updateBigStatus() def updateBigStatus(self): if not self.builder_status: return if not self.slaves: self.builder_status.setBigState("offline") elif self.building or self.old_building: self.builder_status.setBigState("building") else: self.builder_status.setBigState("idle") @defer.inlineCallbacks def _startBuildFor(self, slavebuilder, buildrequests): """Start a build on the given slave. @param build: the L{base.Build} to start @param sb: the L{SlaveBuilder} which will host this build @return: (via Deferred) boolean indicating that the build was succesfully started. """ # as of the Python versions supported now, try/finally can't be used # with a generator expression. So instead, we push cleanup functions # into a list so that, at any point, we can abort this operation. cleanups = [] def run_cleanups(): try: while cleanups: fn = cleanups.pop() fn() except: log.err(failure.Failure(), "while running %r" % (run_cleanups,)) # the last cleanup we want to perform is to update the big # status based on any other cleanup cleanups.append(lambda : self.updateBigStatus()) build = self.config.factory.newBuild(buildrequests) build.setBuilder(self) log.msg("starting build %s using slave %s" % (build, slavebuilder)) # set up locks build.setLocks(self.config.locks) cleanups.append(lambda : slavebuilder.slave.releaseLocks()) if len(self.config.env) > 0: build.setSlaveEnvironment(self.config.env) # append the build to self.building self.building.append(build) cleanups.append(lambda : self.building.remove(build)) # update the big status accordingly self.updateBigStatus() try: ready = yield slavebuilder.prepare(self.builder_status, build) except: log.err(failure.Failure(), 'while preparing slavebuilder:') ready = False # If prepare returns True then it is ready and we start a build # If it returns false then we don't start a new build. if not ready: log.msg("slave %s can't build %s after all; re-queueing the " "request" % (build, slavebuilder)) run_cleanups() defer.returnValue(False) return # ping the slave to make sure they're still there. If they've # fallen off the map (due to a NAT timeout or something), this # will fail in a couple of minutes, depending upon the TCP # timeout. # # TODO: This can unnecessarily suspend the starting of a build, in # situations where the slave is live but is pushing lots of data to # us in a build. log.msg("starting build %s.. pinging the slave %s" % (build, slavebuilder)) try: ping_success = yield slavebuilder.ping() except: log.err(failure.Failure(), 'while pinging slave before build:') ping_success = False if not ping_success: log.msg("slave ping failed; re-queueing the request") run_cleanups() defer.returnValue(False) return # The buildslave is ready to go. slavebuilder.buildStarted() sets its # state to BUILDING (so we won't try to use it for any other builds). # This gets set back to IDLE by the Build itself when it finishes. slavebuilder.buildStarted() cleanups.append(lambda : slavebuilder.buildFinished()) # tell the remote that it's starting a build, too try: yield slavebuilder.remote.callRemote("startBuild") except: log.err(failure.Failure(), 'while calling remote startBuild:') run_cleanups() defer.returnValue(False) return # create the BuildStatus object that goes with the Build bs = self.builder_status.newBuild() # record the build in the db - one row per buildrequest try: bids = [] for req in build.requests: bid = yield self.master.db.builds.addBuild(req.id, bs.number) bids.append(bid) except: log.err(failure.Failure(), 'while adding rows to build table:') run_cleanups() defer.returnValue(False) return # let status know self.master.status.build_started(req.id, self.name, bs) # start the build. This will first set up the steps, then tell the # BuildStatus that it has started, which will announce it to the world # (through our BuilderStatus object, which is its parent). Finally it # will start the actual build process. This is done with a fresh # Deferred since _startBuildFor should not wait until the build is # finished. d = build.startBuild(bs, self.expectations, slavebuilder) d.addCallback(self.buildFinished, slavebuilder, bids) # this shouldn't happen. if it does, the slave will be wedged d.addErrback(log.err) # make sure the builder's status is represented correctly self.updateBigStatus() defer.returnValue(True) def setupProperties(self, props): props.setProperty("buildername", self.name, "Builder") if len(self.config.properties) > 0: for propertyname in self.config.properties: props.setProperty(propertyname, self.config.properties[propertyname], "Builder") def buildFinished(self, build, sb, bids): """This is called when the Build has finished (either success or failure). Any exceptions during the build are reported with results=FAILURE, not with an errback.""" # by the time we get here, the Build has already released the slave, # which will trigger a check for any now-possible build requests # (maybeStartBuilds) # mark the builds as finished, although since nothing ever reads this # table, it's not too important that it complete successfully d = self.master.db.builds.finishBuilds(bids) d.addErrback(log.err, 'while marking builds as finished (ignored)') results = build.build_status.getResults() self.building.remove(build) if results == RETRY: self._resubmit_buildreqs(build).addErrback(log.err) else: brids = [br.id for br in build.requests] db = self.master.db d = db.buildrequests.completeBuildRequests(brids, results) d.addCallback( lambda _ : self._maybeBuildsetsComplete(build.requests)) # nothing in particular to do with this deferred, so just log it if # it fails.. d.addErrback(log.err, 'while marking build requests as completed') if sb.slave: sb.slave.releaseLocks() self.updateBigStatus() @defer.inlineCallbacks def _maybeBuildsetsComplete(self, requests): # inform the master that we may have completed a number of buildsets for br in requests: yield self.master.maybeBuildsetComplete(br.bsid) def _resubmit_buildreqs(self, build): brids = [br.id for br in build.requests] return self.master.db.buildrequests.unclaimBuildRequests(brids) def setExpectations(self, progress): """Mark the build as successful and update expectations for the next build. Only call this when the build did not fail in any way that would invalidate the time expectations generated by it. (if the compile failed and thus terminated early, we can't use the last build to predict how long the next one will take). """ if self.expectations: self.expectations.update(progress) else: # the first time we get a good build, create our Expectations # based upon its results self.expectations = Expectations(progress) log.msg("new expectations: %s seconds" % \ self.expectations.expectedBuildTime()) # Build Creation @defer.inlineCallbacks def maybeStartBuild(self): # This method is called by the botmaster whenever this builder should # check for and potentially start new builds. Do not call this method # directly - use master.botmaster.maybeStartBuildsForBuilder, or one # of the other similar methods if more appropriate # first, if we're not running, then don't start builds; stopService # uses this to ensure that any ongoing maybeStartBuild invocations # are complete before it stops. if not self.running: return # Check for available slaves. If there are no available slaves, then # there is no sense continuing available_slavebuilders = [ sb for sb in self.slaves if sb.isAvailable() ] if not available_slavebuilders: self.updateBigStatus() return # now, get the available build requests unclaimed_requests = \ yield self.master.db.buildrequests.getBuildRequests( buildername=self.name, claimed=False) if not unclaimed_requests: self.updateBigStatus() return # sort by submitted_at, so the first is the oldest unclaimed_requests.sort(key=lambda brd : brd['submitted_at']) # get the mergeRequests function for later mergeRequests_fn = self._getMergeRequestsFn() # match them up until we're out of options while available_slavebuilders and unclaimed_requests: # first, choose a slave (using nextSlave) slavebuilder = yield self._chooseSlave(available_slavebuilders) if not slavebuilder: break if slavebuilder not in available_slavebuilders: log.msg(("nextSlave chose a nonexistent slave for builder " "'%s'; cannot start build") % self.name) break # then choose a request (using nextBuild) brdict = yield self._chooseBuild(unclaimed_requests) if not brdict: break if brdict not in unclaimed_requests: log.msg(("nextBuild chose a nonexistent request for builder " "'%s'; cannot start build") % self.name) break # merge the chosen request with any compatible requests in the # queue brdicts = yield self._mergeRequests(brdict, unclaimed_requests, mergeRequests_fn) # try to claim the build requests brids = [ brdict['brid'] for brdict in brdicts ] try: yield self.master.db.buildrequests.claimBuildRequests(brids) except buildrequests.AlreadyClaimedError: # one or more of the build requests was already claimed; # re-fetch the now-partially-claimed build requests and keep # trying to match them self._breakBrdictRefloops(unclaimed_requests) unclaimed_requests = \ yield self.master.db.buildrequests.getBuildRequests( buildername=self.name, claimed=False) # go around the loop again continue # claim was successful, so initiate a build for this set of # requests. Note that if the build fails from here on out (e.g., # because a slave has failed), it will be handled outside of this # loop. TODO: test that! # _startBuildFor expects BuildRequest objects, so cook some up breqs = yield defer.gatherResults( [ self._brdictToBuildRequest(brdict) for brdict in brdicts ]) build_started = yield self._startBuildFor(slavebuilder, breqs) if not build_started: # build was not started, so unclaim the build requests yield self.master.db.buildrequests.unclaimBuildRequests(brids) # and try starting builds again. If we still have a working slave, # then this may re-claim the same buildrequests self.botmaster.maybeStartBuildsForBuilder(self.name) # finally, remove the buildrequests and slavebuilder from the # respective queues self._breakBrdictRefloops(brdicts) for brdict in brdicts: unclaimed_requests.remove(brdict) available_slavebuilders.remove(slavebuilder) self._breakBrdictRefloops(unclaimed_requests) self.updateBigStatus() return # a few utility functions to make the maybeStartBuild a bit shorter and # easier to read def _chooseSlave(self, available_slavebuilders): """ Choose the next slave, using the C{nextSlave} configuration if available, and falling back to C{random.choice} otherwise. @param available_slavebuilders: list of slavebuilders to choose from @returns: SlaveBuilder or None via Deferred """ if self.config.nextSlave: return defer.maybeDeferred(lambda : self.config.nextSlave(self, available_slavebuilders)) else: return defer.succeed(random.choice(available_slavebuilders)) def _chooseBuild(self, buildrequests): """ Choose the next build from the given set of build requests (represented as dictionaries). Defaults to returning the first request (earliest submitted). @param buildrequests: sorted list of build request dictionaries @returns: a build request dictionary or None via Deferred """ if self.config.nextBuild: # nextBuild expects BuildRequest objects, so instantiate them here # and cache them in the dictionaries d = defer.gatherResults([ self._brdictToBuildRequest(brdict) for brdict in buildrequests ]) d.addCallback(lambda requestobjects : self.config.nextBuild(self, requestobjects)) def to_brdict(brobj): # get the brdict for this object back return brobj.brdict d.addCallback(to_brdict) return d else: return defer.succeed(buildrequests[0]) def _getMergeRequestsFn(self): """Helper function to determine which mergeRequests function to use from L{_mergeRequests}, or None for no merging""" # first, seek through builder, global, and the default mergeRequests_fn = self.config.mergeRequests if mergeRequests_fn is None: mergeRequests_fn = self.master.config.mergeRequests if mergeRequests_fn is None: mergeRequests_fn = True # then translate False and True properly if mergeRequests_fn is False: mergeRequests_fn = None elif mergeRequests_fn is True: mergeRequests_fn = Builder._defaultMergeRequestFn return mergeRequests_fn def _defaultMergeRequestFn(self, req1, req2): return req1.canBeMergedWith(req2) @defer.inlineCallbacks def _mergeRequests(self, breq, unclaimed_requests, mergeRequests_fn): """Use C{mergeRequests_fn} to merge C{breq} against C{unclaimed_requests}, where both are build request dictionaries""" # short circuit if there is no merging to do if not mergeRequests_fn or len(unclaimed_requests) == 1: defer.returnValue([ breq ]) return # we'll need BuildRequest objects, so get those first unclaimed_request_objects = yield defer.gatherResults( [ self._brdictToBuildRequest(brdict) for brdict in unclaimed_requests ]) breq_object = unclaimed_request_objects[unclaimed_requests.index(breq)] # gather the mergeable requests merged_request_objects = [] for other_breq_object in unclaimed_request_objects: if (yield defer.maybeDeferred( lambda : mergeRequests_fn(self, breq_object, other_breq_object))): merged_request_objects.append(other_breq_object) # convert them back to brdicts and return merged_requests = [ br.brdict for br in merged_request_objects ] defer.returnValue(merged_requests) def _brdictToBuildRequest(self, brdict): """ Convert a build request dictionary to a L{buildrequest.BuildRequest} object, caching the result in the dictionary itself. The resulting buildrequest will have a C{brdict} attribute pointing back to this dictionary. Note that this does not perform any locking - be careful that it is only called once at a time for each build request dictionary. @param brdict: dictionary to convert @returns: L{buildrequest.BuildRequest} via Deferred """ if 'brobj' in brdict: return defer.succeed(brdict['brobj']) d = buildrequest.BuildRequest.fromBrdict(self.master, brdict) def keep(buildrequest): brdict['brobj'] = buildrequest buildrequest.brdict = brdict return buildrequest d.addCallback(keep) return d def _breakBrdictRefloops(self, requests): """Break the reference loops created by L{_brdictToBuildRequest}""" for brdict in requests: try: del brdict['brobj'].brdict except KeyError: pass
class Builder(pb.Referenceable, service.MultiService): """I manage all Builds of a given type. Each Builder is created by an entry in the config file (the c['builders'] list), with a number of parameters. One of these parameters is the L{buildbot.process.factory.BuildFactory} object that is associated with this Builder. The factory is responsible for creating new L{Build<buildbot.process.build.Build>} objects. Each Build object defines when and how the build is performed, so a new Factory or Builder should be defined to control this behavior. The Builder holds on to a number of L{BuildRequest} objects in a list named C{.buildable}. Incoming BuildRequest objects will be added to this list, or (if possible) merged into an existing request. When a slave becomes available, I will use my C{BuildFactory} to turn the request into a new C{Build} object. The C{BuildRequest} is forgotten, the C{Build} goes into C{.building} while it runs. Once the build finishes, I will discard it. I maintain a list of available SlaveBuilders, one for each connected slave that the C{slavenames} parameter says we can use. Some of these will be idle, some of them will be busy running builds for me. If there are multiple slaves, I can run multiple builds at once. I also manage forced builds, progress expectation (ETA) management, and some status delivery chores. @type buildable: list of L{buildbot.process.buildrequest.BuildRequest} @ivar buildable: BuildRequests that are ready to build, but which are waiting for a buildslave to be available. @type building: list of L{buildbot.process.build.Build} @ivar building: Builds that are actively running @type slaves: list of L{buildbot.buildslave.BuildSlave} objects @ivar slaves: the slaves currently available for building """ expectations = None # this is created the first time we get a good build CHOOSE_SLAVES_RANDOMLY = True # disabled for determinism during tests def __init__(self, setup, builder_status): """ @type setup: dict @param setup: builder setup data, as stored in BuildmasterConfig['builders']. Contains name, slavename(s), builddir, slavebuilddir, factory, locks. @type builder_status: L{buildbot.status.builder.BuilderStatus} """ service.MultiService.__init__(self) self.name = setup['name'] self.slavenames = [] if setup.has_key('slavename'): self.slavenames.append(setup['slavename']) if setup.has_key('slavenames'): self.slavenames.extend(setup['slavenames']) self.builddir = setup['builddir'] self.slavebuilddir = setup['slavebuilddir'] self.buildFactory = setup['factory'] self.nextSlave = setup.get('nextSlave') if self.nextSlave is not None and not callable(self.nextSlave): raise ValueError("nextSlave must be callable") self.locks = setup.get("locks", []) self.env = setup.get('env', {}) assert isinstance(self.env, dict) if setup.has_key('periodicBuildTime'): raise ValueError("periodicBuildTime can no longer be defined as" " part of the Builder: use scheduler.Periodic" " instead") self.nextBuild = setup.get('nextBuild') if self.nextBuild is not None and not callable(self.nextBuild): raise ValueError("nextBuild must be callable") self.buildHorizon = setup.get('buildHorizon') self.logHorizon = setup.get('logHorizon') self.eventHorizon = setup.get('eventHorizon') self.mergeRequests = setup.get('mergeRequests', True) self.properties = setup.get('properties', {}) self.category = setup.get('category', None) # build/wannabuild slots: Build objects move along this sequence self.building = [] # old_building holds active builds that were stolen from a predecessor self.old_building = weakref.WeakKeyDictionary() # buildslaves which have connected but which are not yet available. # These are always in the ATTACHING state. self.attaching_slaves = [] # buildslaves at our disposal. Each SlaveBuilder instance has a # .state that is IDLE, PINGING, or BUILDING. "PINGING" is used when a # Build is about to start, to make sure that they're still alive. self.slaves = [] self.builder_status = builder_status self.builder_status.setSlavenames(self.slavenames) self.builder_status.buildHorizon = self.buildHorizon self.builder_status.logHorizon = self.logHorizon self.builder_status.eventHorizon = self.eventHorizon t = internet.TimerService(10*60, self.reclaimAllBuilds) t.setServiceParent(self) # for testing, to help synchronize tests self.watchers = {'attach': [], 'detach': [], 'detach_all': [], 'idle': []} self.run_count = 0 def setBotmaster(self, botmaster): self.botmaster = botmaster self.db = botmaster.db self.master_name = botmaster.master_name self.master_incarnation = botmaster.master_incarnation def compareToSetup(self, setup): diffs = [] setup_slavenames = [] if setup.has_key('slavename'): setup_slavenames.append(setup['slavename']) setup_slavenames.extend(setup.get('slavenames', [])) if setup_slavenames != self.slavenames: diffs.append('slavenames changed from %s to %s' \ % (self.slavenames, setup_slavenames)) if setup['builddir'] != self.builddir: diffs.append('builddir changed from %s to %s' \ % (self.builddir, setup['builddir'])) if setup['slavebuilddir'] != self.slavebuilddir: diffs.append('slavebuilddir changed from %s to %s' \ % (self.slavebuilddir, setup['slavebuilddir'])) if setup['factory'] != self.buildFactory: # compare objects diffs.append('factory changed') if setup.get('locks', []) != self.locks: diffs.append('locks changed from %s to %s' % (self.locks, setup.get('locks'))) if setup.get('env', {}) != self.env: diffs.append('env changed from %s to %s' % (self.env, setup.get('env', {}))) if setup.get('nextSlave') != self.nextSlave: diffs.append('nextSlave changed from %s to %s' % (self.nextSlave, setup.get('nextSlave'))) if setup.get('nextBuild') != self.nextBuild: diffs.append('nextBuild changed from %s to %s' % (self.nextBuild, setup.get('nextBuild'))) if setup.get('buildHorizon', None) != self.buildHorizon: diffs.append('buildHorizon changed from %s to %s' % (self.buildHorizon, setup['buildHorizon'])) if setup.get('logHorizon', None) != self.logHorizon: diffs.append('logHorizon changed from %s to %s' % (self.logHorizon, setup['logHorizon'])) if setup.get('eventHorizon', None) != self.eventHorizon: diffs.append('eventHorizon changed from %s to %s' % (self.eventHorizon, setup['eventHorizon'])) if setup.get('category', None) != self.category: diffs.append('category changed from %r to %r' % (self.category, setup.get('category', None))) return diffs def __repr__(self): return "<Builder '%r' at %d>" % (self.name, id(self)) def triggerNewBuildCheck(self): self.botmaster.triggerNewBuildCheck() def run(self): """Check for work to be done. This should be called any time I might be able to start a job: - when the Builder is first created - when a new job has been added to the [buildrequests] DB table - when a slave has connected If I have both an available slave and the database contains a BuildRequest that I can handle, I will claim the BuildRequest and start the build. When the build finishes, I will retire the BuildRequest. """ # overall plan: # move .expectations to DB # if we're not running, we may still be called from leftovers from # a run of the loop, so just ignore the call. if not self.running: return self.run_count += 1 available_slaves = [sb for sb in self.slaves if sb.isAvailable()] if not available_slaves: self.updateBigStatus() return d = self.db.runInteraction(self._claim_buildreqs, available_slaves) d.addCallback(self._start_builds) return d # slave-managers must refresh their claim on a build at least once an # hour, less any inter-manager clock skew RECLAIM_INTERVAL = 1*3600 def _claim_buildreqs(self, t, available_slaves): # return a dict mapping slave -> (brid,ssid) now = util.now() old = now - self.RECLAIM_INTERVAL requests = self.db.get_unclaimed_buildrequests(self.name, old, self.master_name, self.master_incarnation, t) assignments = {} while requests and available_slaves: sb = self._choose_slave(available_slaves) if not sb: log.msg("%s: want to start build, but we don't have a remote" % self) break available_slaves.remove(sb) breq = self._choose_build(requests) if not breq: log.msg("%s: went to start build, but nextBuild said not to" % self) break requests.remove(breq) merged_requests = [breq] for other_breq in requests[:]: if (self.mergeRequests and self.botmaster.shouldMergeRequests(self, breq, other_breq) ): requests.remove(other_breq) merged_requests.append(other_breq) assignments[sb] = merged_requests brids = [br.id for br in merged_requests] self.db.claim_buildrequests(now, self.master_name, self.master_incarnation, brids, t) return assignments def _choose_slave(self, available_slaves): # note: this might return None if the nextSlave() function decided to # not give us anything if self.nextSlave: try: return self.nextSlave(self, available_slaves) except: log.msg("Exception choosing next slave") log.err(Failure()) return None if self.CHOOSE_SLAVES_RANDOMLY: return random.choice(available_slaves) return available_slaves[0] def _choose_build(self, buildable): if self.nextBuild: try: return self.nextBuild(self, buildable) except: log.msg("Exception choosing next build") log.err(Failure()) return None return buildable[0] def _start_builds(self, assignments): # because _claim_buildreqs runs in a separate thread, we might have # lost a slave by this point. We treat that case the same as if we # lose the slave right after the build starts: the initial ping # fails. for (sb, requests) in assignments.items(): build = self.buildFactory.newBuild(requests) build.setBuilder(self) build.setLocks(self.locks) if len(self.env) > 0: build.setSlaveEnvironment(self.env) self.startBuild(build, sb) self.updateBigStatus() def getBuildable(self, limit=None): return self.db.runInteractionNow(self._getBuildable, limit) def _getBuildable(self, t, limit): now = util.now() old = now - self.RECLAIM_INTERVAL return self.db.get_unclaimed_buildrequests(self.name, old, self.master_name, self.master_incarnation, t, limit) def getOldestRequestTime(self): """Returns the timestamp of the oldest build request for this builder. If there are no build requests, None is returned.""" buildable = self.getBuildable(1) if buildable: # TODO: this is sorted by priority first, not strictly reqtime return buildable[0].getSubmitTime() return None def cancelBuildRequest(self, brid): return self.db.cancel_buildrequests([brid]) def consumeTheSoulOfYourPredecessor(self, old): """Suck the brain out of an old Builder. This takes all the runtime state from an existing Builder and moves it into ourselves. This is used when a Builder is changed in the master.cfg file: the new Builder has a different factory, but we want all the builds that were queued for the old one to get processed by the new one. Any builds which are already running will keep running. The new Builder will get as many of the old SlaveBuilder objects as it wants.""" log.msg("consumeTheSoulOfYourPredecessor: %s feeding upon %s" % (self, old)) # all pending builds are stored in the DB, so we don't have to do # anything to claim them. The old builder will be stopService'd, # which should make sure they don't start any new work # this is kind of silly, but the builder status doesn't get updated # when the config changes, yet it stores the category. So: self.builder_status.category = self.category # old.building (i.e. builds which are still running) is not migrated # directly: it keeps track of builds which were in progress in the # old Builder. When those builds finish, the old Builder will be # notified, not us. However, since the old SlaveBuilder will point to # us, it is our maybeStartBuild() that will be triggered. if old.building: self.builder_status.setBigState("building") # however, we do grab a weakref to the active builds, so that our # BuilderControl can see them and stop them. We use a weakref because # we aren't the one to get notified, so there isn't a convenient # place to remove it from self.building . for b in old.building: self.old_building[b] = None for b in old.old_building: self.old_building[b] = None # Our set of slavenames may be different. Steal any of the old # buildslaves that we want to keep using. for sb in old.slaves[:]: if sb.slave.slavename in self.slavenames: log.msg(" stealing buildslave %s" % sb) self.slaves.append(sb) old.slaves.remove(sb) sb.setBuilder(self) # old.attaching_slaves: # these SlaveBuilders are waiting on a sequence of calls: # remote.setMaster and remote.print . When these two complete, # old._attached will be fired, which will add a 'connect' event to # the builder_status and try to start a build. However, we've pulled # everything out of the old builder's queue, so it will have no work # to do. The outstanding remote.setMaster/print call will be holding # the last reference to the old builder, so it will disappear just # after that response comes back. # # The BotMaster will ask the slave to re-set their list of Builders # shortly after this function returns, which will cause our # attached() method to be fired with a bunch of references to remote # SlaveBuilders, some of which we already have (by stealing them # from the old Builder), some of which will be new. The new ones # will be re-attached. # Therefore, we don't need to do anything about old.attaching_slaves return # all done def reclaimAllBuilds(self): try: now = util.now() brids = set() for b in self.building: brids.update([br.id for br in b.requests]) for b in self.old_building: brids.update([br.id for br in b.requests]) self.db.claim_buildrequests(now, self.master_name, self.master_incarnation, brids) except: log.msg("Error in reclaimAllBuilds") log.err() def getBuild(self, number): for b in self.building: if b.build_status and b.build_status.number == number: return b for b in self.old_building.keys(): if b.build_status and b.build_status.number == number: return b return None def fireTestEvent(self, name, fire_with=None): if fire_with is None: fire_with = self watchers = self.watchers[name] self.watchers[name] = [] for w in watchers: eventually(w.callback, fire_with) def addLatentSlave(self, slave): assert interfaces.ILatentBuildSlave.providedBy(slave) for s in self.slaves: if s == slave: break else: sb = slavebuilder.LatentSlaveBuilder(slave, self) self.builder_status.addPointEvent( ['added', 'latent', slave.slavename]) self.slaves.append(sb) self.triggerNewBuildCheck() def attached(self, slave, remote, commands): """This is invoked by the BuildSlave when the self.slavename bot registers their builder. @type slave: L{buildbot.buildslave.BuildSlave} @param slave: the BuildSlave that represents the buildslave as a whole @type remote: L{twisted.spread.pb.RemoteReference} @param remote: a reference to the L{buildbot.slave.bot.SlaveBuilder} @type commands: dict: string -> string, or None @param commands: provides the slave's version of each RemoteCommand @rtype: L{twisted.internet.defer.Deferred} @return: a Deferred that fires (with 'self') when the slave-side builder is fully attached and ready to accept commands. """ for s in self.attaching_slaves + self.slaves: if s.slave == slave: # already attached to them. This is fairly common, since # attached() gets called each time we receive the builder # list from the slave, and we ask for it each time we add or # remove a builder. So if the slave is hosting builders # A,B,C, and the config file changes A, we'll remove A and # re-add it, triggering two builder-list requests, getting # two redundant calls to attached() for B, and another two # for C. # # Therefore, when we see that we're already attached, we can # just ignore it. TODO: build a diagram of the state # transitions here, I'm concerned about sb.attached() failing # and leaving sb.state stuck at 'ATTACHING', and about # the detached() message arriving while there's some # transition pending such that the response to the transition # re-vivifies sb return defer.succeed(self) sb = slavebuilder.SlaveBuilder() sb.setBuilder(self) self.attaching_slaves.append(sb) d = sb.attached(slave, remote, commands) d.addCallback(self._attached) d.addErrback(self._not_attached, slave) return d def _attached(self, sb): # TODO: make this .addSlaveEvent(slave.slavename, ['connect']) ? self.builder_status.addPointEvent(['connect', sb.slave.slavename]) self.attaching_slaves.remove(sb) self.slaves.append(sb) self.fireTestEvent('attach') return self def _not_attached(self, why, slave): # already log.err'ed by SlaveBuilder._attachFailure # TODO: make this .addSlaveEvent? # TODO: remove from self.slaves (except that detached() should get # run first, right?) print why self.builder_status.addPointEvent(['failed', 'connect', slave.slavename]) # TODO: add an HTMLLogFile of the exception self.fireTestEvent('attach', why) def detached(self, slave): """This is called when the connection to the bot is lost.""" for sb in self.attaching_slaves + self.slaves: if sb.slave == slave: break else: log.msg("WEIRD: Builder.detached(%s) (%s)" " not in attaching_slaves(%s)" " or slaves(%s)" % (slave, slave.slavename, self.attaching_slaves, self.slaves)) return if sb.state == BUILDING: # the Build's .lostRemote method (invoked by a notifyOnDisconnect # handler) will cause the Build to be stopped, probably right # after the notifyOnDisconnect that invoked us finishes running. # TODO: should failover to a new Build #self.retryBuild(sb.build) pass if sb in self.attaching_slaves: self.attaching_slaves.remove(sb) if sb in self.slaves: self.slaves.remove(sb) # TODO: make this .addSlaveEvent? self.builder_status.addPointEvent(['disconnect', slave.slavename]) sb.detached() # inform the SlaveBuilder that their slave went away self.updateBigStatus() self.fireTestEvent('detach') if not self.slaves: self.fireTestEvent('detach_all') def updateBigStatus(self): if not self.slaves: self.builder_status.setBigState("offline") elif self.building: self.builder_status.setBigState("building") else: self.builder_status.setBigState("idle") self.fireTestEvent('idle') def startBuild(self, build, sb): """Start a build on the given slave. @param build: the L{base.Build} to start @param sb: the L{SlaveBuilder} which will host this build @return: a Deferred which fires with a L{buildbot.interfaces.IBuildControl} that can be used to stop the Build, or to access a L{buildbot.interfaces.IBuildStatus} which will watch the Build as it runs. """ self.building.append(build) self.updateBigStatus() log.msg("starting build %s using slave %s" % (build, sb)) d = sb.prepare(self.builder_status) def _prepared(ready): # If prepare returns True then it is ready and we start a build # If it returns false then we don't start a new build. d = defer.succeed(ready) if not ready: #FIXME: We should perhaps trigger a check to see if there is # any other way to schedule the work log.msg("slave %s can't build %s after all" % (build, sb)) # release the slave. This will queue a call to maybeStartBuild, which # will fire after other notifyOnDisconnect handlers have marked the # slave as disconnected (so we don't try to use it again). # sb.buildFinished() log.msg("re-queueing the BuildRequest %s" % build) self.building.remove(build) self._resubmit_buildreqs(build).addErrback(log.err) sb.slave.releaseLocks() self.triggerNewBuildCheck() return d def _ping(ign): # ping the slave to make sure they're still there. If they've # fallen off the map (due to a NAT timeout or something), this # will fail in a couple of minutes, depending upon the TCP # timeout. # # TODO: This can unnecessarily suspend the starting of a build, in # situations where the slave is live but is pushing lots of data to # us in a build. log.msg("starting build %s.. pinging the slave %s" % (build, sb)) return sb.ping() d.addCallback(_ping) d.addCallback(self._startBuild_1, build, sb) return d d.addCallback(_prepared) return d def _startBuild_1(self, res, build, sb): if not res: return self._startBuildFailed("slave ping failed", build, sb) # The buildslave is ready to go. sb.buildStarted() sets its state to # BUILDING (so we won't try to use it for any other builds). This # gets set back to IDLE by the Build itself when it finishes. sb.buildStarted() d = sb.remote.callRemote("startBuild") d.addCallbacks(self._startBuild_2, self._startBuildFailed, callbackArgs=(build,sb), errbackArgs=(build,sb)) return d def _startBuild_2(self, res, build, sb): # create the BuildStatus object that goes with the Build bs = self.builder_status.newBuild() # start the build. This will first set up the steps, then tell the # BuildStatus that it has started, which will announce it to the # world (through our BuilderStatus object, which is its parent). # Finally it will start the actual build process. bids = [self.db.build_started(req.id, bs.number) for req in build.requests] d = build.startBuild(bs, self.expectations, sb) d.addCallback(self.buildFinished, sb, bids) # this shouldn't happen. if it does, the slave will be wedged d.addErrback(log.err) return build # this is the IBuildControl def _startBuildFailed(self, why, build, sb): # put the build back on the buildable list log.msg("I tried to tell the slave that the build %s started, but " "remote_startBuild failed: %s" % (build, why)) # release the slave. This will queue a call to maybeStartBuild, which # will fire after other notifyOnDisconnect handlers have marked the # slave as disconnected (so we don't try to use it again). sb.buildFinished() log.msg("re-queueing the BuildRequest") self.building.remove(build) self._resubmit_buildreqs(build).addErrback(log.err) def setupProperties(self, props): props.setProperty("buildername", self.name, "Builder") if len(self.properties) > 0: for propertyname in self.properties: props.setProperty(propertyname, self.properties[propertyname], "Builder") def buildFinished(self, build, sb, bids): """This is called when the Build has finished (either success or failure). Any exceptions during the build are reported with results=FAILURE, not with an errback.""" # by the time we get here, the Build has already released the slave # (which queues a call to maybeStartBuild) self.db.builds_finished(bids) results = build.build_status.getResults() self.building.remove(build) if results == RETRY: self._resubmit_buildreqs(build).addErrback(log.err) # returns Deferred else: brids = [br.id for br in build.requests] self.db.retire_buildrequests(brids, results) if sb.slave: sb.slave.releaseLocks() self.triggerNewBuildCheck() def _resubmit_buildreqs(self, build): brids = [br.id for br in build.requests] return self.db.resubmit_buildrequests(brids) def setExpectations(self, progress): """Mark the build as successful and update expectations for the next build. Only call this when the build did not fail in any way that would invalidate the time expectations generated by it. (if the compile failed and thus terminated early, we can't use the last build to predict how long the next one will take). """ if self.expectations: self.expectations.update(progress) else: # the first time we get a good build, create our Expectations # based upon its results self.expectations = Expectations(progress) log.msg("new expectations: %s seconds" % \ self.expectations.expectedBuildTime()) def shutdownSlave(self): if self.remote: self.remote.callRemote("shutdown")
class Builder(config.ReconfigurableServiceMixin, pb.Referenceable, service.MultiService): # reconfigure builders before slaves reconfig_priority = 196 def __init__(self, name, _addServices=True): service.MultiService.__init__(self) self.name = name # this is filled on demand by getBuilderId; don't access it directly self._builderid = None # this is created the first time we get a good build self.expectations = None # build/wannabuild slots: Build objects move along this sequence self.building = [] # old_building holds active builds that were stolen from a predecessor self.old_building = weakref.WeakKeyDictionary() # buildslaves which have connected but which are not yet available. # These are always in the ATTACHING state. self.attaching_slaves = [] # buildslaves at our disposal. Each SlaveBuilder instance has a # .state that is IDLE, PINGING, or BUILDING. "PINGING" is used when a # Build is about to start, to make sure that they're still alive. self.slaves = [] self.config = None self.builder_status = None if _addServices: self.reclaim_svc = internet.TimerService(10 * 60, self.reclaimAllBuilds) self.reclaim_svc.setServiceParent(self) # update big status every 30 minutes, working around #1980 self.updateStatusService = internet.TimerService(30 * 60, self.updateBigStatus) self.updateStatusService.setServiceParent(self) @defer.inlineCallbacks def reconfigService(self, new_config): # find this builder in the config for builder_config in new_config.builders: if builder_config.name == self.name: found_config = True break assert found_config, "no config found for builder '%s'" % self.name # set up a builder status object on the first reconfig if not self.builder_status: self.builder_status = self.master.status.builderAdded( name=builder_config.name, basedir=builder_config.builddir, tags=builder_config.tags, description=builder_config.description) self.config = builder_config # allocate builderid now, so that the builder is visible in the web # UI; without this, the bulider wouldn't appear until it preformed a # build. yield self.getBuilderId() self.builder_status.setDescription(builder_config.description) self.builder_status.setTags(builder_config.tags) self.builder_status.setSlavenames(self.config.slavenames) self.builder_status.setCacheSize(new_config.caches['Builds']) # if we have any slavebuilders attached which are no longer configured, # drop them. new_slavenames = set(builder_config.slavenames) self.slaves = [s for s in self.slaves if s.slave.slavename in new_slavenames] def __repr__(self): return "<Builder '%r' at %d>" % (self.name, id(self)) def getBuilderId(self): # since findBuilderId is idempotent, there's no reason to add # additional locking around this function. if self._builderid: return defer.succeed(self._builderid) # buildbot.config should ensure this is already unicode, but it doesn't # hurt to check again name = ascii2unicode(self.name) d = self.master.data.updates.findBuilderId(name) @d.addCallback def keep(builderid): self._builderid = builderid return builderid return d @defer.inlineCallbacks def getOldestRequestTime(self): """Returns the submitted_at of the oldest unclaimed build request for this builder, or None if there are no build requests. @returns: datetime instance or None, via Deferred """ unclaimed = yield self.master.data.get(('builders', ascii2unicode(self.name), 'buildrequests'), [resultspec.Filter('claimed', 'eq', [False])]) if unclaimed: unclaimed = sorted([brd['submitted_at'] for brd in unclaimed]) defer.returnValue(unclaimed[0]) else: defer.returnValue(None) def reclaimAllBuilds(self): brids = set() for b in self.building: brids.update([br.id for br in b.requests]) for b in self.old_building: brids.update([br.id for br in b.requests]) if not brids: return defer.succeed(None) d = self.master.data.updates.reclaimBuildRequests(list(brids)) d.addErrback(log.err, 'while re-claiming running BuildRequests') return d def getBuild(self, number): for b in self.building: if b.build_status and b.build_status.number == number: return b for b in self.old_building.keys(): if b.build_status and b.build_status.number == number: return b return None def addLatentSlave(self, slave): assert interfaces.ILatentBuildSlave.providedBy(slave) for s in self.slaves: if s == slave: break else: sb = slavebuilder.LatentSlaveBuilder(slave, self) self.builder_status.addPointEvent( ['added', 'latent', slave.slavename]) self.slaves.append(sb) self.botmaster.maybeStartBuildsForBuilder(self.name) def attached(self, slave, commands): """This is invoked by the BuildSlave when the self.slavename bot registers their builder. @type slave: L{buildbot.buildslave.BuildSlave} @param slave: the BuildSlave that represents the buildslave as a whole @type remote: L{twisted.spread.pb.RemoteReference} @param remote: a reference to the L{buildbot.slave.bot.SlaveBuilder} @type commands: dict: string -> string, or None @param commands: provides the slave's version of each RemoteCommand @rtype: L{twisted.internet.defer.Deferred} @return: a Deferred that fires (with 'self') when the slave-side builder is fully attached and ready to accept commands. """ for s in self.attaching_slaves + self.slaves: if s.slave == slave: # already attached to them. This is fairly common, since # attached() gets called each time we receive the builder # list from the slave, and we ask for it each time we add or # remove a builder. So if the slave is hosting builders # A,B,C, and the config file changes A, we'll remove A and # re-add it, triggering two builder-list requests, getting # two redundant calls to attached() for B, and another two # for C. # # Therefore, when we see that we're already attached, we can # just ignore it. return defer.succeed(self) sb = slavebuilder.SlaveBuilder() sb.setBuilder(self) self.attaching_slaves.append(sb) d = sb.attached(slave, commands) d.addCallback(self._attached) d.addErrback(self._not_attached, slave) return d def _attached(self, sb): self.builder_status.addPointEvent(['connect', sb.slave.slavename]) self.attaching_slaves.remove(sb) self.slaves.append(sb) self.updateBigStatus() return self def _not_attached(self, why, slave): # already log.err'ed by SlaveBuilder._attachFailure # TODO: remove from self.slaves (except that detached() should get # run first, right?) log.err(why, 'slave failed to attach') self.builder_status.addPointEvent(['failed', 'connect', slave.slavename]) # TODO: add an HTMLLogFile of the exception def detached(self, slave): """This is called when the connection to the bot is lost.""" for sb in self.attaching_slaves + self.slaves: if sb.slave == slave: break else: log.msg("WEIRD: Builder.detached(%s) (%s)" " not in attaching_slaves(%s)" " or slaves(%s)" % (slave, slave.slavename, self.attaching_slaves, self.slaves)) return if sb.state == BUILDING: # the Build's .lostRemote method (invoked by a notifyOnDisconnect # handler) will cause the Build to be stopped, probably right # after the notifyOnDisconnect that invoked us finishes running. pass if sb in self.attaching_slaves: self.attaching_slaves.remove(sb) if sb in self.slaves: self.slaves.remove(sb) self.builder_status.addPointEvent(['disconnect', slave.slavename]) sb.detached() # inform the SlaveBuilder that their slave went away self.updateBigStatus() def updateBigStatus(self): try: # Catch exceptions here, since this is called in a LoopingCall. if not self.builder_status: return if not self.slaves: self.builder_status.setBigState("offline") elif self.building or self.old_building: self.builder_status.setBigState("building") else: self.builder_status.setBigState("idle") except Exception: log.err(None, "while trying to update status of builder '%s'" % (self.name,)) def getAvailableSlaves(self): return [sb for sb in self.slaves if sb.isAvailable()] def canStartWithSlavebuilder(self, slavebuilder): locks = [(self.botmaster.getLockFromLockAccess(access), access) for access in self.config.locks] return Build.canStartWithSlavebuilder(locks, slavebuilder) def canStartBuild(self, slavebuilder, breq): if callable(self.config.canStartBuild): return defer.maybeDeferred(self.config.canStartBuild, self, slavebuilder, breq) return defer.succeed(True) @defer.inlineCallbacks def _startBuildFor(self, slavebuilder, buildrequests): # Build a stack of cleanup functions so that, at any point, we can # abort this operation and unwind the commitments made so far. cleanups = [] def run_cleanups(): try: while cleanups: fn = cleanups.pop() fn() except: log.err(failure.Failure(), "while running %r" % (run_cleanups,)) # the last cleanup we want to perform is to update the big # status based on any other cleanup cleanups.append(lambda: self.updateBigStatus()) build = self.config.factory.newBuild(buildrequests) build.setBuilder(self) log.msg("starting build %s using slave %s" % (build, slavebuilder)) # set up locks build.setLocks(self.config.locks) cleanups.append(lambda: slavebuilder.slave.releaseLocks()) if len(self.config.env) > 0: build.setSlaveEnvironment(self.config.env) # append the build to self.building self.building.append(build) cleanups.append(lambda: self.building.remove(build)) # update the big status accordingly self.updateBigStatus() try: ready = yield slavebuilder.prepare(self.builder_status, build) except: log.err(failure.Failure(), 'while preparing slavebuilder:') ready = False # If prepare returns True then it is ready and we start a build # If it returns false then we don't start a new build. if not ready: log.msg("slave %s can't build %s after all; re-queueing the " "request" % (build, slavebuilder)) run_cleanups() defer.returnValue(False) return # ping the slave to make sure they're still there. If they've # fallen off the map (due to a NAT timeout or something), this # will fail in a couple of minutes, depending upon the TCP # timeout. # # TODO: This can unnecessarily suspend the starting of a build, in # situations where the slave is live but is pushing lots of data to # us in a build. log.msg("starting build %s.. pinging the slave %s" % (build, slavebuilder)) try: ping_success = yield slavebuilder.ping() except: log.err(failure.Failure(), 'while pinging slave before build:') ping_success = False if not ping_success: log.msg("slave ping failed; re-queueing the request") run_cleanups() defer.returnValue(False) return # The buildslave is ready to go. slavebuilder.buildStarted() sets its # state to BUILDING (so we won't try to use it for any other builds). # This gets set back to IDLE by the Build itself when it finishes. slavebuilder.buildStarted() cleanups.append(lambda: slavebuilder.buildFinished()) # tell the remote that it's starting a build, too try: yield slavebuilder.slave.conn.remoteStartBuild(build.builder.name) except: log.err(failure.Failure(), 'while calling remote startBuild:') run_cleanups() defer.returnValue(False) return # create the BuildStatus object that goes with the Build bs = self.builder_status.newBuild() # IMPORTANT: no yielding is allowed from here to the startBuild call! # it's possible that we lost the slave remote between the ping above # and now. If so, bail out. The build.startBuild call below transfers # responsibility for monitoring this connection to the Build instance, # so this check ensures we hand off a working connection. if not slavebuilder.slave.conn: # TODO: replace with isConnected() log.msg("slave disappeared before build could start") run_cleanups() defer.returnValue(False) return # let status know self.master.status.build_started(buildrequests[0].id, self.name, bs) # start the build. This will first set up the steps, then tell the # BuildStatus that it has started, which will announce it to the world # (through our BuilderStatus object, which is its parent). Finally it # will start the actual build process. This is done with a fresh # Deferred since _startBuildFor should not wait until the build is # finished. This uses `maybeDeferred` to ensure that any exceptions # raised by startBuild are treated as deferred errbacks (see # http://trac.buildbot.net/ticket/2428). d = defer.maybeDeferred(build.startBuild, bs, self.expectations, slavebuilder) d.addCallback(lambda _: self.buildFinished(build, slavebuilder)) # this shouldn't happen. if it does, the slave will be wedged d.addErrback(log.err, 'from a running build; this is a ' 'serious error - please file a bug at http://buildbot.net') # make sure the builder's status is represented correctly self.updateBigStatus() defer.returnValue(True) def setupProperties(self, props): props.setProperty("buildername", self.name, "Builder") if len(self.config.properties) > 0: for propertyname in self.config.properties: props.setProperty(propertyname, self.config.properties[propertyname], "Builder") def buildFinished(self, build, sb): """This is called when the Build has finished (either success or failure). Any exceptions during the build are reported with results=FAILURE, not with an errback.""" # by the time we get here, the Build has already released the slave, # which will trigger a check for any now-possible build requests # (maybeStartBuilds) results = build.build_status.getResults() self.building.remove(build) if results == RETRY: d = self._resubmit_buildreqs(build) d.addErrback(log.err, 'while resubmitting a build request') else: complete_at_epoch = reactor.seconds() complete_at = epoch2datetime(complete_at_epoch) brids = [br.id for br in build.requests] d = self.master.data.updates.completeBuildRequests(brids, results, complete_at=complete_at) d.addCallback(lambda _: self._notify_completions(build.requests, results, complete_at_epoch)) # nothing in particular to do with this deferred, so just log it if # it fails.. d.addErrback(log.err, 'while marking build requests as completed') if sb.slave: sb.slave.releaseLocks() self.updateBigStatus() @defer.inlineCallbacks def _notify_completions(self, requests, results, complete_at_epoch): updates = self.master.data.updates # send a message for each request for br in requests: updates.completeBuildRequests([br.id], results, epoch2datetime(complete_at_epoch)) # check for completed buildsets -- one call for each build request with # a unique bsid seen_bsids = set() for br in requests: if br.bsid in seen_bsids: continue seen_bsids.add(br.bsid) yield updates.maybeBuildsetComplete(br.bsid) def _resubmit_buildreqs(self, build): brids = [br.id for br in build.requests] d = self.master.data.updates.unclaimBuildRequests(brids) @d.addCallback def notify(_): pass # XXX method does not exist # self._msg_buildrequests_unclaimed(build.requests) return d def setExpectations(self, progress): """Mark the build as successful and update expectations for the next build. Only call this when the build did not fail in any way that would invalidate the time expectations generated by it. (if the compile failed and thus terminated early, we can't use the last build to predict how long the next one will take). """ if self.expectations: self.expectations.update(progress) else: # the first time we get a good build, create our Expectations # based upon its results self.expectations = Expectations(progress) log.msg("new expectations: %s seconds" % self.expectations.expectedBuildTime()) # Build Creation @defer.inlineCallbacks def maybeStartBuild(self, slavebuilder, breqs, _reactor=reactor): # This method is called by the botmaster whenever this builder should # start a set of buildrequests on a slave. Do not call this method # directly - use master.botmaster.maybeStartBuildsForBuilder, or one of # the other similar methods if more appropriate # first, if we're not running, then don't start builds; stopService # uses this to ensure that any ongoing maybeStartBuild invocations # are complete before it stops. if not self.running: defer.returnValue(False) return # If the build fails from here on out (e.g., because a slave has failed), # it will be handled outside of this function. TODO: test that! build_started = yield self._startBuildFor(slavebuilder, breqs) defer.returnValue(build_started) # a few utility functions to make the maybeStartBuild a bit shorter and # easier to read def getMergeRequestsFn(self): """Helper function to determine which mergeRequests function to use from L{_mergeRequests}, or None for no merging""" # first, seek through builder, global, and the default mergeRequests_fn = self.config.mergeRequests if mergeRequests_fn is None: mergeRequests_fn = self.master.config.mergeRequests if mergeRequests_fn is None: mergeRequests_fn = True # then translate False and True properly if mergeRequests_fn is False: mergeRequests_fn = None elif mergeRequests_fn is True: mergeRequests_fn = Builder._defaultMergeRequestFn return mergeRequests_fn def _defaultMergeRequestFn(self, req1, req2): return req1.canBeMergedWith(req2)
class Builder(pb.Referenceable): """I manage all Builds of a given type. Each Builder is created by an entry in the config file (the c['builders'] list), with a number of parameters. One of these parameters is the L{buildbot.process.factory.BuildFactory} object that is associated with this Builder. The factory is responsible for creating new L{Build<buildbot.process.base.Build>} objects. Each Build object defines when and how the build is performed, so a new Factory or Builder should be defined to control this behavior. The Builder holds on to a number of L{base.BuildRequest} objects in a list named C{.buildable}. Incoming BuildRequest objects will be added to this list, or (if possible) merged into an existing request. When a slave becomes available, I will use my C{BuildFactory} to turn the request into a new C{Build} object. The C{BuildRequest} is forgotten, the C{Build} goes into C{.building} while it runs. Once the build finishes, I will discard it. I maintain a list of available SlaveBuilders, one for each connected slave that the C{slavenames} parameter says we can use. Some of these will be idle, some of them will be busy running builds for me. If there are multiple slaves, I can run multiple builds at once. I also manage forced builds, progress expectation (ETA) management, and some status delivery chores. @type buildable: list of L{buildbot.process.base.BuildRequest} @ivar buildable: BuildRequests that are ready to build, but which are waiting for a buildslave to be available. @type building: list of L{buildbot.process.base.Build} @ivar building: Builds that are actively running @type slaves: list of L{buildbot.buildslave.BuildSlave} objects @ivar slaves: the slaves currently available for building """ expectations = None # this is created the first time we get a good build START_BUILD_TIMEOUT = 10 CHOOSE_SLAVES_RANDOMLY = True # disabled for determinism during tests def __init__(self, setup, builder_status): """ @type setup: dict @param setup: builder setup data, as stored in BuildmasterConfig['builders']. Contains name, slavename(s), builddir, slavebuilddir, factory, locks. @type builder_status: L{buildbot.status.builder.BuilderStatus} """ self.name = setup['name'] self.slavenames = [] if setup.has_key('slavename'): self.slavenames.append(setup['slavename']) if setup.has_key('slavenames'): self.slavenames.extend(setup['slavenames']) self.builddir = setup['builddir'] self.slavebuilddir = setup['slavebuilddir'] self.buildFactory = setup['factory'] self.nextSlave = setup.get('nextSlave') if self.nextSlave is not None and not callable(self.nextSlave): raise ValueError("nextSlave must be callable") self.locks = setup.get("locks", []) self.env = setup.get('env', {}) assert isinstance(self.env, dict) if setup.has_key('periodicBuildTime'): raise ValueError("periodicBuildTime can no longer be defined as" " part of the Builder: use scheduler.Periodic" " instead") self.nextBuild = setup.get('nextBuild') if self.nextBuild is not None and not callable(self.nextBuild): raise ValueError("nextBuild must be callable") # build/wannabuild slots: Build objects move along this sequence self.buildable = [] self.building = [] # old_building holds active builds that were stolen from a predecessor self.old_building = weakref.WeakKeyDictionary() # buildslaves which have connected but which are not yet available. # These are always in the ATTACHING state. self.attaching_slaves = [] # buildslaves at our disposal. Each SlaveBuilder instance has a # .state that is IDLE, PINGING, or BUILDING. "PINGING" is used when a # Build is about to start, to make sure that they're still alive. self.slaves = [] self.builder_status = builder_status self.builder_status.setSlavenames(self.slavenames) # for testing, to help synchronize tests self.watchers = {'attach': [], 'detach': [], 'detach_all': [], 'idle': []} def setBotmaster(self, botmaster): self.botmaster = botmaster def compareToSetup(self, setup): diffs = [] setup_slavenames = [] if setup.has_key('slavename'): setup_slavenames.append(setup['slavename']) setup_slavenames.extend(setup.get('slavenames', [])) if setup_slavenames != self.slavenames: diffs.append('slavenames changed from %s to %s' \ % (self.slavenames, setup_slavenames)) if setup['builddir'] != self.builddir: diffs.append('builddir changed from %s to %s' \ % (self.builddir, setup['builddir'])) if setup['slavebuilddir'] != self.slavebuilddir: diffs.append('slavebuilddir changed from %s to %s' \ % (self.slavebuilddir, setup['slavebuilddir'])) if setup['factory'] != self.buildFactory: # compare objects diffs.append('factory changed') oldlocks = [(lock.__class__, lock.name) for lock in self.locks] newlocks = [(lock.__class__, lock.name) for lock in setup.get('locks',[])] if oldlocks != newlocks: diffs.append('locks changed from %s to %s' % (oldlocks, newlocks)) if setup.get('nextSlave') != self.nextSlave: diffs.append('nextSlave changed from %s to %s' % (self.nextSlave, setup['nextSlave'])) if setup.get('nextBuild') != self.nextBuild: diffs.append('nextBuild changed from %s to %s' % (self.nextBuild, setup['nextBuild'])) return diffs def __repr__(self): return "<Builder '%s' at %d>" % (self.name, id(self)) def getOldestRequestTime(self): """Returns the timestamp of the oldest build request for this builder. If there are no build requests, None is returned.""" if self.buildable: return self.buildable[0].getSubmitTime() else: return None def submitBuildRequest(self, req): req.setSubmitTime(now()) self.buildable.append(req) req.requestSubmitted(self) self.builder_status.addBuildRequest(req.status) self.botmaster.maybeStartAllBuilds() def cancelBuildRequest(self, req): if req in self.buildable: self.buildable.remove(req) self.builder_status.removeBuildRequest(req.status, cancelled=True) return True return False def consumeTheSoulOfYourPredecessor(self, old): """Suck the brain out of an old Builder. This takes all the runtime state from an existing Builder and moves it into ourselves. This is used when a Builder is changed in the master.cfg file: the new Builder has a different factory, but we want all the builds that were queued for the old one to get processed by the new one. Any builds which are already running will keep running. The new Builder will get as many of the old SlaveBuilder objects as it wants.""" log.msg("consumeTheSoulOfYourPredecessor: %s feeding upon %s" % (self, old)) # we claim all the pending builds, removing them from the old # Builder's queue. This insures that the old Builder will not start # any new work. log.msg(" stealing %s buildrequests" % len(old.buildable)) self.buildable.extend(old.buildable) old.buildable = [] # old.building (i.e. builds which are still running) is not migrated # directly: it keeps track of builds which were in progress in the # old Builder. When those builds finish, the old Builder will be # notified, not us. However, since the old SlaveBuilder will point to # us, it is our maybeStartBuild() that will be triggered. if old.building: self.builder_status.setBigState("building") # however, we do grab a weakref to the active builds, so that our # BuilderControl can see them and stop them. We use a weakref because # we aren't the one to get notified, so there isn't a convenient # place to remove it from self.building . for b in old.building: self.old_building[b] = None for b in old.old_building: self.old_building[b] = None # Our set of slavenames may be different. Steal any of the old # buildslaves that we want to keep using. for sb in old.slaves[:]: if sb.slave.slavename in self.slavenames: log.msg(" stealing buildslave %s" % sb) self.slaves.append(sb) old.slaves.remove(sb) sb.setBuilder(self) # old.attaching_slaves: # these SlaveBuilders are waiting on a sequence of calls: # remote.setMaster and remote.print . When these two complete, # old._attached will be fired, which will add a 'connect' event to # the builder_status and try to start a build. However, we've pulled # everything out of the old builder's queue, so it will have no work # to do. The outstanding remote.setMaster/print call will be holding # the last reference to the old builder, so it will disappear just # after that response comes back. # # The BotMaster will ask the slave to re-set their list of Builders # shortly after this function returns, which will cause our # attached() method to be fired with a bunch of references to remote # SlaveBuilders, some of which we already have (by stealing them # from the old Builder), some of which will be new. The new ones # will be re-attached. # Therefore, we don't need to do anything about old.attaching_slaves return # all done def getBuild(self, number): for b in self.building: if b.build_status.number == number: return b for b in self.old_building.keys(): if b.build_status.number == number: return b return None def fireTestEvent(self, name, fire_with=None): if fire_with is None: fire_with = self watchers = self.watchers[name] self.watchers[name] = [] for w in watchers: reactor.callLater(0, w.callback, fire_with) def addLatentSlave(self, slave): assert interfaces.ILatentBuildSlave.providedBy(slave) for s in self.slaves: if s == slave: break else: sb = LatentSlaveBuilder(slave, self) self.builder_status.addPointEvent( ['added', 'latent', slave.slavename]) self.slaves.append(sb) reactor.callLater(0, self.botmaster.maybeStartAllBuilds) def attached(self, slave, remote, commands): """This is invoked by the BuildSlave when the self.slavename bot registers their builder. @type slave: L{buildbot.buildslave.BuildSlave} @param slave: the BuildSlave that represents the buildslave as a whole @type remote: L{twisted.spread.pb.RemoteReference} @param remote: a reference to the L{buildbot.slave.bot.SlaveBuilder} @type commands: dict: string -> string, or None @param commands: provides the slave's version of each RemoteCommand @rtype: L{twisted.internet.defer.Deferred} @return: a Deferred that fires (with 'self') when the slave-side builder is fully attached and ready to accept commands. """ for s in self.attaching_slaves + self.slaves: if s.slave == slave: # already attached to them. This is fairly common, since # attached() gets called each time we receive the builder # list from the slave, and we ask for it each time we add or # remove a builder. So if the slave is hosting builders # A,B,C, and the config file changes A, we'll remove A and # re-add it, triggering two builder-list requests, getting # two redundant calls to attached() for B, and another two # for C. # # Therefore, when we see that we're already attached, we can # just ignore it. TODO: build a diagram of the state # transitions here, I'm concerned about sb.attached() failing # and leaving sb.state stuck at 'ATTACHING', and about # the detached() message arriving while there's some # transition pending such that the response to the transition # re-vivifies sb return defer.succeed(self) sb = SlaveBuilder() sb.setBuilder(self) self.attaching_slaves.append(sb) d = sb.attached(slave, remote, commands) d.addCallback(self._attached) d.addErrback(self._not_attached, slave) return d def _attached(self, sb): # TODO: make this .addSlaveEvent(slave.slavename, ['connect']) ? self.builder_status.addPointEvent(['connect', sb.slave.slavename]) self.attaching_slaves.remove(sb) self.slaves.append(sb) self.fireTestEvent('attach') return self def _not_attached(self, why, slave): # already log.err'ed by SlaveBuilder._attachFailure # TODO: make this .addSlaveEvent? # TODO: remove from self.slaves (except that detached() should get # run first, right?) self.builder_status.addPointEvent(['failed', 'connect', slave.slave.slavename]) # TODO: add an HTMLLogFile of the exception self.fireTestEvent('attach', why) def detached(self, slave): """This is called when the connection to the bot is lost.""" log.msg("%s.detached" % self, slave.slavename) for sb in self.attaching_slaves + self.slaves: if sb.slave == slave: break else: log.msg("WEIRD: Builder.detached(%s) (%s)" " not in attaching_slaves(%s)" " or slaves(%s)" % (slave, slave.slavename, self.attaching_slaves, self.slaves)) return if sb.state == BUILDING: # the Build's .lostRemote method (invoked by a notifyOnDisconnect # handler) will cause the Build to be stopped, probably right # after the notifyOnDisconnect that invoked us finishes running. # TODO: should failover to a new Build #self.retryBuild(sb.build) pass if sb in self.attaching_slaves: self.attaching_slaves.remove(sb) if sb in self.slaves: self.slaves.remove(sb) # TODO: make this .addSlaveEvent? self.builder_status.addPointEvent(['disconnect', slave.slavename]) sb.detached() # inform the SlaveBuilder that their slave went away self.updateBigStatus() self.fireTestEvent('detach') if not self.slaves: self.fireTestEvent('detach_all') def updateBigStatus(self): if not self.slaves: self.builder_status.setBigState("offline") elif self.building: self.builder_status.setBigState("building") else: self.builder_status.setBigState("idle") self.fireTestEvent('idle') def maybeStartBuild(self): log.msg("maybeStartBuild %s: %s %s" % (self, self.buildable, self.slaves)) if not self.buildable: self.updateBigStatus() return # nothing to do # pick an idle slave available_slaves = [sb for sb in self.slaves if sb.isAvailable()] if not available_slaves: log.msg("%s: want to start build, but we don't have a remote" % self) self.updateBigStatus() return if self.nextSlave: sb = None try: sb = self.nextSlave(self, available_slaves) except: log.msg("Exception choosing next slave") log.err(Failure()) if not sb: log.msg("%s: want to start build, but we don't have a remote" % self) self.updateBigStatus() return elif self.CHOOSE_SLAVES_RANDOMLY: sb = random.choice(available_slaves) else: sb = available_slaves[0] # there is something to build, and there is a slave on which to build # it. Grab the oldest request, see if we can merge it with anything # else. if not self.nextBuild: req = self.buildable.pop(0) else: try: req = self.nextBuild(self, self.buildable) if not req: # Nothing to do self.updateBigStatus() return self.buildable.remove(req) except: log.msg("Exception choosing next build") log.err(Failure()) self.updateBigStatus() return self.builder_status.removeBuildRequest(req.status) mergers = [] botmaster = self.botmaster for br in self.buildable[:]: if botmaster.shouldMergeRequests(self, req, br): self.buildable.remove(br) self.builder_status.removeBuildRequest(br.status) mergers.append(br) requests = [req] + mergers # Create a new build from our build factory and set ourself as the # builder. build = self.buildFactory.newBuild(requests) build.setBuilder(self) build.setLocks(self.locks) if len(self.env) > 0: build.setSlaveEnvironment(self.env) # start it self.startBuild(build, sb) def startBuild(self, build, sb): """Start a build on the given slave. @param build: the L{base.Build} to start @param sb: the L{SlaveBuilder} which will host this build @return: a Deferred which fires with a L{buildbot.interfaces.IBuildControl} that can be used to stop the Build, or to access a L{buildbot.interfaces.IBuildStatus} which will watch the Build as it runs. """ self.building.append(build) self.updateBigStatus() log.msg("starting build %s using slave %s" % (build, sb)) d = sb.prepare(self.builder_status) def _ping(ign): # ping the slave to make sure they're still there. If they're # fallen off the map (due to a NAT timeout or something), this # will fail in a couple of minutes, depending upon the TCP # timeout. TODO: consider making this time out faster, or at # least characterize the likely duration. log.msg("starting build %s.. pinging the slave %s" % (build, sb)) return sb.ping(self.START_BUILD_TIMEOUT) d.addCallback(_ping) d.addCallback(self._startBuild_1, build, sb) return d def _startBuild_1(self, res, build, sb): if not res: return self._startBuildFailed("slave ping failed", build, sb) # The buildslave is ready to go. sb.buildStarted() sets its state to # BUILDING (so we won't try to use it for any other builds). This # gets set back to IDLE by the Build itself when it finishes. sb.buildStarted() d = sb.remote.callRemote("startBuild") d.addCallbacks(self._startBuild_2, self._startBuildFailed, callbackArgs=(build,sb), errbackArgs=(build,sb)) return d def _startBuild_2(self, res, build, sb): # create the BuildStatus object that goes with the Build bs = self.builder_status.newBuild() # start the build. This will first set up the steps, then tell the # BuildStatus that it has started, which will announce it to the # world (through our BuilderStatus object, which is its parent). # Finally it will start the actual build process. d = build.startBuild(bs, self.expectations, sb) d.addCallback(self.buildFinished, sb) d.addErrback(log.err) # this shouldn't happen. if it does, the slave # will be wedged for req in build.requests: req.buildStarted(build, bs) return build # this is the IBuildControl def _startBuildFailed(self, why, build, sb): # put the build back on the buildable list log.msg("I tried to tell the slave that the build %s started, but " "remote_startBuild failed: %s" % (build, why)) # release the slave. This will queue a call to maybeStartBuild, which # will fire after other notifyOnDisconnect handlers have marked the # slave as disconnected (so we don't try to use it again). sb.buildFinished() log.msg("re-queueing the BuildRequest") self.building.remove(build) for req in build.requests: self.buildable.insert(0, req) # the interrupted build gets first # priority self.builder_status.addBuildRequest(req.status) def buildFinished(self, build, sb): """This is called when the Build has finished (either success or failure). Any exceptions during the build are reported with results=FAILURE, not with an errback.""" # by the time we get here, the Build has already released the slave # (which queues a call to maybeStartBuild) self.building.remove(build) for req in build.requests: req.finished(build.build_status) def setExpectations(self, progress): """Mark the build as successful and update expectations for the next build. Only call this when the build did not fail in any way that would invalidate the time expectations generated by it. (if the compile failed and thus terminated early, we can't use the last build to predict how long the next one will take). """ if self.expectations: self.expectations.update(progress) else: # the first time we get a good build, create our Expectations # based upon its results self.expectations = Expectations(progress) log.msg("new expectations: %s seconds" % \ self.expectations.expectedBuildTime()) def shutdownSlave(self): if self.remote: self.remote.callRemote("shutdown")
class Builder(pb.Referenceable): """I manage all Builds of a given type. Each Builder is created by an entry in the config file (the c['builders'] list), with a number of parameters. One of these parameters is the L{buildbot.process.factory.BuildFactory} object that is associated with this Builder. The factory is responsible for creating new L{Build<buildbot.process.base.Build>} objects. Each Build object defines when and how the build is performed, so a new Factory or Builder should be defined to control this behavior. The Builder holds on to a number of L{base.BuildRequest} objects in a list named C{.buildable}. Incoming BuildRequest objects will be added to this list, or (if possible) merged into an existing request. When a slave becomes available, I will use my C{BuildFactory} to turn the request into a new C{Build} object. The C{BuildRequest} is forgotten, the C{Build} goes into C{.building} while it runs. Once the build finishes, I will discard it. I maintain a list of available SlaveBuilders, one for each connected slave that the C{slavenames} parameter says we can use. Some of these will be idle, some of them will be busy running builds for me. If there are multiple slaves, I can run multiple builds at once. I also manage forced builds, progress expectation (ETA) management, and some status delivery chores. I am persisted in C{BASEDIR/BUILDERNAME/builder}, so I can remember how long a build usually takes to run (in my C{expectations} attribute). This pickle also includes the L{buildbot.status.builder.BuilderStatus} object, which remembers the set of historic builds. @type buildable: list of L{buildbot.process.base.BuildRequest} @ivar buildable: BuildRequests that are ready to build, but which are waiting for a buildslave to be available. @type building: list of L{buildbot.process.base.Build} @ivar building: Builds that are actively running @type slaves: list of L{buildbot.buildslave.BuildSlave} objects @ivar slaves: the slaves currently available for building """ expectations = None # this is created the first time we get a good build START_BUILD_TIMEOUT = 10 CHOOSE_SLAVES_RANDOMLY = True # disabled for determinism during tests def __init__(self, setup, builder_status): """ @type setup: dict @param setup: builder setup data, as stored in BuildmasterConfig['builders']. Contains name, slavename(s), builddir, factory, locks. @type builder_status: L{buildbot.status.builder.BuilderStatus} """ self.name = setup['name'] self.slavenames = [] if setup.has_key('slavename'): self.slavenames.append(setup['slavename']) if setup.has_key('slavenames'): self.slavenames.extend(setup['slavenames']) self.builddir = setup['builddir'] self.buildFactory = setup['factory'] self.locks = setup.get("locks", []) if setup.has_key('periodicBuildTime'): raise ValueError("periodicBuildTime can no longer be defined as" " part of the Builder: use scheduler.Periodic" " instead") # build/wannabuild slots: Build objects move along this sequence self.buildable = [] self.building = [] # old_building holds active builds that were stolen from a predecessor self.old_building = weakref.WeakKeyDictionary() # buildslaves which have connected but which are not yet available. # These are always in the ATTACHING state. self.attaching_slaves = [] # buildslaves at our disposal. Each SlaveBuilder instance has a # .state that is IDLE, PINGING, or BUILDING. "PINGING" is used when a # Build is about to start, to make sure that they're still alive. self.slaves = [] self.builder_status = builder_status self.builder_status.setSlavenames(self.slavenames) # for testing, to help synchronize tests self.watchers = { 'attach': [], 'detach': [], 'detach_all': [], 'idle': [] } def setBotmaster(self, botmaster): self.botmaster = botmaster def compareToSetup(self, setup): diffs = [] setup_slavenames = [] if setup.has_key('slavename'): setup_slavenames.append(setup['slavename']) setup_slavenames.extend(setup.get('slavenames', [])) if setup_slavenames != self.slavenames: diffs.append('slavenames changed from %s to %s' \ % (self.slavenames, setup_slavenames)) if setup['builddir'] != self.builddir: diffs.append('builddir changed from %s to %s' \ % (self.builddir, setup['builddir'])) if setup['factory'] != self.buildFactory: # compare objects diffs.append('factory changed') oldlocks = [(lock.__class__, lock.name) for lock in setup.get('locks', [])] newlocks = [(lock.__class__, lock.name) for lock in self.locks] if oldlocks != newlocks: diffs.append('locks changed from %s to %s' % (oldlocks, newlocks)) return diffs def __repr__(self): return "<Builder '%s' at %d>" % (self.name, id(self)) def getOldestRequestTime(self): """Returns the timestamp of the oldest build request for this builder. If there are no build requests, None is returned.""" if self.buildable: return self.buildable[0].submittedAt else: return None def submitBuildRequest(self, req): req.submittedAt = now() self.buildable.append(req) req.requestSubmitted(self) self.builder_status.addBuildRequest(req.status) self.maybeStartBuild() def cancelBuildRequest(self, req): if req in self.buildable: self.buildable.remove(req) self.builder_status.removeBuildRequest(req.status) return True return False def __getstate__(self): d = self.__dict__.copy() # TODO: note that d['buildable'] can contain Deferreds del d['building'] # TODO: move these back to .buildable? del d['slaves'] return d def __setstate__(self, d): self.__dict__ = d self.building = [] self.slaves = [] def consumeTheSoulOfYourPredecessor(self, old): """Suck the brain out of an old Builder. This takes all the runtime state from an existing Builder and moves it into ourselves. This is used when a Builder is changed in the master.cfg file: the new Builder has a different factory, but we want all the builds that were queued for the old one to get processed by the new one. Any builds which are already running will keep running. The new Builder will get as many of the old SlaveBuilder objects as it wants.""" log.msg("consumeTheSoulOfYourPredecessor: %s feeding upon %s" % (self, old)) # we claim all the pending builds, removing them from the old # Builder's queue. This insures that the old Builder will not start # any new work. log.msg(" stealing %s buildrequests" % len(old.buildable)) self.buildable.extend(old.buildable) old.buildable = [] # old.building (i.e. builds which are still running) is not migrated # directly: it keeps track of builds which were in progress in the # old Builder. When those builds finish, the old Builder will be # notified, not us. However, since the old SlaveBuilder will point to # us, it is our maybeStartBuild() that will be triggered. if old.building: self.builder_status.setBigState("building") # however, we do grab a weakref to the active builds, so that our # BuilderControl can see them and stop them. We use a weakref because # we aren't the one to get notified, so there isn't a convenient # place to remove it from self.building . for b in old.building: self.old_building[b] = None for b in old.old_building: self.old_building[b] = None # Our set of slavenames may be different. Steal any of the old # buildslaves that we want to keep using. for sb in old.slaves[:]: if sb.slave.slavename in self.slavenames: log.msg(" stealing buildslave %s" % sb) self.slaves.append(sb) old.slaves.remove(sb) sb.setBuilder(self) # old.attaching_slaves: # these SlaveBuilders are waiting on a sequence of calls: # remote.setMaster and remote.print . When these two complete, # old._attached will be fired, which will add a 'connect' event to # the builder_status and try to start a build. However, we've pulled # everything out of the old builder's queue, so it will have no work # to do. The outstanding remote.setMaster/print call will be holding # the last reference to the old builder, so it will disappear just # after that response comes back. # # The BotMaster will ask the slave to re-set their list of Builders # shortly after this function returns, which will cause our # attached() method to be fired with a bunch of references to remote # SlaveBuilders, some of which we already have (by stealing them # from the old Builder), some of which will be new. The new ones # will be re-attached. # Therefore, we don't need to do anything about old.attaching_slaves return # all done def getBuild(self, number): for b in self.building: if b.build_status.number == number: return b for b in self.old_building.keys(): if b.build_status.number == number: return b return None def fireTestEvent(self, name, fire_with=None): if fire_with is None: fire_with = self watchers = self.watchers[name] self.watchers[name] = [] for w in watchers: reactor.callLater(0, w.callback, fire_with) def attached(self, slave, remote, commands): """This is invoked by the BuildSlave when the self.slavename bot registers their builder. @type slave: L{buildbot.buildslave.BuildSlave} @param slave: the BuildSlave that represents the buildslave as a whole @type remote: L{twisted.spread.pb.RemoteReference} @param remote: a reference to the L{buildbot.slave.bot.SlaveBuilder} @type commands: dict: string -> string, or None @param commands: provides the slave's version of each RemoteCommand @rtype: L{twisted.internet.defer.Deferred} @return: a Deferred that fires (with 'self') when the slave-side builder is fully attached and ready to accept commands. """ for s in self.attaching_slaves + self.slaves: if s.slave == slave: # already attached to them. This is fairly common, since # attached() gets called each time we receive the builder # list from the slave, and we ask for it each time we add or # remove a builder. So if the slave is hosting builders # A,B,C, and the config file changes A, we'll remove A and # re-add it, triggering two builder-list requests, getting # two redundant calls to attached() for B, and another two # for C. # # Therefore, when we see that we're already attached, we can # just ignore it. TODO: build a diagram of the state # transitions here, I'm concerned about sb.attached() failing # and leaving sb.state stuck at 'ATTACHING', and about # the detached() message arriving while there's some # transition pending such that the response to the transition # re-vivifies sb return defer.succeed(self) sb = SlaveBuilder() sb.setBuilder(self) self.attaching_slaves.append(sb) d = sb.attached(slave, remote, commands) d.addCallback(self._attached) d.addErrback(self._not_attached, slave) return d def _attached(self, sb): # TODO: make this .addSlaveEvent(slave.slavename, ['connect']) ? self.builder_status.addPointEvent(['connect', sb.slave.slavename]) self.attaching_slaves.remove(sb) self.slaves.append(sb) reactor.callLater(0, self.maybeStartBuild) self.fireTestEvent('attach') return self def _not_attached(self, why, slave): # already log.err'ed by SlaveBuilder._attachFailure # TODO: make this .addSlaveEvent? # TODO: remove from self.slaves (except that detached() should get # run first, right?) self.builder_status.addPointEvent( ['failed', 'connect', slave.slave.slavename]) # TODO: add an HTMLLogFile of the exception self.fireTestEvent('attach', why) def detached(self, slave): """This is called when the connection to the bot is lost.""" log.msg("%s.detached" % self, slave.slavename) for sb in self.attaching_slaves + self.slaves: if sb.slave == slave: break else: log.msg( "WEIRD: Builder.detached(%s) (%s)" " not in attaching_slaves(%s)" " or slaves(%s)" % (slave, slave.slavename, self.attaching_slaves, self.slaves)) return if sb.state == BUILDING: # the Build's .lostRemote method (invoked by a notifyOnDisconnect # handler) will cause the Build to be stopped, probably right # after the notifyOnDisconnect that invoked us finishes running. # TODO: should failover to a new Build #self.retryBuild(sb.build) pass if sb in self.attaching_slaves: self.attaching_slaves.remove(sb) if sb in self.slaves: self.slaves.remove(sb) # TODO: make this .addSlaveEvent? self.builder_status.addPointEvent(['disconnect', slave.slavename]) sb.detached() # inform the SlaveBuilder that their slave went away self.updateBigStatus() self.fireTestEvent('detach') if not self.slaves: self.fireTestEvent('detach_all') def updateBigStatus(self): if not self.slaves: self.builder_status.setBigState("offline") elif self.building: self.builder_status.setBigState("building") else: self.builder_status.setBigState("idle") self.fireTestEvent('idle') def maybeStartBuild(self): log.msg("maybeStartBuild %s: %s %s" % (self, self.buildable, self.slaves)) if not self.buildable: self.updateBigStatus() return # nothing to do # pick an idle slave available_slaves = [sb for sb in self.slaves if sb.isAvailable()] if not available_slaves: log.msg("%s: want to start build, but we don't have a remote" % self) self.updateBigStatus() return if self.CHOOSE_SLAVES_RANDOMLY: sb = random.choice(available_slaves) else: sb = available_slaves[0] # there is something to build, and there is a slave on which to build # it. Grab the oldest request, see if we can merge it with anything # else. req = self.buildable.pop(0) self.builder_status.removeBuildRequest(req.status) mergers = [] for br in self.buildable[:]: if req.canBeMergedWith(br): self.buildable.remove(br) self.builder_status.removeBuildRequest(br.status) mergers.append(br) requests = [req] + mergers # Create a new build from our build factory and set ourself as the # builder. build = self.buildFactory.newBuild(requests) build.setBuilder(self) build.setLocks(self.locks) # start it self.startBuild(build, sb) def startBuild(self, build, sb): """Start a build on the given slave. @param build: the L{base.Build} to start @param sb: the L{SlaveBuilder} which will host this build @return: a Deferred which fires with a L{buildbot.interfaces.IBuildControl} that can be used to stop the Build, or to access a L{buildbot.interfaces.IBuildStatus} which will watch the Build as it runs. """ self.building.append(build) self.updateBigStatus() log.msg("starting build %s.. pinging the slave %s" % (build, sb)) # ping the slave to make sure they're still there. If they're fallen # off the map (due to a NAT timeout or something), this will fail in # a couple of minutes, depending upon the TCP timeout. TODO: consider # making this time out faster, or at least characterize the likely # duration. d = sb.ping(self.START_BUILD_TIMEOUT) d.addCallback(self._startBuild_1, build, sb) return d def _startBuild_1(self, res, build, sb): if not res: return self._startBuildFailed("slave ping failed", build, sb) # The buildslave is ready to go. sb.buildStarted() sets its state to # BUILDING (so we won't try to use it for any other builds). This # gets set back to IDLE by the Build itself when it finishes. sb.buildStarted() d = sb.remote.callRemote("startBuild") d.addCallbacks(self._startBuild_2, self._startBuildFailed, callbackArgs=(build, sb), errbackArgs=(build, sb)) return d def _startBuild_2(self, res, build, sb): # create the BuildStatus object that goes with the Build bs = self.builder_status.newBuild() # start the build. This will first set up the steps, then tell the # BuildStatus that it has started, which will announce it to the # world (through our BuilderStatus object, which is its parent). # Finally it will start the actual build process. d = build.startBuild(bs, self.expectations, sb) d.addCallback(self.buildFinished, sb) d.addErrback(log.err) # this shouldn't happen. if it does, the slave # will be wedged for req in build.requests: req.buildStarted(build, bs) return build # this is the IBuildControl def _startBuildFailed(self, why, build, sb): # put the build back on the buildable list log.msg("I tried to tell the slave that the build %s started, but " "remote_startBuild failed: %s" % (build, why)) # release the slave. This will queue a call to maybeStartBuild, which # will fire after other notifyOnDisconnect handlers have marked the # slave as disconnected (so we don't try to use it again). sb.buildFinished() log.msg("re-queueing the BuildRequest") self.building.remove(build) for req in build.requests: self.buildable.insert(0, req) # the interrupted build gets first # priority self.builder_status.addBuildRequest(req.status) def buildFinished(self, build, sb): """This is called when the Build has finished (either success or failure). Any exceptions during the build are reported with results=FAILURE, not with an errback.""" # by the time we get here, the Build has already released the slave # (which queues a call to maybeStartBuild) self.building.remove(build) for req in build.requests: req.finished(build.build_status) def setExpectations(self, progress): """Mark the build as successful and update expectations for the next build. Only call this when the build did not fail in any way that would invalidate the time expectations generated by it. (if the compile failed and thus terminated early, we can't use the last build to predict how long the next one will take). """ if self.expectations: self.expectations.update(progress) else: # the first time we get a good build, create our Expectations # based upon its results self.expectations = Expectations(progress) log.msg("new expectations: %s seconds" % \ self.expectations.expectedBuildTime()) def shutdownSlave(self): if self.remote: self.remote.callRemote("shutdown")
class Builder(pb.Referenceable, service.MultiService): """I manage all Builds of a given type. Each Builder is created by an entry in the config file (the c['builders'] list), with a number of parameters. One of these parameters is the L{buildbot.process.factory.BuildFactory} object that is associated with this Builder. The factory is responsible for creating new L{Build<buildbot.process.build.Build>} objects. Each Build object defines when and how the build is performed, so a new Factory or Builder should be defined to control this behavior. The Builder holds on to a number of L{BuildRequest} objects in a list named C{.buildable}. Incoming BuildRequest objects will be added to this list, or (if possible) merged into an existing request. When a slave becomes available, I will use my C{BuildFactory} to turn the request into a new C{Build} object. The C{BuildRequest} is forgotten, the C{Build} goes into C{.building} while it runs. Once the build finishes, I will discard it. I maintain a list of available SlaveBuilders, one for each connected slave that the C{slavenames} parameter says we can use. Some of these will be idle, some of them will be busy running builds for me. If there are multiple slaves, I can run multiple builds at once. I also manage forced builds, progress expectation (ETA) management, and some status delivery chores. @type buildable: list of L{buildbot.process.buildrequest.BuildRequest} @ivar buildable: BuildRequests that are ready to build, but which are waiting for a buildslave to be available. @type building: list of L{buildbot.process.build.Build} @ivar building: Builds that are actively running @type slaves: list of L{buildbot.buildslave.BuildSlave} objects @ivar slaves: the slaves currently available for building """ expectations = None # this is created the first time we get a good build def __init__(self, setup, builder_status): """ @type setup: dict @param setup: builder setup data, as stored in BuildmasterConfig['builders']. Contains name, slavename(s), builddir, slavebuilddir, factory, locks. @type builder_status: L{buildbot.status.builder.BuilderStatus} """ service.MultiService.__init__(self) self.name = setup['name'] self.slavenames = [] if setup.has_key('slavename'): self.slavenames.append(setup['slavename']) if setup.has_key('slavenames'): self.slavenames.extend(setup['slavenames']) self.builddir = setup['builddir'] self.slavebuilddir = setup['slavebuilddir'] self.buildFactory = setup['factory'] self.nextSlave = setup.get('nextSlave') if self.nextSlave is not None and not callable(self.nextSlave): raise ValueError("nextSlave must be callable") self.locks = setup.get("locks", []) self.env = setup.get('env', {}) assert isinstance(self.env, dict) if setup.has_key('periodicBuildTime'): raise ValueError("periodicBuildTime can no longer be defined as" " part of the Builder: use scheduler.Periodic" " instead") self.nextBuild = setup.get('nextBuild') if self.nextBuild is not None and not callable(self.nextBuild): raise ValueError("nextBuild must be callable") self.buildHorizon = setup.get('buildHorizon') self.logHorizon = setup.get('logHorizon') self.eventHorizon = setup.get('eventHorizon') self.mergeRequests = setup.get('mergeRequests', True) self.properties = setup.get('properties', {}) self.category = setup.get('category', None) # build/wannabuild slots: Build objects move along this sequence self.building = [] # old_building holds active builds that were stolen from a predecessor self.old_building = weakref.WeakKeyDictionary() # buildslaves which have connected but which are not yet available. # These are always in the ATTACHING state. self.attaching_slaves = [] # buildslaves at our disposal. Each SlaveBuilder instance has a # .state that is IDLE, PINGING, or BUILDING. "PINGING" is used when a # Build is about to start, to make sure that they're still alive. self.slaves = [] self.builder_status = builder_status self.builder_status.setSlavenames(self.slavenames) self.builder_status.buildHorizon = self.buildHorizon self.builder_status.logHorizon = self.logHorizon self.builder_status.eventHorizon = self.eventHorizon self.reclaim_svc = internet.TimerService(10 * 60, self.reclaimAllBuilds) self.reclaim_svc.setServiceParent(self) # for testing, to help synchronize tests self.watchers = { 'attach': [], 'detach': [], 'detach_all': [], 'idle': [] } self.run_count = 0 # add serialized-invocation behavior to maybeStartBuild self.maybeStartBuild = util.SerializedInvocation( self.doMaybeStartBuild) def stopService(self): d = defer.maybeDeferred(lambda: service.MultiService.stopService(self)) def flushMaybeStartBuilds(_): # at this point, self.running = False, so another maybeStartBuilds # invocation won't hurt anything, but it also will not complete # until any currently-running invocations are done. return self.maybeStartBuild() d.addCallback(flushMaybeStartBuilds) return d def setBotmaster(self, botmaster): self.botmaster = botmaster self.db = botmaster.db self.master_name = botmaster.master_name self.master_incarnation = botmaster.master_incarnation def compareToSetup(self, setup): diffs = [] setup_slavenames = [] if setup.has_key('slavename'): setup_slavenames.append(setup['slavename']) setup_slavenames.extend(setup.get('slavenames', [])) if setup_slavenames != self.slavenames: diffs.append('slavenames changed from %s to %s' \ % (self.slavenames, setup_slavenames)) if setup['builddir'] != self.builddir: diffs.append('builddir changed from %s to %s' \ % (self.builddir, setup['builddir'])) if setup['slavebuilddir'] != self.slavebuilddir: diffs.append('slavebuilddir changed from %s to %s' \ % (self.slavebuilddir, setup['slavebuilddir'])) if setup['factory'] != self.buildFactory: # compare objects diffs.append('factory changed') if setup.get('locks', []) != self.locks: diffs.append('locks changed from %s to %s' % (self.locks, setup.get('locks'))) if setup.get('env', {}) != self.env: diffs.append('env changed from %s to %s' % (self.env, setup.get('env', {}))) if setup.get('nextSlave') != self.nextSlave: diffs.append('nextSlave changed from %s to %s' % (self.nextSlave, setup.get('nextSlave'))) if setup.get('nextBuild') != self.nextBuild: diffs.append('nextBuild changed from %s to %s' % (self.nextBuild, setup.get('nextBuild'))) if setup.get('buildHorizon', None) != self.buildHorizon: diffs.append('buildHorizon changed from %s to %s' % (self.buildHorizon, setup['buildHorizon'])) if setup.get('logHorizon', None) != self.logHorizon: diffs.append('logHorizon changed from %s to %s' % (self.logHorizon, setup['logHorizon'])) if setup.get('eventHorizon', None) != self.eventHorizon: diffs.append('eventHorizon changed from %s to %s' % (self.eventHorizon, setup['eventHorizon'])) if setup.get('category', None) != self.category: diffs.append('category changed from %r to %r' % (self.category, setup.get('category', None))) return diffs def __repr__(self): return "<Builder '%r' at %d>" % (self.name, id(self)) def triggerNewBuildCheck(self): self.botmaster.triggerNewBuildCheck() def run(self): """Check for work to be done. This should be called any time I might be able to start a job: - when the Builder is first created - when a new job has been added to the [buildrequests] DB table - when a slave has connected If I have both an available slave and the database contains a BuildRequest that I can handle, I will claim the BuildRequest and start the build. When the build finishes, I will retire the BuildRequest. """ # overall plan: # move .expectations to DB # if we're not running, we may still be called from leftovers from # a run of the loop, so just ignore the call. if not self.running: return self.run_count += 1 available_slaves = [sb for sb in self.slaves if sb.isAvailable()] if not available_slaves: self.updateBigStatus() return d = self.db.runInteraction(self._claim_buildreqs, available_slaves) d.addCallback(self._start_builds) return d # slave-managers must refresh their claim on a build at least once an # hour, less any inter-manager clock skew RECLAIM_INTERVAL = 1 * 3600 def _claim_buildreqs(self, t, available_slaves): # return a dict mapping slave -> (brid,ssid) now = util.now() old = now - self.RECLAIM_INTERVAL requests = self.db.get_unclaimed_buildrequests(self.name, old, self.master_name, self.master_incarnation, t) assignments = {} while requests and available_slaves: sb = self._choose_slave(available_slaves) if not sb: log.msg("%s: want to start build, but we don't have a remote" % self) break available_slaves.remove(sb) breq = self._choose_build(requests) if not breq: log.msg("%s: went to start build, but nextBuild said not to" % self) break requests.remove(breq) merged_requests = [breq] for other_breq in requests[:]: if (self.mergeRequests and self.botmaster.shouldMergeRequests( self, breq, other_breq)): requests.remove(other_breq) merged_requests.append(other_breq) assignments[sb] = merged_requests brids = [br.id for br in merged_requests] self.db.claim_buildrequests(now, self.master_name, self.master_incarnation, brids, t) return assignments def _choose_slave(self, available_slaves): # note: this might return None if the nextSlave() function decided to # not give us anything if self.nextSlave: try: return self.nextSlave(self, available_slaves) except: log.msg("Exception choosing next slave") log.err(Failure()) return None return random.choice(available_slaves) def _choose_build(self, buildable): if self.nextBuild: try: return self.nextBuild(self, buildable) except: log.msg("Exception choosing next build") log.err(Failure()) return None return buildable[0] def _start_builds(self, assignments): # because _claim_buildreqs runs in a separate thread, we might have # lost a slave by this point. We treat that case the same as if we # lose the slave right after the build starts: the initial ping # fails. for (sb, requests) in assignments.items(): build = self.buildFactory.newBuild(requests) build.setBuilder(self) build.setLocks(self.locks) if len(self.env) > 0: build.setSlaveEnvironment(self.env) self.startBuild(build, sb) self.updateBigStatus() def getBuildable(self, limit=None): return self.db.runInteractionNow(self._getBuildable, limit) def _getBuildable(self, t, limit): now = util.now() old = now - self.RECLAIM_INTERVAL return self.db.get_unclaimed_buildrequests(self.name, old, self.master_name, self.master_incarnation, t, limit) def getOldestRequestTime(self): """Returns the timestamp of the oldest build request for this builder. If there are no build requests, None is returned.""" buildable = self.getBuildable(1) if buildable: # TODO: this is sorted by priority first, not strictly reqtime return buildable[0].getSubmitTime() return None def cancelBuildRequest(self, brid): return self.db.cancel_buildrequests([brid]) def consumeTheSoulOfYourPredecessor(self, old): """Suck the brain out of an old Builder. This takes all the runtime state from an existing Builder and moves it into ourselves. This is used when a Builder is changed in the master.cfg file: the new Builder has a different factory, but we want all the builds that were queued for the old one to get processed by the new one. Any builds which are already running will keep running. The new Builder will get as many of the old SlaveBuilder objects as it wants.""" log.msg("consumeTheSoulOfYourPredecessor: %s feeding upon %s" % (self, old)) # all pending builds are stored in the DB, so we don't have to do # anything to claim them. The old builder will be stopService'd, # which should make sure they don't start any new work # this is kind of silly, but the builder status doesn't get updated # when the config changes, yet it stores the category. So: self.builder_status.category = self.category # old.building (i.e. builds which are still running) is not migrated # directly: it keeps track of builds which were in progress in the # old Builder. When those builds finish, the old Builder will be # notified, not us. However, since the old SlaveBuilder will point to # us, it is our maybeStartBuild() that will be triggered. if old.building: self.builder_status.setBigState("building") # however, we do grab a weakref to the active builds, so that our # BuilderControl can see them and stop them. We use a weakref because # we aren't the one to get notified, so there isn't a convenient # place to remove it from self.building . for b in old.building: self.old_building[b] = None for b in old.old_building: self.old_building[b] = None # Our set of slavenames may be different. Steal any of the old # buildslaves that we want to keep using. for sb in old.slaves[:]: if sb.slave.slavename in self.slavenames: log.msg(" stealing buildslave %s" % sb) self.slaves.append(sb) old.slaves.remove(sb) sb.setBuilder(self) # old.attaching_slaves: # these SlaveBuilders are waiting on a sequence of calls: # remote.setMaster and remote.print . When these two complete, # old._attached will be fired, which will add a 'connect' event to # the builder_status and try to start a build. However, we've pulled # everything out of the old builder's queue, so it will have no work # to do. The outstanding remote.setMaster/print call will be holding # the last reference to the old builder, so it will disappear just # after that response comes back. # # The BotMaster will ask the slave to re-set their list of Builders # shortly after this function returns, which will cause our # attached() method to be fired with a bunch of references to remote # SlaveBuilders, some of which we already have (by stealing them # from the old Builder), some of which will be new. The new ones # will be re-attached. # Therefore, we don't need to do anything about old.attaching_slaves return # all done def reclaimAllBuilds(self): try: now = util.now() brids = set() for b in self.building: brids.update([br.id for br in b.requests]) for b in self.old_building: brids.update([br.id for br in b.requests]) self.db.claim_buildrequests(now, self.master_name, self.master_incarnation, brids) except: log.msg("Error in reclaimAllBuilds") log.err() def getBuild(self, number): for b in self.building: if b.build_status and b.build_status.number == number: return b for b in self.old_building.keys(): if b.build_status and b.build_status.number == number: return b return None def fireTestEvent(self, name, fire_with=None): if fire_with is None: fire_with = self watchers = self.watchers[name] self.watchers[name] = [] for w in watchers: eventually(w.callback, fire_with) def addLatentSlave(self, slave): assert interfaces.ILatentBuildSlave.providedBy(slave) for s in self.slaves: if s == slave: break else: sb = slavebuilder.LatentSlaveBuilder(slave, self) self.builder_status.addPointEvent( ['added', 'latent', slave.slavename]) self.slaves.append(sb) self.triggerNewBuildCheck() def attached(self, slave, remote, commands): """This is invoked by the BuildSlave when the self.slavename bot registers their builder. @type slave: L{buildbot.buildslave.BuildSlave} @param slave: the BuildSlave that represents the buildslave as a whole @type remote: L{twisted.spread.pb.RemoteReference} @param remote: a reference to the L{buildbot.slave.bot.SlaveBuilder} @type commands: dict: string -> string, or None @param commands: provides the slave's version of each RemoteCommand @rtype: L{twisted.internet.defer.Deferred} @return: a Deferred that fires (with 'self') when the slave-side builder is fully attached and ready to accept commands. """ for s in self.attaching_slaves + self.slaves: if s.slave == slave: # already attached to them. This is fairly common, since # attached() gets called each time we receive the builder # list from the slave, and we ask for it each time we add or # remove a builder. So if the slave is hosting builders # A,B,C, and the config file changes A, we'll remove A and # re-add it, triggering two builder-list requests, getting # two redundant calls to attached() for B, and another two # for C. # # Therefore, when we see that we're already attached, we can # just ignore it. return defer.succeed(self) sb = slavebuilder.SlaveBuilder() sb.setBuilder(self) self.attaching_slaves.append(sb) d = sb.attached(slave, remote, commands) d.addCallback(self._attached) d.addErrback(self._not_attached, slave) return d def _attached(self, sb): self.builder_status.addPointEvent(['connect', sb.slave.slavename]) self.attaching_slaves.remove(sb) self.slaves.append(sb) self.fireTestEvent('attach') return self def _not_attached(self, why, slave): # already log.err'ed by SlaveBuilder._attachFailure # TODO: remove from self.slaves (except that detached() should get # run first, right?) log.err(why, 'slave failed to attach') self.builder_status.addPointEvent( ['failed', 'connect', slave.slavename]) # TODO: add an HTMLLogFile of the exception self.fireTestEvent('attach', why) def detached(self, slave): """This is called when the connection to the bot is lost.""" for sb in self.attaching_slaves + self.slaves: if sb.slave == slave: break else: log.msg( "WEIRD: Builder.detached(%s) (%s)" " not in attaching_slaves(%s)" " or slaves(%s)" % (slave, slave.slavename, self.attaching_slaves, self.slaves)) return if sb.state == BUILDING: # the Build's .lostRemote method (invoked by a notifyOnDisconnect # handler) will cause the Build to be stopped, probably right # after the notifyOnDisconnect that invoked us finishes running. pass if sb in self.attaching_slaves: self.attaching_slaves.remove(sb) if sb in self.slaves: self.slaves.remove(sb) self.builder_status.addPointEvent(['disconnect', slave.slavename]) sb.detached() # inform the SlaveBuilder that their slave went away self.updateBigStatus() self.fireTestEvent('detach') if not self.slaves: self.fireTestEvent('detach_all') def updateBigStatus(self): if not self.slaves: self.builder_status.setBigState("offline") elif self.building: self.builder_status.setBigState("building") else: self.builder_status.setBigState("idle") self.fireTestEvent('idle') def startBuild(self, build, sb): """Start a build on the given slave. @param build: the L{base.Build} to start @param sb: the L{SlaveBuilder} which will host this build @return: a Deferred which fires with a L{buildbot.interfaces.IBuildControl} that can be used to stop the Build, or to access a L{buildbot.interfaces.IBuildStatus} which will watch the Build as it runs. """ self.building.append(build) self.updateBigStatus() log.msg("starting build %s using slave %s" % (build, sb)) d = sb.prepare(self.builder_status, build) def _prepared(ready): # If prepare returns True then it is ready and we start a build # If it returns false then we don't start a new build. d = defer.succeed(ready) if not ready: #FIXME: We should perhaps trigger a check to see if there is # any other way to schedule the work log.msg("slave %s can't build %s after all" % (build, sb)) # release the slave. This will queue a call to maybeStartBuild, which # will fire after other notifyOnDisconnect handlers have marked the # slave as disconnected (so we don't try to use it again). # sb.buildFinished() log.msg("re-queueing the BuildRequest %s" % build) self.building.remove(build) self._resubmit_buildreqs(build).addErrback(log.err) sb.slave.releaseLocks() self.triggerNewBuildCheck() return d def _ping(ign): # ping the slave to make sure they're still there. If they've # fallen off the map (due to a NAT timeout or something), this # will fail in a couple of minutes, depending upon the TCP # timeout. # # TODO: This can unnecessarily suspend the starting of a build, in # situations where the slave is live but is pushing lots of data to # us in a build. log.msg("starting build %s.. pinging the slave %s" % (build, sb)) return sb.ping() d.addCallback(_ping) d.addCallback(self._startBuild_1, build, sb) return d d.addCallback(_prepared) return d def _startBuild_1(self, res, build, sb): if not res: return self._startBuildFailed("slave ping failed", build, sb) # The buildslave is ready to go. sb.buildStarted() sets its state to # BUILDING (so we won't try to use it for any other builds). This # gets set back to IDLE by the Build itself when it finishes. sb.buildStarted() d = sb.remote.callRemote("startBuild") d.addCallbacks(self._startBuild_2, self._startBuildFailed, callbackArgs=(build, sb), errbackArgs=(build, sb)) return d def _startBuild_2(self, res, build, sb): # create the BuildStatus object that goes with the Build bs = self.builder_status.newBuild() # start the build. This will first set up the steps, then tell the # BuildStatus that it has started, which will announce it to the # world (through our BuilderStatus object, which is its parent). # Finally it will start the actual build process. bids = [ self.db.build_started(req.id, bs.number) for req in build.requests ] d = build.startBuild(bs, self.expectations, sb) d.addCallback(self.buildFinished, sb, bids) # this shouldn't happen. if it does, the slave will be wedged d.addErrback(log.err) return build # this is the IBuildControl def _startBuildFailed(self, why, build, sb): # put the build back on the buildable list log.msg("I tried to tell the slave that the build %s started, but " "remote_startBuild failed: %s" % (build, why)) # release the slave. This will queue a call to maybeStartBuild, which # will fire after other notifyOnDisconnect handlers have marked the # slave as disconnected (so we don't try to use it again). sb.buildFinished() log.msg("re-queueing the BuildRequest") self.building.remove(build) self._resubmit_buildreqs(build).addErrback(log.err) def setupProperties(self, props): props.setProperty("buildername", self.name, "Builder") if len(self.properties) > 0: for propertyname in self.properties: props.setProperty(propertyname, self.properties[propertyname], "Builder") def buildFinished(self, build, sb, bids): """This is called when the Build has finished (either success or failure). Any exceptions during the build are reported with results=FAILURE, not with an errback.""" # by the time we get here, the Build has already released the slave # (which queues a call to maybeStartBuild) self.db.builds_finished(bids) results = build.build_status.getResults() self.building.remove(build) if results == RETRY: self._resubmit_buildreqs(build).addErrback( log.err) # returns Deferred else: brids = [br.id for br in build.requests] self.db.retire_buildrequests(brids, results) if sb.slave: sb.slave.releaseLocks() self.triggerNewBuildCheck() def _resubmit_buildreqs(self, build): brids = [br.id for br in build.requests] return self.db.resubmit_buildrequests(brids) def setExpectations(self, progress): """Mark the build as successful and update expectations for the next build. Only call this when the build did not fail in any way that would invalidate the time expectations generated by it. (if the compile failed and thus terminated early, we can't use the last build to predict how long the next one will take). """ if self.expectations: self.expectations.update(progress) else: # the first time we get a good build, create our Expectations # based upon its results self.expectations = Expectations(progress) log.msg("new expectations: %s seconds" % \ self.expectations.expectedBuildTime()) # Build Creation # maybeStartBuild is called by the botmaster whenever this builder should # check for and potentially start new builds. As an optimization, # invocations of this function are collapsed as much as possible while # maintaining the invariant that at least one execution of the entire # algorithm will occur between the invocation of the method and the firing # of its Deferred. This is done with util.SerializedInvocation; see # Builder.__init__, above. @defer.deferredGenerator def doMaybeStartBuild(self): # first, if we're not running, then don't start builds; stopService # uses this to ensure that any ongoing doMaybeStartBuild invocations # are complete before it stops. if not self.running: return # Check for available slaves. If there are no available slaves, then # there is no sense continuing available_slavebuilders = [ sb for sb in self.slaves if sb.isAvailable() ] if not available_slavebuilders: self.updateBigStatus() return # now, get the available build requests wfd = defer.waitForDeferred( self.master.db.buildrequests.getBuildRequests( buildername=self.name, claimed=False)) yield wfd unclaimed_requests = wfd.getResult() # sort by submitted_at, so the first is the oldest unclaimed_requests.sort(key=lambda brd: brd['submitted_at']) # get the mergeRequests function for later mergeRequests_fn = self._getMergeRequestsFn() # match them up until we're out of options while available_slavebuilders and unclaimed_requests: # first, choose a slave (using nextSlave) wfd = defer.waitForDeferred( self._chooseSlave(available_slavebuilders)) yield wfd slavebuilder = wfd.getResult() if not slavebuilder: break if slavebuilder not in available_slavebuilders: log.msg(("nextSlave chose a nonexistent slave for builder " "'%s'; cannot start build") % self.name) break # then choose a request (using nextBuild) wfd = defer.waitForDeferred(self._chooseBuild(unclaimed_requests)) yield wfd breq = wfd.getResult() if not breq: break if breq not in unclaimed_requests: log.msg(("nextBuild chose a nonexistent request for builder " "'%s'; cannot start build") % self.name) break # merge the chosen request with any compatible requests in the # queue wfd = defer.waitForDeferred( self._mergeRequests(breq, unclaimed_requests, mergeRequests_fn)) yield wfd breqs = wfd.getResult() # try to claim the build requests try: wfd = defer.waitForDeferred( self.master.db.buildrequests.claimBuildRequests( [brdict['brid'] for brdict in breqs])) yield wfd wfd.getResult() except buildrequests.AlreadyClaimedError: # one or more of the build requests was already claimed; # re-fetch the now-partially-claimed build requests and keep # trying to match them self._breakBrdictRefloops(unclaimed_requests) wfd = defer.waitForDeferred( self.master.db.buildrequests.getBuildRequests( buildername=self.name, claimed=False)) yield wfd unclaimed_requests = wfd.getResult() # go around the loop again continue # claim was successful, so initiate a build for this set of # requests. Note that if the build fails from here on out (e.g., # because a slave has failed), it will be handled outside of this # loop. TODO: test that! wfd = defer.waitForDeferred( self._startBuildFor(slavebuilder, breqs)) yield wfd wfd.getResult() # and finally remove the buildrequests and slavebuilder from the # respective queues self._breakBrdictRefloops(breqs) for breq in breqs: unclaimed_requests.remove(breq) available_slavebuilders.remove(slavebuilder) self._breakBrdictRefloops(unclaimed_requests) self.updateBigStatus() return # a few utility functions to make the maybeStartBuild a bit shorter and # easier to read def _chooseSlave(self, available_slavebuilders): """ Choose the next slave, using the C{nextSlave} configuration if available, and falling back to C{random.choice} otherwise. @param available_slavebuilders: list of slavebuilders to choose from @returns: SlaveBuilder or None via Deferred """ if self.nextSlave: return defer.maybeDeferred( lambda: self.nextSlave(self, available_slavebuilders)) else: return defer.succeed(random.choice(available_slavebuilders)) def _chooseBuild(self, buildrequests): """ Choose the next build from the given set of build requests (represented as dictionaries). Defaults to returning the first request (earliest submitted). @param buildrequests: sorted list of build request dictionaries @returns: a build request dictionary or None via Deferred """ if self.nextBuild: # nextBuild expects BuildRequest objects, so instantiate them here # and cache them in the dictionaries d = defer.gatherResults([ self._brdictToBuildRequest(brdict) for brdict in buildrequests ]) d.addCallback( lambda requestobjects: self.nextBuild(self, requestobjects)) def to_brdict(brobj): # get the brdict for this object back return brobj.brdict d.addCallback(to_brdict) return d else: return defer.succeed(buildrequests[0]) def _getMergeRequestsFn(self): """Helper function to determine which mergeRequests function to use from L{_mergeRequests}, or None for no merging""" # first, seek through builder, global, and the default mergeRequests_fn = self.mergeRequests if mergeRequests_fn is None: mergeRequests_fn = self.master.mergeRequests if mergeRequests_fn is None: mergeRequests_fn = True # then translate False and True properly if mergeRequests_fn is False: mergeRequests_fn = None elif mergeRequests_fn is True: mergeRequests_fn = buildrequest.BuildRequest.canBeMergedWith return mergeRequests_fn @defer.deferredGenerator def _mergeRequests(self, breq, unclaimed_requests, mergeRequests_fn): """Use C{mergeRequests_fn} to merge C{breq} against C{unclaimed_requests}, where both are build request dictionaries""" # short circuit if there is no merging to do if not mergeRequests_fn or len(unclaimed_requests) == 1: yield [breq] return # we'll need BuildRequest objects, so get those first wfd = defer.waitForDeferred( defer.gatherResults([ self._brdictToBuildRequest(brdict) for brdict in unclaimed_requests ])) yield wfd unclaimed_request_objects = wfd.getResult() breq_object = unclaimed_request_objects.pop( unclaimed_requests.index(breq)) # gather the mergeable requests merged_request_objects = [breq_object] for other_breq_object in unclaimed_request_objects: wfd = defer.waitForDeferred( defer.maybeDeferred( lambda: mergeRequests_fn(breq_object, other_breq_object))) yield wfd if wfd.getResult(): merged_request_objects.append(other_breq_object) # convert them back to brdicts and return merged_requests = [br.brdict for br in merged_request_objects] yield merged_requests def _brdictToBuildRequest(self, brdict): """ Convert a build request dictionary to a L{buildrequest.BuildRequest} object, caching the result in the dictionary itself. The resulting buildrequest will have a C{brdict} attribute pointing back to this dictionary. Note that this does not perform any locking - be careful that it is only called once at a time for each build request dictionary. @param brdict: dictionary to convert @returns: L{buildrequest.BuildRequest} via Deferred """ if 'brobj' in brdict: return defer.succeed(brdict['brobj']) d = buildrequest.BuildRequest.fromBrdict(self.master, brdict) def keep(buildrequest): brdict['brobj'] = buildrequest buildrequest.brdict = brdict return buildrequest d.addCallback(keep) return d def _breakBrdictRefloops(self, requests): """Break the reference loops created by L{_brdictToBuildRequest}""" for brdict in requests: try: del brdict['brobj'].brdict except KeyError: pass
class Builder(config.ReconfigurableServiceMixin, pb.Referenceable, service.MultiService): # reconfigure builders before slaves reconfig_priority = 196 def __init__(self, name): service.MultiService.__init__(self) self.name = name # this is created the first time we get a good build self.expectations = None # build/wannabuild slots: Build objects move along this sequence self.building = [] # old_building holds active builds that were stolen from a predecessor self.old_building = weakref.WeakKeyDictionary() # buildslaves which have connected but which are not yet available. # These are always in the ATTACHING state. self.attaching_slaves = [] # buildslaves at our disposal. Each SlaveBuilder instance has a # .state that is IDLE, PINGING, or BUILDING. "PINGING" is used when a # Build is about to start, to make sure that they're still alive. self.slaves = [] self.config = None self.builder_status = None self.reclaim_svc = internet.TimerService(10 * 60, self.reclaimAllBuilds) self.reclaim_svc.setServiceParent(self) def reconfigService(self, new_config): # find this builder in the config for builder_config in new_config.builders: if builder_config.name == self.name: break else: assert 0, "no config found for builder '%s'" % self.name # set up a builder status object on the first reconfig if not self.builder_status: self.builder_status = self.master.status.builderAdded( builder_config.name, builder_config.builddir, builder_config.category) self.config = builder_config self.builder_status.setSlavenames(self.config.slavenames) return defer.succeed(None) def stopService(self): d = defer.maybeDeferred(lambda: service.MultiService.stopService(self)) def flushMaybeStartBuilds(_): # at this point, self.running = False, so another maybeStartBuild # invocation won't hurt anything, but it also will not complete # until any currently-running invocations are done, so we know that # the builder is quiescent at that time. return self.maybeStartBuild() d.addCallback(flushMaybeStartBuilds) return d def __repr__(self): return "<Builder '%r' at %d>" % (self.name, id(self)) @defer.deferredGenerator def getOldestRequestTime(self): """Returns the submitted_at of the oldest unclaimed build request for this builder, or None if there are no build requests. @returns: datetime instance or None, via Deferred """ wfd = defer.waitForDeferred( self.master.db.buildrequests.getBuildRequests( buildername=self.name, claimed=False)) yield wfd unclaimed = wfd.getResult() if unclaimed: unclaimed = [brd['submitted_at'] for brd in unclaimed] unclaimed.sort() yield unclaimed[0] else: yield None def reclaimAllBuilds(self): brids = set() for b in self.building: brids.update([br.id for br in b.requests]) for b in self.old_building: brids.update([br.id for br in b.requests]) if not brids: return defer.succeed(None) d = self.master.db.buildrequests.reclaimBuildRequests(brids) d.addErrback(log.err, 'while re-claiming running BuildRequests') return d def getBuild(self, number): for b in self.building: if b.build_status and b.build_status.number == number: return b for b in self.old_building.keys(): if b.build_status and b.build_status.number == number: return b return None def addLatentSlave(self, slave): assert interfaces.ILatentBuildSlave.providedBy(slave) for s in self.slaves: if s == slave: break else: sb = slavebuilder.LatentSlaveBuilder(slave, self) self.builder_status.addPointEvent( ['added', 'latent', slave.slavename]) self.slaves.append(sb) self.botmaster.maybeStartBuildsForBuilder(self.name) def attached(self, slave, remote, commands): """This is invoked by the BuildSlave when the self.slavename bot registers their builder. @type slave: L{buildbot.buildslave.BuildSlave} @param slave: the BuildSlave that represents the buildslave as a whole @type remote: L{twisted.spread.pb.RemoteReference} @param remote: a reference to the L{buildbot.slave.bot.SlaveBuilder} @type commands: dict: string -> string, or None @param commands: provides the slave's version of each RemoteCommand @rtype: L{twisted.internet.defer.Deferred} @return: a Deferred that fires (with 'self') when the slave-side builder is fully attached and ready to accept commands. """ for s in self.attaching_slaves + self.slaves: if s.slave == slave: # already attached to them. This is fairly common, since # attached() gets called each time we receive the builder # list from the slave, and we ask for it each time we add or # remove a builder. So if the slave is hosting builders # A,B,C, and the config file changes A, we'll remove A and # re-add it, triggering two builder-list requests, getting # two redundant calls to attached() for B, and another two # for C. # # Therefore, when we see that we're already attached, we can # just ignore it. return defer.succeed(self) sb = slavebuilder.SlaveBuilder() sb.setBuilder(self) self.attaching_slaves.append(sb) d = sb.attached(slave, remote, commands) d.addCallback(self._attached) d.addErrback(self._not_attached, slave) return d def _attached(self, sb): self.builder_status.addPointEvent(['connect', sb.slave.slavename]) self.attaching_slaves.remove(sb) self.slaves.append(sb) self.updateBigStatus() return self def _not_attached(self, why, slave): # already log.err'ed by SlaveBuilder._attachFailure # TODO: remove from self.slaves (except that detached() should get # run first, right?) log.err(why, 'slave failed to attach') self.builder_status.addPointEvent( ['failed', 'connect', slave.slavename]) # TODO: add an HTMLLogFile of the exception def detached(self, slave): """This is called when the connection to the bot is lost.""" for sb in self.attaching_slaves + self.slaves: if sb.slave == slave: break else: log.msg( "WEIRD: Builder.detached(%s) (%s)" " not in attaching_slaves(%s)" " or slaves(%s)" % (slave, slave.slavename, self.attaching_slaves, self.slaves)) return if sb.state == BUILDING: # the Build's .lostRemote method (invoked by a notifyOnDisconnect # handler) will cause the Build to be stopped, probably right # after the notifyOnDisconnect that invoked us finishes running. pass if sb in self.attaching_slaves: self.attaching_slaves.remove(sb) if sb in self.slaves: self.slaves.remove(sb) self.builder_status.addPointEvent(['disconnect', slave.slavename]) sb.detached() # inform the SlaveBuilder that their slave went away self.updateBigStatus() def updateBigStatus(self): if not self.slaves: self.builder_status.setBigState("offline") elif self.building or self.old_building: self.builder_status.setBigState("building") else: self.builder_status.setBigState("idle") @defer.deferredGenerator def _startBuildFor(self, slavebuilder, buildrequests): """Start a build on the given slave. @param build: the L{base.Build} to start @param sb: the L{SlaveBuilder} which will host this build @return: (via Deferred) boolean indicating that the build was succesfully started. """ # as of the Python versions supported now, try/finally can't be used # with a generator expression. So instead, we push cleanup functions # into a list so that, at any point, we can abort this operation. cleanups = [] def run_cleanups(): while cleanups: fn = cleanups.pop() fn() # the last cleanup we want to perform is to update the big # status based on any other cleanup cleanups.append(lambda: self.updateBigStatus()) build = self.config.factory.newBuild(buildrequests) build.setBuilder(self) log.msg("starting build %s using slave %s" % (build, slavebuilder)) # set up locks build.setLocks(self.config.locks) cleanups.append(lambda: slavebuilder.slave.releaseLocks()) if len(self.config.env) > 0: build.setSlaveEnvironment(self.config.env) # append the build to self.building self.building.append(build) cleanups.append(lambda: self.building.remove(build)) # update the big status accordingly self.updateBigStatus() try: wfd = defer.waitForDeferred( slavebuilder.prepare(self.builder_status, build)) yield wfd ready = wfd.getResult() except: log.err(failure.Failure(), 'while preparing slavebuilder:') ready = False # If prepare returns True then it is ready and we start a build # If it returns false then we don't start a new build. if not ready: log.msg("slave %s can't build %s after all; re-queueing the " "request" % (build, slavebuilder)) run_cleanups() yield False return # ping the slave to make sure they're still there. If they've # fallen off the map (due to a NAT timeout or something), this # will fail in a couple of minutes, depending upon the TCP # timeout. # # TODO: This can unnecessarily suspend the starting of a build, in # situations where the slave is live but is pushing lots of data to # us in a build. log.msg("starting build %s.. pinging the slave %s" % (build, slavebuilder)) try: wfd = defer.waitForDeferred(slavebuilder.ping()) yield wfd ping_success = wfd.getResult() except: log.err(failure.Failure(), 'while pinging slave before build:') ping_success = False if not ping_success: log.msg("slave ping failed; re-queueing the request") run_cleanups() yield False return # The buildslave is ready to go. slavebuilder.buildStarted() sets its # state to BUILDING (so we won't try to use it for any other builds). # This gets set back to IDLE by the Build itself when it finishes. slavebuilder.buildStarted() cleanups.append(lambda: slavebuilder.buildFinished()) # tell the remote that it's starting a build, too try: wfd = defer.waitForDeferred( slavebuilder.remote.callRemote("startBuild")) yield wfd wfd.getResult() except: log.err(failure.Failure(), 'while calling remote startBuild:') run_cleanups() yield False return # create the BuildStatus object that goes with the Build bs = self.builder_status.newBuild() # record the build in the db - one row per buildrequest try: bids = [] for req in build.requests: wfd = defer.waitForDeferred( self.master.db.builds.addBuild(req.id, bs.number)) yield wfd bids.append(wfd.getResult()) except: log.err(failure.Failure(), 'while adding rows to build table:') run_cleanups() yield False return # let status know self.master.status.build_started(req.id, self.name, bs) # start the build. This will first set up the steps, then tell the # BuildStatus that it has started, which will announce it to the world # (through our BuilderStatus object, which is its parent). Finally it # will start the actual build process. This is done with a fresh # Deferred since _startBuildFor should not wait until the build is # finished. d = build.startBuild(bs, self.expectations, slavebuilder) d.addCallback(self.buildFinished, slavebuilder, bids) # this shouldn't happen. if it does, the slave will be wedged d.addErrback(log.err) # make sure the builder's status is represented correctly self.updateBigStatus() yield True def setupProperties(self, props): props.setProperty("buildername", self.name, "Builder") if len(self.config.properties) > 0: for propertyname in self.config.properties: props.setProperty(propertyname, self.config.properties[propertyname], "Builder") def buildFinished(self, build, sb, bids): """This is called when the Build has finished (either success or failure). Any exceptions during the build are reported with results=FAILURE, not with an errback.""" # by the time we get here, the Build has already released the slave, # which will trigger a check for any now-possible build requests # (maybeStartBuilds) # mark the builds as finished, although since nothing ever reads this # table, it's not too important that it complete successfully d = self.master.db.builds.finishBuilds(bids) d.addErrback(log.err, 'while marking builds as finished (ignored)') results = build.build_status.getResults() self.building.remove(build) if results == RETRY: self._resubmit_buildreqs(build).addErrback(log.err) else: brids = [br.id for br in build.requests] db = self.master.db d = db.buildrequests.completeBuildRequests(brids, results) d.addCallback( lambda _: self._maybeBuildsetsComplete(build.requests)) # nothing in particular to do with this deferred, so just log it if # it fails.. d.addErrback(log.err, 'while marking build requests as completed') if sb.slave: sb.slave.releaseLocks() self.updateBigStatus() @defer.deferredGenerator def _maybeBuildsetsComplete(self, requests): # inform the master that we may have completed a number of buildsets for br in requests: wfd = defer.waitForDeferred( self.master.maybeBuildsetComplete(br.bsid)) yield wfd wfd.getResult() def _resubmit_buildreqs(self, build): brids = [br.id for br in build.requests] return self.master.db.buildrequests.unclaimBuildRequests(brids) def setExpectations(self, progress): """Mark the build as successful and update expectations for the next build. Only call this when the build did not fail in any way that would invalidate the time expectations generated by it. (if the compile failed and thus terminated early, we can't use the last build to predict how long the next one will take). """ if self.expectations: self.expectations.update(progress) else: # the first time we get a good build, create our Expectations # based upon its results self.expectations = Expectations(progress) log.msg("new expectations: %s seconds" % \ self.expectations.expectedBuildTime()) # Build Creation @defer.deferredGenerator def maybeStartBuild(self): # This method is called by the botmaster whenever this builder should # check for and potentially start new builds. Do not call this method # directly - use master.botmaster.maybeStartBuildsForBuilder, or one # of the other similar methods if more appropriate # first, if we're not running, then don't start builds; stopService # uses this to ensure that any ongoing maybeStartBuild invocations # are complete before it stops. if not self.running: return # Check for available slaves. If there are no available slaves, then # there is no sense continuing available_slavebuilders = [ sb for sb in self.slaves if sb.isAvailable() ] if not available_slavebuilders: self.updateBigStatus() return # now, get the available build requests wfd = defer.waitForDeferred( self.master.db.buildrequests.getBuildRequests( buildername=self.name, claimed=False)) yield wfd unclaimed_requests = wfd.getResult() if not unclaimed_requests: self.updateBigStatus() return # sort by submitted_at, so the first is the oldest unclaimed_requests.sort(key=lambda brd: brd['submitted_at']) # get the mergeRequests function for later mergeRequests_fn = self._getMergeRequestsFn() # match them up until we're out of options while available_slavebuilders and unclaimed_requests: # first, choose a slave (using nextSlave) wfd = defer.waitForDeferred( self._chooseSlave(available_slavebuilders)) yield wfd slavebuilder = wfd.getResult() if not slavebuilder: break if slavebuilder not in available_slavebuilders: log.msg(("nextSlave chose a nonexistent slave for builder " "'%s'; cannot start build") % self.name) break # then choose a request (using nextBuild) wfd = defer.waitForDeferred(self._chooseBuild(unclaimed_requests)) yield wfd brdict = wfd.getResult() if not brdict: break if brdict not in unclaimed_requests: log.msg(("nextBuild chose a nonexistent request for builder " "'%s'; cannot start build") % self.name) break # merge the chosen request with any compatible requests in the # queue wfd = defer.waitForDeferred( self._mergeRequests(brdict, unclaimed_requests, mergeRequests_fn)) yield wfd brdicts = wfd.getResult() # try to claim the build requests brids = [brdict['brid'] for brdict in brdicts] try: wfd = defer.waitForDeferred( self.master.db.buildrequests.claimBuildRequests(brids)) yield wfd wfd.getResult() except buildrequests.AlreadyClaimedError: # one or more of the build requests was already claimed; # re-fetch the now-partially-claimed build requests and keep # trying to match them self._breakBrdictRefloops(unclaimed_requests) wfd = defer.waitForDeferred( self.master.db.buildrequests.getBuildRequests( buildername=self.name, claimed=False)) yield wfd unclaimed_requests = wfd.getResult() # go around the loop again continue # claim was successful, so initiate a build for this set of # requests. Note that if the build fails from here on out (e.g., # because a slave has failed), it will be handled outside of this # loop. TODO: test that! # _startBuildFor expects BuildRequest objects, so cook some up wfd = defer.waitForDeferred( defer.gatherResults([ self._brdictToBuildRequest(brdict) for brdict in brdicts ])) yield wfd breqs = wfd.getResult() wfd = defer.waitForDeferred( self._startBuildFor(slavebuilder, breqs)) yield wfd build_started = wfd.getResult() if not build_started: # build was not started, so unclaim the build requests wfd = defer.waitForDeferred( self.master.db.buildrequests.unclaimBuildRequests(brids)) yield wfd wfd.getResult() # and try starting builds again. If we still have a working slave, # then this may re-claim the same buildrequests self.botmaster.maybeStartBuildsForBuilder(self.name) # finally, remove the buildrequests and slavebuilder from the # respective queues self._breakBrdictRefloops(brdicts) for brdict in brdicts: unclaimed_requests.remove(brdict) available_slavebuilders.remove(slavebuilder) self._breakBrdictRefloops(unclaimed_requests) self.updateBigStatus() return # a few utility functions to make the maybeStartBuild a bit shorter and # easier to read def _chooseSlave(self, available_slavebuilders): """ Choose the next slave, using the C{nextSlave} configuration if available, and falling back to C{random.choice} otherwise. @param available_slavebuilders: list of slavebuilders to choose from @returns: SlaveBuilder or None via Deferred """ if self.config.nextSlave: return defer.maybeDeferred( lambda: self.config.nextSlave(self, available_slavebuilders)) else: return defer.succeed(random.choice(available_slavebuilders)) def _chooseBuild(self, buildrequests): """ Choose the next build from the given set of build requests (represented as dictionaries). Defaults to returning the first request (earliest submitted). @param buildrequests: sorted list of build request dictionaries @returns: a build request dictionary or None via Deferred """ if self.config.nextBuild: # nextBuild expects BuildRequest objects, so instantiate them here # and cache them in the dictionaries d = defer.gatherResults([ self._brdictToBuildRequest(brdict) for brdict in buildrequests ]) d.addCallback(lambda requestobjects: self.config.nextBuild( self, requestobjects)) def to_brdict(brobj): # get the brdict for this object back return brobj.brdict d.addCallback(to_brdict) return d else: return defer.succeed(buildrequests[0]) def _getMergeRequestsFn(self): """Helper function to determine which mergeRequests function to use from L{_mergeRequests}, or None for no merging""" # first, seek through builder, global, and the default mergeRequests_fn = self.config.mergeRequests if mergeRequests_fn is None: mergeRequests_fn = self.master.config.mergeRequests if mergeRequests_fn is None: mergeRequests_fn = True # then translate False and True properly if mergeRequests_fn is False: mergeRequests_fn = None elif mergeRequests_fn is True: mergeRequests_fn = Builder._defaultMergeRequestFn return mergeRequests_fn def _defaultMergeRequestFn(self, req1, req2): return req1.canBeMergedWith(req2) @defer.deferredGenerator def _mergeRequests(self, breq, unclaimed_requests, mergeRequests_fn): """Use C{mergeRequests_fn} to merge C{breq} against C{unclaimed_requests}, where both are build request dictionaries""" # short circuit if there is no merging to do if not mergeRequests_fn or len(unclaimed_requests) == 1: yield [breq] return # we'll need BuildRequest objects, so get those first wfd = defer.waitForDeferred( defer.gatherResults([ self._brdictToBuildRequest(brdict) for brdict in unclaimed_requests ])) yield wfd unclaimed_request_objects = wfd.getResult() breq_object = unclaimed_request_objects.pop( unclaimed_requests.index(breq)) # gather the mergeable requests merged_request_objects = [breq_object] for other_breq_object in unclaimed_request_objects: wfd = defer.waitForDeferred( defer.maybeDeferred(lambda: mergeRequests_fn( self, breq_object, other_breq_object))) yield wfd if wfd.getResult(): merged_request_objects.append(other_breq_object) # convert them back to brdicts and return merged_requests = [br.brdict for br in merged_request_objects] yield merged_requests def _brdictToBuildRequest(self, brdict): """ Convert a build request dictionary to a L{buildrequest.BuildRequest} object, caching the result in the dictionary itself. The resulting buildrequest will have a C{brdict} attribute pointing back to this dictionary. Note that this does not perform any locking - be careful that it is only called once at a time for each build request dictionary. @param brdict: dictionary to convert @returns: L{buildrequest.BuildRequest} via Deferred """ if 'brobj' in brdict: return defer.succeed(brdict['brobj']) d = buildrequest.BuildRequest.fromBrdict(self.master, brdict) def keep(buildrequest): brdict['brobj'] = buildrequest buildrequest.brdict = brdict return buildrequest d.addCallback(keep) return d def _breakBrdictRefloops(self, requests): """Break the reference loops created by L{_brdictToBuildRequest}""" for brdict in requests: try: del brdict['brobj'].brdict except KeyError: pass
class Builder(config.ReconfigurableServiceMixin, pb.Referenceable, service.MultiService): # reconfigure builders before slaves reconfig_priority = 196 def __init__(self, name, _addServices=True): service.MultiService.__init__(self) self.name = name # this is filled on demand by getBuilderId; don't access it directly self._builderid = None # this is created the first time we get a good build self.expectations = None # build/wannabuild slots: Build objects move along this sequence self.building = [] # old_building holds active builds that were stolen from a predecessor self.old_building = weakref.WeakKeyDictionary() # buildslaves which have connected but which are not yet available. # These are always in the ATTACHING state. self.attaching_slaves = [] # buildslaves at our disposal. Each SlaveBuilder instance has a # .state that is IDLE, PINGING, or BUILDING. "PINGING" is used when a # Build is about to start, to make sure that they're still alive. self.slaves = [] self.config = None self.builder_status = None if _addServices: self.reclaim_svc = internet.TimerService(10 * 60, self.reclaimAllBuilds) self.reclaim_svc.setServiceParent(self) # update big status every 30 minutes, working around #1980 self.updateStatusService = internet.TimerService( 30 * 60, self.updateBigStatus) self.updateStatusService.setServiceParent(self) @defer.inlineCallbacks def reconfigService(self, new_config): # find this builder in the config for builder_config in new_config.builders: if builder_config.name == self.name: found_config = True break assert found_config, "no config found for builder '%s'" % self.name # set up a builder status object on the first reconfig if not self.builder_status: self.builder_status = self.master.status.builderAdded( name=builder_config.name, basedir=builder_config.builddir, tags=builder_config.tags, description=builder_config.description) self.config = builder_config # allocate builderid now, so that the builder is visible in the web # UI; without this, the bulider wouldn't appear until it preformed a # build. yield self.getBuilderId() self.builder_status.setDescription(builder_config.description) self.builder_status.setTags(builder_config.tags) self.builder_status.setSlavenames(self.config.slavenames) self.builder_status.setCacheSize(new_config.caches['Builds']) # if we have any slavebuilders attached which are no longer configured, # drop them. new_slavenames = set(builder_config.slavenames) self.slaves = [ s for s in self.slaves if s.slave.slavename in new_slavenames ] def __repr__(self): return "<Builder '%r' at %d>" % (self.name, id(self)) def getBuilderId(self): # since findBuilderId is idempotent, there's no reason to add # additional locking around this function. if self._builderid: return defer.succeed(self._builderid) # buildbot.config should ensure this is already unicode, but it doesn't # hurt to check again name = ascii2unicode(self.name) d = self.master.data.updates.findBuilderId(name) @d.addCallback def keep(builderid): self._builderid = builderid return builderid return d @defer.inlineCallbacks def getOldestRequestTime(self): """Returns the submitted_at of the oldest unclaimed build request for this builder, or None if there are no build requests. @returns: datetime instance or None, via Deferred """ unclaimed = yield self.master.data.get( ('builders', ascii2unicode(self.name), 'buildrequests'), [resultspec.Filter('claimed', 'eq', [False])]) if unclaimed: unclaimed = sorted([brd['submitted_at'] for brd in unclaimed]) defer.returnValue(unclaimed[0]) else: defer.returnValue(None) def reclaimAllBuilds(self): brids = set() for b in self.building: brids.update([br.id for br in b.requests]) for b in self.old_building: brids.update([br.id for br in b.requests]) if not brids: return defer.succeed(None) d = self.master.data.updates.reclaimBuildRequests(list(brids)) d.addErrback(log.err, 'while re-claiming running BuildRequests') return d def getBuild(self, number): for b in self.building: if b.build_status and b.build_status.number == number: return b for b in self.old_building.keys(): if b.build_status and b.build_status.number == number: return b return None def addLatentSlave(self, slave): assert interfaces.ILatentBuildSlave.providedBy(slave) for s in self.slaves: if s == slave: break else: sb = slavebuilder.LatentSlaveBuilder(slave, self) self.builder_status.addPointEvent( ['added', 'latent', slave.slavename]) self.slaves.append(sb) self.botmaster.maybeStartBuildsForBuilder(self.name) def attached(self, slave, commands): """This is invoked by the BuildSlave when the self.slavename bot registers their builder. @type slave: L{buildbot.buildslave.BuildSlave} @param slave: the BuildSlave that represents the buildslave as a whole @type remote: L{twisted.spread.pb.RemoteReference} @param remote: a reference to the L{buildbot.slave.bot.SlaveBuilder} @type commands: dict: string -> string, or None @param commands: provides the slave's version of each RemoteCommand @rtype: L{twisted.internet.defer.Deferred} @return: a Deferred that fires (with 'self') when the slave-side builder is fully attached and ready to accept commands. """ for s in self.attaching_slaves + self.slaves: if s.slave == slave: # already attached to them. This is fairly common, since # attached() gets called each time we receive the builder # list from the slave, and we ask for it each time we add or # remove a builder. So if the slave is hosting builders # A,B,C, and the config file changes A, we'll remove A and # re-add it, triggering two builder-list requests, getting # two redundant calls to attached() for B, and another two # for C. # # Therefore, when we see that we're already attached, we can # just ignore it. return defer.succeed(self) sb = slavebuilder.SlaveBuilder() sb.setBuilder(self) self.attaching_slaves.append(sb) d = sb.attached(slave, commands) d.addCallback(self._attached) d.addErrback(self._not_attached, slave) return d def _attached(self, sb): self.builder_status.addPointEvent(['connect', sb.slave.slavename]) self.attaching_slaves.remove(sb) self.slaves.append(sb) self.updateBigStatus() return self def _not_attached(self, why, slave): # already log.err'ed by SlaveBuilder._attachFailure # TODO: remove from self.slaves (except that detached() should get # run first, right?) log.err(why, 'slave failed to attach') self.builder_status.addPointEvent( ['failed', 'connect', slave.slavename]) # TODO: add an HTMLLogFile of the exception def detached(self, slave): """This is called when the connection to the bot is lost.""" for sb in self.attaching_slaves + self.slaves: if sb.slave == slave: break else: log.msg( "WEIRD: Builder.detached(%s) (%s)" " not in attaching_slaves(%s)" " or slaves(%s)" % (slave, slave.slavename, self.attaching_slaves, self.slaves)) return if sb.state == BUILDING: # the Build's .lostRemote method (invoked by a notifyOnDisconnect # handler) will cause the Build to be stopped, probably right # after the notifyOnDisconnect that invoked us finishes running. pass if sb in self.attaching_slaves: self.attaching_slaves.remove(sb) if sb in self.slaves: self.slaves.remove(sb) self.builder_status.addPointEvent(['disconnect', slave.slavename]) sb.detached() # inform the SlaveBuilder that their slave went away self.updateBigStatus() def updateBigStatus(self): try: # Catch exceptions here, since this is called in a LoopingCall. if not self.builder_status: return if not self.slaves: self.builder_status.setBigState("offline") elif self.building or self.old_building: self.builder_status.setBigState("building") else: self.builder_status.setBigState("idle") except Exception: log.err( None, "while trying to update status of builder '%s'" % (self.name, )) def getAvailableSlaves(self): return [sb for sb in self.slaves if sb.isAvailable()] def canStartWithSlavebuilder(self, slavebuilder): locks = [(self.botmaster.getLockFromLockAccess(access), access) for access in self.config.locks] return Build.canStartWithSlavebuilder(locks, slavebuilder) def canStartBuild(self, slavebuilder, breq): if callable(self.config.canStartBuild): return defer.maybeDeferred(self.config.canStartBuild, self, slavebuilder, breq) return defer.succeed(True) @defer.inlineCallbacks def _startBuildFor(self, slavebuilder, buildrequests): # Build a stack of cleanup functions so that, at any point, we can # abort this operation and unwind the commitments made so far. cleanups = [] def run_cleanups(): try: while cleanups: fn = cleanups.pop() fn() except: log.err(failure.Failure(), "while running %r" % (run_cleanups, )) # the last cleanup we want to perform is to update the big # status based on any other cleanup cleanups.append(lambda: self.updateBigStatus()) build = self.config.factory.newBuild(buildrequests) build.setBuilder(self) log.msg("starting build %s using slave %s" % (build, slavebuilder)) # set up locks build.setLocks(self.config.locks) cleanups.append(lambda: slavebuilder.slave.releaseLocks()) if len(self.config.env) > 0: build.setSlaveEnvironment(self.config.env) # append the build to self.building self.building.append(build) cleanups.append(lambda: self.building.remove(build)) # update the big status accordingly self.updateBigStatus() try: ready = yield slavebuilder.prepare(self.builder_status, build) except: log.err(failure.Failure(), 'while preparing slavebuilder:') ready = False # If prepare returns True then it is ready and we start a build # If it returns false then we don't start a new build. if not ready: log.msg("slave %s can't build %s after all; re-queueing the " "request" % (build, slavebuilder)) run_cleanups() defer.returnValue(False) return # ping the slave to make sure they're still there. If they've # fallen off the map (due to a NAT timeout or something), this # will fail in a couple of minutes, depending upon the TCP # timeout. # # TODO: This can unnecessarily suspend the starting of a build, in # situations where the slave is live but is pushing lots of data to # us in a build. log.msg("starting build %s.. pinging the slave %s" % (build, slavebuilder)) try: ping_success = yield slavebuilder.ping() except: log.err(failure.Failure(), 'while pinging slave before build:') ping_success = False if not ping_success: log.msg("slave ping failed; re-queueing the request") run_cleanups() defer.returnValue(False) return # The buildslave is ready to go. slavebuilder.buildStarted() sets its # state to BUILDING (so we won't try to use it for any other builds). # This gets set back to IDLE by the Build itself when it finishes. slavebuilder.buildStarted() cleanups.append(lambda: slavebuilder.buildFinished()) # tell the remote that it's starting a build, too try: yield slavebuilder.slave.conn.remoteStartBuild(build.builder.name) except: log.err(failure.Failure(), 'while calling remote startBuild:') run_cleanups() defer.returnValue(False) return # create the BuildStatus object that goes with the Build bs = self.builder_status.newBuild() # IMPORTANT: no yielding is allowed from here to the startBuild call! # it's possible that we lost the slave remote between the ping above # and now. If so, bail out. The build.startBuild call below transfers # responsibility for monitoring this connection to the Build instance, # so this check ensures we hand off a working connection. if not slavebuilder.slave.conn: # TODO: replace with isConnected() log.msg("slave disappeared before build could start") run_cleanups() defer.returnValue(False) return # let status know self.master.status.build_started(buildrequests[0].id, self.name, bs) # start the build. This will first set up the steps, then tell the # BuildStatus that it has started, which will announce it to the world # (through our BuilderStatus object, which is its parent). Finally it # will start the actual build process. This is done with a fresh # Deferred since _startBuildFor should not wait until the build is # finished. This uses `maybeDeferred` to ensure that any exceptions # raised by startBuild are treated as deferred errbacks (see # http://trac.buildbot.net/ticket/2428). d = defer.maybeDeferred(build.startBuild, bs, self.expectations, slavebuilder) d.addCallback(lambda _: self.buildFinished(build, slavebuilder)) # this shouldn't happen. if it does, the slave will be wedged d.addErrback( log.err, 'from a running build; this is a ' 'serious error - please file a bug at http://buildbot.net') # make sure the builder's status is represented correctly self.updateBigStatus() defer.returnValue(True) def setupProperties(self, props): props.setProperty("buildername", self.name, "Builder") if len(self.config.properties) > 0: for propertyname in self.config.properties: props.setProperty(propertyname, self.config.properties[propertyname], "Builder") def buildFinished(self, build, sb): """This is called when the Build has finished (either success or failure). Any exceptions during the build are reported with results=FAILURE, not with an errback.""" # by the time we get here, the Build has already released the slave, # which will trigger a check for any now-possible build requests # (maybeStartBuilds) results = build.build_status.getResults() self.building.remove(build) if results == RETRY: d = self._resubmit_buildreqs(build) d.addErrback(log.err, 'while resubmitting a build request') else: complete_at_epoch = reactor.seconds() complete_at = epoch2datetime(complete_at_epoch) brids = [br.id for br in build.requests] d = self.master.data.updates.completeBuildRequests( brids, results, complete_at=complete_at) d.addCallback(lambda _: self._notify_completions( build.requests, results, complete_at_epoch)) # nothing in particular to do with this deferred, so just log it if # it fails.. d.addErrback(log.err, 'while marking build requests as completed') if sb.slave: sb.slave.releaseLocks() self.updateBigStatus() @defer.inlineCallbacks def _notify_completions(self, requests, results, complete_at_epoch): updates = self.master.data.updates # send a message for each request for br in requests: updates.completeBuildRequests([br.id], results, epoch2datetime(complete_at_epoch)) # check for completed buildsets -- one call for each build request with # a unique bsid seen_bsids = set() for br in requests: if br.bsid in seen_bsids: continue seen_bsids.add(br.bsid) yield updates.maybeBuildsetComplete(br.bsid) def _resubmit_buildreqs(self, build): brids = [br.id for br in build.requests] d = self.master.data.updates.unclaimBuildRequests(brids) @d.addCallback def notify(_): pass # XXX method does not exist # self._msg_buildrequests_unclaimed(build.requests) return d def setExpectations(self, progress): """Mark the build as successful and update expectations for the next build. Only call this when the build did not fail in any way that would invalidate the time expectations generated by it. (if the compile failed and thus terminated early, we can't use the last build to predict how long the next one will take). """ if self.expectations: self.expectations.update(progress) else: # the first time we get a good build, create our Expectations # based upon its results self.expectations = Expectations(progress) log.msg("new expectations: %s seconds" % self.expectations.expectedBuildTime()) # Build Creation @defer.inlineCallbacks def maybeStartBuild(self, slavebuilder, breqs, _reactor=reactor): # This method is called by the botmaster whenever this builder should # start a set of buildrequests on a slave. Do not call this method # directly - use master.botmaster.maybeStartBuildsForBuilder, or one of # the other similar methods if more appropriate # first, if we're not running, then don't start builds; stopService # uses this to ensure that any ongoing maybeStartBuild invocations # are complete before it stops. if not self.running: defer.returnValue(False) return # If the build fails from here on out (e.g., because a slave has failed), # it will be handled outside of this function. TODO: test that! build_started = yield self._startBuildFor(slavebuilder, breqs) defer.returnValue(build_started) # a few utility functions to make the maybeStartBuild a bit shorter and # easier to read def getMergeRequestsFn(self): """Helper function to determine which mergeRequests function to use from L{_mergeRequests}, or None for no merging""" # first, seek through builder, global, and the default mergeRequests_fn = self.config.mergeRequests if mergeRequests_fn is None: mergeRequests_fn = self.master.config.mergeRequests if mergeRequests_fn is None: mergeRequests_fn = True # then translate False and True properly if mergeRequests_fn is False: mergeRequests_fn = None elif mergeRequests_fn is True: mergeRequests_fn = Builder._defaultMergeRequestFn return mergeRequests_fn def _defaultMergeRequestFn(self, req1, req2): return req1.canBeMergedWith(req2)
class Builder(pb.Referenceable, service.MultiService): """I manage all Builds of a given type. Each Builder is created by an entry in the config file (the c['builders'] list), with a number of parameters. One of these parameters is the L{buildbot.process.factory.BuildFactory} object that is associated with this Builder. The factory is responsible for creating new L{Build<buildbot.process.build.Build>} objects. Each Build object defines when and how the build is performed, so a new Factory or Builder should be defined to control this behavior. The Builder holds on to a number of L{BuildRequest} objects in a list named C{.buildable}. Incoming BuildRequest objects will be added to this list, or (if possible) merged into an existing request. When a slave becomes available, I will use my C{BuildFactory} to turn the request into a new C{Build} object. The C{BuildRequest} is forgotten, the C{Build} goes into C{.building} while it runs. Once the build finishes, I will discard it. I maintain a list of available SlaveBuilders, one for each connected slave that the C{slavenames} parameter says we can use. Some of these will be idle, some of them will be busy running builds for me. If there are multiple slaves, I can run multiple builds at once. I also manage forced builds, progress expectation (ETA) management, and some status delivery chores. @type buildable: list of L{buildbot.process.buildrequest.BuildRequest} @ivar buildable: BuildRequests that are ready to build, but which are waiting for a buildslave to be available. @type building: list of L{buildbot.process.build.Build} @ivar building: Builds that are actively running @type slaves: list of L{buildbot.buildslave.BuildSlave} objects @ivar slaves: the slaves currently available for building """ expectations = None # this is created the first time we get a good build def __init__(self, setup, builder_status): """ @type setup: dict @param setup: builder setup data, as stored in BuildmasterConfig['builders']. Contains name, slavename(s), builddir, slavebuilddir, factory, locks. @type builder_status: L{buildbot.status.builder.BuilderStatus} """ service.MultiService.__init__(self) self.name = setup['name'] self.slavenames = [] if setup.has_key('slavename'): self.slavenames.append(setup['slavename']) if setup.has_key('slavenames'): self.slavenames.extend(setup['slavenames']) self.builddir = setup['builddir'] self.slavebuilddir = setup['slavebuilddir'] self.buildFactory = setup['factory'] self.nextSlave = setup.get('nextSlave') if self.nextSlave is not None and not callable(self.nextSlave): raise ValueError("nextSlave must be callable") self.locks = setup.get("locks", []) self.env = setup.get('env', {}) assert isinstance(self.env, dict) if setup.has_key('periodicBuildTime'): raise ValueError("periodicBuildTime can no longer be defined as" " part of the Builder: use scheduler.Periodic" " instead") self.nextBuild = setup.get('nextBuild') if self.nextBuild is not None and not callable(self.nextBuild): raise ValueError("nextBuild must be callable") self.buildHorizon = setup.get('buildHorizon') self.logHorizon = setup.get('logHorizon') self.eventHorizon = setup.get('eventHorizon') self.mergeRequests = setup.get('mergeRequests', True) self.properties = setup.get('properties', {}) self.category = setup.get('category', None) # build/wannabuild slots: Build objects move along this sequence self.building = [] # old_building holds active builds that were stolen from a predecessor self.old_building = weakref.WeakKeyDictionary() # buildslaves which have connected but which are not yet available. # These are always in the ATTACHING state. self.attaching_slaves = [] # buildslaves at our disposal. Each SlaveBuilder instance has a # .state that is IDLE, PINGING, or BUILDING. "PINGING" is used when a # Build is about to start, to make sure that they're still alive. self.slaves = [] self.builder_status = builder_status self.builder_status.setSlavenames(self.slavenames) self.builder_status.buildHorizon = self.buildHorizon self.builder_status.logHorizon = self.logHorizon self.builder_status.eventHorizon = self.eventHorizon self.reclaim_svc = internet.TimerService(10*60, self.reclaimAllBuilds) self.reclaim_svc.setServiceParent(self) # for testing, to help synchronize tests self.run_count = 0 def stopService(self): d = defer.maybeDeferred(lambda : service.MultiService.stopService(self)) def flushMaybeStartBuilds(_): # at this point, self.running = False, so another maybeStartBuilds # invocation won't hurt anything, but it also will not complete # until any currently-running invocations are done. return self.maybeStartBuild() d.addCallback(flushMaybeStartBuilds) return d def setBotmaster(self, botmaster): self.botmaster = botmaster self.master = botmaster.master self.db = self.master.db self.master_name = self.master.master_name self.master_incarnation = self.master.master_incarnation def compareToSetup(self, setup): diffs = [] setup_slavenames = [] if setup.has_key('slavename'): setup_slavenames.append(setup['slavename']) setup_slavenames.extend(setup.get('slavenames', [])) if setup_slavenames != self.slavenames: diffs.append('slavenames changed from %s to %s' \ % (self.slavenames, setup_slavenames)) if setup['builddir'] != self.builddir: diffs.append('builddir changed from %s to %s' \ % (self.builddir, setup['builddir'])) if setup['slavebuilddir'] != self.slavebuilddir: diffs.append('slavebuilddir changed from %s to %s' \ % (self.slavebuilddir, setup['slavebuilddir'])) if setup['factory'] != self.buildFactory: # compare objects diffs.append('factory changed') if setup.get('locks', []) != self.locks: diffs.append('locks changed from %s to %s' % (self.locks, setup.get('locks'))) if setup.get('env', {}) != self.env: diffs.append('env changed from %s to %s' % (self.env, setup.get('env', {}))) if setup.get('nextSlave') != self.nextSlave: diffs.append('nextSlave changed from %s to %s' % (self.nextSlave, setup.get('nextSlave'))) if setup.get('nextBuild') != self.nextBuild: diffs.append('nextBuild changed from %s to %s' % (self.nextBuild, setup.get('nextBuild'))) if setup.get('buildHorizon', None) != self.buildHorizon: diffs.append('buildHorizon changed from %s to %s' % (self.buildHorizon, setup['buildHorizon'])) if setup.get('logHorizon', None) != self.logHorizon: diffs.append('logHorizon changed from %s to %s' % (self.logHorizon, setup['logHorizon'])) if setup.get('eventHorizon', None) != self.eventHorizon: diffs.append('eventHorizon changed from %s to %s' % (self.eventHorizon, setup['eventHorizon'])) if setup.get('category', None) != self.category: diffs.append('category changed from %r to %r' % (self.category, setup.get('category', None))) return diffs def __repr__(self): return "<Builder '%r' at %d>" % (self.name, id(self)) @defer.deferredGenerator def getOldestRequestTime(self): """Returns the submitted_at of the oldest unclaimed build request for this builder, or None if there are no build requests. @returns: datetime instance or None, via Deferred """ wfd = defer.waitForDeferred( self.master.db.buildrequests.getBuildRequests( buildername=self.name, claimed=False)) yield wfd unclaimed = wfd.getResult() if unclaimed: unclaimed = [ brd['submitted_at'] for brd in unclaimed ] unclaimed.sort() yield unclaimed[0] else: yield None def consumeTheSoulOfYourPredecessor(self, old): """Suck the brain out of an old Builder. This takes all the runtime state from an existing Builder and moves it into ourselves. This is used when a Builder is changed in the master.cfg file: the new Builder has a different factory, but we want all the builds that were queued for the old one to get processed by the new one. Any builds which are already running will keep running. The new Builder will get as many of the old SlaveBuilder objects as it wants.""" log.msg("consumeTheSoulOfYourPredecessor: %s feeding upon %s" % (self, old)) # all pending builds are stored in the DB, so we don't have to do # anything to claim them. The old builder will be stopService'd, # which should make sure they don't start any new work # this is kind of silly, but the builder status doesn't get updated # when the config changes, yet it stores the category. So: self.builder_status.category = self.category # old.building (i.e. builds which are still running) is not migrated # directly: it keeps track of builds which were in progress in the # old Builder. When those builds finish, the old Builder will be # notified, not us. However, since the old SlaveBuilder will point to # us, it is our maybeStartBuild() that will be triggered. if old.building: self.builder_status.setBigState("building") # however, we do grab a weakref to the active builds, so that our # BuilderControl can see them and stop them. We use a weakref because # we aren't the one to get notified, so there isn't a convenient # place to remove it from self.building . for b in old.building: self.old_building[b] = None for b in old.old_building: self.old_building[b] = None # Our set of slavenames may be different. Steal any of the old # buildslaves that we want to keep using. for sb in old.slaves[:]: if sb.slave.slavename in self.slavenames: log.msg(" stealing buildslave %s" % sb) self.slaves.append(sb) old.slaves.remove(sb) sb.setBuilder(self) # old.attaching_slaves: # these SlaveBuilders are waiting on a sequence of calls: # remote.setMaster and remote.print . When these two complete, # old._attached will be fired, which will add a 'connect' event to # the builder_status and try to start a build. However, we've pulled # everything out of the old builder's queue, so it will have no work # to do. The outstanding remote.setMaster/print call will be holding # the last reference to the old builder, so it will disappear just # after that response comes back. # # The BotMaster will ask the slave to re-set their list of Builders # shortly after this function returns, which will cause our # attached() method to be fired with a bunch of references to remote # SlaveBuilders, some of which we already have (by stealing them # from the old Builder), some of which will be new. The new ones # will be re-attached. # Therefore, we don't need to do anything about old.attaching_slaves return # all done def reclaimAllBuilds(self): brids = set() for b in self.building: brids.update([br.id for br in b.requests]) for b in self.old_building: brids.update([br.id for br in b.requests]) if not brids: return defer.succeed(None) d = self.master.db.buildrequests.claimBuildRequests(brids) d.addErrback(log.err, 'while re-claiming running BuildRequests') return d def getBuild(self, number): for b in self.building: if b.build_status and b.build_status.number == number: return b for b in self.old_building.keys(): if b.build_status and b.build_status.number == number: return b return None def addLatentSlave(self, slave): assert interfaces.ILatentBuildSlave.providedBy(slave) for s in self.slaves: if s == slave: break else: sb = slavebuilder.LatentSlaveBuilder(slave, self) self.builder_status.addPointEvent( ['added', 'latent', slave.slavename]) self.slaves.append(sb) self.botmaster.maybeStartBuildsForBuilder(self.name) def attached(self, slave, remote, commands): """This is invoked by the BuildSlave when the self.slavename bot registers their builder. @type slave: L{buildbot.buildslave.BuildSlave} @param slave: the BuildSlave that represents the buildslave as a whole @type remote: L{twisted.spread.pb.RemoteReference} @param remote: a reference to the L{buildbot.slave.bot.SlaveBuilder} @type commands: dict: string -> string, or None @param commands: provides the slave's version of each RemoteCommand @rtype: L{twisted.internet.defer.Deferred} @return: a Deferred that fires (with 'self') when the slave-side builder is fully attached and ready to accept commands. """ for s in self.attaching_slaves + self.slaves: if s.slave == slave: # already attached to them. This is fairly common, since # attached() gets called each time we receive the builder # list from the slave, and we ask for it each time we add or # remove a builder. So if the slave is hosting builders # A,B,C, and the config file changes A, we'll remove A and # re-add it, triggering two builder-list requests, getting # two redundant calls to attached() for B, and another two # for C. # # Therefore, when we see that we're already attached, we can # just ignore it. return defer.succeed(self) sb = slavebuilder.SlaveBuilder() sb.setBuilder(self) self.attaching_slaves.append(sb) d = sb.attached(slave, remote, commands) d.addCallback(self._attached) d.addErrback(self._not_attached, slave) return d def _attached(self, sb): self.builder_status.addPointEvent(['connect', sb.slave.slavename]) self.attaching_slaves.remove(sb) self.slaves.append(sb) return self def _not_attached(self, why, slave): # already log.err'ed by SlaveBuilder._attachFailure # TODO: remove from self.slaves (except that detached() should get # run first, right?) log.err(why, 'slave failed to attach') self.builder_status.addPointEvent(['failed', 'connect', slave.slavename]) # TODO: add an HTMLLogFile of the exception def detached(self, slave): """This is called when the connection to the bot is lost.""" for sb in self.attaching_slaves + self.slaves: if sb.slave == slave: break else: log.msg("WEIRD: Builder.detached(%s) (%s)" " not in attaching_slaves(%s)" " or slaves(%s)" % (slave, slave.slavename, self.attaching_slaves, self.slaves)) return if sb.state == BUILDING: # the Build's .lostRemote method (invoked by a notifyOnDisconnect # handler) will cause the Build to be stopped, probably right # after the notifyOnDisconnect that invoked us finishes running. pass if sb in self.attaching_slaves: self.attaching_slaves.remove(sb) if sb in self.slaves: self.slaves.remove(sb) self.builder_status.addPointEvent(['disconnect', slave.slavename]) sb.detached() # inform the SlaveBuilder that their slave went away self.updateBigStatus() def updateBigStatus(self): if not self.slaves: self.builder_status.setBigState("offline") elif self.building: self.builder_status.setBigState("building") else: self.builder_status.setBigState("idle") @defer.deferredGenerator def _startBuildFor(self, slavebuilder, buildrequests): """Start a build on the given slave. @param build: the L{base.Build} to start @param sb: the L{SlaveBuilder} which will host this build @return: a Deferred which fires with a L{buildbot.interfaces.IBuildControl} that can be used to stop the Build, or to access a L{buildbot.interfaces.IBuildStatus} which will watch the Build as it runs. """ build = self.buildFactory.newBuild(buildrequests) build.setBuilder(self) build.setLocks(self.locks) if len(self.env) > 0: build.setSlaveEnvironment(self.env) self.building.append(build) self.updateBigStatus() log.msg("starting build %s using slave %s" % (build, slavebuilder)) wfd = defer.waitForDeferred( slavebuilder.prepare(self.builder_status, build)) yield wfd ready = wfd.getResult() # If prepare returns True then it is ready and we start a build # If it returns false then we don't start a new build. if not ready: log.msg("slave %s can't build %s after all; re-queueing the " "request" % (build, slavebuilder)) self.building.remove(build) slavebuilder.slave.releaseLocks() # release the buildrequest claims wfd = defer.waitForDeferred( self._resubmit_buildreqs(build)) yield wfd wfd.getResult() # and try starting builds again. If we still have a working slave, # then this may re-claim the same buildrequests self.botmaster.maybeStartBuildsForBuilder(self.name) return # ping the slave to make sure they're still there. If they've # fallen off the map (due to a NAT timeout or something), this # will fail in a couple of minutes, depending upon the TCP # timeout. # # TODO: This can unnecessarily suspend the starting of a build, in # situations where the slave is live but is pushing lots of data to # us in a build. log.msg("starting build %s.. pinging the slave %s" % (build, slavebuilder)) wfd = defer.waitForDeferred( slavebuilder.ping()) yield wfd ping_success = wfd.getResult() if not ping_success: self._startBuildFailed("slave ping failed", build, slavebuilder) return # The buildslave is ready to go. slavebuilder.buildStarted() sets its # state to BUILDING (so we won't try to use it for any other builds). # This gets set back to IDLE by the Build itself when it finishes. slavebuilder.buildStarted() try: wfd = defer.waitForDeferred( slavebuilder.remote.callRemote("startBuild")) yield wfd wfd.getResult() except: self._startBuildFailed(failure.Failure(), build, slavebuilder) return # create the BuildStatus object that goes with the Build bs = self.builder_status.newBuild() # record in the db - one per buildrequest bids = [] for req in build.requests: wfd = defer.waitForDeferred( self.master.db.builds.addBuild(req.id, bs.number)) yield wfd bids.append(wfd.getResult()) # let status know self.master.status.build_started(req.id, self.name, bs.number) # start the build. This will first set up the steps, then tell the # BuildStatus that it has started, which will announce it to the world # (through our BuilderStatus object, which is its parent). Finally it # will start the actual build process. This is done with a fresh # Deferred since _startBuildFor should not wait until the build is # finished. d = build.startBuild(bs, self.expectations, slavebuilder) d.addCallback(self.buildFinished, slavebuilder, bids) # this shouldn't happen. if it does, the slave will be wedged d.addErrback(log.err) # make sure the builder's status is represented correctly self.updateBigStatus() # yield the IBuildControl, in case anyone needs it yield build def _startBuildFailed(self, why, build, slavebuilder): # put the build back on the buildable list log.msg("I tried to tell the slave that the build %s started, but " "remote_startBuild failed: %s" % (build, why)) # release the slave. This will queue a call to maybeStartBuild, which # will fire after other notifyOnDisconnect handlers have marked the # slave as disconnected (so we don't try to use it again). slavebuilder.buildFinished() log.msg("re-queueing the BuildRequest") self.building.remove(build) self._resubmit_buildreqs(build).addErrback(log.err) def setupProperties(self, props): props.setProperty("buildername", self.name, "Builder") if len(self.properties) > 0: for propertyname in self.properties: props.setProperty(propertyname, self.properties[propertyname], "Builder") def buildFinished(self, build, sb, bids): """This is called when the Build has finished (either success or failure). Any exceptions during the build are reported with results=FAILURE, not with an errback.""" # by the time we get here, the Build has already released the slave, # which will trigger a check for any now-possible build requests # (maybeStartBuilds) # mark the builds as finished, although since nothing ever reads this # table, it's not too important that it complete successfully d = self.db.builds.finishBuilds(bids) d.addErrback(log.err, 'while markign builds as finished (ignored)') results = build.build_status.getResults() self.building.remove(build) if results == RETRY: self._resubmit_buildreqs(build).addErrback(log.err) else: brids = [br.id for br in build.requests] db = self.master.db d = db.buildrequests.completeBuildRequests(brids, results) d.addCallback( lambda _ : self._maybeBuildsetsComplete(build.requests)) # nothing in particular to do with this deferred, so just log it if # it fails.. d.addErrback(log.err, 'while marking build requests as completed') if sb.slave: sb.slave.releaseLocks() @defer.deferredGenerator def _maybeBuildsetsComplete(self, requests): # inform the master that we may have completed a number of buildsets for br in requests: wfd = defer.waitForDeferred( self.master.maybeBuildsetComplete(br.bsid)) yield wfd wfd.getResult() def _resubmit_buildreqs(self, build): brids = [br.id for br in build.requests] return self.db.buildrequests.unclaimBuildRequests(brids) def setExpectations(self, progress): """Mark the build as successful and update expectations for the next build. Only call this when the build did not fail in any way that would invalidate the time expectations generated by it. (if the compile failed and thus terminated early, we can't use the last build to predict how long the next one will take). """ if self.expectations: self.expectations.update(progress) else: # the first time we get a good build, create our Expectations # based upon its results self.expectations = Expectations(progress) log.msg("new expectations: %s seconds" % \ self.expectations.expectedBuildTime()) # Build Creation @defer.deferredGenerator def maybeStartBuild(self): # This method is called by the botmaster whenever this builder should # check for and potentially start new builds. Do not call this method # directly - use master.botmaster.maybeStartBuildsForBuilder, or one # of the other similar methods if more appropriate # first, if we're not running, then don't start builds; stopService # uses this to ensure that any ongoing maybeStartBuild invocations # are complete before it stops. if not self.running: return # Check for available slaves. If there are no available slaves, then # there is no sense continuing available_slavebuilders = [ sb for sb in self.slaves if sb.isAvailable() ] if not available_slavebuilders: self.updateBigStatus() return # now, get the available build requests wfd = defer.waitForDeferred( self.master.db.buildrequests.getBuildRequests( buildername=self.name, claimed=False)) yield wfd unclaimed_requests = wfd.getResult() # sort by submitted_at, so the first is the oldest unclaimed_requests.sort(key=lambda brd : brd['submitted_at']) # get the mergeRequests function for later mergeRequests_fn = self._getMergeRequestsFn() # match them up until we're out of options while available_slavebuilders and unclaimed_requests: # first, choose a slave (using nextSlave) wfd = defer.waitForDeferred( self._chooseSlave(available_slavebuilders)) yield wfd slavebuilder = wfd.getResult() if not slavebuilder: break if slavebuilder not in available_slavebuilders: log.msg(("nextSlave chose a nonexistent slave for builder " "'%s'; cannot start build") % self.name) break # then choose a request (using nextBuild) wfd = defer.waitForDeferred( self._chooseBuild(unclaimed_requests)) yield wfd brdict = wfd.getResult() if not brdict: break if brdict not in unclaimed_requests: log.msg(("nextBuild chose a nonexistent request for builder " "'%s'; cannot start build") % self.name) break # merge the chosen request with any compatible requests in the # queue wfd = defer.waitForDeferred( self._mergeRequests(brdict, unclaimed_requests, mergeRequests_fn)) yield wfd brdicts = wfd.getResult() # try to claim the build requests try: wfd = defer.waitForDeferred( self.master.db.buildrequests.claimBuildRequests( [ brdict['brid'] for brdict in brdicts ])) yield wfd wfd.getResult() except buildrequests.AlreadyClaimedError: # one or more of the build requests was already claimed; # re-fetch the now-partially-claimed build requests and keep # trying to match them self._breakBrdictRefloops(unclaimed_requests) wfd = defer.waitForDeferred( self.master.db.buildrequests.getBuildRequests( buildername=self.name, claimed=False)) yield wfd unclaimed_requests = wfd.getResult() # go around the loop again continue # claim was successful, so initiate a build for this set of # requests. Note that if the build fails from here on out (e.g., # because a slave has failed), it will be handled outside of this # loop. TODO: test that! # _startBuildFor expects BuildRequest objects, so cook some up wfd = defer.waitForDeferred( defer.gatherResults([ self._brdictToBuildRequest(brdict) for brdict in brdicts ])) yield wfd breqs = wfd.getResult() self._startBuildFor(slavebuilder, breqs) # and finally remove the buildrequests and slavebuilder from the # respective queues self._breakBrdictRefloops(brdicts) for brdict in brdicts: unclaimed_requests.remove(brdict) available_slavebuilders.remove(slavebuilder) self._breakBrdictRefloops(unclaimed_requests) self.updateBigStatus() return # a few utility functions to make the maybeStartBuild a bit shorter and # easier to read def _chooseSlave(self, available_slavebuilders): """ Choose the next slave, using the C{nextSlave} configuration if available, and falling back to C{random.choice} otherwise. @param available_slavebuilders: list of slavebuilders to choose from @returns: SlaveBuilder or None via Deferred """ if self.nextSlave: return defer.maybeDeferred(lambda : self.nextSlave(self, available_slavebuilders)) else: return defer.succeed(random.choice(available_slavebuilders)) def _chooseBuild(self, buildrequests): """ Choose the next build from the given set of build requests (represented as dictionaries). Defaults to returning the first request (earliest submitted). @param buildrequests: sorted list of build request dictionaries @returns: a build request dictionary or None via Deferred """ if self.nextBuild: # nextBuild expects BuildRequest objects, so instantiate them here # and cache them in the dictionaries d = defer.gatherResults([ self._brdictToBuildRequest(brdict) for brdict in buildrequests ]) d.addCallback(lambda requestobjects : self.nextBuild(self, requestobjects)) def to_brdict(brobj): # get the brdict for this object back return brobj.brdict d.addCallback(to_brdict) return d else: return defer.succeed(buildrequests[0]) def _getMergeRequestsFn(self): """Helper function to determine which mergeRequests function to use from L{_mergeRequests}, or None for no merging""" # first, seek through builder, global, and the default mergeRequests_fn = self.mergeRequests if mergeRequests_fn is None: mergeRequests_fn = self.master.mergeRequests if mergeRequests_fn is None: mergeRequests_fn = True # then translate False and True properly if mergeRequests_fn is False: mergeRequests_fn = None elif mergeRequests_fn is True: mergeRequests_fn = buildrequest.BuildRequest.canBeMergedWith return mergeRequests_fn @defer.deferredGenerator def _mergeRequests(self, breq, unclaimed_requests, mergeRequests_fn): """Use C{mergeRequests_fn} to merge C{breq} against C{unclaimed_requests}, where both are build request dictionaries""" # short circuit if there is no merging to do if not mergeRequests_fn or len(unclaimed_requests) == 1: yield [ breq ] return # we'll need BuildRequest objects, so get those first wfd = defer.waitForDeferred( defer.gatherResults( [ self._brdictToBuildRequest(brdict) for brdict in unclaimed_requests ])) yield wfd unclaimed_request_objects = wfd.getResult() breq_object = unclaimed_request_objects.pop( unclaimed_requests.index(breq)) # gather the mergeable requests merged_request_objects = [breq_object] for other_breq_object in unclaimed_request_objects: wfd = defer.waitForDeferred( defer.maybeDeferred(lambda : mergeRequests_fn(breq_object, other_breq_object))) yield wfd if wfd.getResult(): merged_request_objects.append(other_breq_object) # convert them back to brdicts and return merged_requests = [ br.brdict for br in merged_request_objects ] yield merged_requests def _brdictToBuildRequest(self, brdict): """ Convert a build request dictionary to a L{buildrequest.BuildRequest} object, caching the result in the dictionary itself. The resulting buildrequest will have a C{brdict} attribute pointing back to this dictionary. Note that this does not perform any locking - be careful that it is only called once at a time for each build request dictionary. @param brdict: dictionary to convert @returns: L{buildrequest.BuildRequest} via Deferred """ if 'brobj' in brdict: return defer.succeed(brdict['brobj']) d = buildrequest.BuildRequest.fromBrdict(self.master, brdict) def keep(buildrequest): brdict['brobj'] = buildrequest buildrequest.brdict = brdict return buildrequest d.addCallback(keep) return d def _breakBrdictRefloops(self, requests): """Break the reference loops created by L{_brdictToBuildRequest}""" for brdict in requests: try: del brdict['brobj'].brdict except KeyError: pass
class Builder(config.ReconfigurableServiceMixin, pb.Referenceable, service.MultiService): # reconfigure builders before slaves reconfig_priority = 196 def __init__(self, name, _addServices=True): service.MultiService.__init__(self) self.name = name # this is created the first time we get a good build self.expectations = None # build/wannabuild slots: Build objects move along this sequence self.building = [] # old_building holds active builds that were stolen from a predecessor self.old_building = weakref.WeakKeyDictionary() # buildslaves which have connected but which are not yet available. # These are always in the ATTACHING state. self.attaching_slaves = [] # buildslaves at our disposal. Each SlaveBuilder instance has a # .state that is IDLE, PINGING, or BUILDING. "PINGING" is used when a # Build is about to start, to make sure that they're still alive. self.slaves = [] self.startSlaves = [] self.config = None self.builder_status = None if _addServices: self.reclaim_svc = internet.TimerService(10 * 60, self.reclaimAllBuilds) self.reclaim_svc.setServiceParent(self) # update big status every 30 minutes, working around #1980 self.updateStatusService = internet.TimerService( 30 * 60, self.updateBigStatus) self.updateStatusService.setServiceParent(self) def reconfigService(self, new_config): # find this builder in the config for builder_config in new_config.builders: if builder_config.name == self.name: break else: assert 0, "no config found for builder '%s'" % self.name # set up a builder status object on the first reconfig if not self.builder_status: self.builder_status = self.master.status.builderAdded( builder_config.name, builder_config.builddir, builder_config.category, builder_config.friendly_name, builder_config.description, project=builder_config.project) self.config = builder_config self.builder_status.setDescription(builder_config.description) self.builder_status.setCategory(builder_config.category) self.builder_status.setSlavenames(self.config.slavenames) self.builder_status.setStartSlavenames(self.config.startSlavenames) self.builder_status.setCacheSize(new_config.caches) self.builder_status.setProject(builder_config.project) self.builder_status.setFriendlyName(builder_config.friendly_name) self.builder_status.setTags(builder_config.tags) return defer.succeed(None) def stopService(self): d = defer.maybeDeferred(lambda: service.MultiService.stopService(self)) if self.building: for b in self.building: d.addCallback(self._resubmit_buildreqs, b.requests) d.addErrback(log.err) return d def __repr__(self): return "<Builder '%r' at %d>" % (self.name, id(self)) @defer.inlineCallbacks def getOldestRequestTime(self): """Returns the submitted_at of the oldest unclaimed build request for this builder, or None if there are no build requests. @returns: datetime instance or None, via Deferred """ unclaimed = yield self.master.db.buildrequests.getBuildRequests( buildername=self.name, claimed=False) if unclaimed: unclaimed = [brd['submitted_at'] for brd in unclaimed] unclaimed.sort() defer.returnValue(unclaimed[0]) else: defer.returnValue(None) def getSlaveBuilder(self, slavename): for sb in self.getAllSlaves(): if sb.slave.slave_status.getName() == slavename: return sb def slaveIsAvailable(self, slavename): slave_builder = self.getSlaveBuilder(slavename=slavename) return slave_builder.isAvailable() if slave_builder else False def reclaimAllBuilds(self): brids = set() for b in self.building: brids.update([br.id for br in b.requests]) for b in self.old_building: brids.update([br.id for br in b.requests]) if not brids: return defer.succeed(None) d = self.master.db.buildrequests.reclaimBuildRequests(brids) d.addErrback(log.err, 'while re-claiming running BuildRequests') return d def getBuild(self, number): for b in self.building: if b.build_status and b.build_status.number == number: return b for b in self.old_building.keys(): if b.build_status and b.build_status.number == number: return b return None def isStartSlave(self, sb): return self.config.startSlavenames and sb.slave.slavename in self.config.startSlavenames def removeSlaveBuilder(self, sb): if sb in self.startSlaves: self.startSlaves.remove(sb) if sb in self.slaves: self.slaves.remove(sb) def addSlaveBuilder(self, sb): if self.isStartSlave(sb): self.startSlaves.append(sb) else: self.slaves.append(sb) def addLatentSlave(self, slave): assert interfaces.ILatentBuildSlave.providedBy(slave) for s in self.slaves: if s == slave: break else: sb = slavebuilder.LatentSlaveBuilder(slave, self) self.builder_status.addPointEvent( ['added', 'latent', slave.slavename]) self.addSlaveBuilder(sb) self.botmaster.maybeStartBuildsForBuilder(self.name) def getAllSlaves(self): if self.startSlaves: return self.slaves + self.startSlaves return self.slaves def shouldUseSelectedSlave(self): return not self.config.startSlavenames def attached(self, slave, remote, commands): """This is invoked by the BuildSlave when the self.slavename bot registers their builder. @type slave: L{buildbot.buildslave.BuildSlave} @param slave: the BuildSlave that represents the buildslave as a whole @type remote: L{twisted.spread.pb.RemoteReference} @param remote: a reference to the L{buildbot.slave.bot.SlaveBuilder} @type commands: dict: string -> string, or None @param commands: provides the slave's version of each RemoteCommand @rtype: L{twisted.internet.defer.Deferred} @return: a Deferred that fires (with 'self') when the slave-side builder is fully attached and ready to accept commands. """ for s in self.attaching_slaves + self.getAllSlaves(): if s.slave == slave: # already attached to them. This is fairly common, since # attached() gets called each time we receive the builder # list from the slave, and we ask for it each time we add or # remove a builder. So if the slave is hosting builders # A,B,C, and the config file changes A, we'll remove A and # re-add it, triggering two builder-list requests, getting # two redundant calls to attached() for B, and another two # for C. # # Therefore, when we see that we're already attached, we can # just ignore it. return defer.succeed(self) sb = slavebuilder.SlaveBuilder() sb.setBuilder(self) self.attaching_slaves.append(sb) d = sb.attached(slave, remote, commands) d.addCallback(self._attached) d.addErrback(self._not_attached, slave) return d def _attached(self, sb): self.builder_status.addPointEvent(['connect', sb.slave.slavename]) self.attaching_slaves.remove(sb) self.addSlaveBuilder(sb) self.updateBigStatus() return self def _not_attached(self, why, slave): # already log.err'ed by SlaveBuilder._attachFailure # TODO: remove from self.slaves (except that detached() should get # run first, right?) log.err(why, 'slave failed to attach') self.builder_status.addPointEvent( ['failed', 'connect', slave.slavename]) # TODO: add an HTMLLogFile of the exception def detached(self, slave): """This is called when the connection to the bot is lost.""" for sb in self.attaching_slaves + self.getAllSlaves(): if sb.slave == slave: break else: log.msg("WEIRD: Builder.detached(%s) (%s)" " not in attaching_slaves(%s)" " or slaves(%s)" % (slave, slave.slavename, self.attaching_slaves, self.getAllSlaves())) return if sb.state == BUILDING: # the Build's .lostRemote method (invoked by a notifyOnDisconnect # handler) will cause the Build to be stopped, probably right # after the notifyOnDisconnect that invoked us finishes running. pass if sb in self.attaching_slaves: self.attaching_slaves.remove(sb) self.removeSlaveBuilder(sb) self.builder_status.addPointEvent(['disconnect', slave.slavename]) sb.detached() # inform the SlaveBuilder that their slave went away self.updateBigStatus() def updateBigStatus(self): try: # Catch exceptions here, since this is called in a LoopingCall. if not self.builder_status: return if not self.slaves: self.builder_status.setBigState("offline") elif self.building or self.old_building: self.builder_status.setBigState("building") else: self.builder_status.setBigState("idle") except Exception: log.err( None, "while trying to update status of builder '%s'" % (self.name, )) def getAvailableSlaves(self): if self.config.startSlavenames: return [sb for sb in self.startSlaves if sb.isAvailable()] return [sb for sb in self.slaves if sb.isAvailable()] def getAvailableSlavesToProcessBuildRequests(self, slavepool): slavelist = self.startSlaves if (self.config.startSlavenames and slavepool == Slavepool.startSlavenames) \ else self.slaves return [sb for sb in slavelist if sb.isAvailable()] def canStartWithSlavebuilder(self, slavebuilder): locks = [(self.botmaster.getLockFromLockAccess(access), access) for access in self.config.locks] return Build.canStartWithSlavebuilder(locks, slavebuilder) def canStartBuild(self, slavebuilder, breq): if callable(self.config.canStartBuild): return defer.maybeDeferred(self.config.canStartBuild, self, slavebuilder, breq) return defer.succeed(True) @defer.inlineCallbacks def maybeUpdateMergedBuilds(self, brid, buildnumber, brids): build_status = yield self.builder_status.deferToThread(buildnumber) if build_status is not None: build_status.updateBuildRequestIDs(brids) buildnumbers = yield self.master.db.builds.getBuildNumbersForRequests( brids=brids) buildnumbers = [num for num in buildnumbers if num != buildnumber] if buildnumbers: url = yield self.master.status.getURLForBuildRequest( brid, builder_name=self.name, build_number=buildnumber, builder_friendly_name=self.config.friendly_name) for number in buildnumbers: build_status = yield self.builder_status.deferToThread(number) if build_status is not None: yield build_status.buildMerged(url) @defer.inlineCallbacks def maybeResumeBuild(self, slavebuilder, buildnumber, breqs): build_status = None if self.builder_status: build_status = yield self.builder_status.deferToThread(buildnumber) if build_status: build_status.finished = None if not self.running: defer.returnValue(False) build_started = yield self._startBuildFor(slavebuilder, breqs, build_status) if build_started and len(breqs) > 1: yield self.maybeUpdateMergedBuilds( brid=breqs[0].id, buildnumber=buildnumber, brids=[br.id for br in breqs[1:]]) defer.returnValue(build_started) @defer.inlineCallbacks def _startBuildFor(self, slavebuilder, buildrequests, build_status=None): """Start a build on the given slave. @param build: the L{base.Build} to start @param sb: the L{SlaveBuilder} which will host this build @return: (via Deferred) boolean indicating that the build was succesfully started. """ # as of the Python versions supported now, try/finally can't be used # with a generator expression. So instead, we push cleanup functions # into a list so that, at any point, we can abort this operation. cleanups = [] def run_cleanups(): try: while cleanups: fn = cleanups.pop() fn() except: log.err(failure.Failure(), "while running %r" % (run_cleanups, )) # the last cleanup we want to perform is to update the big # status based on any other cleanup cleanups.append(lambda: self.updateBigStatus()) build = self.config.factory.newBuild(buildrequests) build.setBuilder(self) log.msg("starting build %s using slave %s" % (build, slavebuilder)) # set up locks build.setLocks(self.config.locks) cleanups.append(lambda: slavebuilder.slave.releaseLocks() if slavebuilder.slave else None) if len(self.config.env) > 0: build.setSlaveEnvironment(self.config.env) # append the build to self.building self.building.append(build) cleanups.append(lambda: self.building.remove(build)) # update the big status accordingly self.updateBigStatus() # ping the slave to make sure they're still there. If they've # fallen off the map (due to a NAT timeout or something), this # will fail in a couple of minutes, depending upon the TCP # timeout. # # TODO: This can unnecessarily suspend the starting of a build, in # situations where the slave is live but is pushing lots of data to # us in a build. log.msg("starting build %s.. pinging the slave %s" % (build, slavebuilder)) try: ping_success = yield slavebuilder.ping( timeout=self.master.config.remoteCallTimeout) except: log.err(failure.Failure(), 'while pinging slave before build:') raise if not ping_success: log.msg("build %s slave %s ping failed; re-queueing the request" % (build, slavebuilder)) run_cleanups() raise Exception("Ping failed") #check slave is still available ready = slavebuilder.isAvailable() if ready: try: ready = yield slavebuilder.prepare(self.builder_status, build) except: log.err(failure.Failure(), 'while preparing slavebuilder:') raise # If prepare returns True then it is ready and we start a build # If it returns false then we don't start a new build. if not ready: log.msg("slave %s can't build %s after all; re-queueing the " "request" % (build, slavebuilder)) run_cleanups() raise Exception("Unknown") # The buildslave is ready to go. slavebuilder.buildStarted() sets its # state to BUILDING (so we won't try to use it for any other builds). # This gets set back to IDLE by the Build itself when it finishes. if slavebuilder.buildStarted(): cleanups.append(lambda: slavebuilder.buildFinished()) else: log.msg("slave %s can't build %s after all; re-queueing the " "request" % (build, slavebuilder)) run_cleanups() raise Exception("Unknown") # create the BuildStatus object that goes with the Build if build_status is None: bs = self.builder_status.newBuild() else: bs = build_status bs.builder = self.builder_status bs.slavename = slavebuilder.slave.slavename bs.waitUntilFinished().addCallback( self.builder_status._buildFinished) # update the steps to use finished steps # record the build in the db - one row per buildrequest try: bids = [] if len(build.requests) > 0: main_br = build.requests[0] bid = yield self.master.db.builds.addBuild( main_br.id, bs.number, slavebuilder.slave.slavename) bids.append(bid) # add build information to merged br for req in build.requests[1:]: bid = yield self.master.db.builds.addBuild( req.id, bs.number) self.master.status.build_started(req.id, self.name, bs) bids.append(bid) except: log.err(failure.Failure(), 'while adding rows to build table:') run_cleanups() raise # IMPORTANT: no yielding is allowed from here to the startBuild call! # it's possible that we lost the slave remote between the ping above # and now. If so, bail out. The build.startBuild call below transfers # responsibility for monitoring this connection to the Build instance, # so this check ensures we hand off a working connection. if not slavebuilder.remote: log.msg("slave disappeared before build could start") run_cleanups() raise Exception("Slave seems to have disappered") # let status know self.master.status.build_started(main_br.id, self.name, bs) # start the build. This will first set up the steps, then tell the # BuildStatus that it has started, which will announce it to the world # (through our BuilderStatus object, which is its parent). Finally it # will start the actual build process. This is done with a fresh # Deferred since _startBuildFor should not wait until the build is # finished. This uses `maybeDeferred` to ensure that any exceptions # raised by startBuild are treated as deferred errbacks (see # http://trac.buildbot.net/ticket/2428). d = defer.maybeDeferred(build.startBuild, bs, self.expectations, slavebuilder) d.addCallback(self.buildFinished, slavebuilder, bids) # this shouldn't happen. if it does, the slave will be wedged d.addErrback( log.err, 'from a running build; this is a ' 'serious error - please file a bug at http://buildbot.net') # make sure the builder's status is represented correctly self.updateBigStatus() defer.returnValue(True) def setupProperties(self, props): props.setProperty("buildername", self.name, "Builder") if len(self.config.properties) > 0: for propertyname in self.config.properties: props.setProperty(propertyname, self.config.properties[propertyname], "Builder") @defer.inlineCallbacks def buildFinished(self, build, sb, bids): """ This is called when the Build has finished (either success or failure). Any exceptions during the build are reported with results=FAILURE, not with an errback. By the time we get here, the Build has already released the slave, which will trigger a check for any now-possible build requests (maybeStartBuilds) """ start = time.time() buildFinishedLog = { 'name': 'buildFinished', 'description': 'Called when a Build has finished', 'number': build.build_status.number, } # List all known build requests tied to this `build` breqs = {br.id: br for br in build.requests} # Prevent new merged builds from coming in while we are finishing lock_keys = [int(brid) for brid in sorted(breqs.keys())] build_merging_locks = self.master.buildrequest_merger.getMergingLocks( lock_keys) for lock in build_merging_locks: yield lock.acquire() locks_acquired_start = time.time() buildFinishedLog['elapsed_acquiring_locks'] = time.time() - start try: # Look for additional build requests that might have been merged into # these known build requests getBuildRequestsStart = time.time() otherBrdicts = yield self.master.db.buildrequests.getBuildRequests( mergebrids=list(breqs.keys())) otherBreqs = [] buildFinishedLog['elapsed_getBuildRequests'] = time.time( ) - getBuildRequestsStart for brdict in otherBrdicts: breq = yield BuildRequest.fromBrdict(self.master, brdict) otherBreqs.append(breq) # Include the missing ones for br in otherBreqs: breqs.setdefault(br.id, br) buildFinishedLog['brids'] = sorted(breqs.keys()) d = yield self.finishBuildRequests( brids=list(breqs.keys()), requests=list(breqs.values()), build=build, bids=bids, ) finally: for lock in build_merging_locks: yield lock.release() buildFinishedLog['elapsed_using_locks'] = time.time( ) - locks_acquired_start log.msg(json.dumps(buildFinishedLog)) self.building.remove(build) if sb.slave: sb.slave.releaseLocks() self.updateBigStatus() defer.returnValue(d) @defer.inlineCallbacks def finishBuildRequestsFailed(self, failure, msg, brids): log.err(failure, msg) log.msg("Katana will retry buildrequests with ids %s" % brids) yield self.master.db.buildrequests.unclaimBuildRequests( brids, results=BEGINNING) def finishBuildRequests(self, brids, requests, build, bids=None, mergedbrids=None): d = self.master.db.builds.finishBuilds( bids) if bids else defer.succeed(None) mergedbrids = brids if mergedbrids is None else mergedbrids # TODO: we should probably do better error handle d.addCallback(lambda _: self.master.db.builds.finishedMergedBuilds( mergedbrids, build.build_status.number)) d.addErrback(log.err, 'while marking builds as finished (ignored)') d.addCallback(lambda _: self.master.db.buildrequests. maybeUpdateMergedBrids(mergedbrids)) results = build.build_status.getResults() if results == RETRY: d.addCallback( lambda _: self._resubmit_buildreqs(requests=requests)) d.addErrback(log.err, 'while resubmitting build requests') else: db = self.master.db if results == RESUME: d.addCallback(lambda _: db.buildrequests.updateBuildRequests( brids, results=results, slavepool=build.build_status.resumeSlavepool)) else: d.addCallback(lambda _: db.buildrequests.completeBuildRequests( brids, results)) d.addCallback(lambda _: self._maybeBuildsetsComplete( requests, results=results)) # nothing in particular to do with this deferred, so just log it if # it fails.. d.addErrback(self.finishBuildRequestsFailed, 'while marking build requests as completed', brids) return d @defer.inlineCallbacks def _maybeBuildsetsComplete(self, requests, results=None): # inform the master that we may have completed a number of buildsets for br in requests: yield self.master.maybeBuildsetComplete(br.bsid) if results and results == RESUME: self.master.buildRequestAdded(br.bsid, br.id, self.name) @defer.inlineCallbacks def _resubmit_buildreqs(self, out=None, requests=None): brids = [br.id for br in requests] yield self.master.db.buildrequests.unclaimBuildRequests( brids, results=BEGINNING) defer.returnValue(out) def setExpectations(self, progress): """Mark the build as successful and update expectations for the next build. Only call this when the build did not fail in any way that would invalidate the time expectations generated by it. (if the compile failed and thus terminated early, we can't use the last build to predict how long the next one will take). """ if self.expectations: self.expectations.update(progress) else: # the first time we get a good build, create our Expectations # based upon its results self.expectations = Expectations(progress) log.msg("new expectations: %s seconds" % self.expectations.expectedBuildTime()) # Build Creation @defer.inlineCallbacks def maybeStartBuild(self, slavebuilder, breqs): # This method is called by the botmaster whenever this builder should # start a set of buildrequests on a slave. Do not call this method # directly - use master.botmaster.maybeStartBuildsForBuilder, or one of # the other similar methods if more appropriate # first, if we're not running, then don't start builds; stopService # uses this to ensure that any ongoing maybeStartBuild invocations # are complete before it stops. if not self.running: defer.returnValue(False) return # If the build fails from here on out (e.g., because a slave has failed), # it will be handled outside of this function. TODO: test that! build_started = yield self._startBuildFor(slavebuilder, breqs) defer.returnValue(build_started) def getConfiguredMergeRequestsFn(self): mergeRequests_fn = self.config.mergeRequests if mergeRequests_fn is None: mergeRequests_fn = self.master.config.mergeRequests if mergeRequests_fn is None: mergeRequests_fn = True return mergeRequests_fn def getMergeRequestsFn(self): """Helper function to determine which mergeRequests function to use from L{_mergeRequests}, or None for no merging""" # first, seek through builder, global, and the default mergeRequests_fn = self.getConfiguredMergeRequestsFn() # then translate False and True properly if mergeRequests_fn is False: mergeRequests_fn = Builder._skipMergeRequestFn elif mergeRequests_fn is True: mergeRequests_fn = Builder._defaultMergeRequestFn return mergeRequests_fn def getBoolProperty(self, req1, name): property = req1.properties.getProperty(name, False) if type(property) != bool: property = (property.lower() == "true") return property def propertiesMatch(self, req1, req2): #If the instances are the same then they match! if req1.bsid == req2.bsid: return True if req1.properties.has_key( 'selected_slave') or req2.properties.has_key('selected_slave'): return False if not req1.isMergingWithPrevious: if self.getBoolProperty(req1, "force_rebuild") != self.getBoolProperty( req2, "force_rebuild"): return False if self.getBoolProperty( req1, "force_chain_rebuild") != self.getBoolProperty( req2, "force_chain_rebuild"): return False return True def _defaultMergeRequestFn(self, req1, req2): if self.propertiesMatch(req1, req2): return req1.canBeMergedWith(req2) return False def _skipMergeRequestFn(self, req1, req2): return False
class Builder(pb.Referenceable, service.MultiService): """I manage all Builds of a given type. Each Builder is created by an entry in the config file (the c['builders'] list), with a number of parameters. One of these parameters is the L{buildbot.process.factory.BuildFactory} object that is associated with this Builder. The factory is responsible for creating new L{Build<buildbot.process.base.Build>} objects. Each Build object defines when and how the build is performed, so a new Factory or Builder should be defined to control this behavior. The Builder holds on to a number of L{base.BuildRequest} objects in a list named C{.buildable}. Incoming BuildRequest objects will be added to this list, or (if possible) merged into an existing request. When a slave becomes available, I will use my C{BuildFactory} to turn the request into a new C{Build} object. The C{BuildRequest} is forgotten, the C{Build} goes into C{.building} while it runs. Once the build finishes, I will discard it. I maintain a list of available SlaveBuilders, one for each connected slave that the C{slavenames} parameter says we can use. Some of these will be idle, some of them will be busy running builds for me. If there are multiple slaves, I can run multiple builds at once. I also manage forced builds, progress expectation (ETA) management, and some status delivery chores. @type buildable: list of L{buildbot.process.base.BuildRequest} @ivar buildable: BuildRequests that are ready to build, but which are waiting for a buildslave to be available. @type building: list of L{buildbot.process.base.Build} @ivar building: Builds that are actively running @type slaves: list of L{buildbot.buildslave.BuildSlave} objects @ivar slaves: the slaves currently available for building """ expectations = None # this is created the first time we get a good build CHOOSE_SLAVES_RANDOMLY = True # disabled for determinism during tests def __init__(self, setup, builder_status): """ @type setup: dict @param setup: builder setup data, as stored in BuildmasterConfig['builders']. Contains name, slavename(s), builddir, slavebuilddir, factory, locks. @type builder_status: L{buildbot.status.builder.BuilderStatus} """ service.MultiService.__init__(self) self.name = setup['name'] self.slavenames = [] if setup.has_key('slavename'): self.slavenames.append(setup['slavename']) if setup.has_key('slavenames'): self.slavenames.extend(setup['slavenames']) self.builddir = setup['builddir'] self.slavebuilddir = setup['slavebuilddir'] self.buildFactory = setup['factory'] self.nextSlave = setup.get('nextSlave') if self.nextSlave is not None and not callable(self.nextSlave): raise ValueError("nextSlave must be callable") self.locks = setup.get("locks", []) self.env = setup.get('env', {}) assert isinstance(self.env, dict) if setup.has_key('periodicBuildTime'): raise ValueError("periodicBuildTime can no longer be defined as" " part of the Builder: use scheduler.Periodic" " instead") self.nextBuild = setup.get('nextBuild') if self.nextBuild is not None and not callable(self.nextBuild): raise ValueError("nextBuild must be callable") self.buildHorizon = setup.get('buildHorizon') self.logHorizon = setup.get('logHorizon') self.eventHorizon = setup.get('eventHorizon') self.mergeRequests = setup.get('mergeRequests', True) self.properties = setup.get('properties', {}) # build/wannabuild slots: Build objects move along this sequence self.building = [] # old_building holds active builds that were stolen from a predecessor self.old_building = weakref.WeakKeyDictionary() # buildslaves which have connected but which are not yet available. # These are always in the ATTACHING state. self.attaching_slaves = [] # buildslaves at our disposal. Each SlaveBuilder instance has a # .state that is IDLE, PINGING, or BUILDING. "PINGING" is used when a # Build is about to start, to make sure that they're still alive. self.slaves = [] self.builder_status = builder_status self.builder_status.setSlavenames(self.slavenames) self.builder_status.buildHorizon = self.buildHorizon self.builder_status.logHorizon = self.logHorizon self.builder_status.eventHorizon = self.eventHorizon t = internet.TimerService(10 * 60, self.reclaimAllBuilds) t.setServiceParent(self) # for testing, to help synchronize tests self.watchers = { 'attach': [], 'detach': [], 'detach_all': [], 'idle': [] } self.run_count = 0 def setBotmaster(self, botmaster): self.botmaster = botmaster self.db = botmaster.db self.master_name = botmaster.master_name self.master_incarnation = botmaster.master_incarnation def compareToSetup(self, setup): diffs = [] setup_slavenames = [] if setup.has_key('slavename'): setup_slavenames.append(setup['slavename']) setup_slavenames.extend(setup.get('slavenames', [])) if setup_slavenames != self.slavenames: diffs.append('slavenames changed from %s to %s' \ % (self.slavenames, setup_slavenames)) if setup['builddir'] != self.builddir: diffs.append('builddir changed from %s to %s' \ % (self.builddir, setup['builddir'])) if setup['slavebuilddir'] != self.slavebuilddir: diffs.append('slavebuilddir changed from %s to %s' \ % (self.slavebuilddir, setup['slavebuilddir'])) if setup['factory'] != self.buildFactory: # compare objects diffs.append('factory changed') if setup.get('locks', []) != self.locks: diffs.append('locks changed from %s to %s' % (self.locks, setup.get('locks'))) if setup.get('nextSlave') != self.nextSlave: diffs.append('nextSlave changed from %s to %s' % (self.nextSlave, setup.get('nextSlave'))) if setup.get('nextBuild') != self.nextBuild: diffs.append('nextBuild changed from %s to %s' % (self.nextBuild, setup.get('nextBuild'))) if setup['buildHorizon'] != self.buildHorizon: diffs.append('buildHorizon changed from %s to %s' % (self.buildHorizon, setup['buildHorizon'])) if setup['logHorizon'] != self.logHorizon: diffs.append('logHorizon changed from %s to %s' % (self.logHorizon, setup['logHorizon'])) if setup['eventHorizon'] != self.eventHorizon: diffs.append('eventHorizon changed from %s to %s' % (self.eventHorizon, setup['eventHorizon'])) return diffs def __repr__(self): return "<Builder '%r' at %d>" % (self.name, id(self)) def triggerNewBuildCheck(self): self.botmaster.triggerNewBuildCheck() def run(self): """Check for work to be done. This should be called any time I might be able to start a job: - when the Builder is first created - when a new job has been added to the [buildrequests] DB table - when a slave has connected If I have both an available slave and the database contains a BuildRequest that I can handle, I will claim the BuildRequest and start the build. When the build finishes, I will retire the BuildRequest. """ # overall plan: # move .expectations to DB assert self.running log.msg("Builder.run %s: %s" % (self, self.slaves)) self.run_count += 1 available_slaves = [sb for sb in self.slaves if sb.isAvailable()] if not available_slaves: self.updateBigStatus() return d = self.db.runInteraction(self._claim_buildreqs, available_slaves) d.addCallback(self._start_builds) return d # slave-managers must refresh their claim on a build at least once an # hour, less any inter-manager clock skew RECLAIM_INTERVAL = 1 * 3600 def _claim_buildreqs(self, t, available_slaves): # return a dict mapping slave -> (brid,ssid) now = util.now() old = now - self.RECLAIM_INTERVAL requests = self.db.get_unclaimed_buildrequests(self.name, old, self.master_name, self.master_incarnation, t) assignments = {} while requests and available_slaves: sb = self._choose_slave(available_slaves) if not sb: log.msg("%s: want to start build, but we don't have a remote" % self) break available_slaves.remove(sb) breq = self._choose_build(requests) if not breq: log.msg("%s: went to start build, but nextBuild said not to" % self) break requests.remove(breq) merged_requests = [breq] for other_breq in requests[:]: if (self.mergeRequests and self.botmaster.shouldMergeRequests( self, breq, other_breq)): requests.remove(other_breq) merged_requests.append(other_breq) assignments[sb] = merged_requests brids = [br.id for br in merged_requests] self.db.claim_buildrequests(now, self.master_name, self.master_incarnation, brids, t) return assignments def _choose_slave(self, available_slaves): # note: this might return None if the nextSlave() function decided to # not give us anything if self.nextSlave: try: return self.nextSlave(self, available_slaves) except: log.msg("Exception choosing next slave") log.err(Failure()) return None if self.CHOOSE_SLAVES_RANDOMLY: return random.choice(available_slaves) return available_slaves[0] def _choose_build(self, buildable): if self.nextBuild: try: return self.nextBuild(self, buildable) except: log.msg("Exception choosing next build") log.err(Failure()) return None return buildable[0] def _start_builds(self, assignments): # because _claim_buildreqs runs in a separate thread, we might have # lost a slave by this point. We treat that case the same as if we # lose the slave right after the build starts: the initial ping # fails. for (sb, requests) in assignments.items(): build = self.buildFactory.newBuild(requests) build.setBuilder(self) build.setLocks(self.locks) if len(self.env) > 0: build.setSlaveEnvironment(self.env) self.startBuild(build, sb) self.updateBigStatus() def getBuildable(self): return self.db.runInteractionNow(self._getBuildable) def _getBuildable(self, t): now = util.now() old = now - self.RECLAIM_INTERVAL return self.db.get_unclaimed_buildrequests(self.name, old, self.master_name, self.master_incarnation, t) def getOldestRequestTime(self): """Returns the timestamp of the oldest build request for this builder. If there are no build requests, None is returned.""" buildable = self.getBuildable() if buildable: # TODO: this is sorted by priority first, not strictly reqtime return buildable[0].getSubmitTime() return None def cancelBuildRequest(self, brid): return self.db.cancel_buildrequests([brid]) def consumeTheSoulOfYourPredecessor(self, old): """Suck the brain out of an old Builder. This takes all the runtime state from an existing Builder and moves it into ourselves. This is used when a Builder is changed in the master.cfg file: the new Builder has a different factory, but we want all the builds that were queued for the old one to get processed by the new one. Any builds which are already running will keep running. The new Builder will get as many of the old SlaveBuilder objects as it wants.""" log.msg("consumeTheSoulOfYourPredecessor: %s feeding upon %s" % (self, old)) # all pending builds are stored in the DB, so we don't have to do # anything to claim them. The old builder will be stopService'd, # which should make sure they don't start any new work # old.building (i.e. builds which are still running) is not migrated # directly: it keeps track of builds which were in progress in the # old Builder. When those builds finish, the old Builder will be # notified, not us. However, since the old SlaveBuilder will point to # us, it is our maybeStartBuild() that will be triggered. if old.building: self.builder_status.setBigState("building") # however, we do grab a weakref to the active builds, so that our # BuilderControl can see them and stop them. We use a weakref because # we aren't the one to get notified, so there isn't a convenient # place to remove it from self.building . for b in old.building: self.old_building[b] = None for b in old.old_building: self.old_building[b] = None # Our set of slavenames may be different. Steal any of the old # buildslaves that we want to keep using. for sb in old.slaves[:]: if sb.slave.slavename in self.slavenames: log.msg(" stealing buildslave %s" % sb) self.slaves.append(sb) old.slaves.remove(sb) sb.setBuilder(self) # old.attaching_slaves: # these SlaveBuilders are waiting on a sequence of calls: # remote.setMaster and remote.print . When these two complete, # old._attached will be fired, which will add a 'connect' event to # the builder_status and try to start a build. However, we've pulled # everything out of the old builder's queue, so it will have no work # to do. The outstanding remote.setMaster/print call will be holding # the last reference to the old builder, so it will disappear just # after that response comes back. # # The BotMaster will ask the slave to re-set their list of Builders # shortly after this function returns, which will cause our # attached() method to be fired with a bunch of references to remote # SlaveBuilders, some of which we already have (by stealing them # from the old Builder), some of which will be new. The new ones # will be re-attached. # Therefore, we don't need to do anything about old.attaching_slaves return # all done def reclaimAllBuilds(self): now = util.now() brids = set() for b in self.building: brids.update([br.id for br in b.requests]) for b in self.old_building: brids.update([br.id for br in b.requests]) self.db.claim_buildrequests(now, self.master_name, self.master_incarnation, brids) def getBuild(self, number): for b in self.building: if b.build_status and b.build_status.number == number: return b for b in self.old_building.keys(): if b.build_status and b.build_status.number == number: return b return None def fireTestEvent(self, name, fire_with=None): if fire_with is None: fire_with = self watchers = self.watchers[name] self.watchers[name] = [] for w in watchers: eventually(w.callback, fire_with) def addLatentSlave(self, slave): assert interfaces.ILatentBuildSlave.providedBy(slave) for s in self.slaves: if s == slave: break else: sb = LatentSlaveBuilder(slave, self) self.builder_status.addPointEvent( ['added', 'latent', slave.slavename]) self.slaves.append(sb) self.triggerNewBuildCheck() def attached(self, slave, remote, commands): """This is invoked by the BuildSlave when the self.slavename bot registers their builder. @type slave: L{buildbot.buildslave.BuildSlave} @param slave: the BuildSlave that represents the buildslave as a whole @type remote: L{twisted.spread.pb.RemoteReference} @param remote: a reference to the L{buildbot.slave.bot.SlaveBuilder} @type commands: dict: string -> string, or None @param commands: provides the slave's version of each RemoteCommand @rtype: L{twisted.internet.defer.Deferred} @return: a Deferred that fires (with 'self') when the slave-side builder is fully attached and ready to accept commands. """ for s in self.attaching_slaves + self.slaves: if s.slave == slave: # already attached to them. This is fairly common, since # attached() gets called each time we receive the builder # list from the slave, and we ask for it each time we add or # remove a builder. So if the slave is hosting builders # A,B,C, and the config file changes A, we'll remove A and # re-add it, triggering two builder-list requests, getting # two redundant calls to attached() for B, and another two # for C. # # Therefore, when we see that we're already attached, we can # just ignore it. TODO: build a diagram of the state # transitions here, I'm concerned about sb.attached() failing # and leaving sb.state stuck at 'ATTACHING', and about # the detached() message arriving while there's some # transition pending such that the response to the transition # re-vivifies sb return defer.succeed(self) sb = SlaveBuilder() sb.setBuilder(self) self.attaching_slaves.append(sb) d = sb.attached(slave, remote, commands) d.addCallback(self._attached) d.addErrback(self._not_attached, slave) return d def _attached(self, sb): # TODO: make this .addSlaveEvent(slave.slavename, ['connect']) ? self.builder_status.addPointEvent(['connect', sb.slave.slavename]) self.attaching_slaves.remove(sb) self.slaves.append(sb) self.fireTestEvent('attach') return self def _not_attached(self, why, slave): # already log.err'ed by SlaveBuilder._attachFailure # TODO: make this .addSlaveEvent? # TODO: remove from self.slaves (except that detached() should get # run first, right?) self.builder_status.addPointEvent( ['failed', 'connect', slave.slave.slavename]) # TODO: add an HTMLLogFile of the exception self.fireTestEvent('attach', why) def detached(self, slave): """This is called when the connection to the bot is lost.""" for sb in self.attaching_slaves + self.slaves: if sb.slave == slave: break else: log.msg( "WEIRD: Builder.detached(%s) (%s)" " not in attaching_slaves(%s)" " or slaves(%s)" % (slave, slave.slavename, self.attaching_slaves, self.slaves)) return if sb.state == BUILDING: # the Build's .lostRemote method (invoked by a notifyOnDisconnect # handler) will cause the Build to be stopped, probably right # after the notifyOnDisconnect that invoked us finishes running. # TODO: should failover to a new Build #self.retryBuild(sb.build) pass if sb in self.attaching_slaves: self.attaching_slaves.remove(sb) if sb in self.slaves: self.slaves.remove(sb) # TODO: make this .addSlaveEvent? self.builder_status.addPointEvent(['disconnect', slave.slavename]) sb.detached() # inform the SlaveBuilder that their slave went away self.updateBigStatus() self.fireTestEvent('detach') if not self.slaves: self.fireTestEvent('detach_all') def updateBigStatus(self): if not self.slaves: self.builder_status.setBigState("offline") elif self.building: self.builder_status.setBigState("building") else: self.builder_status.setBigState("idle") self.fireTestEvent('idle') def startBuild(self, build, sb): """Start a build on the given slave. @param build: the L{base.Build} to start @param sb: the L{SlaveBuilder} which will host this build @return: a Deferred which fires with a L{buildbot.interfaces.IBuildControl} that can be used to stop the Build, or to access a L{buildbot.interfaces.IBuildStatus} which will watch the Build as it runs. """ self.building.append(build) self.updateBigStatus() log.msg("starting build %s using slave %s" % (build, sb)) d = sb.prepare(self.builder_status) def _ping(ign): # ping the slave to make sure they're still there. If they've # fallen off the map (due to a NAT timeout or something), this # will fail in a couple of minutes, depending upon the TCP # timeout. # # TODO: This can unnecessarily suspend the starting of a build, in # situations where the slave is live but is pushing lots of data to # us in a build. log.msg("starting build %s.. pinging the slave %s" % (build, sb)) return sb.ping() d.addCallback(_ping) d.addCallback(self._startBuild_1, build, sb) return d def _startBuild_1(self, res, build, sb): if not res: return self._startBuildFailed("slave ping failed", build, sb) # The buildslave is ready to go. sb.buildStarted() sets its state to # BUILDING (so we won't try to use it for any other builds). This # gets set back to IDLE by the Build itself when it finishes. sb.buildStarted() d = sb.remote.callRemote("startBuild") d.addCallbacks(self._startBuild_2, self._startBuildFailed, callbackArgs=(build, sb), errbackArgs=(build, sb)) return d def _startBuild_2(self, res, build, sb): # create the BuildStatus object that goes with the Build bs = self.builder_status.newBuild() # start the build. This will first set up the steps, then tell the # BuildStatus that it has started, which will announce it to the # world (through our BuilderStatus object, which is its parent). # Finally it will start the actual build process. bids = [ self.db.build_started(req.id, bs.number) for req in build.requests ] d = build.startBuild(bs, self.expectations, sb) d.addCallback(self.buildFinished, sb, bids) # this shouldn't happen. if it does, the slave will be wedged d.addErrback(log.err) return build # this is the IBuildControl def _startBuildFailed(self, why, build, sb): # put the build back on the buildable list log.msg("I tried to tell the slave that the build %s started, but " "remote_startBuild failed: %s" % (build, why)) # release the slave. This will queue a call to maybeStartBuild, which # will fire after other notifyOnDisconnect handlers have marked the # slave as disconnected (so we don't try to use it again). sb.buildFinished() log.msg("re-queueing the BuildRequest") self.building.remove(build) self._resubmit_buildreqs(build).addErrback(log.err) def setupProperties(self, props): props.setProperty("buildername", self.name, "Builder") if len(self.properties) > 0: for propertyname in self.properties: props.setProperty(propertyname, self.properties[propertyname], "Builder") def buildFinished(self, build, sb, bids): """This is called when the Build has finished (either success or failure). Any exceptions during the build are reported with results=FAILURE, not with an errback.""" # by the time we get here, the Build has already released the slave # (which queues a call to maybeStartBuild) self.db.builds_finished(bids) results = build.build_status.getResults() self.building.remove(build) if results == RETRY: self._resubmit_buildreqs(build).addErrback( log.err) # returns Deferred else: brids = [br.id for br in build.requests] self.db.retire_buildrequests(brids, results) self.triggerNewBuildCheck() def _resubmit_buildreqs(self, build): brids = [br.id for br in build.requests] return self.db.resubmit_buildrequests(brids) def setExpectations(self, progress): """Mark the build as successful and update expectations for the next build. Only call this when the build did not fail in any way that would invalidate the time expectations generated by it. (if the compile failed and thus terminated early, we can't use the last build to predict how long the next one will take). """ if self.expectations: self.expectations.update(progress) else: # the first time we get a good build, create our Expectations # based upon its results self.expectations = Expectations(progress) log.msg("new expectations: %s seconds" % \ self.expectations.expectedBuildTime()) def shutdownSlave(self): if self.remote: self.remote.callRemote("shutdown")