コード例 #1
0
 def setExpectations(self, progress):
     """Mark the build as successful and update expectations for the next
     build. Only call this when the build did not fail in any way that
     would invalidate the time expectations generated by it. (if the
     compile failed and thus terminated early, we can't use the last
     build to predict how long the next one will take).
     """
     if self.expectations:
         self.expectations.update(progress)
     else:
         # the first time we get a good build, create our Expectations
         # based upon its results
         self.expectations = Expectations(progress)
     log.msg("new expectations: %s seconds" % \
             self.expectations.expectedBuildTime())
コード例 #2
0
ファイル: builder.py プロジェクト: hortont424/buildbot
 def setExpectations(self, progress):
     """Mark the build as successful and update expectations for the next
     build. Only call this when the build did not fail in any way that
     would invalidate the time expectations generated by it. (if the
     compile failed and thus terminated early, we can't use the last
     build to predict how long the next one will take).
     """
     if self.expectations:
         self.expectations.update(progress)
     else:
         # the first time we get a good build, create our Expectations
         # based upon its results
         self.expectations = Expectations(progress)
     log.msg("new expectations: %s seconds" % self.expectations.expectedBuildTime())
コード例 #3
0
class Builder(config.ReconfigurableServiceMixin,
              pb.Referenceable,
              service.MultiService):

    # reconfigure builders before slaves
    reconfig_priority = 196

    def __init__(self, name, _addServices=True):
        service.MultiService.__init__(self)
        self.name = name

        # this is created the first time we get a good build
        self.expectations = None

        # build/wannabuild slots: Build objects move along this sequence
        self.building = []
        # old_building holds active builds that were stolen from a predecessor
        self.old_building = weakref.WeakKeyDictionary()

        # buildslaves which have connected but which are not yet available.
        # These are always in the ATTACHING state.
        self.attaching_slaves = []

        # buildslaves at our disposal. Each SlaveBuilder instance has a
        # .state that is IDLE, PINGING, or BUILDING. "PINGING" is used when a
        # Build is about to start, to make sure that they're still alive.
        self.slaves = []

        self.config = None
        self.builder_status = None

        if _addServices:
            self.reclaim_svc = internet.TimerService(10*60,
                                            self.reclaimAllBuilds)
            self.reclaim_svc.setServiceParent(self)

            # update big status every 30 minutes, working around #1980
            self.updateStatusService = internet.TimerService(30*60,
                                            self.updateBigStatus)
            self.updateStatusService.setServiceParent(self)

    def reconfigService(self, new_config):
        # find this builder in the config
        for builder_config in new_config.builders:
            if builder_config.name == self.name:
                break
        else:
            assert 0, "no config found for builder '%s'" % self.name

        # set up a builder status object on the first reconfig
        if not self.builder_status:
            self.builder_status = self.master.status.builderAdded(
                    builder_config.name,
                    builder_config.builddir,
                    builder_config.category,
                    builder_config.description)

        self.config = builder_config

        self.builder_status.setDescription(builder_config.description)
        self.builder_status.setCategory(builder_config.category)
        self.builder_status.setSlavenames(self.config.slavenames)
        self.builder_status.setCacheSize(new_config.caches['Builds'])

        return defer.succeed(None)

    def stopService(self):
        d = defer.maybeDeferred(lambda :
                service.MultiService.stopService(self))
        def flushMaybeStartBuilds(_):
            # at this point, self.running = False, so another maybeStartBuild
            # invocation won't hurt anything, but it also will not complete
            # until any currently-running invocations are done, so we know that
            # the builder is quiescent at that time.
            return self.maybeStartBuild()
        d.addCallback(flushMaybeStartBuilds)
        return d

    def __repr__(self):
        return "<Builder '%r' at %d>" % (self.name, id(self))

    @defer.inlineCallbacks
    def getOldestRequestTime(self):

        """Returns the submitted_at of the oldest unclaimed build request for
        this builder, or None if there are no build requests.

        @returns: datetime instance or None, via Deferred
        """
        unclaimed = yield self.master.db.buildrequests.getBuildRequests(
                        buildername=self.name, claimed=False)

        if unclaimed:
            unclaimed = [ brd['submitted_at'] for brd in unclaimed ]
            unclaimed.sort()
            defer.returnValue(unclaimed[0])
        else:
            defer.returnValue(None)

    def reclaimAllBuilds(self):
        brids = set()
        for b in self.building:
            brids.update([br.id for br in b.requests])
        for b in self.old_building:
            brids.update([br.id for br in b.requests])

        if not brids:
            return defer.succeed(None)

        d = self.master.db.buildrequests.reclaimBuildRequests(brids)
        d.addErrback(log.err, 'while re-claiming running BuildRequests')
        return d

    def getBuild(self, number):
        for b in self.building:
            if b.build_status and b.build_status.number == number:
                return b
        for b in self.old_building.keys():
            if b.build_status and b.build_status.number == number:
                return b
        return None

    def addLatentSlave(self, slave):
        assert interfaces.ILatentBuildSlave.providedBy(slave)
        for s in self.slaves:
            if s == slave:
                break
        else:
            sb = slavebuilder.LatentSlaveBuilder(slave, self)
            self.builder_status.addPointEvent(
                ['added', 'latent', slave.slavename])
            self.slaves.append(sb)
            self.botmaster.maybeStartBuildsForBuilder(self.name)

    def attached(self, slave, remote, commands):
        """This is invoked by the BuildSlave when the self.slavename bot
        registers their builder.

        @type  slave: L{buildbot.buildslave.BuildSlave}
        @param slave: the BuildSlave that represents the buildslave as a whole
        @type  remote: L{twisted.spread.pb.RemoteReference}
        @param remote: a reference to the L{buildbot.slave.bot.SlaveBuilder}
        @type  commands: dict: string -> string, or None
        @param commands: provides the slave's version of each RemoteCommand

        @rtype:  L{twisted.internet.defer.Deferred}
        @return: a Deferred that fires (with 'self') when the slave-side
                 builder is fully attached and ready to accept commands.
        """
        for s in self.attaching_slaves + self.slaves:
            if s.slave == slave:
                # already attached to them. This is fairly common, since
                # attached() gets called each time we receive the builder
                # list from the slave, and we ask for it each time we add or
                # remove a builder. So if the slave is hosting builders
                # A,B,C, and the config file changes A, we'll remove A and
                # re-add it, triggering two builder-list requests, getting
                # two redundant calls to attached() for B, and another two
                # for C.
                #
                # Therefore, when we see that we're already attached, we can
                # just ignore it.
                return defer.succeed(self)

        sb = slavebuilder.SlaveBuilder()
        sb.setBuilder(self)
        self.attaching_slaves.append(sb)
        d = sb.attached(slave, remote, commands)
        d.addCallback(self._attached)
        d.addErrback(self._not_attached, slave)
        return d

    def _attached(self, sb):
        self.builder_status.addPointEvent(['connect', sb.slave.slavename])
        self.attaching_slaves.remove(sb)
        self.slaves.append(sb)

        self.updateBigStatus()

        return self

    def _not_attached(self, why, slave):
        # already log.err'ed by SlaveBuilder._attachFailure
        # TODO: remove from self.slaves (except that detached() should get
        #       run first, right?)
        log.err(why, 'slave failed to attach')
        self.builder_status.addPointEvent(['failed', 'connect',
                                           slave.slavename])
        # TODO: add an HTMLLogFile of the exception

    def detached(self, slave):
        """This is called when the connection to the bot is lost."""
        for sb in self.attaching_slaves + self.slaves:
            if sb.slave == slave:
                break
        else:
            log.msg("WEIRD: Builder.detached(%s) (%s)"
                    " not in attaching_slaves(%s)"
                    " or slaves(%s)" % (slave, slave.slavename,
                                        self.attaching_slaves,
                                        self.slaves))
            return
        if sb.state == BUILDING:
            # the Build's .lostRemote method (invoked by a notifyOnDisconnect
            # handler) will cause the Build to be stopped, probably right
            # after the notifyOnDisconnect that invoked us finishes running.
            pass

        if sb in self.attaching_slaves:
            self.attaching_slaves.remove(sb)
        if sb in self.slaves:
            self.slaves.remove(sb)

        self.builder_status.addPointEvent(['disconnect', slave.slavename])
        sb.detached() # inform the SlaveBuilder that their slave went away
        self.updateBigStatus()

    def updateBigStatus(self):
        if not self.builder_status:
            return
        if not self.slaves:
            self.builder_status.setBigState("offline")
        elif self.building or self.old_building:
            self.builder_status.setBigState("building")
        else:
            self.builder_status.setBigState("idle")

    @defer.inlineCallbacks
    def _startBuildFor(self, slavebuilder, buildrequests):
        """Start a build on the given slave.
        @param build: the L{base.Build} to start
        @param sb: the L{SlaveBuilder} which will host this build

        @return: (via Deferred) boolean indicating that the build was
        succesfully started.
        """

        # as of the Python versions supported now, try/finally can't be used
        # with a generator expression.  So instead, we push cleanup functions
        # into a list so that, at any point, we can abort this operation.
        cleanups = []
        def run_cleanups():
            try:
                while cleanups:
                    fn = cleanups.pop()
                    fn()
            except:
                log.err(failure.Failure(), "while running %r" % (run_cleanups,))

        # the last cleanup we want to perform is to update the big
        # status based on any other cleanup
        cleanups.append(lambda : self.updateBigStatus())

        build = self.config.factory.newBuild(buildrequests)
        build.setBuilder(self)
        log.msg("starting build %s using slave %s" % (build, slavebuilder))

        # set up locks
        build.setLocks(self.config.locks)
        cleanups.append(lambda : slavebuilder.slave.releaseLocks())

        if len(self.config.env) > 0:
            build.setSlaveEnvironment(self.config.env)

        # append the build to self.building
        self.building.append(build)
        cleanups.append(lambda : self.building.remove(build))

        # update the big status accordingly
        self.updateBigStatus()

        try:
            ready = yield slavebuilder.prepare(self.builder_status, build)
        except:
            log.err(failure.Failure(), 'while preparing slavebuilder:')
            ready = False

        # If prepare returns True then it is ready and we start a build
        # If it returns false then we don't start a new build.
        if not ready:
            log.msg("slave %s can't build %s after all; re-queueing the "
                    "request" % (build, slavebuilder))
            run_cleanups()
            defer.returnValue(False)
            return

        # ping the slave to make sure they're still there. If they've
        # fallen off the map (due to a NAT timeout or something), this
        # will fail in a couple of minutes, depending upon the TCP
        # timeout.
        #
        # TODO: This can unnecessarily suspend the starting of a build, in
        # situations where the slave is live but is pushing lots of data to
        # us in a build.
        log.msg("starting build %s.. pinging the slave %s"
                % (build, slavebuilder))
        try:
            ping_success = yield slavebuilder.ping()
        except:
            log.err(failure.Failure(), 'while pinging slave before build:')
            ping_success = False

        if not ping_success:
            log.msg("slave ping failed; re-queueing the request")
            run_cleanups()
            defer.returnValue(False)
            return

        # The buildslave is ready to go. slavebuilder.buildStarted() sets its
        # state to BUILDING (so we won't try to use it for any other builds).
        # This gets set back to IDLE by the Build itself when it finishes.
        slavebuilder.buildStarted()
        cleanups.append(lambda : slavebuilder.buildFinished())

        # tell the remote that it's starting a build, too
        try:
            yield slavebuilder.remote.callRemote("startBuild")
        except:
            log.err(failure.Failure(), 'while calling remote startBuild:')
            run_cleanups()
            defer.returnValue(False)
            return

        # create the BuildStatus object that goes with the Build
        bs = self.builder_status.newBuild()

        # record the build in the db - one row per buildrequest
        try:
            bids = []
            for req in build.requests:
                bid = yield self.master.db.builds.addBuild(req.id, bs.number)
                bids.append(bid)
        except:
            log.err(failure.Failure(), 'while adding rows to build table:')
            run_cleanups()
            defer.returnValue(False)
            return

        # let status know
        self.master.status.build_started(req.id, self.name, bs)

        # start the build. This will first set up the steps, then tell the
        # BuildStatus that it has started, which will announce it to the world
        # (through our BuilderStatus object, which is its parent).  Finally it
        # will start the actual build process.  This is done with a fresh
        # Deferred since _startBuildFor should not wait until the build is
        # finished.
        d = build.startBuild(bs, self.expectations, slavebuilder)
        d.addCallback(self.buildFinished, slavebuilder, bids)
        # this shouldn't happen. if it does, the slave will be wedged
        d.addErrback(log.err)

        # make sure the builder's status is represented correctly
        self.updateBigStatus()

        defer.returnValue(True)

    def setupProperties(self, props):
        props.setProperty("buildername", self.name, "Builder")
        if len(self.config.properties) > 0:
            for propertyname in self.config.properties:
                props.setProperty(propertyname,
                        self.config.properties[propertyname],
                        "Builder")

    def buildFinished(self, build, sb, bids):
        """This is called when the Build has finished (either success or
        failure). Any exceptions during the build are reported with
        results=FAILURE, not with an errback."""

        # by the time we get here, the Build has already released the slave,
        # which will trigger a check for any now-possible build requests
        # (maybeStartBuilds)

        # mark the builds as finished, although since nothing ever reads this
        # table, it's not too important that it complete successfully
        d = self.master.db.builds.finishBuilds(bids)
        d.addErrback(log.err, 'while marking builds as finished (ignored)')

        results = build.build_status.getResults()
        self.building.remove(build)
        if results == RETRY:
            self._resubmit_buildreqs(build).addErrback(log.err)
        else:
            brids = [br.id for br in build.requests]
            db = self.master.db
            d = db.buildrequests.completeBuildRequests(brids, results)
            d.addCallback(
                lambda _ : self._maybeBuildsetsComplete(build.requests))
            # nothing in particular to do with this deferred, so just log it if
            # it fails..
            d.addErrback(log.err, 'while marking build requests as completed')

        if sb.slave:
            sb.slave.releaseLocks()

        self.updateBigStatus()

    @defer.inlineCallbacks
    def _maybeBuildsetsComplete(self, requests):
        # inform the master that we may have completed a number of buildsets
        for br in requests:
            yield self.master.maybeBuildsetComplete(br.bsid)

    def _resubmit_buildreqs(self, build):
        brids = [br.id for br in build.requests]
        return self.master.db.buildrequests.unclaimBuildRequests(brids)

    def setExpectations(self, progress):
        """Mark the build as successful and update expectations for the next
        build. Only call this when the build did not fail in any way that
        would invalidate the time expectations generated by it. (if the
        compile failed and thus terminated early, we can't use the last
        build to predict how long the next one will take).
        """
        if self.expectations:
            self.expectations.update(progress)
        else:
            # the first time we get a good build, create our Expectations
            # based upon its results
            self.expectations = Expectations(progress)
        log.msg("new expectations: %s seconds" % \
                self.expectations.expectedBuildTime())

    # Build Creation

    @defer.inlineCallbacks
    def maybeStartBuild(self):
        # This method is called by the botmaster whenever this builder should
        # check for and potentially start new builds.  Do not call this method
        # directly - use master.botmaster.maybeStartBuildsForBuilder, or one
        # of the other similar methods if more appropriate

        # first, if we're not running, then don't start builds; stopService
        # uses this to ensure that any ongoing maybeStartBuild invocations
        # are complete before it stops.
        if not self.running:
            return

        # Check for available slaves.  If there are no available slaves, then
        # there is no sense continuing
        available_slavebuilders = [ sb for sb in self.slaves
                                    if sb.isAvailable() ]
        if not available_slavebuilders:
            self.updateBigStatus()
            return

        # now, get the available build requests
        unclaimed_requests = \
            yield self.master.db.buildrequests.getBuildRequests(
                    buildername=self.name, claimed=False)

        if not unclaimed_requests:
            self.updateBigStatus()
            return

        # sort by submitted_at, so the first is the oldest
        unclaimed_requests.sort(key=lambda brd : brd['submitted_at'])

        # get the mergeRequests function for later
        mergeRequests_fn = self._getMergeRequestsFn()

        # match them up until we're out of options
        while available_slavebuilders and unclaimed_requests:
            # first, choose a slave (using nextSlave)
            slavebuilder = yield self._chooseSlave(available_slavebuilders)

            if not slavebuilder:
                break

            if slavebuilder not in available_slavebuilders:
                log.msg(("nextSlave chose a nonexistent slave for builder "
                         "'%s'; cannot start build") % self.name)
                break

            # then choose a request (using nextBuild)
            brdict = yield self._chooseBuild(unclaimed_requests)

            if not brdict:
                break

            if brdict not in unclaimed_requests:
                log.msg(("nextBuild chose a nonexistent request for builder "
                         "'%s'; cannot start build") % self.name)
                break

            # merge the chosen request with any compatible requests in the
            # queue
            brdicts = yield self._mergeRequests(brdict, unclaimed_requests,
                                    mergeRequests_fn)

            # try to claim the build requests
            brids = [ brdict['brid'] for brdict in brdicts ]
            try:
                yield self.master.db.buildrequests.claimBuildRequests(brids)
            except buildrequests.AlreadyClaimedError:
                # one or more of the build requests was already claimed;
                # re-fetch the now-partially-claimed build requests and keep
                # trying to match them
                self._breakBrdictRefloops(unclaimed_requests)
                unclaimed_requests = \
                    yield self.master.db.buildrequests.getBuildRequests(
                            buildername=self.name, claimed=False)

                # go around the loop again
                continue

            # claim was successful, so initiate a build for this set of
            # requests.  Note that if the build fails from here on out (e.g.,
            # because a slave has failed), it will be handled outside of this
            # loop. TODO: test that!

            # _startBuildFor expects BuildRequest objects, so cook some up
            breqs = yield defer.gatherResults(
                    [ self._brdictToBuildRequest(brdict)
                      for brdict in brdicts ])

            build_started = yield self._startBuildFor(slavebuilder, breqs)

            if not build_started:
                # build was not started, so unclaim the build requests
                yield self.master.db.buildrequests.unclaimBuildRequests(brids)

                # and try starting builds again.  If we still have a working slave,
                # then this may re-claim the same buildrequests
                self.botmaster.maybeStartBuildsForBuilder(self.name)

            # finally, remove the buildrequests and slavebuilder from the
            # respective queues
            self._breakBrdictRefloops(brdicts)
            for brdict in brdicts:
                unclaimed_requests.remove(brdict)
            available_slavebuilders.remove(slavebuilder)

        self._breakBrdictRefloops(unclaimed_requests)
        self.updateBigStatus()
        return

    # a few utility functions to make the maybeStartBuild a bit shorter and
    # easier to read

    def _chooseSlave(self, available_slavebuilders):
        """
        Choose the next slave, using the C{nextSlave} configuration if
        available, and falling back to C{random.choice} otherwise.

        @param available_slavebuilders: list of slavebuilders to choose from
        @returns: SlaveBuilder or None via Deferred
        """
        if self.config.nextSlave:
            return defer.maybeDeferred(lambda :
                    self.config.nextSlave(self, available_slavebuilders))
        else:
            return defer.succeed(random.choice(available_slavebuilders))

    def _chooseBuild(self, buildrequests):
        """
        Choose the next build from the given set of build requests (represented
        as dictionaries).  Defaults to returning the first request (earliest
        submitted).

        @param buildrequests: sorted list of build request dictionaries
        @returns: a build request dictionary or None via Deferred
        """
        if self.config.nextBuild:
            # nextBuild expects BuildRequest objects, so instantiate them here
            # and cache them in the dictionaries
            d = defer.gatherResults([ self._brdictToBuildRequest(brdict)
                                      for brdict in buildrequests ])
            d.addCallback(lambda requestobjects :
                    self.config.nextBuild(self, requestobjects))
            def to_brdict(brobj):
                # get the brdict for this object back
                return brobj.brdict
            d.addCallback(to_brdict)
            return d
        else:
            return defer.succeed(buildrequests[0])

    def _getMergeRequestsFn(self):
        """Helper function to determine which mergeRequests function to use
        from L{_mergeRequests}, or None for no merging"""
        # first, seek through builder, global, and the default
        mergeRequests_fn = self.config.mergeRequests
        if mergeRequests_fn is None:
            mergeRequests_fn = self.master.config.mergeRequests
        if mergeRequests_fn is None:
            mergeRequests_fn = True

        # then translate False and True properly
        if mergeRequests_fn is False:
            mergeRequests_fn = None
        elif mergeRequests_fn is True:
            mergeRequests_fn = Builder._defaultMergeRequestFn

        return mergeRequests_fn

    def _defaultMergeRequestFn(self, req1, req2):
        return req1.canBeMergedWith(req2)

    @defer.inlineCallbacks
    def _mergeRequests(self, breq, unclaimed_requests, mergeRequests_fn):
        """Use C{mergeRequests_fn} to merge C{breq} against
        C{unclaimed_requests}, where both are build request dictionaries"""
        # short circuit if there is no merging to do
        if not mergeRequests_fn or len(unclaimed_requests) == 1:
            defer.returnValue([ breq ])
            return

        # we'll need BuildRequest objects, so get those first
        unclaimed_request_objects = yield defer.gatherResults(
                [ self._brdictToBuildRequest(brdict)
                  for brdict in unclaimed_requests ])

        breq_object = unclaimed_request_objects[unclaimed_requests.index(breq)]

        # gather the mergeable requests
        merged_request_objects = []
        for other_breq_object in unclaimed_request_objects:
            if (yield defer.maybeDeferred(
                        lambda : mergeRequests_fn(self, breq_object,
                                                  other_breq_object))):
                merged_request_objects.append(other_breq_object)

        # convert them back to brdicts and return
        merged_requests = [ br.brdict for br in merged_request_objects ]
        defer.returnValue(merged_requests)

    def _brdictToBuildRequest(self, brdict):
        """
        Convert a build request dictionary to a L{buildrequest.BuildRequest}
        object, caching the result in the dictionary itself.  The resulting
        buildrequest will have a C{brdict} attribute pointing back to this
        dictionary.

        Note that this does not perform any locking - be careful that it is
        only called once at a time for each build request dictionary.

        @param brdict: dictionary to convert

        @returns: L{buildrequest.BuildRequest} via Deferred
        """
        if 'brobj' in brdict:
            return defer.succeed(brdict['brobj'])
        d = buildrequest.BuildRequest.fromBrdict(self.master, brdict)
        def keep(buildrequest):
            brdict['brobj'] = buildrequest
            buildrequest.brdict = brdict
            return buildrequest
        d.addCallback(keep)
        return d

    def _breakBrdictRefloops(self, requests):
        """Break the reference loops created by L{_brdictToBuildRequest}"""
        for brdict in requests:
            try:
                del brdict['brobj'].brdict
            except KeyError:
                pass
コード例 #4
0
ファイル: builder.py プロジェクト: ahussein/buildbot
class Builder(pb.Referenceable, service.MultiService):
    """I manage all Builds of a given type.

    Each Builder is created by an entry in the config file (the c['builders']
    list), with a number of parameters.

    One of these parameters is the L{buildbot.process.factory.BuildFactory}
    object that is associated with this Builder. The factory is responsible
    for creating new L{Build<buildbot.process.build.Build>} objects. Each
    Build object defines when and how the build is performed, so a new
    Factory or Builder should be defined to control this behavior.

    The Builder holds on to a number of L{BuildRequest} objects in a
    list named C{.buildable}. Incoming BuildRequest objects will be added to
    this list, or (if possible) merged into an existing request. When a slave
    becomes available, I will use my C{BuildFactory} to turn the request into
    a new C{Build} object. The C{BuildRequest} is forgotten, the C{Build}
    goes into C{.building} while it runs. Once the build finishes, I will
    discard it.

    I maintain a list of available SlaveBuilders, one for each connected
    slave that the C{slavenames} parameter says we can use. Some of these
    will be idle, some of them will be busy running builds for me. If there
    are multiple slaves, I can run multiple builds at once.

    I also manage forced builds, progress expectation (ETA) management, and
    some status delivery chores.

    @type buildable: list of L{buildbot.process.buildrequest.BuildRequest}
    @ivar buildable: BuildRequests that are ready to build, but which are
                     waiting for a buildslave to be available.

    @type building: list of L{buildbot.process.build.Build}
    @ivar building: Builds that are actively running

    @type slaves: list of L{buildbot.buildslave.BuildSlave} objects
    @ivar slaves: the slaves currently available for building
    """

    expectations = None # this is created the first time we get a good build
    CHOOSE_SLAVES_RANDOMLY = True # disabled for determinism during tests

    def __init__(self, setup, builder_status):
        """
        @type  setup: dict
        @param setup: builder setup data, as stored in
                      BuildmasterConfig['builders'].  Contains name,
                      slavename(s), builddir, slavebuilddir, factory, locks.
        @type  builder_status: L{buildbot.status.builder.BuilderStatus}
        """
        service.MultiService.__init__(self)
        self.name = setup['name']
        self.slavenames = []
        if setup.has_key('slavename'):
            self.slavenames.append(setup['slavename'])
        if setup.has_key('slavenames'):
            self.slavenames.extend(setup['slavenames'])
        self.builddir = setup['builddir']
        self.slavebuilddir = setup['slavebuilddir']
        self.buildFactory = setup['factory']
        self.nextSlave = setup.get('nextSlave')
        if self.nextSlave is not None and not callable(self.nextSlave):
            raise ValueError("nextSlave must be callable")
        self.locks = setup.get("locks", [])
        self.env = setup.get('env', {})
        assert isinstance(self.env, dict)
        if setup.has_key('periodicBuildTime'):
            raise ValueError("periodicBuildTime can no longer be defined as"
                             " part of the Builder: use scheduler.Periodic"
                             " instead")
        self.nextBuild = setup.get('nextBuild')
        if self.nextBuild is not None and not callable(self.nextBuild):
            raise ValueError("nextBuild must be callable")
        self.buildHorizon = setup.get('buildHorizon')
        self.logHorizon = setup.get('logHorizon')
        self.eventHorizon = setup.get('eventHorizon')
        self.mergeRequests = setup.get('mergeRequests', True)
        self.properties = setup.get('properties', {})
        self.category = setup.get('category', None)

        # build/wannabuild slots: Build objects move along this sequence
        self.building = []
        # old_building holds active builds that were stolen from a predecessor
        self.old_building = weakref.WeakKeyDictionary()

        # buildslaves which have connected but which are not yet available.
        # These are always in the ATTACHING state.
        self.attaching_slaves = []

        # buildslaves at our disposal. Each SlaveBuilder instance has a
        # .state that is IDLE, PINGING, or BUILDING. "PINGING" is used when a
        # Build is about to start, to make sure that they're still alive.
        self.slaves = []

        self.builder_status = builder_status
        self.builder_status.setSlavenames(self.slavenames)
        self.builder_status.buildHorizon = self.buildHorizon
        self.builder_status.logHorizon = self.logHorizon
        self.builder_status.eventHorizon = self.eventHorizon
        t = internet.TimerService(10*60, self.reclaimAllBuilds)
        t.setServiceParent(self)

        # for testing, to help synchronize tests
        self.watchers = {'attach': [], 'detach': [], 'detach_all': [],
                         'idle': []}
        self.run_count = 0

    def setBotmaster(self, botmaster):
        self.botmaster = botmaster
        self.db = botmaster.db
        self.master_name = botmaster.master_name
        self.master_incarnation = botmaster.master_incarnation

    def compareToSetup(self, setup):
        diffs = []
        setup_slavenames = []
        if setup.has_key('slavename'):
            setup_slavenames.append(setup['slavename'])
        setup_slavenames.extend(setup.get('slavenames', []))
        if setup_slavenames != self.slavenames:
            diffs.append('slavenames changed from %s to %s' \
                         % (self.slavenames, setup_slavenames))
        if setup['builddir'] != self.builddir:
            diffs.append('builddir changed from %s to %s' \
                         % (self.builddir, setup['builddir']))
        if setup['slavebuilddir'] != self.slavebuilddir:
            diffs.append('slavebuilddir changed from %s to %s' \
                         % (self.slavebuilddir, setup['slavebuilddir']))
        if setup['factory'] != self.buildFactory: # compare objects
            diffs.append('factory changed')
        if setup.get('locks', []) != self.locks:
            diffs.append('locks changed from %s to %s' % (self.locks, setup.get('locks')))
        if setup.get('env', {}) != self.env:
            diffs.append('env changed from %s to %s' % (self.env, setup.get('env', {})))
        if setup.get('nextSlave') != self.nextSlave:
            diffs.append('nextSlave changed from %s to %s' % (self.nextSlave, setup.get('nextSlave')))
        if setup.get('nextBuild') != self.nextBuild:
            diffs.append('nextBuild changed from %s to %s' % (self.nextBuild, setup.get('nextBuild')))
        if setup.get('buildHorizon', None) != self.buildHorizon:
            diffs.append('buildHorizon changed from %s to %s' % (self.buildHorizon, setup['buildHorizon']))
        if setup.get('logHorizon', None) != self.logHorizon:
            diffs.append('logHorizon changed from %s to %s' % (self.logHorizon, setup['logHorizon']))
        if setup.get('eventHorizon', None) != self.eventHorizon:
            diffs.append('eventHorizon changed from %s to %s' % (self.eventHorizon, setup['eventHorizon']))
        if setup.get('category', None) != self.category:
            diffs.append('category changed from %r to %r' % (self.category, setup.get('category', None)))

        return diffs

    def __repr__(self):
        return "<Builder '%r' at %d>" % (self.name, id(self))

    def triggerNewBuildCheck(self):
        self.botmaster.triggerNewBuildCheck()

    def run(self):
        """Check for work to be done. This should be called any time I might
        be able to start a job:

         - when the Builder is first created
         - when a new job has been added to the [buildrequests] DB table
         - when a slave has connected

        If I have both an available slave and the database contains a
        BuildRequest that I can handle, I will claim the BuildRequest and
        start the build. When the build finishes, I will retire the
        BuildRequest.
        """
        # overall plan:
        #  move .expectations to DB

        # if we're not running, we may still be called from leftovers from
        # a run of the loop, so just ignore the call.
        if not self.running:
            return

        self.run_count += 1

        available_slaves = [sb for sb in self.slaves if sb.isAvailable()]
        if not available_slaves:
            self.updateBigStatus()
            return
        d = self.db.runInteraction(self._claim_buildreqs, available_slaves)
        d.addCallback(self._start_builds)
        return d

    # slave-managers must refresh their claim on a build at least once an
    # hour, less any inter-manager clock skew
    RECLAIM_INTERVAL = 1*3600

    def _claim_buildreqs(self, t, available_slaves):
        # return a dict mapping slave -> (brid,ssid)
        now = util.now()
        old = now - self.RECLAIM_INTERVAL
        requests = self.db.get_unclaimed_buildrequests(self.name, old,
                                                       self.master_name,
                                                       self.master_incarnation,
                                                       t)

        assignments = {}
        while requests and available_slaves:
            sb = self._choose_slave(available_slaves)
            if not sb:
                log.msg("%s: want to start build, but we don't have a remote"
                        % self)
                break
            available_slaves.remove(sb)
            breq = self._choose_build(requests)
            if not breq:
                log.msg("%s: went to start build, but nextBuild said not to"
                        % self)
                break
            requests.remove(breq)
            merged_requests = [breq]
            for other_breq in requests[:]:
                if (self.mergeRequests and
                    self.botmaster.shouldMergeRequests(self, breq, other_breq)
                    ):
                    requests.remove(other_breq)
                    merged_requests.append(other_breq)
            assignments[sb] = merged_requests
            brids = [br.id for br in merged_requests]
            self.db.claim_buildrequests(now, self.master_name,
                                        self.master_incarnation, brids, t)
        return assignments

    def _choose_slave(self, available_slaves):
        # note: this might return None if the nextSlave() function decided to
        # not give us anything
        if self.nextSlave:
            try:
                return self.nextSlave(self, available_slaves)
            except:
                log.msg("Exception choosing next slave")
                log.err(Failure())
            return None
        if self.CHOOSE_SLAVES_RANDOMLY:
            return random.choice(available_slaves)
        return available_slaves[0]

    def _choose_build(self, buildable):
        if self.nextBuild:
            try:
                return self.nextBuild(self, buildable)
            except:
                log.msg("Exception choosing next build")
                log.err(Failure())
            return None
        return buildable[0]

    def _start_builds(self, assignments):
        # because _claim_buildreqs runs in a separate thread, we might have
        # lost a slave by this point. We treat that case the same as if we
        # lose the slave right after the build starts: the initial ping
        # fails.
        for (sb, requests) in assignments.items():
            build = self.buildFactory.newBuild(requests)
            build.setBuilder(self)
            build.setLocks(self.locks)
            if len(self.env) > 0:
                build.setSlaveEnvironment(self.env)
            self.startBuild(build, sb)
        self.updateBigStatus()


    def getBuildable(self, limit=None):
        return self.db.runInteractionNow(self._getBuildable, limit)
    def _getBuildable(self, t, limit):
        now = util.now()
        old = now - self.RECLAIM_INTERVAL
        return self.db.get_unclaimed_buildrequests(self.name, old,
                                                   self.master_name,
                                                   self.master_incarnation,
                                                   t,
                                                   limit)

    def getOldestRequestTime(self):
        """Returns the timestamp of the oldest build request for this builder.

        If there are no build requests, None is returned."""
        buildable = self.getBuildable(1)
        if buildable:
            # TODO: this is sorted by priority first, not strictly reqtime
            return buildable[0].getSubmitTime()
        return None

    def cancelBuildRequest(self, brid):
        return self.db.cancel_buildrequests([brid])

    def consumeTheSoulOfYourPredecessor(self, old):
        """Suck the brain out of an old Builder.

        This takes all the runtime state from an existing Builder and moves
        it into ourselves. This is used when a Builder is changed in the
        master.cfg file: the new Builder has a different factory, but we want
        all the builds that were queued for the old one to get processed by
        the new one. Any builds which are already running will keep running.
        The new Builder will get as many of the old SlaveBuilder objects as
        it wants."""

        log.msg("consumeTheSoulOfYourPredecessor: %s feeding upon %s" %
                (self, old))
        # all pending builds are stored in the DB, so we don't have to do
        # anything to claim them. The old builder will be stopService'd,
        # which should make sure they don't start any new work

        # this is kind of silly, but the builder status doesn't get updated
        # when the config changes, yet it stores the category.  So:
        self.builder_status.category = self.category

        # old.building (i.e. builds which are still running) is not migrated
        # directly: it keeps track of builds which were in progress in the
        # old Builder. When those builds finish, the old Builder will be
        # notified, not us. However, since the old SlaveBuilder will point to
        # us, it is our maybeStartBuild() that will be triggered.
        if old.building:
            self.builder_status.setBigState("building")
        # however, we do grab a weakref to the active builds, so that our
        # BuilderControl can see them and stop them. We use a weakref because
        # we aren't the one to get notified, so there isn't a convenient
        # place to remove it from self.building .
        for b in old.building:
            self.old_building[b] = None
        for b in old.old_building:
            self.old_building[b] = None

        # Our set of slavenames may be different. Steal any of the old
        # buildslaves that we want to keep using.
        for sb in old.slaves[:]:
            if sb.slave.slavename in self.slavenames:
                log.msg(" stealing buildslave %s" % sb)
                self.slaves.append(sb)
                old.slaves.remove(sb)
                sb.setBuilder(self)

        # old.attaching_slaves:
        #  these SlaveBuilders are waiting on a sequence of calls:
        #  remote.setMaster and remote.print . When these two complete,
        #  old._attached will be fired, which will add a 'connect' event to
        #  the builder_status and try to start a build. However, we've pulled
        #  everything out of the old builder's queue, so it will have no work
        #  to do. The outstanding remote.setMaster/print call will be holding
        #  the last reference to the old builder, so it will disappear just
        #  after that response comes back.
        #
        #  The BotMaster will ask the slave to re-set their list of Builders
        #  shortly after this function returns, which will cause our
        #  attached() method to be fired with a bunch of references to remote
        #  SlaveBuilders, some of which we already have (by stealing them
        #  from the old Builder), some of which will be new. The new ones
        #  will be re-attached.

        #  Therefore, we don't need to do anything about old.attaching_slaves

        return # all done

    def reclaimAllBuilds(self):
        try:
            now = util.now()
            brids = set()
            for b in self.building:
                brids.update([br.id for br in b.requests])
            for b in self.old_building:
                brids.update([br.id for br in b.requests])
            self.db.claim_buildrequests(now, self.master_name,
                                        self.master_incarnation, brids)
        except:
            log.msg("Error in reclaimAllBuilds")
            log.err()

    def getBuild(self, number):
        for b in self.building:
            if b.build_status and b.build_status.number == number:
                return b
        for b in self.old_building.keys():
            if b.build_status and b.build_status.number == number:
                return b
        return None

    def fireTestEvent(self, name, fire_with=None):
        if fire_with is None:
            fire_with = self
        watchers = self.watchers[name]
        self.watchers[name] = []
        for w in watchers:
            eventually(w.callback, fire_with)

    def addLatentSlave(self, slave):
        assert interfaces.ILatentBuildSlave.providedBy(slave)
        for s in self.slaves:
            if s == slave:
                break
        else:
            sb = slavebuilder.LatentSlaveBuilder(slave, self)
            self.builder_status.addPointEvent(
                ['added', 'latent', slave.slavename])
            self.slaves.append(sb)
            self.triggerNewBuildCheck()

    def attached(self, slave, remote, commands):
        """This is invoked by the BuildSlave when the self.slavename bot
        registers their builder.

        @type  slave: L{buildbot.buildslave.BuildSlave}
        @param slave: the BuildSlave that represents the buildslave as a whole
        @type  remote: L{twisted.spread.pb.RemoteReference}
        @param remote: a reference to the L{buildbot.slave.bot.SlaveBuilder}
        @type  commands: dict: string -> string, or None
        @param commands: provides the slave's version of each RemoteCommand

        @rtype:  L{twisted.internet.defer.Deferred}
        @return: a Deferred that fires (with 'self') when the slave-side
                 builder is fully attached and ready to accept commands.
        """
        for s in self.attaching_slaves + self.slaves:
            if s.slave == slave:
                # already attached to them. This is fairly common, since
                # attached() gets called each time we receive the builder
                # list from the slave, and we ask for it each time we add or
                # remove a builder. So if the slave is hosting builders
                # A,B,C, and the config file changes A, we'll remove A and
                # re-add it, triggering two builder-list requests, getting
                # two redundant calls to attached() for B, and another two
                # for C.
                #
                # Therefore, when we see that we're already attached, we can
                # just ignore it. TODO: build a diagram of the state
                # transitions here, I'm concerned about sb.attached() failing
                # and leaving sb.state stuck at 'ATTACHING', and about
                # the detached() message arriving while there's some
                # transition pending such that the response to the transition
                # re-vivifies sb
                return defer.succeed(self)

        sb = slavebuilder.SlaveBuilder()
        sb.setBuilder(self)
        self.attaching_slaves.append(sb)
        d = sb.attached(slave, remote, commands)
        d.addCallback(self._attached)
        d.addErrback(self._not_attached, slave)
        return d

    def _attached(self, sb):
        # TODO: make this .addSlaveEvent(slave.slavename, ['connect']) ?
        self.builder_status.addPointEvent(['connect', sb.slave.slavename])
        self.attaching_slaves.remove(sb)
        self.slaves.append(sb)

        self.fireTestEvent('attach')
        return self

    def _not_attached(self, why, slave):
        # already log.err'ed by SlaveBuilder._attachFailure
        # TODO: make this .addSlaveEvent?
        # TODO: remove from self.slaves (except that detached() should get
        #       run first, right?)
        print why
        self.builder_status.addPointEvent(['failed', 'connect',
                                           slave.slavename])
        # TODO: add an HTMLLogFile of the exception
        self.fireTestEvent('attach', why)

    def detached(self, slave):
        """This is called when the connection to the bot is lost."""
        for sb in self.attaching_slaves + self.slaves:
            if sb.slave == slave:
                break
        else:
            log.msg("WEIRD: Builder.detached(%s) (%s)"
                    " not in attaching_slaves(%s)"
                    " or slaves(%s)" % (slave, slave.slavename,
                                        self.attaching_slaves,
                                        self.slaves))
            return
        if sb.state == BUILDING:
            # the Build's .lostRemote method (invoked by a notifyOnDisconnect
            # handler) will cause the Build to be stopped, probably right
            # after the notifyOnDisconnect that invoked us finishes running.

            # TODO: should failover to a new Build
            #self.retryBuild(sb.build)
            pass

        if sb in self.attaching_slaves:
            self.attaching_slaves.remove(sb)
        if sb in self.slaves:
            self.slaves.remove(sb)

        # TODO: make this .addSlaveEvent?
        self.builder_status.addPointEvent(['disconnect', slave.slavename])
        sb.detached() # inform the SlaveBuilder that their slave went away
        self.updateBigStatus()
        self.fireTestEvent('detach')
        if not self.slaves:
            self.fireTestEvent('detach_all')

    def updateBigStatus(self):
        if not self.slaves:
            self.builder_status.setBigState("offline")
        elif self.building:
            self.builder_status.setBigState("building")
        else:
            self.builder_status.setBigState("idle")
            self.fireTestEvent('idle')

    def startBuild(self, build, sb):
        """Start a build on the given slave.
        @param build: the L{base.Build} to start
        @param sb: the L{SlaveBuilder} which will host this build

        @return: a Deferred which fires with a
        L{buildbot.interfaces.IBuildControl} that can be used to stop the
        Build, or to access a L{buildbot.interfaces.IBuildStatus} which will
        watch the Build as it runs. """

        self.building.append(build)
        self.updateBigStatus()
        log.msg("starting build %s using slave %s" % (build, sb))
        d = sb.prepare(self.builder_status)

        def _prepared(ready):
            # If prepare returns True then it is ready and we start a build
            # If it returns false then we don't start a new build.
            d = defer.succeed(ready)

            if not ready:
                #FIXME: We should perhaps trigger a check to see if there is
                # any other way to schedule the work
                log.msg("slave %s can't build %s after all" % (build, sb))

                # release the slave. This will queue a call to maybeStartBuild, which
                # will fire after other notifyOnDisconnect handlers have marked the
                # slave as disconnected (so we don't try to use it again).
                # sb.buildFinished()

                log.msg("re-queueing the BuildRequest %s" % build)
                self.building.remove(build)
                self._resubmit_buildreqs(build).addErrback(log.err)

                sb.slave.releaseLocks()
                self.triggerNewBuildCheck()

                return d

            def _ping(ign):
                # ping the slave to make sure they're still there. If they've
                # fallen off the map (due to a NAT timeout or something), this
                # will fail in a couple of minutes, depending upon the TCP
                # timeout.
                #
                # TODO: This can unnecessarily suspend the starting of a build, in
                # situations where the slave is live but is pushing lots of data to
                # us in a build.
                log.msg("starting build %s.. pinging the slave %s" % (build, sb))
                return sb.ping()
            d.addCallback(_ping)
            d.addCallback(self._startBuild_1, build, sb)

            return d

        d.addCallback(_prepared)
        return d

    def _startBuild_1(self, res, build, sb):
        if not res:
            return self._startBuildFailed("slave ping failed", build, sb)
        # The buildslave is ready to go. sb.buildStarted() sets its state to
        # BUILDING (so we won't try to use it for any other builds). This
        # gets set back to IDLE by the Build itself when it finishes.
        sb.buildStarted()
        d = sb.remote.callRemote("startBuild")
        d.addCallbacks(self._startBuild_2, self._startBuildFailed,
                       callbackArgs=(build,sb), errbackArgs=(build,sb))
        return d

    def _startBuild_2(self, res, build, sb):
        # create the BuildStatus object that goes with the Build
        bs = self.builder_status.newBuild()

        # start the build. This will first set up the steps, then tell the
        # BuildStatus that it has started, which will announce it to the
        # world (through our BuilderStatus object, which is its parent).
        # Finally it will start the actual build process.
        bids = [self.db.build_started(req.id, bs.number) for req in build.requests]
        d = build.startBuild(bs, self.expectations, sb)
        d.addCallback(self.buildFinished, sb, bids)
        # this shouldn't happen. if it does, the slave will be wedged
        d.addErrback(log.err)
        return build # this is the IBuildControl

    def _startBuildFailed(self, why, build, sb):
        # put the build back on the buildable list
        log.msg("I tried to tell the slave that the build %s started, but "
                "remote_startBuild failed: %s" % (build, why))
        # release the slave. This will queue a call to maybeStartBuild, which
        # will fire after other notifyOnDisconnect handlers have marked the
        # slave as disconnected (so we don't try to use it again).
        sb.buildFinished()

        log.msg("re-queueing the BuildRequest")
        self.building.remove(build)
        self._resubmit_buildreqs(build).addErrback(log.err)

    def setupProperties(self, props):
        props.setProperty("buildername", self.name, "Builder")
        if len(self.properties) > 0:
            for propertyname in self.properties:
                props.setProperty(propertyname, self.properties[propertyname], "Builder")

    def buildFinished(self, build, sb, bids):
        """This is called when the Build has finished (either success or
        failure). Any exceptions during the build are reported with
        results=FAILURE, not with an errback."""

        # by the time we get here, the Build has already released the slave
        # (which queues a call to maybeStartBuild)

        self.db.builds_finished(bids)

        results = build.build_status.getResults()
        self.building.remove(build)
        if results == RETRY:
            self._resubmit_buildreqs(build).addErrback(log.err) # returns Deferred
        else:
            brids = [br.id for br in build.requests]
            self.db.retire_buildrequests(brids, results)

        if sb.slave:
            sb.slave.releaseLocks()

        self.triggerNewBuildCheck()

    def _resubmit_buildreqs(self, build):
        brids = [br.id for br in build.requests]
        return self.db.resubmit_buildrequests(brids)

    def setExpectations(self, progress):
        """Mark the build as successful and update expectations for the next
        build. Only call this when the build did not fail in any way that
        would invalidate the time expectations generated by it. (if the
        compile failed and thus terminated early, we can't use the last
        build to predict how long the next one will take).
        """
        if self.expectations:
            self.expectations.update(progress)
        else:
            # the first time we get a good build, create our Expectations
            # based upon its results
            self.expectations = Expectations(progress)
        log.msg("new expectations: %s seconds" % \
                self.expectations.expectedBuildTime())

    def shutdownSlave(self):
        if self.remote:
            self.remote.callRemote("shutdown")
コード例 #5
0
ファイル: builder.py プロジェクト: JinsongBian/buildbot
class Builder(config.ReconfigurableServiceMixin,
              pb.Referenceable,
              service.MultiService):

    # reconfigure builders before slaves
    reconfig_priority = 196

    def __init__(self, name, _addServices=True):
        service.MultiService.__init__(self)
        self.name = name

        # this is filled on demand by getBuilderId; don't access it directly
        self._builderid = None

        # this is created the first time we get a good build
        self.expectations = None

        # build/wannabuild slots: Build objects move along this sequence
        self.building = []
        # old_building holds active builds that were stolen from a predecessor
        self.old_building = weakref.WeakKeyDictionary()

        # buildslaves which have connected but which are not yet available.
        # These are always in the ATTACHING state.
        self.attaching_slaves = []

        # buildslaves at our disposal. Each SlaveBuilder instance has a
        # .state that is IDLE, PINGING, or BUILDING. "PINGING" is used when a
        # Build is about to start, to make sure that they're still alive.
        self.slaves = []

        self.config = None
        self.builder_status = None

        if _addServices:
            self.reclaim_svc = internet.TimerService(10 * 60,
                                                     self.reclaimAllBuilds)
            self.reclaim_svc.setServiceParent(self)

            # update big status every 30 minutes, working around #1980
            self.updateStatusService = internet.TimerService(30 * 60,
                                                             self.updateBigStatus)
            self.updateStatusService.setServiceParent(self)

    @defer.inlineCallbacks
    def reconfigService(self, new_config):
        # find this builder in the config
        for builder_config in new_config.builders:
            if builder_config.name == self.name:
                found_config = True
                break
        assert found_config, "no config found for builder '%s'" % self.name

        # set up a builder status object on the first reconfig
        if not self.builder_status:
            self.builder_status = self.master.status.builderAdded(
                name=builder_config.name,
                basedir=builder_config.builddir,
                tags=builder_config.tags,
                description=builder_config.description)

        self.config = builder_config

        # allocate  builderid now, so that the builder is visible in the web
        # UI; without this, the bulider wouldn't appear until it preformed a
        # build.
        yield self.getBuilderId()

        self.builder_status.setDescription(builder_config.description)
        self.builder_status.setTags(builder_config.tags)
        self.builder_status.setSlavenames(self.config.slavenames)
        self.builder_status.setCacheSize(new_config.caches['Builds'])

        # if we have any slavebuilders attached which are no longer configured,
        # drop them.
        new_slavenames = set(builder_config.slavenames)
        self.slaves = [s for s in self.slaves
                       if s.slave.slavename in new_slavenames]

    def __repr__(self):
        return "<Builder '%r' at %d>" % (self.name, id(self))

    def getBuilderId(self):
        # since findBuilderId is idempotent, there's no reason to add
        # additional locking around this function.
        if self._builderid:
            return defer.succeed(self._builderid)
        # buildbot.config should ensure this is already unicode, but it doesn't
        # hurt to check again
        name = ascii2unicode(self.name)
        d = self.master.data.updates.findBuilderId(name)

        @d.addCallback
        def keep(builderid):
            self._builderid = builderid
            return builderid
        return d

    @defer.inlineCallbacks
    def getOldestRequestTime(self):
        """Returns the submitted_at of the oldest unclaimed build request for
        this builder, or None if there are no build requests.

        @returns: datetime instance or None, via Deferred
        """
        unclaimed = yield self.master.data.get(('builders', ascii2unicode(self.name), 'buildrequests'),
                                               [resultspec.Filter('claimed', 'eq', [False])])
        if unclaimed:
            unclaimed = sorted([brd['submitted_at'] for brd in unclaimed])
            defer.returnValue(unclaimed[0])
        else:
            defer.returnValue(None)

    def reclaimAllBuilds(self):
        brids = set()
        for b in self.building:
            brids.update([br.id for br in b.requests])
        for b in self.old_building:
            brids.update([br.id for br in b.requests])

        if not brids:
            return defer.succeed(None)

        d = self.master.data.updates.reclaimBuildRequests(list(brids))
        d.addErrback(log.err, 'while re-claiming running BuildRequests')
        return d

    def getBuild(self, number):
        for b in self.building:
            if b.build_status and b.build_status.number == number:
                return b
        for b in self.old_building.keys():
            if b.build_status and b.build_status.number == number:
                return b
        return None

    def addLatentSlave(self, slave):
        assert interfaces.ILatentBuildSlave.providedBy(slave)
        for s in self.slaves:
            if s == slave:
                break
        else:
            sb = slavebuilder.LatentSlaveBuilder(slave, self)
            self.builder_status.addPointEvent(
                ['added', 'latent', slave.slavename])
            self.slaves.append(sb)
            self.botmaster.maybeStartBuildsForBuilder(self.name)

    def attached(self, slave, commands):
        """This is invoked by the BuildSlave when the self.slavename bot
        registers their builder.

        @type  slave: L{buildbot.buildslave.BuildSlave}
        @param slave: the BuildSlave that represents the buildslave as a whole
        @type  remote: L{twisted.spread.pb.RemoteReference}
        @param remote: a reference to the L{buildbot.slave.bot.SlaveBuilder}
        @type  commands: dict: string -> string, or None
        @param commands: provides the slave's version of each RemoteCommand

        @rtype:  L{twisted.internet.defer.Deferred}
        @return: a Deferred that fires (with 'self') when the slave-side
                 builder is fully attached and ready to accept commands.
        """
        for s in self.attaching_slaves + self.slaves:
            if s.slave == slave:
                # already attached to them. This is fairly common, since
                # attached() gets called each time we receive the builder
                # list from the slave, and we ask for it each time we add or
                # remove a builder. So if the slave is hosting builders
                # A,B,C, and the config file changes A, we'll remove A and
                # re-add it, triggering two builder-list requests, getting
                # two redundant calls to attached() for B, and another two
                # for C.
                #
                # Therefore, when we see that we're already attached, we can
                # just ignore it.
                return defer.succeed(self)

        sb = slavebuilder.SlaveBuilder()
        sb.setBuilder(self)
        self.attaching_slaves.append(sb)
        d = sb.attached(slave, commands)
        d.addCallback(self._attached)
        d.addErrback(self._not_attached, slave)
        return d

    def _attached(self, sb):
        self.builder_status.addPointEvent(['connect', sb.slave.slavename])
        self.attaching_slaves.remove(sb)
        self.slaves.append(sb)

        self.updateBigStatus()

        return self

    def _not_attached(self, why, slave):
        # already log.err'ed by SlaveBuilder._attachFailure
        # TODO: remove from self.slaves (except that detached() should get
        #       run first, right?)
        log.err(why, 'slave failed to attach')
        self.builder_status.addPointEvent(['failed', 'connect',
                                           slave.slavename])
        # TODO: add an HTMLLogFile of the exception

    def detached(self, slave):
        """This is called when the connection to the bot is lost."""
        for sb in self.attaching_slaves + self.slaves:
            if sb.slave == slave:
                break
        else:
            log.msg("WEIRD: Builder.detached(%s) (%s)"
                    " not in attaching_slaves(%s)"
                    " or slaves(%s)" % (slave, slave.slavename,
                                        self.attaching_slaves,
                                        self.slaves))
            return
        if sb.state == BUILDING:
            # the Build's .lostRemote method (invoked by a notifyOnDisconnect
            # handler) will cause the Build to be stopped, probably right
            # after the notifyOnDisconnect that invoked us finishes running.
            pass

        if sb in self.attaching_slaves:
            self.attaching_slaves.remove(sb)
        if sb in self.slaves:
            self.slaves.remove(sb)

        self.builder_status.addPointEvent(['disconnect', slave.slavename])
        sb.detached()  # inform the SlaveBuilder that their slave went away
        self.updateBigStatus()

    def updateBigStatus(self):
        try:
            # Catch exceptions here, since this is called in a LoopingCall.
            if not self.builder_status:
                return
            if not self.slaves:
                self.builder_status.setBigState("offline")
            elif self.building or self.old_building:
                self.builder_status.setBigState("building")
            else:
                self.builder_status.setBigState("idle")
        except Exception:
            log.err(None, "while trying to update status of builder '%s'" % (self.name,))

    def getAvailableSlaves(self):
        return [sb for sb in self.slaves if sb.isAvailable()]

    def canStartWithSlavebuilder(self, slavebuilder):
        locks = [(self.botmaster.getLockFromLockAccess(access), access)
                 for access in self.config.locks]
        return Build.canStartWithSlavebuilder(locks, slavebuilder)

    def canStartBuild(self, slavebuilder, breq):
        if callable(self.config.canStartBuild):
            return defer.maybeDeferred(self.config.canStartBuild, self, slavebuilder, breq)
        return defer.succeed(True)

    @defer.inlineCallbacks
    def _startBuildFor(self, slavebuilder, buildrequests):
        # Build a stack of cleanup functions so that, at any point, we can
        # abort this operation and unwind the commitments made so far.
        cleanups = []

        def run_cleanups():
            try:
                while cleanups:
                    fn = cleanups.pop()
                    fn()
            except:
                log.err(failure.Failure(), "while running %r" % (run_cleanups,))

        # the last cleanup we want to perform is to update the big
        # status based on any other cleanup
        cleanups.append(lambda: self.updateBigStatus())

        build = self.config.factory.newBuild(buildrequests)
        build.setBuilder(self)
        log.msg("starting build %s using slave %s" % (build, slavebuilder))

        # set up locks
        build.setLocks(self.config.locks)
        cleanups.append(lambda: slavebuilder.slave.releaseLocks())

        if len(self.config.env) > 0:
            build.setSlaveEnvironment(self.config.env)

        # append the build to self.building
        self.building.append(build)
        cleanups.append(lambda: self.building.remove(build))

        # update the big status accordingly
        self.updateBigStatus()

        try:
            ready = yield slavebuilder.prepare(self.builder_status, build)
        except:
            log.err(failure.Failure(), 'while preparing slavebuilder:')
            ready = False

        # If prepare returns True then it is ready and we start a build
        # If it returns false then we don't start a new build.
        if not ready:
            log.msg("slave %s can't build %s after all; re-queueing the "
                    "request" % (build, slavebuilder))
            run_cleanups()
            defer.returnValue(False)
            return

        # ping the slave to make sure they're still there. If they've
        # fallen off the map (due to a NAT timeout or something), this
        # will fail in a couple of minutes, depending upon the TCP
        # timeout.
        #
        # TODO: This can unnecessarily suspend the starting of a build, in
        # situations where the slave is live but is pushing lots of data to
        # us in a build.
        log.msg("starting build %s.. pinging the slave %s"
                % (build, slavebuilder))
        try:
            ping_success = yield slavebuilder.ping()
        except:
            log.err(failure.Failure(), 'while pinging slave before build:')
            ping_success = False

        if not ping_success:
            log.msg("slave ping failed; re-queueing the request")
            run_cleanups()
            defer.returnValue(False)
            return

        # The buildslave is ready to go. slavebuilder.buildStarted() sets its
        # state to BUILDING (so we won't try to use it for any other builds).
        # This gets set back to IDLE by the Build itself when it finishes.
        slavebuilder.buildStarted()
        cleanups.append(lambda: slavebuilder.buildFinished())

        # tell the remote that it's starting a build, too
        try:
            yield slavebuilder.slave.conn.remoteStartBuild(build.builder.name)
        except:
            log.err(failure.Failure(), 'while calling remote startBuild:')
            run_cleanups()
            defer.returnValue(False)
            return

        # create the BuildStatus object that goes with the Build
        bs = self.builder_status.newBuild()

        # IMPORTANT: no yielding is allowed from here to the startBuild call!

        # it's possible that we lost the slave remote between the ping above
        # and now.  If so, bail out.  The build.startBuild call below transfers
        # responsibility for monitoring this connection to the Build instance,
        # so this check ensures we hand off a working connection.
        if not slavebuilder.slave.conn:  # TODO: replace with isConnected()
            log.msg("slave disappeared before build could start")
            run_cleanups()
            defer.returnValue(False)
            return

        # let status know
        self.master.status.build_started(buildrequests[0].id, self.name, bs)

        # start the build. This will first set up the steps, then tell the
        # BuildStatus that it has started, which will announce it to the world
        # (through our BuilderStatus object, which is its parent).  Finally it
        # will start the actual build process.  This is done with a fresh
        # Deferred since _startBuildFor should not wait until the build is
        # finished.  This uses `maybeDeferred` to ensure that any exceptions
        # raised by startBuild are treated as deferred errbacks (see
        # http://trac.buildbot.net/ticket/2428).
        d = defer.maybeDeferred(build.startBuild,
                                bs, self.expectations, slavebuilder)
        d.addCallback(lambda _: self.buildFinished(build, slavebuilder))
        # this shouldn't happen. if it does, the slave will be wedged
        d.addErrback(log.err, 'from a running build; this is a '
                     'serious error - please file a bug at http://buildbot.net')

        # make sure the builder's status is represented correctly
        self.updateBigStatus()

        defer.returnValue(True)

    def setupProperties(self, props):
        props.setProperty("buildername", self.name, "Builder")
        if len(self.config.properties) > 0:
            for propertyname in self.config.properties:
                props.setProperty(propertyname,
                                  self.config.properties[propertyname],
                                  "Builder")

    def buildFinished(self, build, sb):
        """This is called when the Build has finished (either success or
        failure). Any exceptions during the build are reported with
        results=FAILURE, not with an errback."""

        # by the time we get here, the Build has already released the slave,
        # which will trigger a check for any now-possible build requests
        # (maybeStartBuilds)

        results = build.build_status.getResults()

        self.building.remove(build)
        if results == RETRY:
            d = self._resubmit_buildreqs(build)
            d.addErrback(log.err, 'while resubmitting a build request')
        else:
            complete_at_epoch = reactor.seconds()
            complete_at = epoch2datetime(complete_at_epoch)
            brids = [br.id for br in build.requests]

            d = self.master.data.updates.completeBuildRequests(brids, results, complete_at=complete_at)
            d.addCallback(lambda _:
                          self._notify_completions(build.requests, results,
                                                   complete_at_epoch))
            # nothing in particular to do with this deferred, so just log it if
            # it fails..
            d.addErrback(log.err, 'while marking build requests as completed')

        if sb.slave:
            sb.slave.releaseLocks()

        self.updateBigStatus()

    @defer.inlineCallbacks
    def _notify_completions(self, requests, results, complete_at_epoch):
        updates = self.master.data.updates

        # send a message for each request
        for br in requests:
            updates.completeBuildRequests([br.id], results,
                                          epoch2datetime(complete_at_epoch))

        # check for completed buildsets -- one call for each build request with
        # a unique bsid
        seen_bsids = set()
        for br in requests:
            if br.bsid in seen_bsids:
                continue
            seen_bsids.add(br.bsid)
            yield updates.maybeBuildsetComplete(br.bsid)

    def _resubmit_buildreqs(self, build):
        brids = [br.id for br in build.requests]
        d = self.master.data.updates.unclaimBuildRequests(brids)

        @d.addCallback
        def notify(_):
            pass  # XXX method does not exist
            # self._msg_buildrequests_unclaimed(build.requests)
        return d

    def setExpectations(self, progress):
        """Mark the build as successful and update expectations for the next
        build. Only call this when the build did not fail in any way that
        would invalidate the time expectations generated by it. (if the
        compile failed and thus terminated early, we can't use the last
        build to predict how long the next one will take).
        """
        if self.expectations:
            self.expectations.update(progress)
        else:
            # the first time we get a good build, create our Expectations
            # based upon its results
            self.expectations = Expectations(progress)
        log.msg("new expectations: %s seconds" %
                self.expectations.expectedBuildTime())

    # Build Creation

    @defer.inlineCallbacks
    def maybeStartBuild(self, slavebuilder, breqs, _reactor=reactor):
        # This method is called by the botmaster whenever this builder should
        # start a set of buildrequests on a slave. Do not call this method
        # directly - use master.botmaster.maybeStartBuildsForBuilder, or one of
        # the other similar methods if more appropriate

        # first, if we're not running, then don't start builds; stopService
        # uses this to ensure that any ongoing maybeStartBuild invocations
        # are complete before it stops.
        if not self.running:
            defer.returnValue(False)
            return

        # If the build fails from here on out (e.g., because a slave has failed),
        # it will be handled outside of this function. TODO: test that!

        build_started = yield self._startBuildFor(slavebuilder, breqs)
        defer.returnValue(build_started)

    # a few utility functions to make the maybeStartBuild a bit shorter and
    # easier to read

    def getMergeRequestsFn(self):
        """Helper function to determine which mergeRequests function to use
        from L{_mergeRequests}, or None for no merging"""
        # first, seek through builder, global, and the default
        mergeRequests_fn = self.config.mergeRequests
        if mergeRequests_fn is None:
            mergeRequests_fn = self.master.config.mergeRequests
        if mergeRequests_fn is None:
            mergeRequests_fn = True

        # then translate False and True properly
        if mergeRequests_fn is False:
            mergeRequests_fn = None
        elif mergeRequests_fn is True:
            mergeRequests_fn = Builder._defaultMergeRequestFn

        return mergeRequests_fn

    def _defaultMergeRequestFn(self, req1, req2):
        return req1.canBeMergedWith(req2)
コード例 #6
0
ファイル: builder.py プロジェクト: knielsen/buildbot
class Builder(pb.Referenceable):
    """I manage all Builds of a given type.

    Each Builder is created by an entry in the config file (the c['builders']
    list), with a number of parameters.

    One of these parameters is the L{buildbot.process.factory.BuildFactory}
    object that is associated with this Builder. The factory is responsible
    for creating new L{Build<buildbot.process.base.Build>} objects. Each
    Build object defines when and how the build is performed, so a new
    Factory or Builder should be defined to control this behavior.

    The Builder holds on to a number of L{base.BuildRequest} objects in a
    list named C{.buildable}. Incoming BuildRequest objects will be added to
    this list, or (if possible) merged into an existing request. When a slave
    becomes available, I will use my C{BuildFactory} to turn the request into
    a new C{Build} object. The C{BuildRequest} is forgotten, the C{Build}
    goes into C{.building} while it runs. Once the build finishes, I will
    discard it.

    I maintain a list of available SlaveBuilders, one for each connected
    slave that the C{slavenames} parameter says we can use. Some of these
    will be idle, some of them will be busy running builds for me. If there
    are multiple slaves, I can run multiple builds at once.

    I also manage forced builds, progress expectation (ETA) management, and
    some status delivery chores.

    @type buildable: list of L{buildbot.process.base.BuildRequest}
    @ivar buildable: BuildRequests that are ready to build, but which are
                     waiting for a buildslave to be available.

    @type building: list of L{buildbot.process.base.Build}
    @ivar building: Builds that are actively running

    @type slaves: list of L{buildbot.buildslave.BuildSlave} objects
    @ivar slaves: the slaves currently available for building
    """

    expectations = None # this is created the first time we get a good build
    START_BUILD_TIMEOUT = 10
    CHOOSE_SLAVES_RANDOMLY = True # disabled for determinism during tests

    def __init__(self, setup, builder_status):
        """
        @type  setup: dict
        @param setup: builder setup data, as stored in
                      BuildmasterConfig['builders'].  Contains name,
                      slavename(s), builddir, slavebuilddir, factory, locks.
        @type  builder_status: L{buildbot.status.builder.BuilderStatus}
        """
        self.name = setup['name']
        self.slavenames = []
        if setup.has_key('slavename'):
            self.slavenames.append(setup['slavename'])
        if setup.has_key('slavenames'):
            self.slavenames.extend(setup['slavenames'])
        self.builddir = setup['builddir']
        self.slavebuilddir = setup['slavebuilddir']
        self.buildFactory = setup['factory']
        self.nextSlave = setup.get('nextSlave')
        if self.nextSlave is not None and not callable(self.nextSlave):
            raise ValueError("nextSlave must be callable")
        self.locks = setup.get("locks", [])
        self.env = setup.get('env', {})
        assert isinstance(self.env, dict)
        if setup.has_key('periodicBuildTime'):
            raise ValueError("periodicBuildTime can no longer be defined as"
                             " part of the Builder: use scheduler.Periodic"
                             " instead")
        self.nextBuild = setup.get('nextBuild')
        if self.nextBuild is not None and not callable(self.nextBuild):
            raise ValueError("nextBuild must be callable")

        # build/wannabuild slots: Build objects move along this sequence
        self.buildable = []
        self.building = []
        # old_building holds active builds that were stolen from a predecessor
        self.old_building = weakref.WeakKeyDictionary()

        # buildslaves which have connected but which are not yet available.
        # These are always in the ATTACHING state.
        self.attaching_slaves = []

        # buildslaves at our disposal. Each SlaveBuilder instance has a
        # .state that is IDLE, PINGING, or BUILDING. "PINGING" is used when a
        # Build is about to start, to make sure that they're still alive.
        self.slaves = []

        self.builder_status = builder_status
        self.builder_status.setSlavenames(self.slavenames)

        # for testing, to help synchronize tests
        self.watchers = {'attach': [], 'detach': [], 'detach_all': [],
                         'idle': []}

    def setBotmaster(self, botmaster):
        self.botmaster = botmaster

    def compareToSetup(self, setup):
        diffs = []
        setup_slavenames = []
        if setup.has_key('slavename'):
            setup_slavenames.append(setup['slavename'])
        setup_slavenames.extend(setup.get('slavenames', []))
        if setup_slavenames != self.slavenames:
            diffs.append('slavenames changed from %s to %s' \
                         % (self.slavenames, setup_slavenames))
        if setup['builddir'] != self.builddir:
            diffs.append('builddir changed from %s to %s' \
                         % (self.builddir, setup['builddir']))
        if setup['slavebuilddir'] != self.slavebuilddir:
            diffs.append('slavebuilddir changed from %s to %s' \
                         % (self.slavebuilddir, setup['slavebuilddir']))
        if setup['factory'] != self.buildFactory: # compare objects
            diffs.append('factory changed')
        oldlocks = [(lock.__class__, lock.name)
                    for lock in self.locks]
        newlocks = [(lock.__class__, lock.name)
                    for lock in setup.get('locks',[])]
        if oldlocks != newlocks:
            diffs.append('locks changed from %s to %s' % (oldlocks, newlocks))
        if setup.get('nextSlave') != self.nextSlave:
            diffs.append('nextSlave changed from %s to %s' % (self.nextSlave, setup['nextSlave']))
        if setup.get('nextBuild') != self.nextBuild:
            diffs.append('nextBuild changed from %s to %s' % (self.nextBuild, setup['nextBuild']))
        return diffs

    def __repr__(self):
        return "<Builder '%s' at %d>" % (self.name, id(self))

    def getOldestRequestTime(self):
        """Returns the timestamp of the oldest build request for this builder.

        If there are no build requests, None is returned."""
        if self.buildable:
            return self.buildable[0].getSubmitTime()
        else:
            return None

    def submitBuildRequest(self, req):
        req.setSubmitTime(now())
        self.buildable.append(req)
        req.requestSubmitted(self)
        self.builder_status.addBuildRequest(req.status)
        self.botmaster.maybeStartAllBuilds()

    def cancelBuildRequest(self, req):
        if req in self.buildable:
            self.buildable.remove(req)
            self.builder_status.removeBuildRequest(req.status, cancelled=True)
            return True
        return False

    def consumeTheSoulOfYourPredecessor(self, old):
        """Suck the brain out of an old Builder.

        This takes all the runtime state from an existing Builder and moves
        it into ourselves. This is used when a Builder is changed in the
        master.cfg file: the new Builder has a different factory, but we want
        all the builds that were queued for the old one to get processed by
        the new one. Any builds which are already running will keep running.
        The new Builder will get as many of the old SlaveBuilder objects as
        it wants."""

        log.msg("consumeTheSoulOfYourPredecessor: %s feeding upon %s" %
                (self, old))
        # we claim all the pending builds, removing them from the old
        # Builder's queue. This insures that the old Builder will not start
        # any new work.
        log.msg(" stealing %s buildrequests" % len(old.buildable))
        self.buildable.extend(old.buildable)
        old.buildable = []

        # old.building (i.e. builds which are still running) is not migrated
        # directly: it keeps track of builds which were in progress in the
        # old Builder. When those builds finish, the old Builder will be
        # notified, not us. However, since the old SlaveBuilder will point to
        # us, it is our maybeStartBuild() that will be triggered.
        if old.building:
            self.builder_status.setBigState("building")
        # however, we do grab a weakref to the active builds, so that our
        # BuilderControl can see them and stop them. We use a weakref because
        # we aren't the one to get notified, so there isn't a convenient
        # place to remove it from self.building .
        for b in old.building:
            self.old_building[b] = None
        for b in old.old_building:
            self.old_building[b] = None

        # Our set of slavenames may be different. Steal any of the old
        # buildslaves that we want to keep using.
        for sb in old.slaves[:]:
            if sb.slave.slavename in self.slavenames:
                log.msg(" stealing buildslave %s" % sb)
                self.slaves.append(sb)
                old.slaves.remove(sb)
                sb.setBuilder(self)

        # old.attaching_slaves:
        #  these SlaveBuilders are waiting on a sequence of calls:
        #  remote.setMaster and remote.print . When these two complete,
        #  old._attached will be fired, which will add a 'connect' event to
        #  the builder_status and try to start a build. However, we've pulled
        #  everything out of the old builder's queue, so it will have no work
        #  to do. The outstanding remote.setMaster/print call will be holding
        #  the last reference to the old builder, so it will disappear just
        #  after that response comes back.
        #
        #  The BotMaster will ask the slave to re-set their list of Builders
        #  shortly after this function returns, which will cause our
        #  attached() method to be fired with a bunch of references to remote
        #  SlaveBuilders, some of which we already have (by stealing them
        #  from the old Builder), some of which will be new. The new ones
        #  will be re-attached.

        #  Therefore, we don't need to do anything about old.attaching_slaves

        return # all done

    def getBuild(self, number):
        for b in self.building:
            if b.build_status.number == number:
                return b
        for b in self.old_building.keys():
            if b.build_status.number == number:
                return b
        return None

    def fireTestEvent(self, name, fire_with=None):
        if fire_with is None:
            fire_with = self
        watchers = self.watchers[name]
        self.watchers[name] = []
        for w in watchers:
            reactor.callLater(0, w.callback, fire_with)

    def addLatentSlave(self, slave):
        assert interfaces.ILatentBuildSlave.providedBy(slave)
        for s in self.slaves:
            if s == slave:
                break
        else:
            sb = LatentSlaveBuilder(slave, self)
            self.builder_status.addPointEvent(
                ['added', 'latent', slave.slavename])
            self.slaves.append(sb)
            reactor.callLater(0, self.botmaster.maybeStartAllBuilds)

    def attached(self, slave, remote, commands):
        """This is invoked by the BuildSlave when the self.slavename bot
        registers their builder.

        @type  slave: L{buildbot.buildslave.BuildSlave}
        @param slave: the BuildSlave that represents the buildslave as a whole
        @type  remote: L{twisted.spread.pb.RemoteReference}
        @param remote: a reference to the L{buildbot.slave.bot.SlaveBuilder}
        @type  commands: dict: string -> string, or None
        @param commands: provides the slave's version of each RemoteCommand

        @rtype:  L{twisted.internet.defer.Deferred}
        @return: a Deferred that fires (with 'self') when the slave-side
                 builder is fully attached and ready to accept commands.
        """
        for s in self.attaching_slaves + self.slaves:
            if s.slave == slave:
                # already attached to them. This is fairly common, since
                # attached() gets called each time we receive the builder
                # list from the slave, and we ask for it each time we add or
                # remove a builder. So if the slave is hosting builders
                # A,B,C, and the config file changes A, we'll remove A and
                # re-add it, triggering two builder-list requests, getting
                # two redundant calls to attached() for B, and another two
                # for C.
                #
                # Therefore, when we see that we're already attached, we can
                # just ignore it. TODO: build a diagram of the state
                # transitions here, I'm concerned about sb.attached() failing
                # and leaving sb.state stuck at 'ATTACHING', and about
                # the detached() message arriving while there's some
                # transition pending such that the response to the transition
                # re-vivifies sb
                return defer.succeed(self)

        sb = SlaveBuilder()
        sb.setBuilder(self)
        self.attaching_slaves.append(sb)
        d = sb.attached(slave, remote, commands)
        d.addCallback(self._attached)
        d.addErrback(self._not_attached, slave)
        return d

    def _attached(self, sb):
        # TODO: make this .addSlaveEvent(slave.slavename, ['connect']) ?
        self.builder_status.addPointEvent(['connect', sb.slave.slavename])
        self.attaching_slaves.remove(sb)
        self.slaves.append(sb)

        self.fireTestEvent('attach')
        return self

    def _not_attached(self, why, slave):
        # already log.err'ed by SlaveBuilder._attachFailure
        # TODO: make this .addSlaveEvent?
        # TODO: remove from self.slaves (except that detached() should get
        #       run first, right?)
        self.builder_status.addPointEvent(['failed', 'connect',
                                           slave.slave.slavename])
        # TODO: add an HTMLLogFile of the exception
        self.fireTestEvent('attach', why)

    def detached(self, slave):
        """This is called when the connection to the bot is lost."""
        log.msg("%s.detached" % self, slave.slavename)
        for sb in self.attaching_slaves + self.slaves:
            if sb.slave == slave:
                break
        else:
            log.msg("WEIRD: Builder.detached(%s) (%s)"
                    " not in attaching_slaves(%s)"
                    " or slaves(%s)" % (slave, slave.slavename,
                                        self.attaching_slaves,
                                        self.slaves))
            return
        if sb.state == BUILDING:
            # the Build's .lostRemote method (invoked by a notifyOnDisconnect
            # handler) will cause the Build to be stopped, probably right
            # after the notifyOnDisconnect that invoked us finishes running.

            # TODO: should failover to a new Build
            #self.retryBuild(sb.build)
            pass

        if sb in self.attaching_slaves:
            self.attaching_slaves.remove(sb)
        if sb in self.slaves:
            self.slaves.remove(sb)

        # TODO: make this .addSlaveEvent?
        self.builder_status.addPointEvent(['disconnect', slave.slavename])
        sb.detached() # inform the SlaveBuilder that their slave went away
        self.updateBigStatus()
        self.fireTestEvent('detach')
        if not self.slaves:
            self.fireTestEvent('detach_all')

    def updateBigStatus(self):
        if not self.slaves:
            self.builder_status.setBigState("offline")
        elif self.building:
            self.builder_status.setBigState("building")
        else:
            self.builder_status.setBigState("idle")
            self.fireTestEvent('idle')

    def maybeStartBuild(self):
        log.msg("maybeStartBuild %s: %s %s" %
                (self, self.buildable, self.slaves))
        if not self.buildable:
            self.updateBigStatus()
            return # nothing to do

        # pick an idle slave
        available_slaves = [sb for sb in self.slaves if sb.isAvailable()]
        if not available_slaves:
            log.msg("%s: want to start build, but we don't have a remote"
                    % self)
            self.updateBigStatus()
            return
        if self.nextSlave:
            sb = None
            try:
                sb = self.nextSlave(self, available_slaves)
            except:
                log.msg("Exception choosing next slave")
                log.err(Failure())

            if not sb:
                log.msg("%s: want to start build, but we don't have a remote"
                        % self)
                self.updateBigStatus()
                return
        elif self.CHOOSE_SLAVES_RANDOMLY:
            sb = random.choice(available_slaves)
        else:
            sb = available_slaves[0]

        # there is something to build, and there is a slave on which to build
        # it. Grab the oldest request, see if we can merge it with anything
        # else.
        if not self.nextBuild:
            req = self.buildable.pop(0)
        else:
            try:
                req = self.nextBuild(self, self.buildable)
                if not req:
                    # Nothing to do
                    self.updateBigStatus()
                    return
                self.buildable.remove(req)
            except:
                log.msg("Exception choosing next build")
                log.err(Failure())
                self.updateBigStatus()
                return
        self.builder_status.removeBuildRequest(req.status)
        mergers = []
        botmaster = self.botmaster
        for br in self.buildable[:]:
            if botmaster.shouldMergeRequests(self, req, br):
                self.buildable.remove(br)
                self.builder_status.removeBuildRequest(br.status)
                mergers.append(br)
        requests = [req] + mergers

        # Create a new build from our build factory and set ourself as the
        # builder.
        build = self.buildFactory.newBuild(requests)
        build.setBuilder(self)
        build.setLocks(self.locks)
        if len(self.env) > 0:
            build.setSlaveEnvironment(self.env)

        # start it
        self.startBuild(build, sb)

    def startBuild(self, build, sb):
        """Start a build on the given slave.
        @param build: the L{base.Build} to start
        @param sb: the L{SlaveBuilder} which will host this build

        @return: a Deferred which fires with a
        L{buildbot.interfaces.IBuildControl} that can be used to stop the
        Build, or to access a L{buildbot.interfaces.IBuildStatus} which will
        watch the Build as it runs. """

        self.building.append(build)
        self.updateBigStatus()
        log.msg("starting build %s using slave %s" % (build, sb))
        d = sb.prepare(self.builder_status)
        def _ping(ign):
            # ping the slave to make sure they're still there. If they're
            # fallen off the map (due to a NAT timeout or something), this
            # will fail in a couple of minutes, depending upon the TCP
            # timeout. TODO: consider making this time out faster, or at
            # least characterize the likely duration.
            log.msg("starting build %s.. pinging the slave %s" % (build, sb))
            return sb.ping(self.START_BUILD_TIMEOUT)
        d.addCallback(_ping)
        d.addCallback(self._startBuild_1, build, sb)
        return d

    def _startBuild_1(self, res, build, sb):
        if not res:
            return self._startBuildFailed("slave ping failed", build, sb)
        # The buildslave is ready to go. sb.buildStarted() sets its state to
        # BUILDING (so we won't try to use it for any other builds). This
        # gets set back to IDLE by the Build itself when it finishes.
        sb.buildStarted()
        d = sb.remote.callRemote("startBuild")
        d.addCallbacks(self._startBuild_2, self._startBuildFailed,
                       callbackArgs=(build,sb), errbackArgs=(build,sb))
        return d

    def _startBuild_2(self, res, build, sb):
        # create the BuildStatus object that goes with the Build
        bs = self.builder_status.newBuild()

        # start the build. This will first set up the steps, then tell the
        # BuildStatus that it has started, which will announce it to the
        # world (through our BuilderStatus object, which is its parent).
        # Finally it will start the actual build process.
        d = build.startBuild(bs, self.expectations, sb)
        d.addCallback(self.buildFinished, sb)
        d.addErrback(log.err) # this shouldn't happen. if it does, the slave
                              # will be wedged
        for req in build.requests:
            req.buildStarted(build, bs)
        return build # this is the IBuildControl

    def _startBuildFailed(self, why, build, sb):
        # put the build back on the buildable list
        log.msg("I tried to tell the slave that the build %s started, but "
                "remote_startBuild failed: %s" % (build, why))
        # release the slave. This will queue a call to maybeStartBuild, which
        # will fire after other notifyOnDisconnect handlers have marked the
        # slave as disconnected (so we don't try to use it again).
        sb.buildFinished()

        log.msg("re-queueing the BuildRequest")
        self.building.remove(build)
        for req in build.requests:
            self.buildable.insert(0, req) # the interrupted build gets first
                                          # priority
            self.builder_status.addBuildRequest(req.status)


    def buildFinished(self, build, sb):
        """This is called when the Build has finished (either success or
        failure). Any exceptions during the build are reported with
        results=FAILURE, not with an errback."""

        # by the time we get here, the Build has already released the slave
        # (which queues a call to maybeStartBuild)

        self.building.remove(build)
        for req in build.requests:
            req.finished(build.build_status)

    def setExpectations(self, progress):
        """Mark the build as successful and update expectations for the next
        build. Only call this when the build did not fail in any way that
        would invalidate the time expectations generated by it. (if the
        compile failed and thus terminated early, we can't use the last
        build to predict how long the next one will take).
        """
        if self.expectations:
            self.expectations.update(progress)
        else:
            # the first time we get a good build, create our Expectations
            # based upon its results
            self.expectations = Expectations(progress)
        log.msg("new expectations: %s seconds" % \
                self.expectations.expectedBuildTime())

    def shutdownSlave(self):
        if self.remote:
            self.remote.callRemote("shutdown")
コード例 #7
0
class Builder(pb.Referenceable):
    """I manage all Builds of a given type.

    Each Builder is created by an entry in the config file (the c['builders']
    list), with a number of parameters.

    One of these parameters is the L{buildbot.process.factory.BuildFactory}
    object that is associated with this Builder. The factory is responsible
    for creating new L{Build<buildbot.process.base.Build>} objects. Each
    Build object defines when and how the build is performed, so a new
    Factory or Builder should be defined to control this behavior.

    The Builder holds on to a number of L{base.BuildRequest} objects in a
    list named C{.buildable}. Incoming BuildRequest objects will be added to
    this list, or (if possible) merged into an existing request. When a slave
    becomes available, I will use my C{BuildFactory} to turn the request into
    a new C{Build} object. The C{BuildRequest} is forgotten, the C{Build}
    goes into C{.building} while it runs. Once the build finishes, I will
    discard it.

    I maintain a list of available SlaveBuilders, one for each connected
    slave that the C{slavenames} parameter says we can use. Some of these
    will be idle, some of them will be busy running builds for me. If there
    are multiple slaves, I can run multiple builds at once.

    I also manage forced builds, progress expectation (ETA) management, and
    some status delivery chores.

    I am persisted in C{BASEDIR/BUILDERNAME/builder}, so I can remember how
    long a build usually takes to run (in my C{expectations} attribute). This
    pickle also includes the L{buildbot.status.builder.BuilderStatus} object,
    which remembers the set of historic builds.

    @type buildable: list of L{buildbot.process.base.BuildRequest}
    @ivar buildable: BuildRequests that are ready to build, but which are
                     waiting for a buildslave to be available.

    @type building: list of L{buildbot.process.base.Build}
    @ivar building: Builds that are actively running

    @type slaves: list of L{buildbot.buildslave.BuildSlave} objects
    @ivar slaves: the slaves currently available for building
    """

    expectations = None  # this is created the first time we get a good build
    START_BUILD_TIMEOUT = 10
    CHOOSE_SLAVES_RANDOMLY = True  # disabled for determinism during tests

    def __init__(self, setup, builder_status):
        """
        @type  setup: dict
        @param setup: builder setup data, as stored in
                      BuildmasterConfig['builders'].  Contains name,
                      slavename(s), builddir, factory, locks.
        @type  builder_status: L{buildbot.status.builder.BuilderStatus}
        """
        self.name = setup['name']
        self.slavenames = []
        if setup.has_key('slavename'):
            self.slavenames.append(setup['slavename'])
        if setup.has_key('slavenames'):
            self.slavenames.extend(setup['slavenames'])
        self.builddir = setup['builddir']
        self.buildFactory = setup['factory']
        self.locks = setup.get("locks", [])
        if setup.has_key('periodicBuildTime'):
            raise ValueError("periodicBuildTime can no longer be defined as"
                             " part of the Builder: use scheduler.Periodic"
                             " instead")

        # build/wannabuild slots: Build objects move along this sequence
        self.buildable = []
        self.building = []
        # old_building holds active builds that were stolen from a predecessor
        self.old_building = weakref.WeakKeyDictionary()

        # buildslaves which have connected but which are not yet available.
        # These are always in the ATTACHING state.
        self.attaching_slaves = []

        # buildslaves at our disposal. Each SlaveBuilder instance has a
        # .state that is IDLE, PINGING, or BUILDING. "PINGING" is used when a
        # Build is about to start, to make sure that they're still alive.
        self.slaves = []

        self.builder_status = builder_status
        self.builder_status.setSlavenames(self.slavenames)

        # for testing, to help synchronize tests
        self.watchers = {
            'attach': [],
            'detach': [],
            'detach_all': [],
            'idle': []
        }

    def setBotmaster(self, botmaster):
        self.botmaster = botmaster

    def compareToSetup(self, setup):
        diffs = []
        setup_slavenames = []
        if setup.has_key('slavename'):
            setup_slavenames.append(setup['slavename'])
        setup_slavenames.extend(setup.get('slavenames', []))
        if setup_slavenames != self.slavenames:
            diffs.append('slavenames changed from %s to %s' \
                         % (self.slavenames, setup_slavenames))
        if setup['builddir'] != self.builddir:
            diffs.append('builddir changed from %s to %s' \
                         % (self.builddir, setup['builddir']))
        if setup['factory'] != self.buildFactory:  # compare objects
            diffs.append('factory changed')
        oldlocks = [(lock.__class__, lock.name)
                    for lock in setup.get('locks', [])]
        newlocks = [(lock.__class__, lock.name) for lock in self.locks]
        if oldlocks != newlocks:
            diffs.append('locks changed from %s to %s' % (oldlocks, newlocks))
        return diffs

    def __repr__(self):
        return "<Builder '%s' at %d>" % (self.name, id(self))

    def getOldestRequestTime(self):
        """Returns the timestamp of the oldest build request for this builder.

        If there are no build requests, None is returned."""
        if self.buildable:
            return self.buildable[0].submittedAt
        else:
            return None

    def submitBuildRequest(self, req):
        req.submittedAt = now()
        self.buildable.append(req)
        req.requestSubmitted(self)
        self.builder_status.addBuildRequest(req.status)
        self.maybeStartBuild()

    def cancelBuildRequest(self, req):
        if req in self.buildable:
            self.buildable.remove(req)
            self.builder_status.removeBuildRequest(req.status)
            return True
        return False

    def __getstate__(self):
        d = self.__dict__.copy()
        # TODO: note that d['buildable'] can contain Deferreds
        del d['building']  # TODO: move these back to .buildable?
        del d['slaves']
        return d

    def __setstate__(self, d):
        self.__dict__ = d
        self.building = []
        self.slaves = []

    def consumeTheSoulOfYourPredecessor(self, old):
        """Suck the brain out of an old Builder.

        This takes all the runtime state from an existing Builder and moves
        it into ourselves. This is used when a Builder is changed in the
        master.cfg file: the new Builder has a different factory, but we want
        all the builds that were queued for the old one to get processed by
        the new one. Any builds which are already running will keep running.
        The new Builder will get as many of the old SlaveBuilder objects as
        it wants."""

        log.msg("consumeTheSoulOfYourPredecessor: %s feeding upon %s" %
                (self, old))
        # we claim all the pending builds, removing them from the old
        # Builder's queue. This insures that the old Builder will not start
        # any new work.
        log.msg(" stealing %s buildrequests" % len(old.buildable))
        self.buildable.extend(old.buildable)
        old.buildable = []

        # old.building (i.e. builds which are still running) is not migrated
        # directly: it keeps track of builds which were in progress in the
        # old Builder. When those builds finish, the old Builder will be
        # notified, not us. However, since the old SlaveBuilder will point to
        # us, it is our maybeStartBuild() that will be triggered.
        if old.building:
            self.builder_status.setBigState("building")
        # however, we do grab a weakref to the active builds, so that our
        # BuilderControl can see them and stop them. We use a weakref because
        # we aren't the one to get notified, so there isn't a convenient
        # place to remove it from self.building .
        for b in old.building:
            self.old_building[b] = None
        for b in old.old_building:
            self.old_building[b] = None

        # Our set of slavenames may be different. Steal any of the old
        # buildslaves that we want to keep using.
        for sb in old.slaves[:]:
            if sb.slave.slavename in self.slavenames:
                log.msg(" stealing buildslave %s" % sb)
                self.slaves.append(sb)
                old.slaves.remove(sb)
                sb.setBuilder(self)

        # old.attaching_slaves:
        #  these SlaveBuilders are waiting on a sequence of calls:
        #  remote.setMaster and remote.print . When these two complete,
        #  old._attached will be fired, which will add a 'connect' event to
        #  the builder_status and try to start a build. However, we've pulled
        #  everything out of the old builder's queue, so it will have no work
        #  to do. The outstanding remote.setMaster/print call will be holding
        #  the last reference to the old builder, so it will disappear just
        #  after that response comes back.
        #
        #  The BotMaster will ask the slave to re-set their list of Builders
        #  shortly after this function returns, which will cause our
        #  attached() method to be fired with a bunch of references to remote
        #  SlaveBuilders, some of which we already have (by stealing them
        #  from the old Builder), some of which will be new. The new ones
        #  will be re-attached.

        #  Therefore, we don't need to do anything about old.attaching_slaves

        return  # all done

    def getBuild(self, number):
        for b in self.building:
            if b.build_status.number == number:
                return b
        for b in self.old_building.keys():
            if b.build_status.number == number:
                return b
        return None

    def fireTestEvent(self, name, fire_with=None):
        if fire_with is None:
            fire_with = self
        watchers = self.watchers[name]
        self.watchers[name] = []
        for w in watchers:
            reactor.callLater(0, w.callback, fire_with)

    def attached(self, slave, remote, commands):
        """This is invoked by the BuildSlave when the self.slavename bot
        registers their builder.

        @type  slave: L{buildbot.buildslave.BuildSlave}
        @param slave: the BuildSlave that represents the buildslave as a whole
        @type  remote: L{twisted.spread.pb.RemoteReference}
        @param remote: a reference to the L{buildbot.slave.bot.SlaveBuilder}
        @type  commands: dict: string -> string, or None
        @param commands: provides the slave's version of each RemoteCommand

        @rtype:  L{twisted.internet.defer.Deferred}
        @return: a Deferred that fires (with 'self') when the slave-side
                 builder is fully attached and ready to accept commands.
        """
        for s in self.attaching_slaves + self.slaves:
            if s.slave == slave:
                # already attached to them. This is fairly common, since
                # attached() gets called each time we receive the builder
                # list from the slave, and we ask for it each time we add or
                # remove a builder. So if the slave is hosting builders
                # A,B,C, and the config file changes A, we'll remove A and
                # re-add it, triggering two builder-list requests, getting
                # two redundant calls to attached() for B, and another two
                # for C.
                #
                # Therefore, when we see that we're already attached, we can
                # just ignore it. TODO: build a diagram of the state
                # transitions here, I'm concerned about sb.attached() failing
                # and leaving sb.state stuck at 'ATTACHING', and about
                # the detached() message arriving while there's some
                # transition pending such that the response to the transition
                # re-vivifies sb
                return defer.succeed(self)

        sb = SlaveBuilder()
        sb.setBuilder(self)
        self.attaching_slaves.append(sb)
        d = sb.attached(slave, remote, commands)
        d.addCallback(self._attached)
        d.addErrback(self._not_attached, slave)
        return d

    def _attached(self, sb):
        # TODO: make this .addSlaveEvent(slave.slavename, ['connect']) ?
        self.builder_status.addPointEvent(['connect', sb.slave.slavename])
        self.attaching_slaves.remove(sb)
        self.slaves.append(sb)
        reactor.callLater(0, self.maybeStartBuild)

        self.fireTestEvent('attach')
        return self

    def _not_attached(self, why, slave):
        # already log.err'ed by SlaveBuilder._attachFailure
        # TODO: make this .addSlaveEvent?
        # TODO: remove from self.slaves (except that detached() should get
        #       run first, right?)
        self.builder_status.addPointEvent(
            ['failed', 'connect', slave.slave.slavename])
        # TODO: add an HTMLLogFile of the exception
        self.fireTestEvent('attach', why)

    def detached(self, slave):
        """This is called when the connection to the bot is lost."""
        log.msg("%s.detached" % self, slave.slavename)
        for sb in self.attaching_slaves + self.slaves:
            if sb.slave == slave:
                break
        else:
            log.msg(
                "WEIRD: Builder.detached(%s) (%s)"
                " not in attaching_slaves(%s)"
                " or slaves(%s)" %
                (slave, slave.slavename, self.attaching_slaves, self.slaves))
            return
        if sb.state == BUILDING:
            # the Build's .lostRemote method (invoked by a notifyOnDisconnect
            # handler) will cause the Build to be stopped, probably right
            # after the notifyOnDisconnect that invoked us finishes running.

            # TODO: should failover to a new Build
            #self.retryBuild(sb.build)
            pass

        if sb in self.attaching_slaves:
            self.attaching_slaves.remove(sb)
        if sb in self.slaves:
            self.slaves.remove(sb)

        # TODO: make this .addSlaveEvent?
        self.builder_status.addPointEvent(['disconnect', slave.slavename])
        sb.detached()  # inform the SlaveBuilder that their slave went away
        self.updateBigStatus()
        self.fireTestEvent('detach')
        if not self.slaves:
            self.fireTestEvent('detach_all')

    def updateBigStatus(self):
        if not self.slaves:
            self.builder_status.setBigState("offline")
        elif self.building:
            self.builder_status.setBigState("building")
        else:
            self.builder_status.setBigState("idle")
            self.fireTestEvent('idle')

    def maybeStartBuild(self):
        log.msg("maybeStartBuild %s: %s %s" %
                (self, self.buildable, self.slaves))
        if not self.buildable:
            self.updateBigStatus()
            return  # nothing to do

        # pick an idle slave
        available_slaves = [sb for sb in self.slaves if sb.isAvailable()]
        if not available_slaves:
            log.msg("%s: want to start build, but we don't have a remote" %
                    self)
            self.updateBigStatus()
            return
        if self.CHOOSE_SLAVES_RANDOMLY:
            sb = random.choice(available_slaves)
        else:
            sb = available_slaves[0]

        # there is something to build, and there is a slave on which to build
        # it. Grab the oldest request, see if we can merge it with anything
        # else.
        req = self.buildable.pop(0)
        self.builder_status.removeBuildRequest(req.status)
        mergers = []
        for br in self.buildable[:]:
            if req.canBeMergedWith(br):
                self.buildable.remove(br)
                self.builder_status.removeBuildRequest(br.status)
                mergers.append(br)
        requests = [req] + mergers

        # Create a new build from our build factory and set ourself as the
        # builder.
        build = self.buildFactory.newBuild(requests)
        build.setBuilder(self)
        build.setLocks(self.locks)

        # start it
        self.startBuild(build, sb)

    def startBuild(self, build, sb):
        """Start a build on the given slave.
        @param build: the L{base.Build} to start
        @param sb: the L{SlaveBuilder} which will host this build

        @return: a Deferred which fires with a
        L{buildbot.interfaces.IBuildControl} that can be used to stop the
        Build, or to access a L{buildbot.interfaces.IBuildStatus} which will
        watch the Build as it runs. """

        self.building.append(build)
        self.updateBigStatus()

        log.msg("starting build %s.. pinging the slave %s" % (build, sb))
        # ping the slave to make sure they're still there. If they're fallen
        # off the map (due to a NAT timeout or something), this will fail in
        # a couple of minutes, depending upon the TCP timeout. TODO: consider
        # making this time out faster, or at least characterize the likely
        # duration.
        d = sb.ping(self.START_BUILD_TIMEOUT)
        d.addCallback(self._startBuild_1, build, sb)
        return d

    def _startBuild_1(self, res, build, sb):
        if not res:
            return self._startBuildFailed("slave ping failed", build, sb)
        # The buildslave is ready to go. sb.buildStarted() sets its state to
        # BUILDING (so we won't try to use it for any other builds). This
        # gets set back to IDLE by the Build itself when it finishes.
        sb.buildStarted()
        d = sb.remote.callRemote("startBuild")
        d.addCallbacks(self._startBuild_2,
                       self._startBuildFailed,
                       callbackArgs=(build, sb),
                       errbackArgs=(build, sb))
        return d

    def _startBuild_2(self, res, build, sb):
        # create the BuildStatus object that goes with the Build
        bs = self.builder_status.newBuild()

        # start the build. This will first set up the steps, then tell the
        # BuildStatus that it has started, which will announce it to the
        # world (through our BuilderStatus object, which is its parent).
        # Finally it will start the actual build process.
        d = build.startBuild(bs, self.expectations, sb)
        d.addCallback(self.buildFinished, sb)
        d.addErrback(log.err)  # this shouldn't happen. if it does, the slave
        # will be wedged
        for req in build.requests:
            req.buildStarted(build, bs)
        return build  # this is the IBuildControl

    def _startBuildFailed(self, why, build, sb):
        # put the build back on the buildable list
        log.msg("I tried to tell the slave that the build %s started, but "
                "remote_startBuild failed: %s" % (build, why))
        # release the slave. This will queue a call to maybeStartBuild, which
        # will fire after other notifyOnDisconnect handlers have marked the
        # slave as disconnected (so we don't try to use it again).
        sb.buildFinished()

        log.msg("re-queueing the BuildRequest")
        self.building.remove(build)
        for req in build.requests:
            self.buildable.insert(0, req)  # the interrupted build gets first
            # priority
            self.builder_status.addBuildRequest(req.status)

    def buildFinished(self, build, sb):
        """This is called when the Build has finished (either success or
        failure). Any exceptions during the build are reported with
        results=FAILURE, not with an errback."""

        # by the time we get here, the Build has already released the slave
        # (which queues a call to maybeStartBuild)

        self.building.remove(build)
        for req in build.requests:
            req.finished(build.build_status)

    def setExpectations(self, progress):
        """Mark the build as successful and update expectations for the next
        build. Only call this when the build did not fail in any way that
        would invalidate the time expectations generated by it. (if the
        compile failed and thus terminated early, we can't use the last
        build to predict how long the next one will take).
        """
        if self.expectations:
            self.expectations.update(progress)
        else:
            # the first time we get a good build, create our Expectations
            # based upon its results
            self.expectations = Expectations(progress)
        log.msg("new expectations: %s seconds" % \
                self.expectations.expectedBuildTime())

    def shutdownSlave(self):
        if self.remote:
            self.remote.callRemote("shutdown")
コード例 #8
0
class Builder(pb.Referenceable, service.MultiService):
    """I manage all Builds of a given type.

    Each Builder is created by an entry in the config file (the c['builders']
    list), with a number of parameters.

    One of these parameters is the L{buildbot.process.factory.BuildFactory}
    object that is associated with this Builder. The factory is responsible
    for creating new L{Build<buildbot.process.build.Build>} objects. Each
    Build object defines when and how the build is performed, so a new
    Factory or Builder should be defined to control this behavior.

    The Builder holds on to a number of L{BuildRequest} objects in a
    list named C{.buildable}. Incoming BuildRequest objects will be added to
    this list, or (if possible) merged into an existing request. When a slave
    becomes available, I will use my C{BuildFactory} to turn the request into
    a new C{Build} object. The C{BuildRequest} is forgotten, the C{Build}
    goes into C{.building} while it runs. Once the build finishes, I will
    discard it.

    I maintain a list of available SlaveBuilders, one for each connected
    slave that the C{slavenames} parameter says we can use. Some of these
    will be idle, some of them will be busy running builds for me. If there
    are multiple slaves, I can run multiple builds at once.

    I also manage forced builds, progress expectation (ETA) management, and
    some status delivery chores.

    @type buildable: list of L{buildbot.process.buildrequest.BuildRequest}
    @ivar buildable: BuildRequests that are ready to build, but which are
                     waiting for a buildslave to be available.

    @type building: list of L{buildbot.process.build.Build}
    @ivar building: Builds that are actively running

    @type slaves: list of L{buildbot.buildslave.BuildSlave} objects
    @ivar slaves: the slaves currently available for building
    """

    expectations = None  # this is created the first time we get a good build

    def __init__(self, setup, builder_status):
        """
        @type  setup: dict
        @param setup: builder setup data, as stored in
                      BuildmasterConfig['builders'].  Contains name,
                      slavename(s), builddir, slavebuilddir, factory, locks.
        @type  builder_status: L{buildbot.status.builder.BuilderStatus}
        """
        service.MultiService.__init__(self)
        self.name = setup['name']
        self.slavenames = []
        if setup.has_key('slavename'):
            self.slavenames.append(setup['slavename'])
        if setup.has_key('slavenames'):
            self.slavenames.extend(setup['slavenames'])
        self.builddir = setup['builddir']
        self.slavebuilddir = setup['slavebuilddir']
        self.buildFactory = setup['factory']
        self.nextSlave = setup.get('nextSlave')
        if self.nextSlave is not None and not callable(self.nextSlave):
            raise ValueError("nextSlave must be callable")
        self.locks = setup.get("locks", [])
        self.env = setup.get('env', {})
        assert isinstance(self.env, dict)
        if setup.has_key('periodicBuildTime'):
            raise ValueError("periodicBuildTime can no longer be defined as"
                             " part of the Builder: use scheduler.Periodic"
                             " instead")
        self.nextBuild = setup.get('nextBuild')
        if self.nextBuild is not None and not callable(self.nextBuild):
            raise ValueError("nextBuild must be callable")
        self.buildHorizon = setup.get('buildHorizon')
        self.logHorizon = setup.get('logHorizon')
        self.eventHorizon = setup.get('eventHorizon')
        self.mergeRequests = setup.get('mergeRequests', True)
        self.properties = setup.get('properties', {})
        self.category = setup.get('category', None)

        # build/wannabuild slots: Build objects move along this sequence
        self.building = []
        # old_building holds active builds that were stolen from a predecessor
        self.old_building = weakref.WeakKeyDictionary()

        # buildslaves which have connected but which are not yet available.
        # These are always in the ATTACHING state.
        self.attaching_slaves = []

        # buildslaves at our disposal. Each SlaveBuilder instance has a
        # .state that is IDLE, PINGING, or BUILDING. "PINGING" is used when a
        # Build is about to start, to make sure that they're still alive.
        self.slaves = []

        self.builder_status = builder_status
        self.builder_status.setSlavenames(self.slavenames)
        self.builder_status.buildHorizon = self.buildHorizon
        self.builder_status.logHorizon = self.logHorizon
        self.builder_status.eventHorizon = self.eventHorizon

        self.reclaim_svc = internet.TimerService(10 * 60,
                                                 self.reclaimAllBuilds)
        self.reclaim_svc.setServiceParent(self)

        # for testing, to help synchronize tests
        self.watchers = {
            'attach': [],
            'detach': [],
            'detach_all': [],
            'idle': []
        }
        self.run_count = 0

        # add serialized-invocation behavior to maybeStartBuild
        self.maybeStartBuild = util.SerializedInvocation(
            self.doMaybeStartBuild)

    def stopService(self):
        d = defer.maybeDeferred(lambda: service.MultiService.stopService(self))

        def flushMaybeStartBuilds(_):
            # at this point, self.running = False, so another maybeStartBuilds
            # invocation won't hurt anything, but it also will not complete
            # until any currently-running invocations are done.
            return self.maybeStartBuild()

        d.addCallback(flushMaybeStartBuilds)
        return d

    def setBotmaster(self, botmaster):
        self.botmaster = botmaster
        self.db = botmaster.db
        self.master_name = botmaster.master_name
        self.master_incarnation = botmaster.master_incarnation

    def compareToSetup(self, setup):
        diffs = []
        setup_slavenames = []
        if setup.has_key('slavename'):
            setup_slavenames.append(setup['slavename'])
        setup_slavenames.extend(setup.get('slavenames', []))
        if setup_slavenames != self.slavenames:
            diffs.append('slavenames changed from %s to %s' \
                         % (self.slavenames, setup_slavenames))
        if setup['builddir'] != self.builddir:
            diffs.append('builddir changed from %s to %s' \
                         % (self.builddir, setup['builddir']))
        if setup['slavebuilddir'] != self.slavebuilddir:
            diffs.append('slavebuilddir changed from %s to %s' \
                         % (self.slavebuilddir, setup['slavebuilddir']))
        if setup['factory'] != self.buildFactory:  # compare objects
            diffs.append('factory changed')
        if setup.get('locks', []) != self.locks:
            diffs.append('locks changed from %s to %s' %
                         (self.locks, setup.get('locks')))
        if setup.get('env', {}) != self.env:
            diffs.append('env changed from %s to %s' %
                         (self.env, setup.get('env', {})))
        if setup.get('nextSlave') != self.nextSlave:
            diffs.append('nextSlave changed from %s to %s' %
                         (self.nextSlave, setup.get('nextSlave')))
        if setup.get('nextBuild') != self.nextBuild:
            diffs.append('nextBuild changed from %s to %s' %
                         (self.nextBuild, setup.get('nextBuild')))
        if setup.get('buildHorizon', None) != self.buildHorizon:
            diffs.append('buildHorizon changed from %s to %s' %
                         (self.buildHorizon, setup['buildHorizon']))
        if setup.get('logHorizon', None) != self.logHorizon:
            diffs.append('logHorizon changed from %s to %s' %
                         (self.logHorizon, setup['logHorizon']))
        if setup.get('eventHorizon', None) != self.eventHorizon:
            diffs.append('eventHorizon changed from %s to %s' %
                         (self.eventHorizon, setup['eventHorizon']))
        if setup.get('category', None) != self.category:
            diffs.append('category changed from %r to %r' %
                         (self.category, setup.get('category', None)))

        return diffs

    def __repr__(self):
        return "<Builder '%r' at %d>" % (self.name, id(self))

    def triggerNewBuildCheck(self):
        self.botmaster.triggerNewBuildCheck()

    def run(self):
        """Check for work to be done. This should be called any time I might
        be able to start a job:

         - when the Builder is first created
         - when a new job has been added to the [buildrequests] DB table
         - when a slave has connected

        If I have both an available slave and the database contains a
        BuildRequest that I can handle, I will claim the BuildRequest and
        start the build. When the build finishes, I will retire the
        BuildRequest.
        """
        # overall plan:
        #  move .expectations to DB

        # if we're not running, we may still be called from leftovers from
        # a run of the loop, so just ignore the call.
        if not self.running:
            return

        self.run_count += 1

        available_slaves = [sb for sb in self.slaves if sb.isAvailable()]
        if not available_slaves:
            self.updateBigStatus()
            return
        d = self.db.runInteraction(self._claim_buildreqs, available_slaves)
        d.addCallback(self._start_builds)
        return d

    # slave-managers must refresh their claim on a build at least once an
    # hour, less any inter-manager clock skew
    RECLAIM_INTERVAL = 1 * 3600

    def _claim_buildreqs(self, t, available_slaves):
        # return a dict mapping slave -> (brid,ssid)
        now = util.now()
        old = now - self.RECLAIM_INTERVAL
        requests = self.db.get_unclaimed_buildrequests(self.name, old,
                                                       self.master_name,
                                                       self.master_incarnation,
                                                       t)

        assignments = {}
        while requests and available_slaves:
            sb = self._choose_slave(available_slaves)
            if not sb:
                log.msg("%s: want to start build, but we don't have a remote" %
                        self)
                break
            available_slaves.remove(sb)
            breq = self._choose_build(requests)
            if not breq:
                log.msg("%s: went to start build, but nextBuild said not to" %
                        self)
                break
            requests.remove(breq)
            merged_requests = [breq]
            for other_breq in requests[:]:
                if (self.mergeRequests and self.botmaster.shouldMergeRequests(
                        self, breq, other_breq)):
                    requests.remove(other_breq)
                    merged_requests.append(other_breq)
            assignments[sb] = merged_requests
            brids = [br.id for br in merged_requests]
            self.db.claim_buildrequests(now, self.master_name,
                                        self.master_incarnation, brids, t)
        return assignments

    def _choose_slave(self, available_slaves):
        # note: this might return None if the nextSlave() function decided to
        # not give us anything
        if self.nextSlave:
            try:
                return self.nextSlave(self, available_slaves)
            except:
                log.msg("Exception choosing next slave")
                log.err(Failure())
            return None
        return random.choice(available_slaves)

    def _choose_build(self, buildable):
        if self.nextBuild:
            try:
                return self.nextBuild(self, buildable)
            except:
                log.msg("Exception choosing next build")
                log.err(Failure())
            return None
        return buildable[0]

    def _start_builds(self, assignments):
        # because _claim_buildreqs runs in a separate thread, we might have
        # lost a slave by this point. We treat that case the same as if we
        # lose the slave right after the build starts: the initial ping
        # fails.
        for (sb, requests) in assignments.items():
            build = self.buildFactory.newBuild(requests)
            build.setBuilder(self)
            build.setLocks(self.locks)
            if len(self.env) > 0:
                build.setSlaveEnvironment(self.env)
            self.startBuild(build, sb)
        self.updateBigStatus()

    def getBuildable(self, limit=None):
        return self.db.runInteractionNow(self._getBuildable, limit)

    def _getBuildable(self, t, limit):
        now = util.now()
        old = now - self.RECLAIM_INTERVAL
        return self.db.get_unclaimed_buildrequests(self.name, old,
                                                   self.master_name,
                                                   self.master_incarnation, t,
                                                   limit)

    def getOldestRequestTime(self):
        """Returns the timestamp of the oldest build request for this builder.

        If there are no build requests, None is returned."""
        buildable = self.getBuildable(1)
        if buildable:
            # TODO: this is sorted by priority first, not strictly reqtime
            return buildable[0].getSubmitTime()
        return None

    def cancelBuildRequest(self, brid):
        return self.db.cancel_buildrequests([brid])

    def consumeTheSoulOfYourPredecessor(self, old):
        """Suck the brain out of an old Builder.

        This takes all the runtime state from an existing Builder and moves
        it into ourselves. This is used when a Builder is changed in the
        master.cfg file: the new Builder has a different factory, but we want
        all the builds that were queued for the old one to get processed by
        the new one. Any builds which are already running will keep running.
        The new Builder will get as many of the old SlaveBuilder objects as
        it wants."""

        log.msg("consumeTheSoulOfYourPredecessor: %s feeding upon %s" %
                (self, old))
        # all pending builds are stored in the DB, so we don't have to do
        # anything to claim them. The old builder will be stopService'd,
        # which should make sure they don't start any new work

        # this is kind of silly, but the builder status doesn't get updated
        # when the config changes, yet it stores the category.  So:
        self.builder_status.category = self.category

        # old.building (i.e. builds which are still running) is not migrated
        # directly: it keeps track of builds which were in progress in the
        # old Builder. When those builds finish, the old Builder will be
        # notified, not us. However, since the old SlaveBuilder will point to
        # us, it is our maybeStartBuild() that will be triggered.
        if old.building:
            self.builder_status.setBigState("building")
        # however, we do grab a weakref to the active builds, so that our
        # BuilderControl can see them and stop them. We use a weakref because
        # we aren't the one to get notified, so there isn't a convenient
        # place to remove it from self.building .
        for b in old.building:
            self.old_building[b] = None
        for b in old.old_building:
            self.old_building[b] = None

        # Our set of slavenames may be different. Steal any of the old
        # buildslaves that we want to keep using.
        for sb in old.slaves[:]:
            if sb.slave.slavename in self.slavenames:
                log.msg(" stealing buildslave %s" % sb)
                self.slaves.append(sb)
                old.slaves.remove(sb)
                sb.setBuilder(self)

        # old.attaching_slaves:
        #  these SlaveBuilders are waiting on a sequence of calls:
        #  remote.setMaster and remote.print . When these two complete,
        #  old._attached will be fired, which will add a 'connect' event to
        #  the builder_status and try to start a build. However, we've pulled
        #  everything out of the old builder's queue, so it will have no work
        #  to do. The outstanding remote.setMaster/print call will be holding
        #  the last reference to the old builder, so it will disappear just
        #  after that response comes back.
        #
        #  The BotMaster will ask the slave to re-set their list of Builders
        #  shortly after this function returns, which will cause our
        #  attached() method to be fired with a bunch of references to remote
        #  SlaveBuilders, some of which we already have (by stealing them
        #  from the old Builder), some of which will be new. The new ones
        #  will be re-attached.

        #  Therefore, we don't need to do anything about old.attaching_slaves

        return  # all done

    def reclaimAllBuilds(self):
        try:
            now = util.now()
            brids = set()
            for b in self.building:
                brids.update([br.id for br in b.requests])
            for b in self.old_building:
                brids.update([br.id for br in b.requests])
            self.db.claim_buildrequests(now, self.master_name,
                                        self.master_incarnation, brids)
        except:
            log.msg("Error in reclaimAllBuilds")
            log.err()

    def getBuild(self, number):
        for b in self.building:
            if b.build_status and b.build_status.number == number:
                return b
        for b in self.old_building.keys():
            if b.build_status and b.build_status.number == number:
                return b
        return None

    def fireTestEvent(self, name, fire_with=None):
        if fire_with is None:
            fire_with = self
        watchers = self.watchers[name]
        self.watchers[name] = []
        for w in watchers:
            eventually(w.callback, fire_with)

    def addLatentSlave(self, slave):
        assert interfaces.ILatentBuildSlave.providedBy(slave)
        for s in self.slaves:
            if s == slave:
                break
        else:
            sb = slavebuilder.LatentSlaveBuilder(slave, self)
            self.builder_status.addPointEvent(
                ['added', 'latent', slave.slavename])
            self.slaves.append(sb)
            self.triggerNewBuildCheck()

    def attached(self, slave, remote, commands):
        """This is invoked by the BuildSlave when the self.slavename bot
        registers their builder.

        @type  slave: L{buildbot.buildslave.BuildSlave}
        @param slave: the BuildSlave that represents the buildslave as a whole
        @type  remote: L{twisted.spread.pb.RemoteReference}
        @param remote: a reference to the L{buildbot.slave.bot.SlaveBuilder}
        @type  commands: dict: string -> string, or None
        @param commands: provides the slave's version of each RemoteCommand

        @rtype:  L{twisted.internet.defer.Deferred}
        @return: a Deferred that fires (with 'self') when the slave-side
                 builder is fully attached and ready to accept commands.
        """
        for s in self.attaching_slaves + self.slaves:
            if s.slave == slave:
                # already attached to them. This is fairly common, since
                # attached() gets called each time we receive the builder
                # list from the slave, and we ask for it each time we add or
                # remove a builder. So if the slave is hosting builders
                # A,B,C, and the config file changes A, we'll remove A and
                # re-add it, triggering two builder-list requests, getting
                # two redundant calls to attached() for B, and another two
                # for C.
                #
                # Therefore, when we see that we're already attached, we can
                # just ignore it.
                return defer.succeed(self)

        sb = slavebuilder.SlaveBuilder()
        sb.setBuilder(self)
        self.attaching_slaves.append(sb)
        d = sb.attached(slave, remote, commands)
        d.addCallback(self._attached)
        d.addErrback(self._not_attached, slave)
        return d

    def _attached(self, sb):
        self.builder_status.addPointEvent(['connect', sb.slave.slavename])
        self.attaching_slaves.remove(sb)
        self.slaves.append(sb)

        self.fireTestEvent('attach')
        return self

    def _not_attached(self, why, slave):
        # already log.err'ed by SlaveBuilder._attachFailure
        # TODO: remove from self.slaves (except that detached() should get
        #       run first, right?)
        log.err(why, 'slave failed to attach')
        self.builder_status.addPointEvent(
            ['failed', 'connect', slave.slavename])
        # TODO: add an HTMLLogFile of the exception
        self.fireTestEvent('attach', why)

    def detached(self, slave):
        """This is called when the connection to the bot is lost."""
        for sb in self.attaching_slaves + self.slaves:
            if sb.slave == slave:
                break
        else:
            log.msg(
                "WEIRD: Builder.detached(%s) (%s)"
                " not in attaching_slaves(%s)"
                " or slaves(%s)" %
                (slave, slave.slavename, self.attaching_slaves, self.slaves))
            return
        if sb.state == BUILDING:
            # the Build's .lostRemote method (invoked by a notifyOnDisconnect
            # handler) will cause the Build to be stopped, probably right
            # after the notifyOnDisconnect that invoked us finishes running.
            pass

        if sb in self.attaching_slaves:
            self.attaching_slaves.remove(sb)
        if sb in self.slaves:
            self.slaves.remove(sb)

        self.builder_status.addPointEvent(['disconnect', slave.slavename])
        sb.detached()  # inform the SlaveBuilder that their slave went away
        self.updateBigStatus()
        self.fireTestEvent('detach')
        if not self.slaves:
            self.fireTestEvent('detach_all')

    def updateBigStatus(self):
        if not self.slaves:
            self.builder_status.setBigState("offline")
        elif self.building:
            self.builder_status.setBigState("building")
        else:
            self.builder_status.setBigState("idle")
            self.fireTestEvent('idle')

    def startBuild(self, build, sb):
        """Start a build on the given slave.
        @param build: the L{base.Build} to start
        @param sb: the L{SlaveBuilder} which will host this build

        @return: a Deferred which fires with a
        L{buildbot.interfaces.IBuildControl} that can be used to stop the
        Build, or to access a L{buildbot.interfaces.IBuildStatus} which will
        watch the Build as it runs. """

        self.building.append(build)
        self.updateBigStatus()
        log.msg("starting build %s using slave %s" % (build, sb))
        d = sb.prepare(self.builder_status, build)

        def _prepared(ready):
            # If prepare returns True then it is ready and we start a build
            # If it returns false then we don't start a new build.
            d = defer.succeed(ready)

            if not ready:
                #FIXME: We should perhaps trigger a check to see if there is
                # any other way to schedule the work
                log.msg("slave %s can't build %s after all" % (build, sb))

                # release the slave. This will queue a call to maybeStartBuild, which
                # will fire after other notifyOnDisconnect handlers have marked the
                # slave as disconnected (so we don't try to use it again).
                # sb.buildFinished()

                log.msg("re-queueing the BuildRequest %s" % build)
                self.building.remove(build)
                self._resubmit_buildreqs(build).addErrback(log.err)

                sb.slave.releaseLocks()
                self.triggerNewBuildCheck()

                return d

            def _ping(ign):
                # ping the slave to make sure they're still there. If they've
                # fallen off the map (due to a NAT timeout or something), this
                # will fail in a couple of minutes, depending upon the TCP
                # timeout.
                #
                # TODO: This can unnecessarily suspend the starting of a build, in
                # situations where the slave is live but is pushing lots of data to
                # us in a build.
                log.msg("starting build %s.. pinging the slave %s" %
                        (build, sb))
                return sb.ping()

            d.addCallback(_ping)
            d.addCallback(self._startBuild_1, build, sb)

            return d

        d.addCallback(_prepared)
        return d

    def _startBuild_1(self, res, build, sb):
        if not res:
            return self._startBuildFailed("slave ping failed", build, sb)
        # The buildslave is ready to go. sb.buildStarted() sets its state to
        # BUILDING (so we won't try to use it for any other builds). This
        # gets set back to IDLE by the Build itself when it finishes.
        sb.buildStarted()
        d = sb.remote.callRemote("startBuild")
        d.addCallbacks(self._startBuild_2,
                       self._startBuildFailed,
                       callbackArgs=(build, sb),
                       errbackArgs=(build, sb))
        return d

    def _startBuild_2(self, res, build, sb):
        # create the BuildStatus object that goes with the Build
        bs = self.builder_status.newBuild()

        # start the build. This will first set up the steps, then tell the
        # BuildStatus that it has started, which will announce it to the
        # world (through our BuilderStatus object, which is its parent).
        # Finally it will start the actual build process.
        bids = [
            self.db.build_started(req.id, bs.number) for req in build.requests
        ]
        d = build.startBuild(bs, self.expectations, sb)
        d.addCallback(self.buildFinished, sb, bids)
        # this shouldn't happen. if it does, the slave will be wedged
        d.addErrback(log.err)
        return build  # this is the IBuildControl

    def _startBuildFailed(self, why, build, sb):
        # put the build back on the buildable list
        log.msg("I tried to tell the slave that the build %s started, but "
                "remote_startBuild failed: %s" % (build, why))
        # release the slave. This will queue a call to maybeStartBuild, which
        # will fire after other notifyOnDisconnect handlers have marked the
        # slave as disconnected (so we don't try to use it again).
        sb.buildFinished()

        log.msg("re-queueing the BuildRequest")
        self.building.remove(build)
        self._resubmit_buildreqs(build).addErrback(log.err)

    def setupProperties(self, props):
        props.setProperty("buildername", self.name, "Builder")
        if len(self.properties) > 0:
            for propertyname in self.properties:
                props.setProperty(propertyname, self.properties[propertyname],
                                  "Builder")

    def buildFinished(self, build, sb, bids):
        """This is called when the Build has finished (either success or
        failure). Any exceptions during the build are reported with
        results=FAILURE, not with an errback."""

        # by the time we get here, the Build has already released the slave
        # (which queues a call to maybeStartBuild)

        self.db.builds_finished(bids)

        results = build.build_status.getResults()
        self.building.remove(build)
        if results == RETRY:
            self._resubmit_buildreqs(build).addErrback(
                log.err)  # returns Deferred
        else:
            brids = [br.id for br in build.requests]
            self.db.retire_buildrequests(brids, results)

        if sb.slave:
            sb.slave.releaseLocks()

        self.triggerNewBuildCheck()

    def _resubmit_buildreqs(self, build):
        brids = [br.id for br in build.requests]
        return self.db.resubmit_buildrequests(brids)

    def setExpectations(self, progress):
        """Mark the build as successful and update expectations for the next
        build. Only call this when the build did not fail in any way that
        would invalidate the time expectations generated by it. (if the
        compile failed and thus terminated early, we can't use the last
        build to predict how long the next one will take).
        """
        if self.expectations:
            self.expectations.update(progress)
        else:
            # the first time we get a good build, create our Expectations
            # based upon its results
            self.expectations = Expectations(progress)
        log.msg("new expectations: %s seconds" % \
                self.expectations.expectedBuildTime())

    # Build Creation

    # maybeStartBuild is called by the botmaster whenever this builder should
    # check for and potentially start new builds.  As an optimization,
    # invocations of this function are collapsed as much as possible while
    # maintaining the invariant that at least one execution of the entire
    # algorithm will occur between the invocation of the method and the firing
    # of its Deferred.  This is done with util.SerializedInvocation; see
    # Builder.__init__, above.

    @defer.deferredGenerator
    def doMaybeStartBuild(self):
        # first, if we're not running, then don't start builds; stopService
        # uses this to ensure that any ongoing doMaybeStartBuild invocations
        # are complete before it stops.
        if not self.running:
            return

        # Check for available slaves.  If there are no available slaves, then
        # there is no sense continuing
        available_slavebuilders = [
            sb for sb in self.slaves if sb.isAvailable()
        ]
        if not available_slavebuilders:
            self.updateBigStatus()
            return

        # now, get the available build requests
        wfd = defer.waitForDeferred(
            self.master.db.buildrequests.getBuildRequests(
                buildername=self.name, claimed=False))
        yield wfd
        unclaimed_requests = wfd.getResult()

        # sort by submitted_at, so the first is the oldest
        unclaimed_requests.sort(key=lambda brd: brd['submitted_at'])

        # get the mergeRequests function for later
        mergeRequests_fn = self._getMergeRequestsFn()

        # match them up until we're out of options
        while available_slavebuilders and unclaimed_requests:
            # first, choose a slave (using nextSlave)
            wfd = defer.waitForDeferred(
                self._chooseSlave(available_slavebuilders))
            yield wfd
            slavebuilder = wfd.getResult()

            if not slavebuilder:
                break

            if slavebuilder not in available_slavebuilders:
                log.msg(("nextSlave chose a nonexistent slave for builder "
                         "'%s'; cannot start build") % self.name)
                break

            # then choose a request (using nextBuild)
            wfd = defer.waitForDeferred(self._chooseBuild(unclaimed_requests))
            yield wfd
            breq = wfd.getResult()

            if not breq:
                break

            if breq not in unclaimed_requests:
                log.msg(("nextBuild chose a nonexistent request for builder "
                         "'%s'; cannot start build") % self.name)
                break

            # merge the chosen request with any compatible requests in the
            # queue
            wfd = defer.waitForDeferred(
                self._mergeRequests(breq, unclaimed_requests,
                                    mergeRequests_fn))
            yield wfd
            breqs = wfd.getResult()

            # try to claim the build requests
            try:
                wfd = defer.waitForDeferred(
                    self.master.db.buildrequests.claimBuildRequests(
                        [brdict['brid'] for brdict in breqs]))
                yield wfd
                wfd.getResult()
            except buildrequests.AlreadyClaimedError:
                # one or more of the build requests was already claimed;
                # re-fetch the now-partially-claimed build requests and keep
                # trying to match them
                self._breakBrdictRefloops(unclaimed_requests)
                wfd = defer.waitForDeferred(
                    self.master.db.buildrequests.getBuildRequests(
                        buildername=self.name, claimed=False))
                yield wfd
                unclaimed_requests = wfd.getResult()

                # go around the loop again
                continue

            # claim was successful, so initiate a build for this set of
            # requests.  Note that if the build fails from here on out (e.g.,
            # because a slave has failed), it will be handled outside of this
            # loop. TODO: test that!
            wfd = defer.waitForDeferred(
                self._startBuildFor(slavebuilder, breqs))
            yield wfd
            wfd.getResult()

            # and finally remove the buildrequests and slavebuilder from the
            # respective queues
            self._breakBrdictRefloops(breqs)
            for breq in breqs:
                unclaimed_requests.remove(breq)
            available_slavebuilders.remove(slavebuilder)

        self._breakBrdictRefloops(unclaimed_requests)
        self.updateBigStatus()
        return

    # a few utility functions to make the maybeStartBuild a bit shorter and
    # easier to read

    def _chooseSlave(self, available_slavebuilders):
        """
        Choose the next slave, using the C{nextSlave} configuration if
        available, and falling back to C{random.choice} otherwise.

        @param available_slavebuilders: list of slavebuilders to choose from
        @returns: SlaveBuilder or None via Deferred
        """
        if self.nextSlave:
            return defer.maybeDeferred(
                lambda: self.nextSlave(self, available_slavebuilders))
        else:
            return defer.succeed(random.choice(available_slavebuilders))

    def _chooseBuild(self, buildrequests):
        """
        Choose the next build from the given set of build requests (represented
        as dictionaries).  Defaults to returning the first request (earliest
        submitted).

        @param buildrequests: sorted list of build request dictionaries
        @returns: a build request dictionary or None via Deferred
        """
        if self.nextBuild:
            # nextBuild expects BuildRequest objects, so instantiate them here
            # and cache them in the dictionaries
            d = defer.gatherResults([
                self._brdictToBuildRequest(brdict) for brdict in buildrequests
            ])
            d.addCallback(
                lambda requestobjects: self.nextBuild(self, requestobjects))

            def to_brdict(brobj):
                # get the brdict for this object back
                return brobj.brdict

            d.addCallback(to_brdict)
            return d
        else:
            return defer.succeed(buildrequests[0])

    def _getMergeRequestsFn(self):
        """Helper function to determine which mergeRequests function to use
        from L{_mergeRequests}, or None for no merging"""
        # first, seek through builder, global, and the default
        mergeRequests_fn = self.mergeRequests
        if mergeRequests_fn is None:
            mergeRequests_fn = self.master.mergeRequests
        if mergeRequests_fn is None:
            mergeRequests_fn = True

        # then translate False and True properly
        if mergeRequests_fn is False:
            mergeRequests_fn = None
        elif mergeRequests_fn is True:
            mergeRequests_fn = buildrequest.BuildRequest.canBeMergedWith

        return mergeRequests_fn

    @defer.deferredGenerator
    def _mergeRequests(self, breq, unclaimed_requests, mergeRequests_fn):
        """Use C{mergeRequests_fn} to merge C{breq} against
        C{unclaimed_requests}, where both are build request dictionaries"""
        # short circuit if there is no merging to do
        if not mergeRequests_fn or len(unclaimed_requests) == 1:
            yield [breq]
            return

        # we'll need BuildRequest objects, so get those first
        wfd = defer.waitForDeferred(
            defer.gatherResults([
                self._brdictToBuildRequest(brdict)
                for brdict in unclaimed_requests
            ]))
        yield wfd
        unclaimed_request_objects = wfd.getResult()
        breq_object = unclaimed_request_objects.pop(
            unclaimed_requests.index(breq))

        # gather the mergeable requests
        merged_request_objects = [breq_object]
        for other_breq_object in unclaimed_request_objects:
            wfd = defer.waitForDeferred(
                defer.maybeDeferred(
                    lambda: mergeRequests_fn(breq_object, other_breq_object)))
            yield wfd
            if wfd.getResult():
                merged_request_objects.append(other_breq_object)

        # convert them back to brdicts and return
        merged_requests = [br.brdict for br in merged_request_objects]
        yield merged_requests

    def _brdictToBuildRequest(self, brdict):
        """
        Convert a build request dictionary to a L{buildrequest.BuildRequest}
        object, caching the result in the dictionary itself.  The resulting
        buildrequest will have a C{brdict} attribute pointing back to this
        dictionary.

        Note that this does not perform any locking - be careful that it is
        only called once at a time for each build request dictionary.

        @param brdict: dictionary to convert

        @returns: L{buildrequest.BuildRequest} via Deferred
        """
        if 'brobj' in brdict:
            return defer.succeed(brdict['brobj'])
        d = buildrequest.BuildRequest.fromBrdict(self.master, brdict)

        def keep(buildrequest):
            brdict['brobj'] = buildrequest
            buildrequest.brdict = brdict
            return buildrequest

        d.addCallback(keep)
        return d

    def _breakBrdictRefloops(self, requests):
        """Break the reference loops created by L{_brdictToBuildRequest}"""
        for brdict in requests:
            try:
                del brdict['brobj'].brdict
            except KeyError:
                pass
コード例 #9
0
class Builder(config.ReconfigurableServiceMixin, pb.Referenceable,
              service.MultiService):

    # reconfigure builders before slaves
    reconfig_priority = 196

    def __init__(self, name):
        service.MultiService.__init__(self)
        self.name = name

        # this is created the first time we get a good build
        self.expectations = None

        # build/wannabuild slots: Build objects move along this sequence
        self.building = []
        # old_building holds active builds that were stolen from a predecessor
        self.old_building = weakref.WeakKeyDictionary()

        # buildslaves which have connected but which are not yet available.
        # These are always in the ATTACHING state.
        self.attaching_slaves = []

        # buildslaves at our disposal. Each SlaveBuilder instance has a
        # .state that is IDLE, PINGING, or BUILDING. "PINGING" is used when a
        # Build is about to start, to make sure that they're still alive.
        self.slaves = []

        self.config = None
        self.builder_status = None

        self.reclaim_svc = internet.TimerService(10 * 60,
                                                 self.reclaimAllBuilds)
        self.reclaim_svc.setServiceParent(self)

    def reconfigService(self, new_config):
        # find this builder in the config
        for builder_config in new_config.builders:
            if builder_config.name == self.name:
                break
        else:
            assert 0, "no config found for builder '%s'" % self.name

        # set up a builder status object on the first reconfig
        if not self.builder_status:
            self.builder_status = self.master.status.builderAdded(
                builder_config.name, builder_config.builddir,
                builder_config.category)

        self.config = builder_config

        self.builder_status.setSlavenames(self.config.slavenames)

        return defer.succeed(None)

    def stopService(self):
        d = defer.maybeDeferred(lambda: service.MultiService.stopService(self))

        def flushMaybeStartBuilds(_):
            # at this point, self.running = False, so another maybeStartBuild
            # invocation won't hurt anything, but it also will not complete
            # until any currently-running invocations are done, so we know that
            # the builder is quiescent at that time.
            return self.maybeStartBuild()

        d.addCallback(flushMaybeStartBuilds)
        return d

    def __repr__(self):
        return "<Builder '%r' at %d>" % (self.name, id(self))

    @defer.deferredGenerator
    def getOldestRequestTime(self):
        """Returns the submitted_at of the oldest unclaimed build request for
        this builder, or None if there are no build requests.

        @returns: datetime instance or None, via Deferred
        """
        wfd = defer.waitForDeferred(
            self.master.db.buildrequests.getBuildRequests(
                buildername=self.name, claimed=False))
        yield wfd
        unclaimed = wfd.getResult()

        if unclaimed:
            unclaimed = [brd['submitted_at'] for brd in unclaimed]
            unclaimed.sort()
            yield unclaimed[0]
        else:
            yield None

    def reclaimAllBuilds(self):
        brids = set()
        for b in self.building:
            brids.update([br.id for br in b.requests])
        for b in self.old_building:
            brids.update([br.id for br in b.requests])

        if not brids:
            return defer.succeed(None)

        d = self.master.db.buildrequests.reclaimBuildRequests(brids)
        d.addErrback(log.err, 'while re-claiming running BuildRequests')
        return d

    def getBuild(self, number):
        for b in self.building:
            if b.build_status and b.build_status.number == number:
                return b
        for b in self.old_building.keys():
            if b.build_status and b.build_status.number == number:
                return b
        return None

    def addLatentSlave(self, slave):
        assert interfaces.ILatentBuildSlave.providedBy(slave)
        for s in self.slaves:
            if s == slave:
                break
        else:
            sb = slavebuilder.LatentSlaveBuilder(slave, self)
            self.builder_status.addPointEvent(
                ['added', 'latent', slave.slavename])
            self.slaves.append(sb)
            self.botmaster.maybeStartBuildsForBuilder(self.name)

    def attached(self, slave, remote, commands):
        """This is invoked by the BuildSlave when the self.slavename bot
        registers their builder.

        @type  slave: L{buildbot.buildslave.BuildSlave}
        @param slave: the BuildSlave that represents the buildslave as a whole
        @type  remote: L{twisted.spread.pb.RemoteReference}
        @param remote: a reference to the L{buildbot.slave.bot.SlaveBuilder}
        @type  commands: dict: string -> string, or None
        @param commands: provides the slave's version of each RemoteCommand

        @rtype:  L{twisted.internet.defer.Deferred}
        @return: a Deferred that fires (with 'self') when the slave-side
                 builder is fully attached and ready to accept commands.
        """
        for s in self.attaching_slaves + self.slaves:
            if s.slave == slave:
                # already attached to them. This is fairly common, since
                # attached() gets called each time we receive the builder
                # list from the slave, and we ask for it each time we add or
                # remove a builder. So if the slave is hosting builders
                # A,B,C, and the config file changes A, we'll remove A and
                # re-add it, triggering two builder-list requests, getting
                # two redundant calls to attached() for B, and another two
                # for C.
                #
                # Therefore, when we see that we're already attached, we can
                # just ignore it.
                return defer.succeed(self)

        sb = slavebuilder.SlaveBuilder()
        sb.setBuilder(self)
        self.attaching_slaves.append(sb)
        d = sb.attached(slave, remote, commands)
        d.addCallback(self._attached)
        d.addErrback(self._not_attached, slave)
        return d

    def _attached(self, sb):
        self.builder_status.addPointEvent(['connect', sb.slave.slavename])
        self.attaching_slaves.remove(sb)
        self.slaves.append(sb)

        self.updateBigStatus()

        return self

    def _not_attached(self, why, slave):
        # already log.err'ed by SlaveBuilder._attachFailure
        # TODO: remove from self.slaves (except that detached() should get
        #       run first, right?)
        log.err(why, 'slave failed to attach')
        self.builder_status.addPointEvent(
            ['failed', 'connect', slave.slavename])
        # TODO: add an HTMLLogFile of the exception

    def detached(self, slave):
        """This is called when the connection to the bot is lost."""
        for sb in self.attaching_slaves + self.slaves:
            if sb.slave == slave:
                break
        else:
            log.msg(
                "WEIRD: Builder.detached(%s) (%s)"
                " not in attaching_slaves(%s)"
                " or slaves(%s)" %
                (slave, slave.slavename, self.attaching_slaves, self.slaves))
            return
        if sb.state == BUILDING:
            # the Build's .lostRemote method (invoked by a notifyOnDisconnect
            # handler) will cause the Build to be stopped, probably right
            # after the notifyOnDisconnect that invoked us finishes running.
            pass

        if sb in self.attaching_slaves:
            self.attaching_slaves.remove(sb)
        if sb in self.slaves:
            self.slaves.remove(sb)

        self.builder_status.addPointEvent(['disconnect', slave.slavename])
        sb.detached()  # inform the SlaveBuilder that their slave went away
        self.updateBigStatus()

    def updateBigStatus(self):
        if not self.slaves:
            self.builder_status.setBigState("offline")
        elif self.building or self.old_building:
            self.builder_status.setBigState("building")
        else:
            self.builder_status.setBigState("idle")

    @defer.deferredGenerator
    def _startBuildFor(self, slavebuilder, buildrequests):
        """Start a build on the given slave.
        @param build: the L{base.Build} to start
        @param sb: the L{SlaveBuilder} which will host this build

        @return: (via Deferred) boolean indicating that the build was
        succesfully started.
        """

        # as of the Python versions supported now, try/finally can't be used
        # with a generator expression.  So instead, we push cleanup functions
        # into a list so that, at any point, we can abort this operation.
        cleanups = []

        def run_cleanups():
            while cleanups:
                fn = cleanups.pop()
                fn()

        # the last cleanup we want to perform is to update the big
        # status based on any other cleanup
        cleanups.append(lambda: self.updateBigStatus())

        build = self.config.factory.newBuild(buildrequests)
        build.setBuilder(self)
        log.msg("starting build %s using slave %s" % (build, slavebuilder))

        # set up locks
        build.setLocks(self.config.locks)
        cleanups.append(lambda: slavebuilder.slave.releaseLocks())

        if len(self.config.env) > 0:
            build.setSlaveEnvironment(self.config.env)

        # append the build to self.building
        self.building.append(build)
        cleanups.append(lambda: self.building.remove(build))

        # update the big status accordingly
        self.updateBigStatus()

        try:
            wfd = defer.waitForDeferred(
                slavebuilder.prepare(self.builder_status, build))
            yield wfd
            ready = wfd.getResult()
        except:
            log.err(failure.Failure(), 'while preparing slavebuilder:')
            ready = False

        # If prepare returns True then it is ready and we start a build
        # If it returns false then we don't start a new build.
        if not ready:
            log.msg("slave %s can't build %s after all; re-queueing the "
                    "request" % (build, slavebuilder))
            run_cleanups()
            yield False
            return

        # ping the slave to make sure they're still there. If they've
        # fallen off the map (due to a NAT timeout or something), this
        # will fail in a couple of minutes, depending upon the TCP
        # timeout.
        #
        # TODO: This can unnecessarily suspend the starting of a build, in
        # situations where the slave is live but is pushing lots of data to
        # us in a build.
        log.msg("starting build %s.. pinging the slave %s" %
                (build, slavebuilder))
        try:
            wfd = defer.waitForDeferred(slavebuilder.ping())
            yield wfd
            ping_success = wfd.getResult()
        except:
            log.err(failure.Failure(), 'while pinging slave before build:')
            ping_success = False

        if not ping_success:
            log.msg("slave ping failed; re-queueing the request")
            run_cleanups()
            yield False
            return

        # The buildslave is ready to go. slavebuilder.buildStarted() sets its
        # state to BUILDING (so we won't try to use it for any other builds).
        # This gets set back to IDLE by the Build itself when it finishes.
        slavebuilder.buildStarted()
        cleanups.append(lambda: slavebuilder.buildFinished())

        # tell the remote that it's starting a build, too
        try:
            wfd = defer.waitForDeferred(
                slavebuilder.remote.callRemote("startBuild"))
            yield wfd
            wfd.getResult()
        except:
            log.err(failure.Failure(), 'while calling remote startBuild:')
            run_cleanups()
            yield False
            return

        # create the BuildStatus object that goes with the Build
        bs = self.builder_status.newBuild()

        # record the build in the db - one row per buildrequest
        try:
            bids = []
            for req in build.requests:
                wfd = defer.waitForDeferred(
                    self.master.db.builds.addBuild(req.id, bs.number))
                yield wfd
                bids.append(wfd.getResult())
        except:
            log.err(failure.Failure(), 'while adding rows to build table:')
            run_cleanups()
            yield False
            return

        # let status know
        self.master.status.build_started(req.id, self.name, bs)

        # start the build. This will first set up the steps, then tell the
        # BuildStatus that it has started, which will announce it to the world
        # (through our BuilderStatus object, which is its parent).  Finally it
        # will start the actual build process.  This is done with a fresh
        # Deferred since _startBuildFor should not wait until the build is
        # finished.
        d = build.startBuild(bs, self.expectations, slavebuilder)
        d.addCallback(self.buildFinished, slavebuilder, bids)
        # this shouldn't happen. if it does, the slave will be wedged
        d.addErrback(log.err)

        # make sure the builder's status is represented correctly
        self.updateBigStatus()

        yield True

    def setupProperties(self, props):
        props.setProperty("buildername", self.name, "Builder")
        if len(self.config.properties) > 0:
            for propertyname in self.config.properties:
                props.setProperty(propertyname,
                                  self.config.properties[propertyname],
                                  "Builder")

    def buildFinished(self, build, sb, bids):
        """This is called when the Build has finished (either success or
        failure). Any exceptions during the build are reported with
        results=FAILURE, not with an errback."""

        # by the time we get here, the Build has already released the slave,
        # which will trigger a check for any now-possible build requests
        # (maybeStartBuilds)

        # mark the builds as finished, although since nothing ever reads this
        # table, it's not too important that it complete successfully
        d = self.master.db.builds.finishBuilds(bids)
        d.addErrback(log.err, 'while marking builds as finished (ignored)')

        results = build.build_status.getResults()
        self.building.remove(build)
        if results == RETRY:
            self._resubmit_buildreqs(build).addErrback(log.err)
        else:
            brids = [br.id for br in build.requests]
            db = self.master.db
            d = db.buildrequests.completeBuildRequests(brids, results)
            d.addCallback(
                lambda _: self._maybeBuildsetsComplete(build.requests))
            # nothing in particular to do with this deferred, so just log it if
            # it fails..
            d.addErrback(log.err, 'while marking build requests as completed')

        if sb.slave:
            sb.slave.releaseLocks()

        self.updateBigStatus()

    @defer.deferredGenerator
    def _maybeBuildsetsComplete(self, requests):
        # inform the master that we may have completed a number of buildsets
        for br in requests:
            wfd = defer.waitForDeferred(
                self.master.maybeBuildsetComplete(br.bsid))
            yield wfd
            wfd.getResult()

    def _resubmit_buildreqs(self, build):
        brids = [br.id for br in build.requests]
        return self.master.db.buildrequests.unclaimBuildRequests(brids)

    def setExpectations(self, progress):
        """Mark the build as successful and update expectations for the next
        build. Only call this when the build did not fail in any way that
        would invalidate the time expectations generated by it. (if the
        compile failed and thus terminated early, we can't use the last
        build to predict how long the next one will take).
        """
        if self.expectations:
            self.expectations.update(progress)
        else:
            # the first time we get a good build, create our Expectations
            # based upon its results
            self.expectations = Expectations(progress)
        log.msg("new expectations: %s seconds" % \
                self.expectations.expectedBuildTime())

    # Build Creation

    @defer.deferredGenerator
    def maybeStartBuild(self):
        # This method is called by the botmaster whenever this builder should
        # check for and potentially start new builds.  Do not call this method
        # directly - use master.botmaster.maybeStartBuildsForBuilder, or one
        # of the other similar methods if more appropriate

        # first, if we're not running, then don't start builds; stopService
        # uses this to ensure that any ongoing maybeStartBuild invocations
        # are complete before it stops.
        if not self.running:
            return

        # Check for available slaves.  If there are no available slaves, then
        # there is no sense continuing
        available_slavebuilders = [
            sb for sb in self.slaves if sb.isAvailable()
        ]
        if not available_slavebuilders:
            self.updateBigStatus()
            return

        # now, get the available build requests
        wfd = defer.waitForDeferred(
            self.master.db.buildrequests.getBuildRequests(
                buildername=self.name, claimed=False))
        yield wfd
        unclaimed_requests = wfd.getResult()

        if not unclaimed_requests:
            self.updateBigStatus()
            return

        # sort by submitted_at, so the first is the oldest
        unclaimed_requests.sort(key=lambda brd: brd['submitted_at'])

        # get the mergeRequests function for later
        mergeRequests_fn = self._getMergeRequestsFn()

        # match them up until we're out of options
        while available_slavebuilders and unclaimed_requests:
            # first, choose a slave (using nextSlave)
            wfd = defer.waitForDeferred(
                self._chooseSlave(available_slavebuilders))
            yield wfd
            slavebuilder = wfd.getResult()

            if not slavebuilder:
                break

            if slavebuilder not in available_slavebuilders:
                log.msg(("nextSlave chose a nonexistent slave for builder "
                         "'%s'; cannot start build") % self.name)
                break

            # then choose a request (using nextBuild)
            wfd = defer.waitForDeferred(self._chooseBuild(unclaimed_requests))
            yield wfd
            brdict = wfd.getResult()

            if not brdict:
                break

            if brdict not in unclaimed_requests:
                log.msg(("nextBuild chose a nonexistent request for builder "
                         "'%s'; cannot start build") % self.name)
                break

            # merge the chosen request with any compatible requests in the
            # queue
            wfd = defer.waitForDeferred(
                self._mergeRequests(brdict, unclaimed_requests,
                                    mergeRequests_fn))
            yield wfd
            brdicts = wfd.getResult()

            # try to claim the build requests
            brids = [brdict['brid'] for brdict in brdicts]
            try:
                wfd = defer.waitForDeferred(
                    self.master.db.buildrequests.claimBuildRequests(brids))
                yield wfd
                wfd.getResult()
            except buildrequests.AlreadyClaimedError:
                # one or more of the build requests was already claimed;
                # re-fetch the now-partially-claimed build requests and keep
                # trying to match them
                self._breakBrdictRefloops(unclaimed_requests)
                wfd = defer.waitForDeferred(
                    self.master.db.buildrequests.getBuildRequests(
                        buildername=self.name, claimed=False))
                yield wfd
                unclaimed_requests = wfd.getResult()

                # go around the loop again
                continue

            # claim was successful, so initiate a build for this set of
            # requests.  Note that if the build fails from here on out (e.g.,
            # because a slave has failed), it will be handled outside of this
            # loop. TODO: test that!

            # _startBuildFor expects BuildRequest objects, so cook some up
            wfd = defer.waitForDeferred(
                defer.gatherResults([
                    self._brdictToBuildRequest(brdict) for brdict in brdicts
                ]))
            yield wfd
            breqs = wfd.getResult()

            wfd = defer.waitForDeferred(
                self._startBuildFor(slavebuilder, breqs))
            yield wfd
            build_started = wfd.getResult()

            if not build_started:
                # build was not started, so unclaim the build requests
                wfd = defer.waitForDeferred(
                    self.master.db.buildrequests.unclaimBuildRequests(brids))
                yield wfd
                wfd.getResult()

                # and try starting builds again.  If we still have a working slave,
                # then this may re-claim the same buildrequests
                self.botmaster.maybeStartBuildsForBuilder(self.name)

            # finally, remove the buildrequests and slavebuilder from the
            # respective queues
            self._breakBrdictRefloops(brdicts)
            for brdict in brdicts:
                unclaimed_requests.remove(brdict)
            available_slavebuilders.remove(slavebuilder)

        self._breakBrdictRefloops(unclaimed_requests)
        self.updateBigStatus()
        return

    # a few utility functions to make the maybeStartBuild a bit shorter and
    # easier to read

    def _chooseSlave(self, available_slavebuilders):
        """
        Choose the next slave, using the C{nextSlave} configuration if
        available, and falling back to C{random.choice} otherwise.

        @param available_slavebuilders: list of slavebuilders to choose from
        @returns: SlaveBuilder or None via Deferred
        """
        if self.config.nextSlave:
            return defer.maybeDeferred(
                lambda: self.config.nextSlave(self, available_slavebuilders))
        else:
            return defer.succeed(random.choice(available_slavebuilders))

    def _chooseBuild(self, buildrequests):
        """
        Choose the next build from the given set of build requests (represented
        as dictionaries).  Defaults to returning the first request (earliest
        submitted).

        @param buildrequests: sorted list of build request dictionaries
        @returns: a build request dictionary or None via Deferred
        """
        if self.config.nextBuild:
            # nextBuild expects BuildRequest objects, so instantiate them here
            # and cache them in the dictionaries
            d = defer.gatherResults([
                self._brdictToBuildRequest(brdict) for brdict in buildrequests
            ])
            d.addCallback(lambda requestobjects: self.config.nextBuild(
                self, requestobjects))

            def to_brdict(brobj):
                # get the brdict for this object back
                return brobj.brdict

            d.addCallback(to_brdict)
            return d
        else:
            return defer.succeed(buildrequests[0])

    def _getMergeRequestsFn(self):
        """Helper function to determine which mergeRequests function to use
        from L{_mergeRequests}, or None for no merging"""
        # first, seek through builder, global, and the default
        mergeRequests_fn = self.config.mergeRequests
        if mergeRequests_fn is None:
            mergeRequests_fn = self.master.config.mergeRequests
        if mergeRequests_fn is None:
            mergeRequests_fn = True

        # then translate False and True properly
        if mergeRequests_fn is False:
            mergeRequests_fn = None
        elif mergeRequests_fn is True:
            mergeRequests_fn = Builder._defaultMergeRequestFn

        return mergeRequests_fn

    def _defaultMergeRequestFn(self, req1, req2):
        return req1.canBeMergedWith(req2)

    @defer.deferredGenerator
    def _mergeRequests(self, breq, unclaimed_requests, mergeRequests_fn):
        """Use C{mergeRequests_fn} to merge C{breq} against
        C{unclaimed_requests}, where both are build request dictionaries"""
        # short circuit if there is no merging to do
        if not mergeRequests_fn or len(unclaimed_requests) == 1:
            yield [breq]
            return

        # we'll need BuildRequest objects, so get those first
        wfd = defer.waitForDeferred(
            defer.gatherResults([
                self._brdictToBuildRequest(brdict)
                for brdict in unclaimed_requests
            ]))
        yield wfd
        unclaimed_request_objects = wfd.getResult()
        breq_object = unclaimed_request_objects.pop(
            unclaimed_requests.index(breq))

        # gather the mergeable requests
        merged_request_objects = [breq_object]
        for other_breq_object in unclaimed_request_objects:
            wfd = defer.waitForDeferred(
                defer.maybeDeferred(lambda: mergeRequests_fn(
                    self, breq_object, other_breq_object)))
            yield wfd
            if wfd.getResult():
                merged_request_objects.append(other_breq_object)

        # convert them back to brdicts and return
        merged_requests = [br.brdict for br in merged_request_objects]
        yield merged_requests

    def _brdictToBuildRequest(self, brdict):
        """
        Convert a build request dictionary to a L{buildrequest.BuildRequest}
        object, caching the result in the dictionary itself.  The resulting
        buildrequest will have a C{brdict} attribute pointing back to this
        dictionary.

        Note that this does not perform any locking - be careful that it is
        only called once at a time for each build request dictionary.

        @param brdict: dictionary to convert

        @returns: L{buildrequest.BuildRequest} via Deferred
        """
        if 'brobj' in brdict:
            return defer.succeed(brdict['brobj'])
        d = buildrequest.BuildRequest.fromBrdict(self.master, brdict)

        def keep(buildrequest):
            brdict['brobj'] = buildrequest
            buildrequest.brdict = brdict
            return buildrequest

        d.addCallback(keep)
        return d

    def _breakBrdictRefloops(self, requests):
        """Break the reference loops created by L{_brdictToBuildRequest}"""
        for brdict in requests:
            try:
                del brdict['brobj'].brdict
            except KeyError:
                pass
コード例 #10
0
ファイル: builder.py プロジェクト: zozo123/buildbot
class Builder(config.ReconfigurableServiceMixin, pb.Referenceable,
              service.MultiService):

    # reconfigure builders before slaves
    reconfig_priority = 196

    def __init__(self, name, _addServices=True):
        service.MultiService.__init__(self)
        self.name = name

        # this is filled on demand by getBuilderId; don't access it directly
        self._builderid = None

        # this is created the first time we get a good build
        self.expectations = None

        # build/wannabuild slots: Build objects move along this sequence
        self.building = []
        # old_building holds active builds that were stolen from a predecessor
        self.old_building = weakref.WeakKeyDictionary()

        # buildslaves which have connected but which are not yet available.
        # These are always in the ATTACHING state.
        self.attaching_slaves = []

        # buildslaves at our disposal. Each SlaveBuilder instance has a
        # .state that is IDLE, PINGING, or BUILDING. "PINGING" is used when a
        # Build is about to start, to make sure that they're still alive.
        self.slaves = []

        self.config = None
        self.builder_status = None

        if _addServices:
            self.reclaim_svc = internet.TimerService(10 * 60,
                                                     self.reclaimAllBuilds)
            self.reclaim_svc.setServiceParent(self)

            # update big status every 30 minutes, working around #1980
            self.updateStatusService = internet.TimerService(
                30 * 60, self.updateBigStatus)
            self.updateStatusService.setServiceParent(self)

    @defer.inlineCallbacks
    def reconfigService(self, new_config):
        # find this builder in the config
        for builder_config in new_config.builders:
            if builder_config.name == self.name:
                found_config = True
                break
        assert found_config, "no config found for builder '%s'" % self.name

        # set up a builder status object on the first reconfig
        if not self.builder_status:
            self.builder_status = self.master.status.builderAdded(
                name=builder_config.name,
                basedir=builder_config.builddir,
                tags=builder_config.tags,
                description=builder_config.description)

        self.config = builder_config

        # allocate  builderid now, so that the builder is visible in the web
        # UI; without this, the bulider wouldn't appear until it preformed a
        # build.
        yield self.getBuilderId()

        self.builder_status.setDescription(builder_config.description)
        self.builder_status.setTags(builder_config.tags)
        self.builder_status.setSlavenames(self.config.slavenames)
        self.builder_status.setCacheSize(new_config.caches['Builds'])

        # if we have any slavebuilders attached which are no longer configured,
        # drop them.
        new_slavenames = set(builder_config.slavenames)
        self.slaves = [
            s for s in self.slaves if s.slave.slavename in new_slavenames
        ]

    def __repr__(self):
        return "<Builder '%r' at %d>" % (self.name, id(self))

    def getBuilderId(self):
        # since findBuilderId is idempotent, there's no reason to add
        # additional locking around this function.
        if self._builderid:
            return defer.succeed(self._builderid)
        # buildbot.config should ensure this is already unicode, but it doesn't
        # hurt to check again
        name = ascii2unicode(self.name)
        d = self.master.data.updates.findBuilderId(name)

        @d.addCallback
        def keep(builderid):
            self._builderid = builderid
            return builderid

        return d

    @defer.inlineCallbacks
    def getOldestRequestTime(self):
        """Returns the submitted_at of the oldest unclaimed build request for
        this builder, or None if there are no build requests.

        @returns: datetime instance or None, via Deferred
        """
        unclaimed = yield self.master.data.get(
            ('builders', ascii2unicode(self.name), 'buildrequests'),
            [resultspec.Filter('claimed', 'eq', [False])])
        if unclaimed:
            unclaimed = sorted([brd['submitted_at'] for brd in unclaimed])
            defer.returnValue(unclaimed[0])
        else:
            defer.returnValue(None)

    def reclaimAllBuilds(self):
        brids = set()
        for b in self.building:
            brids.update([br.id for br in b.requests])
        for b in self.old_building:
            brids.update([br.id for br in b.requests])

        if not brids:
            return defer.succeed(None)

        d = self.master.data.updates.reclaimBuildRequests(list(brids))
        d.addErrback(log.err, 'while re-claiming running BuildRequests')
        return d

    def getBuild(self, number):
        for b in self.building:
            if b.build_status and b.build_status.number == number:
                return b
        for b in self.old_building.keys():
            if b.build_status and b.build_status.number == number:
                return b
        return None

    def addLatentSlave(self, slave):
        assert interfaces.ILatentBuildSlave.providedBy(slave)
        for s in self.slaves:
            if s == slave:
                break
        else:
            sb = slavebuilder.LatentSlaveBuilder(slave, self)
            self.builder_status.addPointEvent(
                ['added', 'latent', slave.slavename])
            self.slaves.append(sb)
            self.botmaster.maybeStartBuildsForBuilder(self.name)

    def attached(self, slave, commands):
        """This is invoked by the BuildSlave when the self.slavename bot
        registers their builder.

        @type  slave: L{buildbot.buildslave.BuildSlave}
        @param slave: the BuildSlave that represents the buildslave as a whole
        @type  remote: L{twisted.spread.pb.RemoteReference}
        @param remote: a reference to the L{buildbot.slave.bot.SlaveBuilder}
        @type  commands: dict: string -> string, or None
        @param commands: provides the slave's version of each RemoteCommand

        @rtype:  L{twisted.internet.defer.Deferred}
        @return: a Deferred that fires (with 'self') when the slave-side
                 builder is fully attached and ready to accept commands.
        """
        for s in self.attaching_slaves + self.slaves:
            if s.slave == slave:
                # already attached to them. This is fairly common, since
                # attached() gets called each time we receive the builder
                # list from the slave, and we ask for it each time we add or
                # remove a builder. So if the slave is hosting builders
                # A,B,C, and the config file changes A, we'll remove A and
                # re-add it, triggering two builder-list requests, getting
                # two redundant calls to attached() for B, and another two
                # for C.
                #
                # Therefore, when we see that we're already attached, we can
                # just ignore it.
                return defer.succeed(self)

        sb = slavebuilder.SlaveBuilder()
        sb.setBuilder(self)
        self.attaching_slaves.append(sb)
        d = sb.attached(slave, commands)
        d.addCallback(self._attached)
        d.addErrback(self._not_attached, slave)
        return d

    def _attached(self, sb):
        self.builder_status.addPointEvent(['connect', sb.slave.slavename])
        self.attaching_slaves.remove(sb)
        self.slaves.append(sb)

        self.updateBigStatus()

        return self

    def _not_attached(self, why, slave):
        # already log.err'ed by SlaveBuilder._attachFailure
        # TODO: remove from self.slaves (except that detached() should get
        #       run first, right?)
        log.err(why, 'slave failed to attach')
        self.builder_status.addPointEvent(
            ['failed', 'connect', slave.slavename])
        # TODO: add an HTMLLogFile of the exception

    def detached(self, slave):
        """This is called when the connection to the bot is lost."""
        for sb in self.attaching_slaves + self.slaves:
            if sb.slave == slave:
                break
        else:
            log.msg(
                "WEIRD: Builder.detached(%s) (%s)"
                " not in attaching_slaves(%s)"
                " or slaves(%s)" %
                (slave, slave.slavename, self.attaching_slaves, self.slaves))
            return
        if sb.state == BUILDING:
            # the Build's .lostRemote method (invoked by a notifyOnDisconnect
            # handler) will cause the Build to be stopped, probably right
            # after the notifyOnDisconnect that invoked us finishes running.
            pass

        if sb in self.attaching_slaves:
            self.attaching_slaves.remove(sb)
        if sb in self.slaves:
            self.slaves.remove(sb)

        self.builder_status.addPointEvent(['disconnect', slave.slavename])
        sb.detached()  # inform the SlaveBuilder that their slave went away
        self.updateBigStatus()

    def updateBigStatus(self):
        try:
            # Catch exceptions here, since this is called in a LoopingCall.
            if not self.builder_status:
                return
            if not self.slaves:
                self.builder_status.setBigState("offline")
            elif self.building or self.old_building:
                self.builder_status.setBigState("building")
            else:
                self.builder_status.setBigState("idle")
        except Exception:
            log.err(
                None, "while trying to update status of builder '%s'" %
                (self.name, ))

    def getAvailableSlaves(self):
        return [sb for sb in self.slaves if sb.isAvailable()]

    def canStartWithSlavebuilder(self, slavebuilder):
        locks = [(self.botmaster.getLockFromLockAccess(access), access)
                 for access in self.config.locks]
        return Build.canStartWithSlavebuilder(locks, slavebuilder)

    def canStartBuild(self, slavebuilder, breq):
        if callable(self.config.canStartBuild):
            return defer.maybeDeferred(self.config.canStartBuild, self,
                                       slavebuilder, breq)
        return defer.succeed(True)

    @defer.inlineCallbacks
    def _startBuildFor(self, slavebuilder, buildrequests):
        # Build a stack of cleanup functions so that, at any point, we can
        # abort this operation and unwind the commitments made so far.
        cleanups = []

        def run_cleanups():
            try:
                while cleanups:
                    fn = cleanups.pop()
                    fn()
            except:
                log.err(failure.Failure(),
                        "while running %r" % (run_cleanups, ))

        # the last cleanup we want to perform is to update the big
        # status based on any other cleanup
        cleanups.append(lambda: self.updateBigStatus())

        build = self.config.factory.newBuild(buildrequests)
        build.setBuilder(self)
        log.msg("starting build %s using slave %s" % (build, slavebuilder))

        # set up locks
        build.setLocks(self.config.locks)
        cleanups.append(lambda: slavebuilder.slave.releaseLocks())

        if len(self.config.env) > 0:
            build.setSlaveEnvironment(self.config.env)

        # append the build to self.building
        self.building.append(build)
        cleanups.append(lambda: self.building.remove(build))

        # update the big status accordingly
        self.updateBigStatus()

        try:
            ready = yield slavebuilder.prepare(self.builder_status, build)
        except:
            log.err(failure.Failure(), 'while preparing slavebuilder:')
            ready = False

        # If prepare returns True then it is ready and we start a build
        # If it returns false then we don't start a new build.
        if not ready:
            log.msg("slave %s can't build %s after all; re-queueing the "
                    "request" % (build, slavebuilder))
            run_cleanups()
            defer.returnValue(False)
            return

        # ping the slave to make sure they're still there. If they've
        # fallen off the map (due to a NAT timeout or something), this
        # will fail in a couple of minutes, depending upon the TCP
        # timeout.
        #
        # TODO: This can unnecessarily suspend the starting of a build, in
        # situations where the slave is live but is pushing lots of data to
        # us in a build.
        log.msg("starting build %s.. pinging the slave %s" %
                (build, slavebuilder))
        try:
            ping_success = yield slavebuilder.ping()
        except:
            log.err(failure.Failure(), 'while pinging slave before build:')
            ping_success = False

        if not ping_success:
            log.msg("slave ping failed; re-queueing the request")
            run_cleanups()
            defer.returnValue(False)
            return

        # The buildslave is ready to go. slavebuilder.buildStarted() sets its
        # state to BUILDING (so we won't try to use it for any other builds).
        # This gets set back to IDLE by the Build itself when it finishes.
        slavebuilder.buildStarted()
        cleanups.append(lambda: slavebuilder.buildFinished())

        # tell the remote that it's starting a build, too
        try:
            yield slavebuilder.slave.conn.remoteStartBuild(build.builder.name)
        except:
            log.err(failure.Failure(), 'while calling remote startBuild:')
            run_cleanups()
            defer.returnValue(False)
            return

        # create the BuildStatus object that goes with the Build
        bs = self.builder_status.newBuild()

        # IMPORTANT: no yielding is allowed from here to the startBuild call!

        # it's possible that we lost the slave remote between the ping above
        # and now.  If so, bail out.  The build.startBuild call below transfers
        # responsibility for monitoring this connection to the Build instance,
        # so this check ensures we hand off a working connection.
        if not slavebuilder.slave.conn:  # TODO: replace with isConnected()
            log.msg("slave disappeared before build could start")
            run_cleanups()
            defer.returnValue(False)
            return

        # let status know
        self.master.status.build_started(buildrequests[0].id, self.name, bs)

        # start the build. This will first set up the steps, then tell the
        # BuildStatus that it has started, which will announce it to the world
        # (through our BuilderStatus object, which is its parent).  Finally it
        # will start the actual build process.  This is done with a fresh
        # Deferred since _startBuildFor should not wait until the build is
        # finished.  This uses `maybeDeferred` to ensure that any exceptions
        # raised by startBuild are treated as deferred errbacks (see
        # http://trac.buildbot.net/ticket/2428).
        d = defer.maybeDeferred(build.startBuild, bs, self.expectations,
                                slavebuilder)
        d.addCallback(lambda _: self.buildFinished(build, slavebuilder))
        # this shouldn't happen. if it does, the slave will be wedged
        d.addErrback(
            log.err, 'from a running build; this is a '
            'serious error - please file a bug at http://buildbot.net')

        # make sure the builder's status is represented correctly
        self.updateBigStatus()

        defer.returnValue(True)

    def setupProperties(self, props):
        props.setProperty("buildername", self.name, "Builder")
        if len(self.config.properties) > 0:
            for propertyname in self.config.properties:
                props.setProperty(propertyname,
                                  self.config.properties[propertyname],
                                  "Builder")

    def buildFinished(self, build, sb):
        """This is called when the Build has finished (either success or
        failure). Any exceptions during the build are reported with
        results=FAILURE, not with an errback."""

        # by the time we get here, the Build has already released the slave,
        # which will trigger a check for any now-possible build requests
        # (maybeStartBuilds)

        results = build.build_status.getResults()

        self.building.remove(build)
        if results == RETRY:
            d = self._resubmit_buildreqs(build)
            d.addErrback(log.err, 'while resubmitting a build request')
        else:
            complete_at_epoch = reactor.seconds()
            complete_at = epoch2datetime(complete_at_epoch)
            brids = [br.id for br in build.requests]

            d = self.master.data.updates.completeBuildRequests(
                brids, results, complete_at=complete_at)
            d.addCallback(lambda _: self._notify_completions(
                build.requests, results, complete_at_epoch))
            # nothing in particular to do with this deferred, so just log it if
            # it fails..
            d.addErrback(log.err, 'while marking build requests as completed')

        if sb.slave:
            sb.slave.releaseLocks()

        self.updateBigStatus()

    @defer.inlineCallbacks
    def _notify_completions(self, requests, results, complete_at_epoch):
        updates = self.master.data.updates

        # send a message for each request
        for br in requests:
            updates.completeBuildRequests([br.id], results,
                                          epoch2datetime(complete_at_epoch))

        # check for completed buildsets -- one call for each build request with
        # a unique bsid
        seen_bsids = set()
        for br in requests:
            if br.bsid in seen_bsids:
                continue
            seen_bsids.add(br.bsid)
            yield updates.maybeBuildsetComplete(br.bsid)

    def _resubmit_buildreqs(self, build):
        brids = [br.id for br in build.requests]
        d = self.master.data.updates.unclaimBuildRequests(brids)

        @d.addCallback
        def notify(_):
            pass  # XXX method does not exist
            # self._msg_buildrequests_unclaimed(build.requests)

        return d

    def setExpectations(self, progress):
        """Mark the build as successful and update expectations for the next
        build. Only call this when the build did not fail in any way that
        would invalidate the time expectations generated by it. (if the
        compile failed and thus terminated early, we can't use the last
        build to predict how long the next one will take).
        """
        if self.expectations:
            self.expectations.update(progress)
        else:
            # the first time we get a good build, create our Expectations
            # based upon its results
            self.expectations = Expectations(progress)
        log.msg("new expectations: %s seconds" %
                self.expectations.expectedBuildTime())

    # Build Creation

    @defer.inlineCallbacks
    def maybeStartBuild(self, slavebuilder, breqs, _reactor=reactor):
        # This method is called by the botmaster whenever this builder should
        # start a set of buildrequests on a slave. Do not call this method
        # directly - use master.botmaster.maybeStartBuildsForBuilder, or one of
        # the other similar methods if more appropriate

        # first, if we're not running, then don't start builds; stopService
        # uses this to ensure that any ongoing maybeStartBuild invocations
        # are complete before it stops.
        if not self.running:
            defer.returnValue(False)
            return

        # If the build fails from here on out (e.g., because a slave has failed),
        # it will be handled outside of this function. TODO: test that!

        build_started = yield self._startBuildFor(slavebuilder, breqs)
        defer.returnValue(build_started)

    # a few utility functions to make the maybeStartBuild a bit shorter and
    # easier to read

    def getMergeRequestsFn(self):
        """Helper function to determine which mergeRequests function to use
        from L{_mergeRequests}, or None for no merging"""
        # first, seek through builder, global, and the default
        mergeRequests_fn = self.config.mergeRequests
        if mergeRequests_fn is None:
            mergeRequests_fn = self.master.config.mergeRequests
        if mergeRequests_fn is None:
            mergeRequests_fn = True

        # then translate False and True properly
        if mergeRequests_fn is False:
            mergeRequests_fn = None
        elif mergeRequests_fn is True:
            mergeRequests_fn = Builder._defaultMergeRequestFn

        return mergeRequests_fn

    def _defaultMergeRequestFn(self, req1, req2):
        return req1.canBeMergedWith(req2)
コード例 #11
0
ファイル: builder.py プロジェクト: hef/buildbot
class Builder(pb.Referenceable, service.MultiService):
    """I manage all Builds of a given type.

    Each Builder is created by an entry in the config file (the c['builders']
    list), with a number of parameters.

    One of these parameters is the L{buildbot.process.factory.BuildFactory}
    object that is associated with this Builder. The factory is responsible
    for creating new L{Build<buildbot.process.build.Build>} objects. Each
    Build object defines when and how the build is performed, so a new
    Factory or Builder should be defined to control this behavior.

    The Builder holds on to a number of L{BuildRequest} objects in a
    list named C{.buildable}. Incoming BuildRequest objects will be added to
    this list, or (if possible) merged into an existing request. When a slave
    becomes available, I will use my C{BuildFactory} to turn the request into
    a new C{Build} object. The C{BuildRequest} is forgotten, the C{Build}
    goes into C{.building} while it runs. Once the build finishes, I will
    discard it.

    I maintain a list of available SlaveBuilders, one for each connected
    slave that the C{slavenames} parameter says we can use. Some of these
    will be idle, some of them will be busy running builds for me. If there
    are multiple slaves, I can run multiple builds at once.

    I also manage forced builds, progress expectation (ETA) management, and
    some status delivery chores.

    @type buildable: list of L{buildbot.process.buildrequest.BuildRequest}
    @ivar buildable: BuildRequests that are ready to build, but which are
                     waiting for a buildslave to be available.

    @type building: list of L{buildbot.process.build.Build}
    @ivar building: Builds that are actively running

    @type slaves: list of L{buildbot.buildslave.BuildSlave} objects
    @ivar slaves: the slaves currently available for building
    """

    expectations = None # this is created the first time we get a good build

    def __init__(self, setup, builder_status):
        """
        @type  setup: dict
        @param setup: builder setup data, as stored in
                      BuildmasterConfig['builders'].  Contains name,
                      slavename(s), builddir, slavebuilddir, factory, locks.
        @type  builder_status: L{buildbot.status.builder.BuilderStatus}
        """
        service.MultiService.__init__(self)
        self.name = setup['name']
        self.slavenames = []
        if setup.has_key('slavename'):
            self.slavenames.append(setup['slavename'])
        if setup.has_key('slavenames'):
            self.slavenames.extend(setup['slavenames'])
        self.builddir = setup['builddir']
        self.slavebuilddir = setup['slavebuilddir']
        self.buildFactory = setup['factory']
        self.nextSlave = setup.get('nextSlave')
        if self.nextSlave is not None and not callable(self.nextSlave):
            raise ValueError("nextSlave must be callable")
        self.locks = setup.get("locks", [])
        self.env = setup.get('env', {})
        assert isinstance(self.env, dict)
        if setup.has_key('periodicBuildTime'):
            raise ValueError("periodicBuildTime can no longer be defined as"
                             " part of the Builder: use scheduler.Periodic"
                             " instead")
        self.nextBuild = setup.get('nextBuild')
        if self.nextBuild is not None and not callable(self.nextBuild):
            raise ValueError("nextBuild must be callable")
        self.buildHorizon = setup.get('buildHorizon')
        self.logHorizon = setup.get('logHorizon')
        self.eventHorizon = setup.get('eventHorizon')
        self.mergeRequests = setup.get('mergeRequests', True)
        self.properties = setup.get('properties', {})
        self.category = setup.get('category', None)

        # build/wannabuild slots: Build objects move along this sequence
        self.building = []
        # old_building holds active builds that were stolen from a predecessor
        self.old_building = weakref.WeakKeyDictionary()

        # buildslaves which have connected but which are not yet available.
        # These are always in the ATTACHING state.
        self.attaching_slaves = []

        # buildslaves at our disposal. Each SlaveBuilder instance has a
        # .state that is IDLE, PINGING, or BUILDING. "PINGING" is used when a
        # Build is about to start, to make sure that they're still alive.
        self.slaves = []

        self.builder_status = builder_status
        self.builder_status.setSlavenames(self.slavenames)
        self.builder_status.buildHorizon = self.buildHorizon
        self.builder_status.logHorizon = self.logHorizon
        self.builder_status.eventHorizon = self.eventHorizon

        self.reclaim_svc = internet.TimerService(10*60, self.reclaimAllBuilds)
        self.reclaim_svc.setServiceParent(self)

        # for testing, to help synchronize tests
        self.run_count = 0

    def stopService(self):
        d = defer.maybeDeferred(lambda :
                service.MultiService.stopService(self))
        def flushMaybeStartBuilds(_):
            # at this point, self.running = False, so another maybeStartBuilds
            # invocation won't hurt anything, but it also will not complete
            # until any currently-running invocations are done.
            return self.maybeStartBuild()
        d.addCallback(flushMaybeStartBuilds)
        return d

    def setBotmaster(self, botmaster):
        self.botmaster = botmaster
        self.master = botmaster.master
        self.db = self.master.db
        self.master_name = self.master.master_name
        self.master_incarnation = self.master.master_incarnation

    def compareToSetup(self, setup):
        diffs = []
        setup_slavenames = []
        if setup.has_key('slavename'):
            setup_slavenames.append(setup['slavename'])
        setup_slavenames.extend(setup.get('slavenames', []))
        if setup_slavenames != self.slavenames:
            diffs.append('slavenames changed from %s to %s' \
                         % (self.slavenames, setup_slavenames))
        if setup['builddir'] != self.builddir:
            diffs.append('builddir changed from %s to %s' \
                         % (self.builddir, setup['builddir']))
        if setup['slavebuilddir'] != self.slavebuilddir:
            diffs.append('slavebuilddir changed from %s to %s' \
                         % (self.slavebuilddir, setup['slavebuilddir']))
        if setup['factory'] != self.buildFactory: # compare objects
            diffs.append('factory changed')
        if setup.get('locks', []) != self.locks:
            diffs.append('locks changed from %s to %s' % (self.locks, setup.get('locks')))
        if setup.get('env', {}) != self.env:
            diffs.append('env changed from %s to %s' % (self.env, setup.get('env', {})))
        if setup.get('nextSlave') != self.nextSlave:
            diffs.append('nextSlave changed from %s to %s' % (self.nextSlave, setup.get('nextSlave')))
        if setup.get('nextBuild') != self.nextBuild:
            diffs.append('nextBuild changed from %s to %s' % (self.nextBuild, setup.get('nextBuild')))
        if setup.get('buildHorizon', None) != self.buildHorizon:
            diffs.append('buildHorizon changed from %s to %s' % (self.buildHorizon, setup['buildHorizon']))
        if setup.get('logHorizon', None) != self.logHorizon:
            diffs.append('logHorizon changed from %s to %s' % (self.logHorizon, setup['logHorizon']))
        if setup.get('eventHorizon', None) != self.eventHorizon:
            diffs.append('eventHorizon changed from %s to %s' % (self.eventHorizon, setup['eventHorizon']))
        if setup.get('category', None) != self.category:
            diffs.append('category changed from %r to %r' % (self.category, setup.get('category', None)))

        return diffs

    def __repr__(self):
        return "<Builder '%r' at %d>" % (self.name, id(self))

    @defer.deferredGenerator
    def getOldestRequestTime(self):

        """Returns the submitted_at of the oldest unclaimed build request for
        this builder, or None if there are no build requests.

        @returns: datetime instance or None, via Deferred
        """
        wfd = defer.waitForDeferred(
            self.master.db.buildrequests.getBuildRequests(
                        buildername=self.name, claimed=False))
        yield wfd
        unclaimed = wfd.getResult()

        if unclaimed:
            unclaimed = [ brd['submitted_at'] for brd in unclaimed ]
            unclaimed.sort()
            yield unclaimed[0]
        else:
            yield None

    def consumeTheSoulOfYourPredecessor(self, old):
        """Suck the brain out of an old Builder.

        This takes all the runtime state from an existing Builder and moves
        it into ourselves. This is used when a Builder is changed in the
        master.cfg file: the new Builder has a different factory, but we want
        all the builds that were queued for the old one to get processed by
        the new one. Any builds which are already running will keep running.
        The new Builder will get as many of the old SlaveBuilder objects as
        it wants."""

        log.msg("consumeTheSoulOfYourPredecessor: %s feeding upon %s" %
                (self, old))
        # all pending builds are stored in the DB, so we don't have to do
        # anything to claim them. The old builder will be stopService'd,
        # which should make sure they don't start any new work

        # this is kind of silly, but the builder status doesn't get updated
        # when the config changes, yet it stores the category.  So:
        self.builder_status.category = self.category

        # old.building (i.e. builds which are still running) is not migrated
        # directly: it keeps track of builds which were in progress in the
        # old Builder. When those builds finish, the old Builder will be
        # notified, not us. However, since the old SlaveBuilder will point to
        # us, it is our maybeStartBuild() that will be triggered.
        if old.building:
            self.builder_status.setBigState("building")
        # however, we do grab a weakref to the active builds, so that our
        # BuilderControl can see them and stop them. We use a weakref because
        # we aren't the one to get notified, so there isn't a convenient
        # place to remove it from self.building .
        for b in old.building:
            self.old_building[b] = None
        for b in old.old_building:
            self.old_building[b] = None

        # Our set of slavenames may be different. Steal any of the old
        # buildslaves that we want to keep using.
        for sb in old.slaves[:]:
            if sb.slave.slavename in self.slavenames:
                log.msg(" stealing buildslave %s" % sb)
                self.slaves.append(sb)
                old.slaves.remove(sb)
                sb.setBuilder(self)

        # old.attaching_slaves:
        #  these SlaveBuilders are waiting on a sequence of calls:
        #  remote.setMaster and remote.print . When these two complete,
        #  old._attached will be fired, which will add a 'connect' event to
        #  the builder_status and try to start a build. However, we've pulled
        #  everything out of the old builder's queue, so it will have no work
        #  to do. The outstanding remote.setMaster/print call will be holding
        #  the last reference to the old builder, so it will disappear just
        #  after that response comes back.
        #
        #  The BotMaster will ask the slave to re-set their list of Builders
        #  shortly after this function returns, which will cause our
        #  attached() method to be fired with a bunch of references to remote
        #  SlaveBuilders, some of which we already have (by stealing them
        #  from the old Builder), some of which will be new. The new ones
        #  will be re-attached.

        #  Therefore, we don't need to do anything about old.attaching_slaves

        return # all done

    def reclaimAllBuilds(self):
        brids = set()
        for b in self.building:
            brids.update([br.id for br in b.requests])
        for b in self.old_building:
            brids.update([br.id for br in b.requests])

        if not brids:
            return defer.succeed(None)

        d = self.master.db.buildrequests.claimBuildRequests(brids)
        d.addErrback(log.err, 'while re-claiming running BuildRequests')
        return d

    def getBuild(self, number):
        for b in self.building:
            if b.build_status and b.build_status.number == number:
                return b
        for b in self.old_building.keys():
            if b.build_status and b.build_status.number == number:
                return b
        return None

    def addLatentSlave(self, slave):
        assert interfaces.ILatentBuildSlave.providedBy(slave)
        for s in self.slaves:
            if s == slave:
                break
        else:
            sb = slavebuilder.LatentSlaveBuilder(slave, self)
            self.builder_status.addPointEvent(
                ['added', 'latent', slave.slavename])
            self.slaves.append(sb)
            self.botmaster.maybeStartBuildsForBuilder(self.name)

    def attached(self, slave, remote, commands):
        """This is invoked by the BuildSlave when the self.slavename bot
        registers their builder.

        @type  slave: L{buildbot.buildslave.BuildSlave}
        @param slave: the BuildSlave that represents the buildslave as a whole
        @type  remote: L{twisted.spread.pb.RemoteReference}
        @param remote: a reference to the L{buildbot.slave.bot.SlaveBuilder}
        @type  commands: dict: string -> string, or None
        @param commands: provides the slave's version of each RemoteCommand

        @rtype:  L{twisted.internet.defer.Deferred}
        @return: a Deferred that fires (with 'self') when the slave-side
                 builder is fully attached and ready to accept commands.
        """
        for s in self.attaching_slaves + self.slaves:
            if s.slave == slave:
                # already attached to them. This is fairly common, since
                # attached() gets called each time we receive the builder
                # list from the slave, and we ask for it each time we add or
                # remove a builder. So if the slave is hosting builders
                # A,B,C, and the config file changes A, we'll remove A and
                # re-add it, triggering two builder-list requests, getting
                # two redundant calls to attached() for B, and another two
                # for C.
                #
                # Therefore, when we see that we're already attached, we can
                # just ignore it.
                return defer.succeed(self)

        sb = slavebuilder.SlaveBuilder()
        sb.setBuilder(self)
        self.attaching_slaves.append(sb)
        d = sb.attached(slave, remote, commands)
        d.addCallback(self._attached)
        d.addErrback(self._not_attached, slave)
        return d

    def _attached(self, sb):
        self.builder_status.addPointEvent(['connect', sb.slave.slavename])
        self.attaching_slaves.remove(sb)
        self.slaves.append(sb)

        return self

    def _not_attached(self, why, slave):
        # already log.err'ed by SlaveBuilder._attachFailure
        # TODO: remove from self.slaves (except that detached() should get
        #       run first, right?)
        log.err(why, 'slave failed to attach')
        self.builder_status.addPointEvent(['failed', 'connect',
                                           slave.slavename])
        # TODO: add an HTMLLogFile of the exception

    def detached(self, slave):
        """This is called when the connection to the bot is lost."""
        for sb in self.attaching_slaves + self.slaves:
            if sb.slave == slave:
                break
        else:
            log.msg("WEIRD: Builder.detached(%s) (%s)"
                    " not in attaching_slaves(%s)"
                    " or slaves(%s)" % (slave, slave.slavename,
                                        self.attaching_slaves,
                                        self.slaves))
            return
        if sb.state == BUILDING:
            # the Build's .lostRemote method (invoked by a notifyOnDisconnect
            # handler) will cause the Build to be stopped, probably right
            # after the notifyOnDisconnect that invoked us finishes running.
            pass

        if sb in self.attaching_slaves:
            self.attaching_slaves.remove(sb)
        if sb in self.slaves:
            self.slaves.remove(sb)

        self.builder_status.addPointEvent(['disconnect', slave.slavename])
        sb.detached() # inform the SlaveBuilder that their slave went away
        self.updateBigStatus()

    def updateBigStatus(self):
        if not self.slaves:
            self.builder_status.setBigState("offline")
        elif self.building:
            self.builder_status.setBigState("building")
        else:
            self.builder_status.setBigState("idle")

    @defer.deferredGenerator
    def _startBuildFor(self, slavebuilder, buildrequests):
        """Start a build on the given slave.
        @param build: the L{base.Build} to start
        @param sb: the L{SlaveBuilder} which will host this build

        @return: a Deferred which fires with a
        L{buildbot.interfaces.IBuildControl} that can be used to stop the
        Build, or to access a L{buildbot.interfaces.IBuildStatus} which will
        watch the Build as it runs. """

        build = self.buildFactory.newBuild(buildrequests)
        build.setBuilder(self)
        build.setLocks(self.locks)
        if len(self.env) > 0:
            build.setSlaveEnvironment(self.env)

        self.building.append(build)
        self.updateBigStatus()
        log.msg("starting build %s using slave %s" % (build, slavebuilder))

        wfd = defer.waitForDeferred(
                slavebuilder.prepare(self.builder_status, build))
        yield wfd
        ready = wfd.getResult()

        # If prepare returns True then it is ready and we start a build
        # If it returns false then we don't start a new build.
        if not ready:
            log.msg("slave %s can't build %s after all; re-queueing the "
                    "request" % (build, slavebuilder))

            self.building.remove(build)
            slavebuilder.slave.releaseLocks()

            # release the buildrequest claims
            wfd = defer.waitForDeferred(
                self._resubmit_buildreqs(build))
            yield wfd
            wfd.getResult()

            # and try starting builds again.  If we still have a working slave,
            # then this may re-claim the same buildrequests
            self.botmaster.maybeStartBuildsForBuilder(self.name)

            return

        # ping the slave to make sure they're still there. If they've
        # fallen off the map (due to a NAT timeout or something), this
        # will fail in a couple of minutes, depending upon the TCP
        # timeout.
        #
        # TODO: This can unnecessarily suspend the starting of a build, in
        # situations where the slave is live but is pushing lots of data to
        # us in a build.
        log.msg("starting build %s.. pinging the slave %s"
                % (build, slavebuilder))
        wfd = defer.waitForDeferred(
                slavebuilder.ping())
        yield wfd
        ping_success = wfd.getResult()

        if not ping_success:
            self._startBuildFailed("slave ping failed", build, slavebuilder)
            return

        # The buildslave is ready to go. slavebuilder.buildStarted() sets its
        # state to BUILDING (so we won't try to use it for any other builds).
        # This gets set back to IDLE by the Build itself when it finishes.
        slavebuilder.buildStarted()
        try:
            wfd = defer.waitForDeferred(
                    slavebuilder.remote.callRemote("startBuild"))
            yield wfd
            wfd.getResult()
        except:
            self._startBuildFailed(failure.Failure(), build, slavebuilder)
            return

        # create the BuildStatus object that goes with the Build
        bs = self.builder_status.newBuild()

        # record in the db - one per buildrequest
        bids = []
        for req in build.requests:
            wfd = defer.waitForDeferred(
                self.master.db.builds.addBuild(req.id, bs.number))
            yield wfd
            bids.append(wfd.getResult())

        # let status know
        self.master.status.build_started(req.id, self.name, bs.number)

        # start the build. This will first set up the steps, then tell the
        # BuildStatus that it has started, which will announce it to the world
        # (through our BuilderStatus object, which is its parent).  Finally it
        # will start the actual build process.  This is done with a fresh
        # Deferred since _startBuildFor should not wait until the build is
        # finished.
        d = build.startBuild(bs, self.expectations, slavebuilder)
        d.addCallback(self.buildFinished, slavebuilder, bids)
        # this shouldn't happen. if it does, the slave will be wedged
        d.addErrback(log.err)

        # make sure the builder's status is represented correctly
        self.updateBigStatus()

        # yield the IBuildControl, in case anyone needs it
        yield build

    def _startBuildFailed(self, why, build, slavebuilder):
        # put the build back on the buildable list
        log.msg("I tried to tell the slave that the build %s started, but "
                "remote_startBuild failed: %s" % (build, why))
        # release the slave. This will queue a call to maybeStartBuild, which
        # will fire after other notifyOnDisconnect handlers have marked the
        # slave as disconnected (so we don't try to use it again).
        slavebuilder.buildFinished()

        log.msg("re-queueing the BuildRequest")
        self.building.remove(build)
        self._resubmit_buildreqs(build).addErrback(log.err)

    def setupProperties(self, props):
        props.setProperty("buildername", self.name, "Builder")
        if len(self.properties) > 0:
            for propertyname in self.properties:
                props.setProperty(propertyname, self.properties[propertyname],
                                  "Builder")

    def buildFinished(self, build, sb, bids):
        """This is called when the Build has finished (either success or
        failure). Any exceptions during the build are reported with
        results=FAILURE, not with an errback."""

        # by the time we get here, the Build has already released the slave,
        # which will trigger a check for any now-possible build requests
        # (maybeStartBuilds)

        # mark the builds as finished, although since nothing ever reads this
        # table, it's not too important that it complete successfully
        d = self.db.builds.finishBuilds(bids)
        d.addErrback(log.err, 'while markign builds as finished (ignored)')

        results = build.build_status.getResults()
        self.building.remove(build)
        if results == RETRY:
            self._resubmit_buildreqs(build).addErrback(log.err)
        else:
            brids = [br.id for br in build.requests]
            db = self.master.db
            d = db.buildrequests.completeBuildRequests(brids, results)
            d.addCallback(
                lambda _ : self._maybeBuildsetsComplete(build.requests))
            # nothing in particular to do with this deferred, so just log it if
            # it fails..
            d.addErrback(log.err, 'while marking build requests as completed')

        if sb.slave:
            sb.slave.releaseLocks()

    @defer.deferredGenerator
    def _maybeBuildsetsComplete(self, requests):
        # inform the master that we may have completed a number of buildsets
        for br in requests:
            wfd = defer.waitForDeferred(
                self.master.maybeBuildsetComplete(br.bsid))
            yield wfd
            wfd.getResult()

    def _resubmit_buildreqs(self, build):
        brids = [br.id for br in build.requests]
        return self.db.buildrequests.unclaimBuildRequests(brids)

    def setExpectations(self, progress):
        """Mark the build as successful and update expectations for the next
        build. Only call this when the build did not fail in any way that
        would invalidate the time expectations generated by it. (if the
        compile failed and thus terminated early, we can't use the last
        build to predict how long the next one will take).
        """
        if self.expectations:
            self.expectations.update(progress)
        else:
            # the first time we get a good build, create our Expectations
            # based upon its results
            self.expectations = Expectations(progress)
        log.msg("new expectations: %s seconds" % \
                self.expectations.expectedBuildTime())

    # Build Creation

    @defer.deferredGenerator
    def maybeStartBuild(self):
        # This method is called by the botmaster whenever this builder should
        # check for and potentially start new builds.  Do not call this method
        # directly - use master.botmaster.maybeStartBuildsForBuilder, or one
        # of the other similar methods if more appropriate

        # first, if we're not running, then don't start builds; stopService
        # uses this to ensure that any ongoing maybeStartBuild invocations
        # are complete before it stops.
        if not self.running:
            return

        # Check for available slaves.  If there are no available slaves, then
        # there is no sense continuing
        available_slavebuilders = [ sb for sb in self.slaves
                                    if sb.isAvailable() ]
        if not available_slavebuilders:
            self.updateBigStatus()
            return

        # now, get the available build requests
        wfd = defer.waitForDeferred(
                self.master.db.buildrequests.getBuildRequests(
                        buildername=self.name, claimed=False))
        yield wfd
        unclaimed_requests = wfd.getResult()

        # sort by submitted_at, so the first is the oldest
        unclaimed_requests.sort(key=lambda brd : brd['submitted_at'])

        # get the mergeRequests function for later
        mergeRequests_fn = self._getMergeRequestsFn()

        # match them up until we're out of options
        while available_slavebuilders and unclaimed_requests:
            # first, choose a slave (using nextSlave)
            wfd = defer.waitForDeferred(
                self._chooseSlave(available_slavebuilders))
            yield wfd
            slavebuilder = wfd.getResult()

            if not slavebuilder:
                break

            if slavebuilder not in available_slavebuilders:
                log.msg(("nextSlave chose a nonexistent slave for builder "
                         "'%s'; cannot start build") % self.name)
                break

            # then choose a request (using nextBuild)
            wfd = defer.waitForDeferred(
                self._chooseBuild(unclaimed_requests))
            yield wfd
            brdict = wfd.getResult()

            if not brdict:
                break

            if brdict not in unclaimed_requests:
                log.msg(("nextBuild chose a nonexistent request for builder "
                         "'%s'; cannot start build") % self.name)
                break

            # merge the chosen request with any compatible requests in the
            # queue
            wfd = defer.waitForDeferred(
                self._mergeRequests(brdict, unclaimed_requests,
                                    mergeRequests_fn))
            yield wfd
            brdicts = wfd.getResult()

            # try to claim the build requests
            try:
                wfd = defer.waitForDeferred(
                        self.master.db.buildrequests.claimBuildRequests(
                            [ brdict['brid'] for brdict in brdicts ]))
                yield wfd
                wfd.getResult()
            except buildrequests.AlreadyClaimedError:
                # one or more of the build requests was already claimed;
                # re-fetch the now-partially-claimed build requests and keep
                # trying to match them
                self._breakBrdictRefloops(unclaimed_requests)
                wfd = defer.waitForDeferred(
                        self.master.db.buildrequests.getBuildRequests(
                                buildername=self.name, claimed=False))
                yield wfd
                unclaimed_requests = wfd.getResult()

                # go around the loop again
                continue

            # claim was successful, so initiate a build for this set of
            # requests.  Note that if the build fails from here on out (e.g.,
            # because a slave has failed), it will be handled outside of this
            # loop. TODO: test that!

            # _startBuildFor expects BuildRequest objects, so cook some up
            wfd = defer.waitForDeferred(
                    defer.gatherResults([ self._brdictToBuildRequest(brdict)
                                          for brdict in brdicts ]))
            yield wfd
            breqs = wfd.getResult()
            self._startBuildFor(slavebuilder, breqs)

            # and finally remove the buildrequests and slavebuilder from the
            # respective queues
            self._breakBrdictRefloops(brdicts)
            for brdict in brdicts:
                unclaimed_requests.remove(brdict)
            available_slavebuilders.remove(slavebuilder)

        self._breakBrdictRefloops(unclaimed_requests)
        self.updateBigStatus()
        return

    # a few utility functions to make the maybeStartBuild a bit shorter and
    # easier to read

    def _chooseSlave(self, available_slavebuilders):
        """
        Choose the next slave, using the C{nextSlave} configuration if
        available, and falling back to C{random.choice} otherwise.

        @param available_slavebuilders: list of slavebuilders to choose from
        @returns: SlaveBuilder or None via Deferred
        """
        if self.nextSlave:
            return defer.maybeDeferred(lambda :
                    self.nextSlave(self, available_slavebuilders))
        else:
            return defer.succeed(random.choice(available_slavebuilders))

    def _chooseBuild(self, buildrequests):
        """
        Choose the next build from the given set of build requests (represented
        as dictionaries).  Defaults to returning the first request (earliest
        submitted).

        @param buildrequests: sorted list of build request dictionaries
        @returns: a build request dictionary or None via Deferred
        """
        if self.nextBuild:
            # nextBuild expects BuildRequest objects, so instantiate them here
            # and cache them in the dictionaries
            d = defer.gatherResults([ self._brdictToBuildRequest(brdict)
                                      for brdict in buildrequests ])
            d.addCallback(lambda requestobjects :
                    self.nextBuild(self, requestobjects))
            def to_brdict(brobj):
                # get the brdict for this object back
                return brobj.brdict
            d.addCallback(to_brdict)
            return d
        else:
            return defer.succeed(buildrequests[0])

    def _getMergeRequestsFn(self):
        """Helper function to determine which mergeRequests function to use
        from L{_mergeRequests}, or None for no merging"""
        # first, seek through builder, global, and the default
        mergeRequests_fn = self.mergeRequests
        if mergeRequests_fn is None:
            mergeRequests_fn = self.master.mergeRequests
        if mergeRequests_fn is None:
            mergeRequests_fn = True

        # then translate False and True properly
        if mergeRequests_fn is False:
            mergeRequests_fn = None
        elif mergeRequests_fn is True:
            mergeRequests_fn = buildrequest.BuildRequest.canBeMergedWith

        return mergeRequests_fn

    @defer.deferredGenerator
    def _mergeRequests(self, breq, unclaimed_requests, mergeRequests_fn):
        """Use C{mergeRequests_fn} to merge C{breq} against
        C{unclaimed_requests}, where both are build request dictionaries"""
        # short circuit if there is no merging to do
        if not mergeRequests_fn or len(unclaimed_requests) == 1:
            yield [ breq ]
            return

        # we'll need BuildRequest objects, so get those first
        wfd = defer.waitForDeferred(
            defer.gatherResults(
                [ self._brdictToBuildRequest(brdict)
                  for brdict in unclaimed_requests ]))
        yield wfd
        unclaimed_request_objects = wfd.getResult()
        breq_object = unclaimed_request_objects.pop(
                unclaimed_requests.index(breq))

        # gather the mergeable requests
        merged_request_objects = [breq_object]
        for other_breq_object in unclaimed_request_objects:
            wfd = defer.waitForDeferred(
                defer.maybeDeferred(lambda :
                    mergeRequests_fn(breq_object, other_breq_object)))
            yield wfd
            if wfd.getResult():
                merged_request_objects.append(other_breq_object)

        # convert them back to brdicts and return
        merged_requests = [ br.brdict for br in merged_request_objects ]
        yield merged_requests

    def _brdictToBuildRequest(self, brdict):
        """
        Convert a build request dictionary to a L{buildrequest.BuildRequest}
        object, caching the result in the dictionary itself.  The resulting
        buildrequest will have a C{brdict} attribute pointing back to this
        dictionary.

        Note that this does not perform any locking - be careful that it is
        only called once at a time for each build request dictionary.

        @param brdict: dictionary to convert

        @returns: L{buildrequest.BuildRequest} via Deferred
        """
        if 'brobj' in brdict:
            return defer.succeed(brdict['brobj'])
        d = buildrequest.BuildRequest.fromBrdict(self.master, brdict)
        def keep(buildrequest):
            brdict['brobj'] = buildrequest
            buildrequest.brdict = brdict
            return buildrequest
        d.addCallback(keep)
        return d

    def _breakBrdictRefloops(self, requests):
        """Break the reference loops created by L{_brdictToBuildRequest}"""
        for brdict in requests:
            try:
                del brdict['brobj'].brdict
            except KeyError:
                pass
コード例 #12
0
ファイル: builder.py プロジェクト: thuanbk2010/katana
class Builder(config.ReconfigurableServiceMixin, pb.Referenceable,
              service.MultiService):

    # reconfigure builders before slaves
    reconfig_priority = 196

    def __init__(self, name, _addServices=True):
        service.MultiService.__init__(self)
        self.name = name

        # this is created the first time we get a good build
        self.expectations = None

        # build/wannabuild slots: Build objects move along this sequence
        self.building = []
        # old_building holds active builds that were stolen from a predecessor
        self.old_building = weakref.WeakKeyDictionary()

        # buildslaves which have connected but which are not yet available.
        # These are always in the ATTACHING state.
        self.attaching_slaves = []

        # buildslaves at our disposal. Each SlaveBuilder instance has a
        # .state that is IDLE, PINGING, or BUILDING. "PINGING" is used when a
        # Build is about to start, to make sure that they're still alive.
        self.slaves = []
        self.startSlaves = []

        self.config = None
        self.builder_status = None

        if _addServices:
            self.reclaim_svc = internet.TimerService(10 * 60,
                                                     self.reclaimAllBuilds)
            self.reclaim_svc.setServiceParent(self)

            # update big status every 30 minutes, working around #1980
            self.updateStatusService = internet.TimerService(
                30 * 60, self.updateBigStatus)
            self.updateStatusService.setServiceParent(self)

    def reconfigService(self, new_config):
        # find this builder in the config
        for builder_config in new_config.builders:
            if builder_config.name == self.name:
                break
        else:
            assert 0, "no config found for builder '%s'" % self.name

        # set up a builder status object on the first reconfig
        if not self.builder_status:
            self.builder_status = self.master.status.builderAdded(
                builder_config.name,
                builder_config.builddir,
                builder_config.category,
                builder_config.friendly_name,
                builder_config.description,
                project=builder_config.project)

        self.config = builder_config

        self.builder_status.setDescription(builder_config.description)
        self.builder_status.setCategory(builder_config.category)
        self.builder_status.setSlavenames(self.config.slavenames)
        self.builder_status.setStartSlavenames(self.config.startSlavenames)
        self.builder_status.setCacheSize(new_config.caches)
        self.builder_status.setProject(builder_config.project)
        self.builder_status.setFriendlyName(builder_config.friendly_name)
        self.builder_status.setTags(builder_config.tags)

        return defer.succeed(None)

    def stopService(self):

        d = defer.maybeDeferred(lambda: service.MultiService.stopService(self))

        if self.building:
            for b in self.building:
                d.addCallback(self._resubmit_buildreqs, b.requests)
                d.addErrback(log.err)
        return d

    def __repr__(self):
        return "<Builder '%r' at %d>" % (self.name, id(self))

    @defer.inlineCallbacks
    def getOldestRequestTime(self):
        """Returns the submitted_at of the oldest unclaimed build request for
        this builder, or None if there are no build requests.

        @returns: datetime instance or None, via Deferred
        """
        unclaimed = yield self.master.db.buildrequests.getBuildRequests(
            buildername=self.name, claimed=False)

        if unclaimed:
            unclaimed = [brd['submitted_at'] for brd in unclaimed]
            unclaimed.sort()
            defer.returnValue(unclaimed[0])
        else:
            defer.returnValue(None)

    def getSlaveBuilder(self, slavename):
        for sb in self.getAllSlaves():
            if sb.slave.slave_status.getName() == slavename:
                return sb

    def slaveIsAvailable(self, slavename):
        slave_builder = self.getSlaveBuilder(slavename=slavename)
        return slave_builder.isAvailable() if slave_builder else False

    def reclaimAllBuilds(self):
        brids = set()
        for b in self.building:
            brids.update([br.id for br in b.requests])
        for b in self.old_building:
            brids.update([br.id for br in b.requests])

        if not brids:
            return defer.succeed(None)

        d = self.master.db.buildrequests.reclaimBuildRequests(brids)
        d.addErrback(log.err, 'while re-claiming running BuildRequests')
        return d

    def getBuild(self, number):
        for b in self.building:
            if b.build_status and b.build_status.number == number:
                return b
        for b in self.old_building.keys():
            if b.build_status and b.build_status.number == number:
                return b
        return None

    def isStartSlave(self, sb):
        return self.config.startSlavenames and sb.slave.slavename in self.config.startSlavenames

    def removeSlaveBuilder(self, sb):
        if sb in self.startSlaves:
            self.startSlaves.remove(sb)

        if sb in self.slaves:
            self.slaves.remove(sb)

    def addSlaveBuilder(self, sb):
        if self.isStartSlave(sb):
            self.startSlaves.append(sb)
        else:
            self.slaves.append(sb)

    def addLatentSlave(self, slave):
        assert interfaces.ILatentBuildSlave.providedBy(slave)
        for s in self.slaves:
            if s == slave:
                break
        else:
            sb = slavebuilder.LatentSlaveBuilder(slave, self)
            self.builder_status.addPointEvent(
                ['added', 'latent', slave.slavename])
            self.addSlaveBuilder(sb)
            self.botmaster.maybeStartBuildsForBuilder(self.name)

    def getAllSlaves(self):
        if self.startSlaves:
            return self.slaves + self.startSlaves
        return self.slaves

    def shouldUseSelectedSlave(self):
        return not self.config.startSlavenames

    def attached(self, slave, remote, commands):
        """This is invoked by the BuildSlave when the self.slavename bot
        registers their builder.

        @type  slave: L{buildbot.buildslave.BuildSlave}
        @param slave: the BuildSlave that represents the buildslave as a whole
        @type  remote: L{twisted.spread.pb.RemoteReference}
        @param remote: a reference to the L{buildbot.slave.bot.SlaveBuilder}
        @type  commands: dict: string -> string, or None
        @param commands: provides the slave's version of each RemoteCommand

        @rtype:  L{twisted.internet.defer.Deferred}
        @return: a Deferred that fires (with 'self') when the slave-side
                 builder is fully attached and ready to accept commands.
        """
        for s in self.attaching_slaves + self.getAllSlaves():
            if s.slave == slave:
                # already attached to them. This is fairly common, since
                # attached() gets called each time we receive the builder
                # list from the slave, and we ask for it each time we add or
                # remove a builder. So if the slave is hosting builders
                # A,B,C, and the config file changes A, we'll remove A and
                # re-add it, triggering two builder-list requests, getting
                # two redundant calls to attached() for B, and another two
                # for C.
                #
                # Therefore, when we see that we're already attached, we can
                # just ignore it.
                return defer.succeed(self)

        sb = slavebuilder.SlaveBuilder()
        sb.setBuilder(self)
        self.attaching_slaves.append(sb)
        d = sb.attached(slave, remote, commands)
        d.addCallback(self._attached)
        d.addErrback(self._not_attached, slave)
        return d

    def _attached(self, sb):
        self.builder_status.addPointEvent(['connect', sb.slave.slavename])
        self.attaching_slaves.remove(sb)
        self.addSlaveBuilder(sb)

        self.updateBigStatus()

        return self

    def _not_attached(self, why, slave):
        # already log.err'ed by SlaveBuilder._attachFailure
        # TODO: remove from self.slaves (except that detached() should get
        #       run first, right?)
        log.err(why, 'slave failed to attach')
        self.builder_status.addPointEvent(
            ['failed', 'connect', slave.slavename])
        # TODO: add an HTMLLogFile of the exception

    def detached(self, slave):
        """This is called when the connection to the bot is lost."""
        for sb in self.attaching_slaves + self.getAllSlaves():
            if sb.slave == slave:
                break
        else:
            log.msg("WEIRD: Builder.detached(%s) (%s)"
                    " not in attaching_slaves(%s)"
                    " or slaves(%s)" %
                    (slave, slave.slavename, self.attaching_slaves,
                     self.getAllSlaves()))
            return
        if sb.state == BUILDING:
            # the Build's .lostRemote method (invoked by a notifyOnDisconnect
            # handler) will cause the Build to be stopped, probably right
            # after the notifyOnDisconnect that invoked us finishes running.
            pass

        if sb in self.attaching_slaves:
            self.attaching_slaves.remove(sb)

        self.removeSlaveBuilder(sb)

        self.builder_status.addPointEvent(['disconnect', slave.slavename])
        sb.detached()  # inform the SlaveBuilder that their slave went away
        self.updateBigStatus()

    def updateBigStatus(self):
        try:
            # Catch exceptions here, since this is called in a LoopingCall.
            if not self.builder_status:
                return
            if not self.slaves:
                self.builder_status.setBigState("offline")
            elif self.building or self.old_building:
                self.builder_status.setBigState("building")
            else:
                self.builder_status.setBigState("idle")
        except Exception:
            log.err(
                None, "while trying to update status of builder '%s'" %
                (self.name, ))

    def getAvailableSlaves(self):
        if self.config.startSlavenames:
            return [sb for sb in self.startSlaves if sb.isAvailable()]

        return [sb for sb in self.slaves if sb.isAvailable()]

    def getAvailableSlavesToProcessBuildRequests(self, slavepool):
        slavelist = self.startSlaves if (self.config.startSlavenames and slavepool == Slavepool.startSlavenames) \
            else self.slaves

        return [sb for sb in slavelist if sb.isAvailable()]

    def canStartWithSlavebuilder(self, slavebuilder):
        locks = [(self.botmaster.getLockFromLockAccess(access), access)
                 for access in self.config.locks]
        return Build.canStartWithSlavebuilder(locks, slavebuilder)

    def canStartBuild(self, slavebuilder, breq):
        if callable(self.config.canStartBuild):
            return defer.maybeDeferred(self.config.canStartBuild, self,
                                       slavebuilder, breq)
        return defer.succeed(True)

    @defer.inlineCallbacks
    def maybeUpdateMergedBuilds(self, brid, buildnumber, brids):
        build_status = yield self.builder_status.deferToThread(buildnumber)
        if build_status is not None:
            build_status.updateBuildRequestIDs(brids)
        buildnumbers = yield self.master.db.builds.getBuildNumbersForRequests(
            brids=brids)
        buildnumbers = [num for num in buildnumbers if num != buildnumber]

        if buildnumbers:
            url = yield self.master.status.getURLForBuildRequest(
                brid,
                builder_name=self.name,
                build_number=buildnumber,
                builder_friendly_name=self.config.friendly_name)
            for number in buildnumbers:
                build_status = yield self.builder_status.deferToThread(number)
                if build_status is not None:
                    yield build_status.buildMerged(url)

    @defer.inlineCallbacks
    def maybeResumeBuild(self, slavebuilder, buildnumber, breqs):
        build_status = None

        if self.builder_status:
            build_status = yield self.builder_status.deferToThread(buildnumber)
            if build_status:
                build_status.finished = None

        if not self.running:
            defer.returnValue(False)

        build_started = yield self._startBuildFor(slavebuilder, breqs,
                                                  build_status)

        if build_started and len(breqs) > 1:
            yield self.maybeUpdateMergedBuilds(
                brid=breqs[0].id,
                buildnumber=buildnumber,
                brids=[br.id for br in breqs[1:]])

        defer.returnValue(build_started)

    @defer.inlineCallbacks
    def _startBuildFor(self, slavebuilder, buildrequests, build_status=None):
        """Start a build on the given slave.
        @param build: the L{base.Build} to start
        @param sb: the L{SlaveBuilder} which will host this build

        @return: (via Deferred) boolean indicating that the build was
        succesfully started.
        """

        # as of the Python versions supported now, try/finally can't be used
        # with a generator expression.  So instead, we push cleanup functions
        # into a list so that, at any point, we can abort this operation.
        cleanups = []

        def run_cleanups():
            try:
                while cleanups:
                    fn = cleanups.pop()
                    fn()
            except:
                log.err(failure.Failure(),
                        "while running %r" % (run_cleanups, ))

        # the last cleanup we want to perform is to update the big
        # status based on any other cleanup
        cleanups.append(lambda: self.updateBigStatus())

        build = self.config.factory.newBuild(buildrequests)
        build.setBuilder(self)
        log.msg("starting build %s using slave %s" % (build, slavebuilder))

        # set up locks
        build.setLocks(self.config.locks)
        cleanups.append(lambda: slavebuilder.slave.releaseLocks()
                        if slavebuilder.slave else None)

        if len(self.config.env) > 0:
            build.setSlaveEnvironment(self.config.env)

        # append the build to self.building
        self.building.append(build)
        cleanups.append(lambda: self.building.remove(build))

        # update the big status accordingly
        self.updateBigStatus()

        # ping the slave to make sure they're still there. If they've
        # fallen off the map (due to a NAT timeout or something), this
        # will fail in a couple of minutes, depending upon the TCP
        # timeout.
        #
        # TODO: This can unnecessarily suspend the starting of a build, in
        # situations where the slave is live but is pushing lots of data to
        # us in a build.
        log.msg("starting build %s.. pinging the slave %s" %
                (build, slavebuilder))
        try:
            ping_success = yield slavebuilder.ping(
                timeout=self.master.config.remoteCallTimeout)
        except:
            log.err(failure.Failure(), 'while pinging slave before build:')
            raise

        if not ping_success:
            log.msg("build %s slave %s ping failed; re-queueing the request" %
                    (build, slavebuilder))
            run_cleanups()
            raise Exception("Ping failed")

        #check slave is still available
        ready = slavebuilder.isAvailable()
        if ready:
            try:
                ready = yield slavebuilder.prepare(self.builder_status, build)
            except:
                log.err(failure.Failure(), 'while preparing slavebuilder:')
                raise

        # If prepare returns True then it is ready and we start a build
        # If it returns false then we don't start a new build.
        if not ready:
            log.msg("slave %s can't build %s after all; re-queueing the "
                    "request" % (build, slavebuilder))
            run_cleanups()
            raise Exception("Unknown")

        # The buildslave is ready to go. slavebuilder.buildStarted() sets its
        # state to BUILDING (so we won't try to use it for any other builds).
        # This gets set back to IDLE by the Build itself when it finishes.
        if slavebuilder.buildStarted():
            cleanups.append(lambda: slavebuilder.buildFinished())
        else:
            log.msg("slave %s can't build %s after all; re-queueing the "
                    "request" % (build, slavebuilder))
            run_cleanups()
            raise Exception("Unknown")

        # create the BuildStatus object that goes with the Build
        if build_status is None:
            bs = self.builder_status.newBuild()
        else:
            bs = build_status
            bs.builder = self.builder_status
            bs.slavename = slavebuilder.slave.slavename
            bs.waitUntilFinished().addCallback(
                self.builder_status._buildFinished)
            # update the steps to use finished steps

        # record the build in the db - one row per buildrequest
        try:
            bids = []

            if len(build.requests) > 0:
                main_br = build.requests[0]
                bid = yield self.master.db.builds.addBuild(
                    main_br.id, bs.number, slavebuilder.slave.slavename)
                bids.append(bid)
                # add build information to merged br
                for req in build.requests[1:]:
                    bid = yield self.master.db.builds.addBuild(
                        req.id, bs.number)
                    self.master.status.build_started(req.id, self.name, bs)
                    bids.append(bid)
        except:
            log.err(failure.Failure(), 'while adding rows to build table:')
            run_cleanups()
            raise

        # IMPORTANT: no yielding is allowed from here to the startBuild call!

        # it's possible that we lost the slave remote between the ping above
        # and now.  If so, bail out.  The build.startBuild call below transfers
        # responsibility for monitoring this connection to the Build instance,
        # so this check ensures we hand off a working connection.
        if not slavebuilder.remote:
            log.msg("slave disappeared before build could start")
            run_cleanups()
            raise Exception("Slave seems to have disappered")

        # let status know
        self.master.status.build_started(main_br.id, self.name, bs)

        # start the build. This will first set up the steps, then tell the
        # BuildStatus that it has started, which will announce it to the world
        # (through our BuilderStatus object, which is its parent).  Finally it
        # will start the actual build process.  This is done with a fresh
        # Deferred since _startBuildFor should not wait until the build is
        # finished.  This uses `maybeDeferred` to ensure that any exceptions
        # raised by startBuild are treated as deferred errbacks (see
        # http://trac.buildbot.net/ticket/2428).
        d = defer.maybeDeferred(build.startBuild, bs, self.expectations,
                                slavebuilder)
        d.addCallback(self.buildFinished, slavebuilder, bids)
        # this shouldn't happen. if it does, the slave will be wedged
        d.addErrback(
            log.err, 'from a running build; this is a '
            'serious error - please file a bug at http://buildbot.net')

        # make sure the builder's status is represented correctly
        self.updateBigStatus()
        defer.returnValue(True)

    def setupProperties(self, props):
        props.setProperty("buildername", self.name, "Builder")
        if len(self.config.properties) > 0:
            for propertyname in self.config.properties:
                props.setProperty(propertyname,
                                  self.config.properties[propertyname],
                                  "Builder")

    @defer.inlineCallbacks
    def buildFinished(self, build, sb, bids):
        """
        This is called when the Build has finished (either success or
        failure). Any exceptions during the build are reported with
        results=FAILURE, not with an errback.

        By the time we get here, the Build has already released the slave,
        which will trigger a check for any now-possible build requests
        (maybeStartBuilds)
        """
        start = time.time()
        buildFinishedLog = {
            'name': 'buildFinished',
            'description': 'Called when a Build has finished',
            'number': build.build_status.number,
        }

        # List all known build requests tied to this `build`
        breqs = {br.id: br for br in build.requests}

        # Prevent new merged builds from coming in while we are finishing
        lock_keys = [int(brid) for brid in sorted(breqs.keys())]
        build_merging_locks = self.master.buildrequest_merger.getMergingLocks(
            lock_keys)
        for lock in build_merging_locks:
            yield lock.acquire()
        locks_acquired_start = time.time()
        buildFinishedLog['elapsed_acquiring_locks'] = time.time() - start

        try:
            # Look for additional build requests that might have been merged into
            # these known build requests
            getBuildRequestsStart = time.time()
            otherBrdicts = yield self.master.db.buildrequests.getBuildRequests(
                mergebrids=list(breqs.keys()))
            otherBreqs = []
            buildFinishedLog['elapsed_getBuildRequests'] = time.time(
            ) - getBuildRequestsStart

            for brdict in otherBrdicts:
                breq = yield BuildRequest.fromBrdict(self.master, brdict)
                otherBreqs.append(breq)

            # Include the missing ones
            for br in otherBreqs:
                breqs.setdefault(br.id, br)

            buildFinishedLog['brids'] = sorted(breqs.keys())

            d = yield self.finishBuildRequests(
                brids=list(breqs.keys()),
                requests=list(breqs.values()),
                build=build,
                bids=bids,
            )
        finally:
            for lock in build_merging_locks:
                yield lock.release()
                buildFinishedLog['elapsed_using_locks'] = time.time(
                ) - locks_acquired_start
            log.msg(json.dumps(buildFinishedLog))

        self.building.remove(build)

        if sb.slave:
            sb.slave.releaseLocks()

        self.updateBigStatus()

        defer.returnValue(d)

    @defer.inlineCallbacks
    def finishBuildRequestsFailed(self, failure, msg, brids):
        log.err(failure, msg)
        log.msg("Katana will retry buildrequests with ids %s" % brids)
        yield self.master.db.buildrequests.unclaimBuildRequests(
            brids, results=BEGINNING)

    def finishBuildRequests(self,
                            brids,
                            requests,
                            build,
                            bids=None,
                            mergedbrids=None):

        d = self.master.db.builds.finishBuilds(
            bids) if bids else defer.succeed(None)

        mergedbrids = brids if mergedbrids is None else mergedbrids

        # TODO: we should probably do better error handle
        d.addCallback(lambda _: self.master.db.builds.finishedMergedBuilds(
            mergedbrids, build.build_status.number))
        d.addErrback(log.err, 'while marking builds as finished (ignored)')
        d.addCallback(lambda _: self.master.db.buildrequests.
                      maybeUpdateMergedBrids(mergedbrids))

        results = build.build_status.getResults()
        if results == RETRY:
            d.addCallback(
                lambda _: self._resubmit_buildreqs(requests=requests))
            d.addErrback(log.err, 'while resubmitting build requests')
        else:
            db = self.master.db
            if results == RESUME:
                d.addCallback(lambda _: db.buildrequests.updateBuildRequests(
                    brids,
                    results=results,
                    slavepool=build.build_status.resumeSlavepool))
            else:
                d.addCallback(lambda _: db.buildrequests.completeBuildRequests(
                    brids, results))

            d.addCallback(lambda _: self._maybeBuildsetsComplete(
                requests, results=results))
            # nothing in particular to do with this deferred, so just log it if
            # it fails..
            d.addErrback(self.finishBuildRequestsFailed,
                         'while marking build requests as completed', brids)
        return d

    @defer.inlineCallbacks
    def _maybeBuildsetsComplete(self, requests, results=None):
        # inform the master that we may have completed a number of buildsets
        for br in requests:
            yield self.master.maybeBuildsetComplete(br.bsid)

            if results and results == RESUME:
                self.master.buildRequestAdded(br.bsid, br.id, self.name)

    @defer.inlineCallbacks
    def _resubmit_buildreqs(self, out=None, requests=None):
        brids = [br.id for br in requests]
        yield self.master.db.buildrequests.unclaimBuildRequests(
            brids, results=BEGINNING)
        defer.returnValue(out)

    def setExpectations(self, progress):
        """Mark the build as successful and update expectations for the next
        build. Only call this when the build did not fail in any way that
        would invalidate the time expectations generated by it. (if the
        compile failed and thus terminated early, we can't use the last
        build to predict how long the next one will take).
        """
        if self.expectations:
            self.expectations.update(progress)
        else:
            # the first time we get a good build, create our Expectations
            # based upon its results
            self.expectations = Expectations(progress)
        log.msg("new expectations: %s seconds" %
                self.expectations.expectedBuildTime())

    # Build Creation
    @defer.inlineCallbacks
    def maybeStartBuild(self, slavebuilder, breqs):
        # This method is called by the botmaster whenever this builder should
        # start a set of buildrequests on a slave. Do not call this method
        # directly - use master.botmaster.maybeStartBuildsForBuilder, or one of
        # the other similar methods if more appropriate

        # first, if we're not running, then don't start builds; stopService
        # uses this to ensure that any ongoing maybeStartBuild invocations
        # are complete before it stops.
        if not self.running:
            defer.returnValue(False)
            return

        # If the build fails from here on out (e.g., because a slave has failed),
        # it will be handled outside of this function. TODO: test that!

        build_started = yield self._startBuildFor(slavebuilder, breqs)
        defer.returnValue(build_started)

    def getConfiguredMergeRequestsFn(self):
        mergeRequests_fn = self.config.mergeRequests
        if mergeRequests_fn is None:
            mergeRequests_fn = self.master.config.mergeRequests
        if mergeRequests_fn is None:
            mergeRequests_fn = True
        return mergeRequests_fn

    def getMergeRequestsFn(self):
        """Helper function to determine which mergeRequests function to use
        from L{_mergeRequests}, or None for no merging"""
        # first, seek through builder, global, and the default
        mergeRequests_fn = self.getConfiguredMergeRequestsFn()

        # then translate False and True properly
        if mergeRequests_fn is False:
            mergeRequests_fn = Builder._skipMergeRequestFn
        elif mergeRequests_fn is True:
            mergeRequests_fn = Builder._defaultMergeRequestFn

        return mergeRequests_fn

    def getBoolProperty(self, req1, name):
        property = req1.properties.getProperty(name, False)
        if type(property) != bool:
            property = (property.lower() == "true")
        return property

    def propertiesMatch(self, req1, req2):
        #If the instances are the same then they match!
        if req1.bsid == req2.bsid:
            return True
        if req1.properties.has_key(
                'selected_slave') or req2.properties.has_key('selected_slave'):
            return False
        if not req1.isMergingWithPrevious:
            if self.getBoolProperty(req1,
                                    "force_rebuild") != self.getBoolProperty(
                                        req2, "force_rebuild"):
                return False
            if self.getBoolProperty(
                    req1, "force_chain_rebuild") != self.getBoolProperty(
                        req2, "force_chain_rebuild"):
                return False
        return True

    def _defaultMergeRequestFn(self, req1, req2):
        if self.propertiesMatch(req1, req2):
            return req1.canBeMergedWith(req2)
        return False

    def _skipMergeRequestFn(self, req1, req2):
        return False
コード例 #13
0
class Builder(pb.Referenceable, service.MultiService):
    """I manage all Builds of a given type.

    Each Builder is created by an entry in the config file (the c['builders']
    list), with a number of parameters.

    One of these parameters is the L{buildbot.process.factory.BuildFactory}
    object that is associated with this Builder. The factory is responsible
    for creating new L{Build<buildbot.process.base.Build>} objects. Each
    Build object defines when and how the build is performed, so a new
    Factory or Builder should be defined to control this behavior.

    The Builder holds on to a number of L{base.BuildRequest} objects in a
    list named C{.buildable}. Incoming BuildRequest objects will be added to
    this list, or (if possible) merged into an existing request. When a slave
    becomes available, I will use my C{BuildFactory} to turn the request into
    a new C{Build} object. The C{BuildRequest} is forgotten, the C{Build}
    goes into C{.building} while it runs. Once the build finishes, I will
    discard it.

    I maintain a list of available SlaveBuilders, one for each connected
    slave that the C{slavenames} parameter says we can use. Some of these
    will be idle, some of them will be busy running builds for me. If there
    are multiple slaves, I can run multiple builds at once.

    I also manage forced builds, progress expectation (ETA) management, and
    some status delivery chores.

    @type buildable: list of L{buildbot.process.base.BuildRequest}
    @ivar buildable: BuildRequests that are ready to build, but which are
                     waiting for a buildslave to be available.

    @type building: list of L{buildbot.process.base.Build}
    @ivar building: Builds that are actively running

    @type slaves: list of L{buildbot.buildslave.BuildSlave} objects
    @ivar slaves: the slaves currently available for building
    """

    expectations = None  # this is created the first time we get a good build
    CHOOSE_SLAVES_RANDOMLY = True  # disabled for determinism during tests

    def __init__(self, setup, builder_status):
        """
        @type  setup: dict
        @param setup: builder setup data, as stored in
                      BuildmasterConfig['builders'].  Contains name,
                      slavename(s), builddir, slavebuilddir, factory, locks.
        @type  builder_status: L{buildbot.status.builder.BuilderStatus}
        """
        service.MultiService.__init__(self)
        self.name = setup['name']
        self.slavenames = []
        if setup.has_key('slavename'):
            self.slavenames.append(setup['slavename'])
        if setup.has_key('slavenames'):
            self.slavenames.extend(setup['slavenames'])
        self.builddir = setup['builddir']
        self.slavebuilddir = setup['slavebuilddir']
        self.buildFactory = setup['factory']
        self.nextSlave = setup.get('nextSlave')
        if self.nextSlave is not None and not callable(self.nextSlave):
            raise ValueError("nextSlave must be callable")
        self.locks = setup.get("locks", [])
        self.env = setup.get('env', {})
        assert isinstance(self.env, dict)
        if setup.has_key('periodicBuildTime'):
            raise ValueError("periodicBuildTime can no longer be defined as"
                             " part of the Builder: use scheduler.Periodic"
                             " instead")
        self.nextBuild = setup.get('nextBuild')
        if self.nextBuild is not None and not callable(self.nextBuild):
            raise ValueError("nextBuild must be callable")
        self.buildHorizon = setup.get('buildHorizon')
        self.logHorizon = setup.get('logHorizon')
        self.eventHorizon = setup.get('eventHorizon')
        self.mergeRequests = setup.get('mergeRequests', True)
        self.properties = setup.get('properties', {})

        # build/wannabuild slots: Build objects move along this sequence
        self.building = []
        # old_building holds active builds that were stolen from a predecessor
        self.old_building = weakref.WeakKeyDictionary()

        # buildslaves which have connected but which are not yet available.
        # These are always in the ATTACHING state.
        self.attaching_slaves = []

        # buildslaves at our disposal. Each SlaveBuilder instance has a
        # .state that is IDLE, PINGING, or BUILDING. "PINGING" is used when a
        # Build is about to start, to make sure that they're still alive.
        self.slaves = []

        self.builder_status = builder_status
        self.builder_status.setSlavenames(self.slavenames)
        self.builder_status.buildHorizon = self.buildHorizon
        self.builder_status.logHorizon = self.logHorizon
        self.builder_status.eventHorizon = self.eventHorizon
        t = internet.TimerService(10 * 60, self.reclaimAllBuilds)
        t.setServiceParent(self)

        # for testing, to help synchronize tests
        self.watchers = {
            'attach': [],
            'detach': [],
            'detach_all': [],
            'idle': []
        }
        self.run_count = 0

    def setBotmaster(self, botmaster):
        self.botmaster = botmaster
        self.db = botmaster.db
        self.master_name = botmaster.master_name
        self.master_incarnation = botmaster.master_incarnation

    def compareToSetup(self, setup):
        diffs = []
        setup_slavenames = []
        if setup.has_key('slavename'):
            setup_slavenames.append(setup['slavename'])
        setup_slavenames.extend(setup.get('slavenames', []))
        if setup_slavenames != self.slavenames:
            diffs.append('slavenames changed from %s to %s' \
                         % (self.slavenames, setup_slavenames))
        if setup['builddir'] != self.builddir:
            diffs.append('builddir changed from %s to %s' \
                         % (self.builddir, setup['builddir']))
        if setup['slavebuilddir'] != self.slavebuilddir:
            diffs.append('slavebuilddir changed from %s to %s' \
                         % (self.slavebuilddir, setup['slavebuilddir']))
        if setup['factory'] != self.buildFactory:  # compare objects
            diffs.append('factory changed')
        if setup.get('locks', []) != self.locks:
            diffs.append('locks changed from %s to %s' %
                         (self.locks, setup.get('locks')))
        if setup.get('nextSlave') != self.nextSlave:
            diffs.append('nextSlave changed from %s to %s' %
                         (self.nextSlave, setup.get('nextSlave')))
        if setup.get('nextBuild') != self.nextBuild:
            diffs.append('nextBuild changed from %s to %s' %
                         (self.nextBuild, setup.get('nextBuild')))
        if setup['buildHorizon'] != self.buildHorizon:
            diffs.append('buildHorizon changed from %s to %s' %
                         (self.buildHorizon, setup['buildHorizon']))
        if setup['logHorizon'] != self.logHorizon:
            diffs.append('logHorizon changed from %s to %s' %
                         (self.logHorizon, setup['logHorizon']))
        if setup['eventHorizon'] != self.eventHorizon:
            diffs.append('eventHorizon changed from %s to %s' %
                         (self.eventHorizon, setup['eventHorizon']))
        return diffs

    def __repr__(self):
        return "<Builder '%r' at %d>" % (self.name, id(self))

    def triggerNewBuildCheck(self):
        self.botmaster.triggerNewBuildCheck()

    def run(self):
        """Check for work to be done. This should be called any time I might
        be able to start a job:

         - when the Builder is first created
         - when a new job has been added to the [buildrequests] DB table
         - when a slave has connected

        If I have both an available slave and the database contains a
        BuildRequest that I can handle, I will claim the BuildRequest and
        start the build. When the build finishes, I will retire the
        BuildRequest.
        """
        # overall plan:
        #  move .expectations to DB

        assert self.running
        log.msg("Builder.run %s: %s" % (self, self.slaves))
        self.run_count += 1

        available_slaves = [sb for sb in self.slaves if sb.isAvailable()]
        if not available_slaves:
            self.updateBigStatus()
            return
        d = self.db.runInteraction(self._claim_buildreqs, available_slaves)
        d.addCallback(self._start_builds)
        return d

    # slave-managers must refresh their claim on a build at least once an
    # hour, less any inter-manager clock skew
    RECLAIM_INTERVAL = 1 * 3600

    def _claim_buildreqs(self, t, available_slaves):
        # return a dict mapping slave -> (brid,ssid)
        now = util.now()
        old = now - self.RECLAIM_INTERVAL
        requests = self.db.get_unclaimed_buildrequests(self.name, old,
                                                       self.master_name,
                                                       self.master_incarnation,
                                                       t)

        assignments = {}
        while requests and available_slaves:
            sb = self._choose_slave(available_slaves)
            if not sb:
                log.msg("%s: want to start build, but we don't have a remote" %
                        self)
                break
            available_slaves.remove(sb)
            breq = self._choose_build(requests)
            if not breq:
                log.msg("%s: went to start build, but nextBuild said not to" %
                        self)
                break
            requests.remove(breq)
            merged_requests = [breq]
            for other_breq in requests[:]:
                if (self.mergeRequests and self.botmaster.shouldMergeRequests(
                        self, breq, other_breq)):
                    requests.remove(other_breq)
                    merged_requests.append(other_breq)
            assignments[sb] = merged_requests
            brids = [br.id for br in merged_requests]
            self.db.claim_buildrequests(now, self.master_name,
                                        self.master_incarnation, brids, t)
        return assignments

    def _choose_slave(self, available_slaves):
        # note: this might return None if the nextSlave() function decided to
        # not give us anything
        if self.nextSlave:
            try:
                return self.nextSlave(self, available_slaves)
            except:
                log.msg("Exception choosing next slave")
                log.err(Failure())
            return None
        if self.CHOOSE_SLAVES_RANDOMLY:
            return random.choice(available_slaves)
        return available_slaves[0]

    def _choose_build(self, buildable):
        if self.nextBuild:
            try:
                return self.nextBuild(self, buildable)
            except:
                log.msg("Exception choosing next build")
                log.err(Failure())
            return None
        return buildable[0]

    def _start_builds(self, assignments):
        # because _claim_buildreqs runs in a separate thread, we might have
        # lost a slave by this point. We treat that case the same as if we
        # lose the slave right after the build starts: the initial ping
        # fails.
        for (sb, requests) in assignments.items():
            build = self.buildFactory.newBuild(requests)
            build.setBuilder(self)
            build.setLocks(self.locks)
            if len(self.env) > 0:
                build.setSlaveEnvironment(self.env)
            self.startBuild(build, sb)
        self.updateBigStatus()

    def getBuildable(self):
        return self.db.runInteractionNow(self._getBuildable)

    def _getBuildable(self, t):
        now = util.now()
        old = now - self.RECLAIM_INTERVAL
        return self.db.get_unclaimed_buildrequests(self.name, old,
                                                   self.master_name,
                                                   self.master_incarnation, t)

    def getOldestRequestTime(self):
        """Returns the timestamp of the oldest build request for this builder.

        If there are no build requests, None is returned."""
        buildable = self.getBuildable()
        if buildable:
            # TODO: this is sorted by priority first, not strictly reqtime
            return buildable[0].getSubmitTime()
        return None

    def cancelBuildRequest(self, brid):
        return self.db.cancel_buildrequests([brid])

    def consumeTheSoulOfYourPredecessor(self, old):
        """Suck the brain out of an old Builder.

        This takes all the runtime state from an existing Builder and moves
        it into ourselves. This is used when a Builder is changed in the
        master.cfg file: the new Builder has a different factory, but we want
        all the builds that were queued for the old one to get processed by
        the new one. Any builds which are already running will keep running.
        The new Builder will get as many of the old SlaveBuilder objects as
        it wants."""

        log.msg("consumeTheSoulOfYourPredecessor: %s feeding upon %s" %
                (self, old))
        # all pending builds are stored in the DB, so we don't have to do
        # anything to claim them. The old builder will be stopService'd,
        # which should make sure they don't start any new work

        # old.building (i.e. builds which are still running) is not migrated
        # directly: it keeps track of builds which were in progress in the
        # old Builder. When those builds finish, the old Builder will be
        # notified, not us. However, since the old SlaveBuilder will point to
        # us, it is our maybeStartBuild() that will be triggered.
        if old.building:
            self.builder_status.setBigState("building")
        # however, we do grab a weakref to the active builds, so that our
        # BuilderControl can see them and stop them. We use a weakref because
        # we aren't the one to get notified, so there isn't a convenient
        # place to remove it from self.building .
        for b in old.building:
            self.old_building[b] = None
        for b in old.old_building:
            self.old_building[b] = None

        # Our set of slavenames may be different. Steal any of the old
        # buildslaves that we want to keep using.
        for sb in old.slaves[:]:
            if sb.slave.slavename in self.slavenames:
                log.msg(" stealing buildslave %s" % sb)
                self.slaves.append(sb)
                old.slaves.remove(sb)
                sb.setBuilder(self)

        # old.attaching_slaves:
        #  these SlaveBuilders are waiting on a sequence of calls:
        #  remote.setMaster and remote.print . When these two complete,
        #  old._attached will be fired, which will add a 'connect' event to
        #  the builder_status and try to start a build. However, we've pulled
        #  everything out of the old builder's queue, so it will have no work
        #  to do. The outstanding remote.setMaster/print call will be holding
        #  the last reference to the old builder, so it will disappear just
        #  after that response comes back.
        #
        #  The BotMaster will ask the slave to re-set their list of Builders
        #  shortly after this function returns, which will cause our
        #  attached() method to be fired with a bunch of references to remote
        #  SlaveBuilders, some of which we already have (by stealing them
        #  from the old Builder), some of which will be new. The new ones
        #  will be re-attached.

        #  Therefore, we don't need to do anything about old.attaching_slaves

        return  # all done

    def reclaimAllBuilds(self):
        now = util.now()
        brids = set()
        for b in self.building:
            brids.update([br.id for br in b.requests])
        for b in self.old_building:
            brids.update([br.id for br in b.requests])
        self.db.claim_buildrequests(now, self.master_name,
                                    self.master_incarnation, brids)

    def getBuild(self, number):
        for b in self.building:
            if b.build_status and b.build_status.number == number:
                return b
        for b in self.old_building.keys():
            if b.build_status and b.build_status.number == number:
                return b
        return None

    def fireTestEvent(self, name, fire_with=None):
        if fire_with is None:
            fire_with = self
        watchers = self.watchers[name]
        self.watchers[name] = []
        for w in watchers:
            eventually(w.callback, fire_with)

    def addLatentSlave(self, slave):
        assert interfaces.ILatentBuildSlave.providedBy(slave)
        for s in self.slaves:
            if s == slave:
                break
        else:
            sb = LatentSlaveBuilder(slave, self)
            self.builder_status.addPointEvent(
                ['added', 'latent', slave.slavename])
            self.slaves.append(sb)
            self.triggerNewBuildCheck()

    def attached(self, slave, remote, commands):
        """This is invoked by the BuildSlave when the self.slavename bot
        registers their builder.

        @type  slave: L{buildbot.buildslave.BuildSlave}
        @param slave: the BuildSlave that represents the buildslave as a whole
        @type  remote: L{twisted.spread.pb.RemoteReference}
        @param remote: a reference to the L{buildbot.slave.bot.SlaveBuilder}
        @type  commands: dict: string -> string, or None
        @param commands: provides the slave's version of each RemoteCommand

        @rtype:  L{twisted.internet.defer.Deferred}
        @return: a Deferred that fires (with 'self') when the slave-side
                 builder is fully attached and ready to accept commands.
        """
        for s in self.attaching_slaves + self.slaves:
            if s.slave == slave:
                # already attached to them. This is fairly common, since
                # attached() gets called each time we receive the builder
                # list from the slave, and we ask for it each time we add or
                # remove a builder. So if the slave is hosting builders
                # A,B,C, and the config file changes A, we'll remove A and
                # re-add it, triggering two builder-list requests, getting
                # two redundant calls to attached() for B, and another two
                # for C.
                #
                # Therefore, when we see that we're already attached, we can
                # just ignore it. TODO: build a diagram of the state
                # transitions here, I'm concerned about sb.attached() failing
                # and leaving sb.state stuck at 'ATTACHING', and about
                # the detached() message arriving while there's some
                # transition pending such that the response to the transition
                # re-vivifies sb
                return defer.succeed(self)

        sb = SlaveBuilder()
        sb.setBuilder(self)
        self.attaching_slaves.append(sb)
        d = sb.attached(slave, remote, commands)
        d.addCallback(self._attached)
        d.addErrback(self._not_attached, slave)
        return d

    def _attached(self, sb):
        # TODO: make this .addSlaveEvent(slave.slavename, ['connect']) ?
        self.builder_status.addPointEvent(['connect', sb.slave.slavename])
        self.attaching_slaves.remove(sb)
        self.slaves.append(sb)

        self.fireTestEvent('attach')
        return self

    def _not_attached(self, why, slave):
        # already log.err'ed by SlaveBuilder._attachFailure
        # TODO: make this .addSlaveEvent?
        # TODO: remove from self.slaves (except that detached() should get
        #       run first, right?)
        self.builder_status.addPointEvent(
            ['failed', 'connect', slave.slave.slavename])
        # TODO: add an HTMLLogFile of the exception
        self.fireTestEvent('attach', why)

    def detached(self, slave):
        """This is called when the connection to the bot is lost."""
        for sb in self.attaching_slaves + self.slaves:
            if sb.slave == slave:
                break
        else:
            log.msg(
                "WEIRD: Builder.detached(%s) (%s)"
                " not in attaching_slaves(%s)"
                " or slaves(%s)" %
                (slave, slave.slavename, self.attaching_slaves, self.slaves))
            return
        if sb.state == BUILDING:
            # the Build's .lostRemote method (invoked by a notifyOnDisconnect
            # handler) will cause the Build to be stopped, probably right
            # after the notifyOnDisconnect that invoked us finishes running.

            # TODO: should failover to a new Build
            #self.retryBuild(sb.build)
            pass

        if sb in self.attaching_slaves:
            self.attaching_slaves.remove(sb)
        if sb in self.slaves:
            self.slaves.remove(sb)

        # TODO: make this .addSlaveEvent?
        self.builder_status.addPointEvent(['disconnect', slave.slavename])
        sb.detached()  # inform the SlaveBuilder that their slave went away
        self.updateBigStatus()
        self.fireTestEvent('detach')
        if not self.slaves:
            self.fireTestEvent('detach_all')

    def updateBigStatus(self):
        if not self.slaves:
            self.builder_status.setBigState("offline")
        elif self.building:
            self.builder_status.setBigState("building")
        else:
            self.builder_status.setBigState("idle")
            self.fireTestEvent('idle')

    def startBuild(self, build, sb):
        """Start a build on the given slave.
        @param build: the L{base.Build} to start
        @param sb: the L{SlaveBuilder} which will host this build

        @return: a Deferred which fires with a
        L{buildbot.interfaces.IBuildControl} that can be used to stop the
        Build, or to access a L{buildbot.interfaces.IBuildStatus} which will
        watch the Build as it runs. """

        self.building.append(build)
        self.updateBigStatus()
        log.msg("starting build %s using slave %s" % (build, sb))
        d = sb.prepare(self.builder_status)

        def _ping(ign):
            # ping the slave to make sure they're still there. If they've
            # fallen off the map (due to a NAT timeout or something), this
            # will fail in a couple of minutes, depending upon the TCP
            # timeout.
            #
            # TODO: This can unnecessarily suspend the starting of a build, in
            # situations where the slave is live but is pushing lots of data to
            # us in a build.
            log.msg("starting build %s.. pinging the slave %s" % (build, sb))
            return sb.ping()

        d.addCallback(_ping)
        d.addCallback(self._startBuild_1, build, sb)
        return d

    def _startBuild_1(self, res, build, sb):
        if not res:
            return self._startBuildFailed("slave ping failed", build, sb)
        # The buildslave is ready to go. sb.buildStarted() sets its state to
        # BUILDING (so we won't try to use it for any other builds). This
        # gets set back to IDLE by the Build itself when it finishes.
        sb.buildStarted()
        d = sb.remote.callRemote("startBuild")
        d.addCallbacks(self._startBuild_2,
                       self._startBuildFailed,
                       callbackArgs=(build, sb),
                       errbackArgs=(build, sb))
        return d

    def _startBuild_2(self, res, build, sb):
        # create the BuildStatus object that goes with the Build
        bs = self.builder_status.newBuild()

        # start the build. This will first set up the steps, then tell the
        # BuildStatus that it has started, which will announce it to the
        # world (through our BuilderStatus object, which is its parent).
        # Finally it will start the actual build process.
        bids = [
            self.db.build_started(req.id, bs.number) for req in build.requests
        ]
        d = build.startBuild(bs, self.expectations, sb)
        d.addCallback(self.buildFinished, sb, bids)
        # this shouldn't happen. if it does, the slave will be wedged
        d.addErrback(log.err)
        return build  # this is the IBuildControl

    def _startBuildFailed(self, why, build, sb):
        # put the build back on the buildable list
        log.msg("I tried to tell the slave that the build %s started, but "
                "remote_startBuild failed: %s" % (build, why))
        # release the slave. This will queue a call to maybeStartBuild, which
        # will fire after other notifyOnDisconnect handlers have marked the
        # slave as disconnected (so we don't try to use it again).
        sb.buildFinished()

        log.msg("re-queueing the BuildRequest")
        self.building.remove(build)
        self._resubmit_buildreqs(build).addErrback(log.err)

    def setupProperties(self, props):
        props.setProperty("buildername", self.name, "Builder")
        if len(self.properties) > 0:
            for propertyname in self.properties:
                props.setProperty(propertyname, self.properties[propertyname],
                                  "Builder")

    def buildFinished(self, build, sb, bids):
        """This is called when the Build has finished (either success or
        failure). Any exceptions during the build are reported with
        results=FAILURE, not with an errback."""

        # by the time we get here, the Build has already released the slave
        # (which queues a call to maybeStartBuild)

        self.db.builds_finished(bids)

        results = build.build_status.getResults()
        self.building.remove(build)
        if results == RETRY:
            self._resubmit_buildreqs(build).addErrback(
                log.err)  # returns Deferred
        else:
            brids = [br.id for br in build.requests]
            self.db.retire_buildrequests(brids, results)
        self.triggerNewBuildCheck()

    def _resubmit_buildreqs(self, build):
        brids = [br.id for br in build.requests]
        return self.db.resubmit_buildrequests(brids)

    def setExpectations(self, progress):
        """Mark the build as successful and update expectations for the next
        build. Only call this when the build did not fail in any way that
        would invalidate the time expectations generated by it. (if the
        compile failed and thus terminated early, we can't use the last
        build to predict how long the next one will take).
        """
        if self.expectations:
            self.expectations.update(progress)
        else:
            # the first time we get a good build, create our Expectations
            # based upon its results
            self.expectations = Expectations(progress)
        log.msg("new expectations: %s seconds" % \
                self.expectations.expectedBuildTime())

    def shutdownSlave(self):
        if self.remote:
            self.remote.callRemote("shutdown")