Example #1
0
def splitJobList(jobList,
                 src,
                 targetSet,
                 hidden=False,
                 callback=ChangesetCallback()):
    log.debug("Changeset Key conflict detected; splitting job further...")
    jobs = {}
    for job in jobList:
        name = job[0]
        if ':' in name:
            name = name.split(':')[0]
        l = jobs.setdefault(name, [])
        l.append(job)
    i = 0
    for smallJobList in jobs.itervalues():
        (outFd, tmpName) = util.mkstemp()
        os.close(outFd)
        log.debug(
            "jobsplit %d of %d %s" %
            (i + 1, len(jobs), displayBundle([(0, x) for x in smallJobList])))
        src.createChangeSetFile(smallJobList,
                                tmpName,
                                recurse=False,
                                callback=callback,
                                mirrorMode=True)
        for target in targetSet:
            target.commitChangeSetFile(tmpName,
                                       hidden=hidden,
                                       callback=callback)
        os.unlink(tmpName)
        callback.done()
        i += 1
    return
Example #2
0
def splitJobList(jobList, src, targetSet, hidden = False, callback = ChangesetCallback()):
    log.debug("Changeset Key conflict detected; splitting job further...")
    jobs = {}
    for job in jobList:
        name = job[0]
        if ':' in name:
            name = name.split(':')[0]
        l = jobs.setdefault(name, [])
        l.append(job)
    i = 0
    for smallJobList in jobs.itervalues():
        (outFd, tmpName) = util.mkstemp()
        os.close(outFd)
        log.debug("jobsplit %d of %d %s" % (
            i + 1, len(jobs), displayBundle([(0,x) for x in smallJobList])))
        src.createChangeSetFile(smallJobList, tmpName, recurse = False,
                                callback = callback, mirrorMode = True)
        for target in targetSet:
            target.commitChangeSetFile(tmpName, hidden = hidden, callback = callback)
        os.unlink(tmpName)
        callback.done()
        i += 1
    return
Example #3
0
def mirrorRepository(sourceRepos, targetRepos, cfg,
                     test = False, sync = False, syncSigs = False,
                     callback = ChangesetCallback(),
                     fastSync = False,
                     referenceRepos=None,
                     ):
    if referenceRepos is None:
        referenceRepos = sourceRepos
    checkConfig(cfg)
    targets = _makeTargets(cfg, targetRepos, test)
    log.debug("-" * 20 + " start loop " + "-" * 20)

    hidden = len(targets) > 1 or cfg.useHiddenCommits
    if hidden:
        log.debug("will use hidden commits to synchronize target mirrors")

    if sync:
        currentMark = -1
    else:
        marks = [ t.getMirrorMark() for t in targets ]
        # we use the oldest mark as a starting point (since we have to
        # get stuff from source for that oldest one anyway)
        currentMark = min(marks)
    log.debug("using common mirror mark %s", currentMark)
    # reset mirror mark to the lowest common denominator
    for t in targets:
        if t.getMirrorMark() != currentMark:
            t.setMirrorMark(currentMark)
    # mirror gpg signatures from the src into the targets
    for t in targets:
        t.mirrorGPG(referenceRepos, cfg.host)
    # mirror changed trove information for troves already mirrored
    if fastSync:
        updateCount = 0
        log.debug("skip trove info records sync because of fast-sync")
    else:
        updateCount = mirrorTroveInfo(referenceRepos, targets, currentMark,
                cfg, syncSigs)
    newMark, troveList = getTroveList(referenceRepos, cfg, currentMark)
    if not troveList:
        if newMark > currentMark: # something was returned, but filtered out
            for t in targets:
                t.setMirrorMark(newMark)
            return -1 # call again
        return 0
    # prepare a new max mark to be used when we need to break out of a loop
    crtMaxMark = max(long(x[0]) for x in troveList)
    if currentMark > 0 and crtMaxMark == currentMark:
        # if we're hung on the current max then we need to
        # forcibly advance the mark in case we're stuck
        crtMaxMark += 1 # only used if we filter out all troves below
    initTLlen = len(troveList)

    # removed troves are a special blend - we keep them separate
    removedSet  = set([ x[1] for x in troveList if x[2] == trove.TROVE_TYPE_REMOVED ])
    troveList = [ (x[0], x[1]) for x in troveList if x[2] != trove.TROVE_TYPE_REMOVED ]

    # figure out if we need to recurse the group-troves
    if cfg.recurseGroups:
        # avoid adding duplicates
        troveSetList = set([x[1] for x in troveList])
        for mark, (name, version, flavor) in troveList:
            if trove.troveIsGroup(name):
                recTroves = recurseTrove(referenceRepos, name,
                        version, flavor, callback=callback)

                # add sources here:
                if cfg.includeSources:
                    troveInfo = referenceRepos.getTroveInfo(
                        trove._TROVEINFO_TAG_SOURCENAME, recTroves)
                    sourceComps = set()
                    for nvf, source in itertools.izip(recTroves, troveInfo):
                        sourceComps.add((source(), nvf[1].getSourceVersion(),
                                         parseFlavor('')))
                    recTroves.extend(sourceComps)

                # add the results at the end with the current mark
                for (n, v, f) in recTroves:
                    if (n, v, f) not in troveSetList:
                        troveList.append((mark, (n, v, f)))
                        troveSetList.add((n, v, f))
        log.debug("after group recursion %d troves are needed", len(troveList))
        # we need to make sure we mirror the GPG keys of any newly added troves
        newHosts = set([x[1].getHost() for x in troveSetList.union(removedSet)])
        for host in newHosts.difference(set([cfg.host])):
            for t in targets:
                t.mirrorGPG(referenceRepos, host)

    # we check which troves from the troveList are needed on each
    # target and we split the troveList into separate lists depending
    # on how many targets require each
    byTarget = {}
    targetSetList = []
    if len(troveList):
        byTrove = {}
        for i, target in enumerate(targets):
            for t in target.addTroveList(troveList):
                bt = byTrove.setdefault(t, set())
                bt.add(i)
        # invert the dict by target now
        for trv, ts in byTrove.iteritems():
            targetSet = [ targets[i] for i in ts ]
            try:
                targetIdx = targetSetList.index(targetSet)
            except ValueError:
                targetSetList.append(targetSet)
                targetIdx = len(targetSetList)-1
            bt = byTarget.setdefault(targetIdx, [])
            bt.append(trv)
        del byTrove
    # if we were returned troves, but we filtered them all out, advance the
    # mark and signal "try again"
    if len(byTarget) == 0 and len(removedSet) == 0 and initTLlen:
        # we had troves and now we don't
        log.debug("no troves found for our label %s" % cfg.labels)
        for t in targets:
            t.setMirrorMark(crtMaxMark)
        # try again
        return -1

    # now we get each section of the troveList for each targetSet. We
    # start off mirroring by those required by fewer targets, using
    # the assumption that those troves are what is required for the
    # targets to catch up to a common set
    if len(byTarget) > 1:
        log.debug("split %d troves into %d chunks by target", len(troveList), len(byTarget))
    # sort the targetSets by length
    targetSets = list(enumerate(targetSetList))
    targetSets.sort(lambda a,b: cmp(len(a[1]), len(b[1])))
    bundlesMark = 0
    for idx, targetSet in targetSets:
        troveList = byTarget[idx]
        if not troveList: # XXX: should not happen...
            continue
        log.debug("mirroring %d troves into %d targets", len(troveList), len(targetSet))
        # since these troves are required for all targets, we can use
        # the "first" one to build the relative changeset requests
        target = list(targetSet)[0]
        bundles = buildBundles(sourceRepos, target, troveList, cfg.absoluteChangesets)
        for i, bundle in enumerate(bundles):
            jobList = [ x[1] for x in bundle ]
            # XXX it's a shame we can't give a hint as to what server to use
            # to avoid having to open the changeset and read in bits of it
            if test:
                log.debug("test mode: not mirroring (%d of %d) %s" % (i + 1, len(bundles), jobList))
                updateCount += len(bundle)
                continue
            (outFd, tmpName) = util.mkstemp()
            os.close(outFd)
            log.debug("getting (%d of %d) %s" % (i + 1, len(bundles), displayBundle(bundle)))
            try:
                sourceRepos.createChangeSetFile(jobList, tmpName, recurse = False,
                                                callback = callback, mirrorMode = True)
            except changeset.ChangeSetKeyConflictError:
                splitJobList(jobList, sourceRepos, targetSet, hidden=hidden,
                             callback=callback)
            else:
                for target in targetSet:
                    target.commitChangeSetFile(tmpName, hidden=hidden, callback=callback)
            try:
                os.unlink(tmpName)
            except OSError:
                pass
            callback.done()
        updateCount += len(bundle)
        # compute the max mark of the bundles we comitted
        mark = max([min([x[0] for x in bundle]) for bundle in bundles])
        if mark > bundlesMark:
            bundlesMark = mark
    else: # only when we're all done looping advance mark to the new max
        if bundlesMark == 0 or bundlesMark <= currentMark:
            bundlesMark = crtMaxMark # avoid repeating the same query...
        for target in targets:
            if hidden: # if we've hidden the last commits, show them now
                target.presentHiddenTroves()
            target.setMirrorMark(bundlesMark)
    # mirroring removed troves requires one by one processing
    for target in targets:
        copySet = removedSet.copy()
        updateCount += mirrorRemoved(referenceRepos, target.repo, copySet,
                                     test=test, callback=callback)
    # if this was a noop because the removed troves were already mirrored
    # we need to keep going
    if updateCount == 0 and len(removedSet):
        for target in targets:
            target.setMirrorMark(crtMaxMark)
        return -1
    return updateCount
Example #4
0
def mirrorRepository(
        sourceRepos,
        targetRepos,
        cfg,
        test=False,
        sync=False,
        syncSigs=False,
        callback=ChangesetCallback(),
        fastSync=False,
        referenceRepos=None,
):
    if referenceRepos is None:
        referenceRepos = sourceRepos
    checkConfig(cfg)
    targets = _makeTargets(cfg, targetRepos, test)
    log.debug("-" * 20 + " start loop " + "-" * 20)

    hidden = len(targets) > 1 or cfg.useHiddenCommits
    if hidden:
        log.debug("will use hidden commits to synchronize target mirrors")

    if sync:
        currentMark = -1
    else:
        marks = [t.getMirrorMark() for t in targets]
        # we use the oldest mark as a starting point (since we have to
        # get stuff from source for that oldest one anyway)
        currentMark = min(marks)
    log.debug("using common mirror mark %s", currentMark)
    # reset mirror mark to the lowest common denominator
    for t in targets:
        if t.getMirrorMark() != currentMark:
            t.setMirrorMark(currentMark)
    # mirror gpg signatures from the src into the targets
    for t in targets:
        t.mirrorGPG(referenceRepos, cfg.host)
    # mirror changed trove information for troves already mirrored
    if fastSync:
        updateCount = 0
        log.debug("skip trove info records sync because of fast-sync")
    else:
        updateCount = mirrorTroveInfo(referenceRepos, targets, currentMark,
                                      cfg, syncSigs)
    newMark, troveList = getTroveList(referenceRepos, cfg, currentMark)
    if not troveList:
        if newMark > currentMark:  # something was returned, but filtered out
            for t in targets:
                t.setMirrorMark(newMark)
            return -1  # call again
        return 0
    # prepare a new max mark to be used when we need to break out of a loop
    crtMaxMark = max(long(x[0]) for x in troveList)
    if currentMark > 0 and crtMaxMark == currentMark:
        # if we're hung on the current max then we need to
        # forcibly advance the mark in case we're stuck
        crtMaxMark += 1  # only used if we filter out all troves below
    initTLlen = len(troveList)

    # removed troves are a special blend - we keep them separate
    removedSet = set(
        [x[1] for x in troveList if x[2] == trove.TROVE_TYPE_REMOVED])
    troveList = [(x[0], x[1]) for x in troveList
                 if x[2] != trove.TROVE_TYPE_REMOVED]

    # figure out if we need to recurse the group-troves
    if cfg.recurseGroups:
        # avoid adding duplicates
        troveSetList = set([x[1] for x in troveList])
        for mark, (name, version, flavor) in troveList:
            if trove.troveIsGroup(name):
                recTroves = recurseTrove(referenceRepos,
                                         name,
                                         version,
                                         flavor,
                                         callback=callback)

                # add sources here:
                if cfg.includeSources:
                    troveInfo = referenceRepos.getTroveInfo(
                        trove._TROVEINFO_TAG_SOURCENAME, recTroves)
                    sourceComps = set()
                    for nvf, source in itertools.izip(recTroves, troveInfo):
                        sourceComps.add((source(), nvf[1].getSourceVersion(),
                                         parseFlavor('')))
                    recTroves.extend(sourceComps)

                # add the results at the end with the current mark
                for (n, v, f) in recTroves:
                    if (n, v, f) not in troveSetList:
                        troveList.append((mark, (n, v, f)))
                        troveSetList.add((n, v, f))
        log.debug("after group recursion %d troves are needed", len(troveList))
        # we need to make sure we mirror the GPG keys of any newly added troves
        newHosts = set(
            [x[1].getHost() for x in troveSetList.union(removedSet)])
        for host in newHosts.difference(set([cfg.host])):
            for t in targets:
                t.mirrorGPG(referenceRepos, host)

    # we check which troves from the troveList are needed on each
    # target and we split the troveList into separate lists depending
    # on how many targets require each
    byTarget = {}
    targetSetList = []
    if len(troveList):
        byTrove = {}
        for i, target in enumerate(targets):
            for t in target.addTroveList(troveList):
                bt = byTrove.setdefault(t, set())
                bt.add(i)
        # invert the dict by target now
        for trv, ts in byTrove.iteritems():
            targetSet = [targets[i] for i in ts]
            try:
                targetIdx = targetSetList.index(targetSet)
            except ValueError:
                targetSetList.append(targetSet)
                targetIdx = len(targetSetList) - 1
            bt = byTarget.setdefault(targetIdx, [])
            bt.append(trv)
        del byTrove
    # if we were returned troves, but we filtered them all out, advance the
    # mark and signal "try again"
    if len(byTarget) == 0 and len(removedSet) == 0 and initTLlen:
        # we had troves and now we don't
        log.debug("no troves found for our label %s" % cfg.labels)
        for t in targets:
            t.setMirrorMark(crtMaxMark)
        # try again
        return -1

    # now we get each section of the troveList for each targetSet. We
    # start off mirroring by those required by fewer targets, using
    # the assumption that those troves are what is required for the
    # targets to catch up to a common set
    if len(byTarget) > 1:
        log.debug("split %d troves into %d chunks by target", len(troveList),
                  len(byTarget))
    # sort the targetSets by length
    targetSets = list(enumerate(targetSetList))
    targetSets.sort(lambda a, b: cmp(len(a[1]), len(b[1])))
    bundlesMark = 0
    for idx, targetSet in targetSets:
        troveList = byTarget[idx]
        if not troveList:  # XXX: should not happen...
            continue
        log.debug("mirroring %d troves into %d targets", len(troveList),
                  len(targetSet))
        # since these troves are required for all targets, we can use
        # the "first" one to build the relative changeset requests
        target = list(targetSet)[0]
        bundles = buildBundles(sourceRepos, target, troveList,
                               cfg.absoluteChangesets)
        for i, bundle in enumerate(bundles):
            jobList = [x[1] for x in bundle]
            # XXX it's a shame we can't give a hint as to what server to use
            # to avoid having to open the changeset and read in bits of it
            if test:
                log.debug("test mode: not mirroring (%d of %d) %s" %
                          (i + 1, len(bundles), jobList))
                updateCount += len(bundle)
                continue
            (outFd, tmpName) = util.mkstemp()
            os.close(outFd)
            log.debug("getting (%d of %d) %s" %
                      (i + 1, len(bundles), displayBundle(bundle)))
            try:
                sourceRepos.createChangeSetFile(jobList,
                                                tmpName,
                                                recurse=False,
                                                callback=callback,
                                                mirrorMode=True)
            except changeset.ChangeSetKeyConflictError:
                splitJobList(jobList,
                             sourceRepos,
                             targetSet,
                             hidden=hidden,
                             callback=callback)
            else:
                for target in targetSet:
                    target.commitChangeSetFile(tmpName,
                                               hidden=hidden,
                                               callback=callback)
            try:
                os.unlink(tmpName)
            except OSError:
                pass
            callback.done()
        updateCount += len(bundle)
        # compute the max mark of the bundles we comitted
        mark = max([min([x[0] for x in bundle]) for bundle in bundles])
        if mark > bundlesMark:
            bundlesMark = mark
    else:  # only when we're all done looping advance mark to the new max
        if bundlesMark == 0 or bundlesMark <= currentMark:
            bundlesMark = crtMaxMark  # avoid repeating the same query...
        for target in targets:
            if hidden:  # if we've hidden the last commits, show them now
                target.presentHiddenTroves()
            target.setMirrorMark(bundlesMark)
    # mirroring removed troves requires one by one processing
    for target in targets:
        copySet = removedSet.copy()
        updateCount += mirrorRemoved(referenceRepos,
                                     target.repo,
                                     copySet,
                                     test=test,
                                     callback=callback)
    # if this was a noop because the removed troves were already mirrored
    # we need to keep going
    if updateCount == 0 and len(removedSet):
        for target in targets:
            target.setMirrorMark(crtMaxMark)
        return -1
    return updateCount