Esempio n. 1
0
    def run(self):
        # XXX: This is not thread-safe, but okay for now,
        #      as we only use this for very blunt throttling
        global lock_fails
        global git_fails
        while True:
            (gitdir, fingerprint, modified) = self.in_queue.get()
            # Do we still need to update it, or has another process
            # already done this for us?
            todo = True
            my_fingerprint = grokmirror.get_repo_fingerprint(self.toplevel,
                    gitdir, force=True)
            if fingerprint is not None:
                if fingerprint == my_fingerprint:
                    logger.debug('Fingeprints match, not pulling %s' % gitdir)
                    todo = False
            else:
                ts = grokmirror.get_repo_timestamp(self.toplevel, gitdir)
                if ts >= modified:
                    logger.debug('TS same or newer, not pulling %s' % gitdir)
                    todo = False

            if not todo:
                logger.debug('Looks like %s already latest, skipping' % gitdir)
                # Update the fingerprint stored in-repo
                grokmirror.set_repo_fingerprint(self.toplevel, gitdir,
                        fingerprint=my_fingerprint)
                self.out_queue.put((gitdir, my_fingerprint, True))
                self.in_queue.task_done()
                continue

            fullpath = os.path.join(self.toplevel, gitdir.lstrip('/'))
            success  = False

            try:
                grokmirror.lock_repo(fullpath, nonblocking=True)

                logger.info('[Thread-%s] Updating %s' % (self.myname, gitdir))
                success = pull_repo(self.toplevel, gitdir)
                logger.debug('[Thread-%s] done pulling %s' %
                             (self.myname, gitdir))

                if success:
                    set_agefile(self.toplevel, gitdir, modified)
                    run_post_update_hook(self.hookscript, self.toplevel, gitdir)
                else:
                    logger.warning('Pulling unsuccessful, not setting agefile')
                    git_fails.append(gitdir)

                # Record our current fingerprint and return it
                my_fingerprint = grokmirror.set_repo_fingerprint(self.toplevel,
                        gitdir)

                grokmirror.unlock_repo(fullpath)
            except IOError, ex:
                logger.info('Could not lock %s, skipping' % gitdir)
                lock_fails.append(gitdir)

            self.out_queue.put((gitdir, my_fingerprint, success))
            self.in_queue.task_done()
Esempio n. 2
0
def update_manifest(manifest, toplevel, gitdir, usenow):
    path = gitdir.replace(toplevel, '', 1)

    # Try to open git dir
    logger.debug('Examining %s', gitdir)
    try:
        repo = Repo(gitdir)
        assert repo.bare is True
    except:
        logger.critical('Error opening %s.', gitdir)
        logger.critical('Make sure it is a bare git repository.')
        sys.exit(1)

    # Ignore it if it's an empty git repository
    try:
        if len(repo.heads) == 0:
            logger.info('%s has no heads, ignoring', gitdir)
            return
    except:
        # Errors when listing heads usually means repository is no good
        logger.info('Error listing heads in %s, ignoring', gitdir)
        return

    try:
        description = repo.description
    except:
        description = 'Unnamed repository'

    try:
        rcr = repo.config_reader()
        owner = rcr.get('gitweb', 'owner')
    except:
        owner = None

    modified = 0

    if not usenow:
        # noinspection PyTypeChecker
        for branch in repo.branches:
            try:
                if branch.commit.committed_date > modified:
                    modified = branch.commit.committed_date
                    # Older versions of GitPython returned time.struct_time
                    if type(modified) == time.struct_time:
                        modified = int(time.mktime(modified))

            except:
                pass

    if modified == 0:
        modified = int(time.time())

    reference = None

    if len(repo.alternates) == 1:
        # use this to hint which repo to use as reference when cloning
        alternate = repo.alternates[0]
        if alternate.find(toplevel) == 0:
            reference = alternate.replace(toplevel, '').replace('/objects', '')

    if path not in manifest.keys():
        logger.info('Adding %s to manifest', path)
        manifest[path] = {}
    else:
        logger.info('Updating %s in the manifest', path)

    # we need a way to quickly compare whether mirrored repositories match
    # what is in the master manifest. To this end, we calculate a so-called
    # "state fingerprint" -- basically the output of "git show-ref | sha1sum".
    # git show-ref output is deterministic and should accurately list all refs
    # and their relation to heads/tags/etc.
    fingerprint = grokmirror.get_repo_fingerprint(toplevel, path, force=True)
    # Record it in the repo for other use
    grokmirror.set_repo_fingerprint(toplevel, path, fingerprint)

    manifest[path]['owner'] = owner
    manifest[path]['description'] = description
    manifest[path]['reference'] = reference
    manifest[path]['modified'] = modified
    manifest[path]['fingerprint'] = fingerprint
Esempio n. 3
0
    def run(self):
        # XXX: This is not thread-safe, but okay for now,
        #      as we only use this for very blunt throttling
        global lock_fails
        global git_fails
        while True:
            (gitdir, fingerprint, modified) = self.in_queue.get()
            # Do we still need to update it, or has another process
            # already done this for us?
            todo = True
            success = False
            logger.debug('[Thread-%s] gitdir=%s, figerprint=%s, modified=%s'
                         % (self.myname, gitdir, fingerprint, modified))

            fullpath = os.path.join(self.toplevel, gitdir.lstrip('/'))

            try:
                grokmirror.lock_repo(fullpath, nonblocking=True)
                # First, get fingerprint as reported in grokmirror.fingerprint
                my_fingerprint = grokmirror.get_repo_fingerprint(
                    self.toplevel, gitdir, force=False)

                # We never rely on timestamps if fingerprints are in play
                if fingerprint is None:
                    ts = grokmirror.get_repo_timestamp(self.toplevel, gitdir)
                    if ts >= modified:
                        logger.debug('[Thread-%s] TS same or newer, not pulling %s'
                                     % (self.myname, gitdir))
                        todo = False
                else:
                    # Recheck the real fingerprint to make sure there is no
                    # divergence between grokmirror.fingerprint and real repo
                    logger.debug('[Thread-%s] Rechecking fingerprint in %s' %
                                 (self.myname, gitdir))
                    my_fingerprint = grokmirror.get_repo_fingerprint(
                        self.toplevel, gitdir, force=True)

                    # Update the fingerprint stored in-repo
                    grokmirror.set_repo_fingerprint(
                        self.toplevel, gitdir, fingerprint=my_fingerprint)

                    if fingerprint == my_fingerprint:
                        logger.debug('[Thread-%s] FP match, not pulling %s'
                                     % (self.myname, gitdir))
                        todo = False

                if not todo:
                    logger.debug('[Thread-%s] %s already latest, skipping'
                                 % (self.myname, gitdir))
                    set_agefile(self.toplevel, gitdir, modified)
                    grokmirror.unlock_repo(fullpath)
                    self.out_queue.put((gitdir, my_fingerprint, True))
                    self.in_queue.task_done()
                    continue

                logger.info('[Thread-%s] updating %s' % (self.myname, gitdir))
                success = pull_repo(self.toplevel, gitdir, threadid=self.myname)
                logger.debug('[Thread-%s] done pulling %s' %
                             (self.myname, gitdir))

                if success:
                    set_agefile(self.toplevel, gitdir, modified)
                    run_post_update_hook(self.hookscript, self.toplevel, gitdir,
                                         threadid=self.myname)
                else:
                    logger.warning('[Thread-%s] pulling %s unsuccessful'
                                   % (self.myname, gitdir))
                    git_fails.append(gitdir)

                # Record our current fingerprint and return it
                my_fingerprint = grokmirror.set_repo_fingerprint(
                    self.toplevel, gitdir)

                grokmirror.unlock_repo(fullpath)
            except IOError:
                my_fingerprint = fingerprint
                logger.info('[Thread-%s] Could not lock %s, skipping'
                            % (self.myname, gitdir))
                lock_fails.append(gitdir)

            self.out_queue.put((gitdir, my_fingerprint, success))
            self.in_queue.task_done()
Esempio n. 4
0
            # check dir to make sure cloning succeeded and then add to existing
            if os.path.exists(fullpath) and success:
                logger.debug('Cloning of %s succeeded, adding to existing'
                             % gitdir)
                existing.append(gitdir)

                desc = culled[gitdir].get('description')
                owner = culled[gitdir].get('owner')
                ref = culled[gitdir].get('reference')

                if owner is None:
                    owner = config['default_owner']
                set_repo_params(toplevel, gitdir, owner, desc, ref)
                set_agefile(toplevel, gitdir, culled[gitdir]['modified'])
                my_fingerprint = grokmirror.set_repo_fingerprint(toplevel,
                                                                 gitdir)
                culled[gitdir]['fingerprint'] = my_fingerprint
                run_post_update_hook(hookscript, toplevel, gitdir)
            else:
                logger.critical('Was not able to clone %s' % gitdir)
                # Remove it from our manifest so we can try re-cloning
                # next time grok-pull runs
                del culled[gitdir]
                git_fails.append(gitdir)

            grokmirror.unlock_repo(fullpath)

    # loop through all entries and find any symlinks we need to set
    # We also collect all symlinks to do purging correctly
    symlinks = []
    for gitdir in culled.keys():
Esempio n. 5
0
def pull_mirror(name, config, verbose=False, force=False, nomtime=False,
                verify=False, verify_subpath='*', noreuse=False,
                purge=False, pretty=False, forcepurge=False):
    global logger
    global lock_fails

    # noinspection PyTypeChecker
    em = enlighten.get_manager(series=' -=#')

    logger = logging.getLogger(name)
    logger.setLevel(logging.DEBUG)

    if 'log' in config.keys():
        ch = logging.FileHandler(config['log'])
        formatter = logging.Formatter(
            "[%(process)d] %(asctime)s - %(levelname)s - %(message)s")
        ch.setFormatter(formatter)
        loglevel = logging.INFO

        if 'loglevel' in config.keys():
            if config['loglevel'] == 'debug':
                loglevel = logging.DEBUG

        ch.setLevel(loglevel)
        logger.addHandler(ch)

    ch = logging.StreamHandler()
    formatter = logging.Formatter('%(message)s')
    ch.setFormatter(formatter)

    if verbose:
        ch.setLevel(logging.INFO)
    else:
        ch.setLevel(logging.CRITICAL)
        em.enabled = False

    logger.addHandler(ch)

    # push it into grokmirror to override the default logger
    grokmirror.logger = logger

    logger.info('Checking [%s]', name)
    mymanifest = config['mymanifest']

    if verify:
        logger.info('Verifying mirror against %s', config['manifest'])
        nomtime = True

    if config['manifest'].find('file:///') == 0:
        manifile = config['manifest'].replace('file://', '')
        if not os.path.exists(manifile):
            logger.critical('Remote manifest not found in %s! Quitting!',
                            config['manifest'])
            return 1

        fstat = os.stat(manifile)
        last_modified = fstat[8]
        logger.debug('mtime on %s is: %s', manifile, fstat[8])

        if os.path.exists(config['mymanifest']):
            fstat = os.stat(config['mymanifest'])
            my_last_modified = fstat[8]
            logger.debug('Our last-modified is: %s', my_last_modified)
            if not (force or nomtime) and last_modified <= my_last_modified:
                logger.info('Manifest file unchanged. Quitting.')
                return 0

        logger.info('Reading new manifest from %s', manifile)
        manifest = grokmirror.read_manifest(manifile)
        # Don't accept empty manifests -- that indicates something is wrong
        if not len(manifest.keys()):
            logger.warning('Remote manifest empty or unparseable! Quitting.')
            return 1

    else:
        # Load it from remote host using http and header magic
        logger.info('Fetching remote manifest from %s', config['manifest'])

        # Do we have username:[email protected] in the URL?
        chunks = urlparse(config['manifest'])
        if chunks.netloc.find('@') > 0:
            logger.debug('Taking username/password from the URL for basic auth')
            (upass, netloc) = chunks.netloc.split('@')
            if upass.find(':') > 0:
                (username, password) = upass.split(':')
            else:
                username = upass
                password = ''

            manifesturl = config['manifest'].replace(chunks.netloc, netloc)
            logger.debug('manifesturl=%s', manifesturl)
            request = urllib_request.Request(manifesturl)

            password_mgr = urllib_request.HTTPPasswordMgrWithDefaultRealm()
            password_mgr.add_password(None, manifesturl, username, password)
            auth_handler = urllib_request.HTTPBasicAuthHandler(password_mgr)
            opener = urllib_request.build_opener(auth_handler)

        else:
            request = urllib_request.Request(config['manifest'])
            opener = urllib_request.build_opener()

        # Find out if we need to run at all first
        if not (force or nomtime) and os.path.exists(mymanifest):
            fstat = os.stat(mymanifest)
            mtime = fstat[8]
            logger.debug('mtime on %s is: %s', mymanifest, mtime)
            my_last_modified = time.strftime('%a, %d %b %Y %H:%M:%S GMT',
                                             time.gmtime(mtime))
            logger.debug('Our last-modified is: %s', my_last_modified)
            request.add_header('If-Modified-Since', my_last_modified)

        try:
            ufh = opener.open(request, timeout=30)
        except HTTPError as ex:
            if ex.code == 304:
                logger.info('Server says we have the latest manifest. '
                            'Quitting.')
                return 0
            logger.warning('Could not fetch %s', config['manifest'])
            logger.warning('Server returned: %s', ex)
            return 1
        except (URLError, ssl.SSLError, ssl.CertificateError) as ex:
            logger.warning('Could not fetch %s', config['manifest'])
            logger.warning('Error was: %s', ex)
            return 1

        last_modified = ufh.headers.get('Last-Modified')
        last_modified = time.strptime(last_modified, '%a, %d %b %Y %H:%M:%S %Z')
        last_modified = calendar.timegm(last_modified)

        # We don't use read_manifest for the remote manifest, as it can be
        # anything, really. For now, blindly open it with gzipfile if it ends
        # with .gz. XXX: some http servers will auto-deflate such files.
        try:
            if config['manifest'].find('.gz') > 0:
                fh = gzip.GzipFile(fileobj=BytesIO(ufh.read()))
            else:
                fh = ufh

            jdata = fh.read().decode('utf-8')
            fh.close()

            manifest = anyjson.deserialize(jdata)

        except Exception as ex:
            logger.warning('Failed to parse %s', config['manifest'])
            logger.warning('Error was: %s', ex)
            return 1

    mymanifest = grokmirror.read_manifest(mymanifest)

    culled = cull_manifest(manifest, config)

    to_clone = []
    to_pull = []
    existing = []

    toplevel = config['toplevel']
    if not os.access(toplevel, os.W_OK):
        logger.critical('Toplevel %s does not exist or is not writable',
                        toplevel)
        sys.exit(1)

    if 'pull_threads' in config.keys():
        pull_threads = int(config['pull_threads'])
        if pull_threads < 1:
            logger.info('pull_threads is less than 1, forcing to 1')
            pull_threads = 1
    else:
        # be conservative
        logger.info('pull_threads is not set, consider setting it')
        pull_threads = 5

    # noinspection PyTypeChecker
    e_cmp = em.counter(total=len(culled), desc='Comparing:', unit='repos', leave=False)

    for gitdir in list(culled):
        fullpath = os.path.join(toplevel, gitdir.lstrip('/'))
        e_cmp.update()

        # fingerprints were added in later versions, so deal if the upstream
        # manifest doesn't have a fingerprint
        if 'fingerprint' not in culled[gitdir]:
            culled[gitdir]['fingerprint'] = None

        # Attempt to lock the repo
        try:
            grokmirror.lock_repo(fullpath, nonblocking=True)
        except IOError:
            logger.info('Could not lock %s, skipping', gitdir)
            lock_fails.append(gitdir)
            # Force the fingerprint to what we have in mymanifest,
            # if we have it.
            culled[gitdir]['fingerprint'] = None
            if gitdir in mymanifest and 'fingerprint' in mymanifest[gitdir]:
                culled[gitdir]['fingerprint'] = mymanifest[gitdir][
                    'fingerprint']
            if len(lock_fails) >= pull_threads:
                logger.info('Too many repositories locked (%s). Exiting.',
                            len(lock_fails))
                return 0
            continue

        if verify:
            if culled[gitdir]['fingerprint'] is None:
                logger.debug('No fingerprint for %s, not verifying', gitdir)
                grokmirror.unlock_repo(fullpath)
                continue

            if not fnmatch.fnmatch(gitdir, verify_subpath):
                grokmirror.unlock_repo(fullpath)
                continue

            logger.debug('Verifying %s', gitdir)
            if not os.path.exists(fullpath):
                verify_fails.append(gitdir)
                logger.info('Verify: %s ABSENT', gitdir)
                grokmirror.unlock_repo(fullpath)
                continue

            my_fingerprint = grokmirror.get_repo_fingerprint(
                toplevel, gitdir, force=force)

            if my_fingerprint == culled[gitdir]['fingerprint']:
                logger.info('Verify: %s OK', gitdir)
            else:
                logger.critical('Verify: %s FAILED', gitdir)
                verify_fails.append(gitdir)

            grokmirror.unlock_repo(fullpath)
            continue

        # Is the directory in place?
        if os.path.exists(fullpath):
            # Did grok-fsck request to reclone it?
            rfile = os.path.join(fullpath, 'grokmirror.reclone')
            if os.path.exists(rfile):
                logger.info('Reclone requested for %s:', gitdir)
                with open(rfile, 'r') as rfh:
                    reason = rfh.read()
                    logger.info('  %s', reason)

                to_clone.append(gitdir)
                grokmirror.unlock_repo(fullpath)
                continue

            # Fix owner and description, if necessary
            if gitdir in mymanifest.keys():
                # This code is hurky and needs to be cleaned up
                desc = culled[gitdir].get('description')
                owner = culled[gitdir].get('owner')
                ref = None
                if config['ignore_repo_references'] != 'yes':
                    ref = culled[gitdir].get('reference')

                # dirty hack to force on-disk owner/description checks
                # when we're called with -n, in case our manifest
                # differs from what is on disk for owner/description/alternates
                myref = None
                if nomtime:
                    mydesc = None
                    myowner = None
                else:
                    mydesc = mymanifest[gitdir].get('description')
                    myowner = mymanifest[gitdir].get('owner')

                    if config['ignore_repo_references'] != 'yes':
                        myref = mymanifest[gitdir].get('reference')

                    if myowner is None:
                        myowner = config['default_owner']

                if owner is None:
                    owner = config['default_owner']

                if desc != mydesc or owner != myowner or ref != myref:
                    # we can do this right away without waiting
                    set_repo_params(toplevel, gitdir, owner, desc, ref)

            else:
                # It exists on disk, but not in my manifest?
                if noreuse:
                    logger.critical('Found existing git repo in %s', fullpath)
                    logger.critical('But you asked NOT to reuse repos')
                    logger.critical('Skipping %s', gitdir)
                    grokmirror.unlock_repo(fullpath)
                    continue

                logger.info('Setting new origin for %s', gitdir)
                fix_remotes(gitdir, toplevel, config['site'])
                to_pull.append(gitdir)
                grokmirror.unlock_repo(fullpath)
                continue

            # fingerprints were added late, so if we don't have them
            # in the remote manifest, fall back on using timestamps
            changed = False
            if culled[gitdir]['fingerprint'] is not None:
                logger.debug('Will use fingerprints to compare %s', gitdir)
                my_fingerprint = grokmirror.get_repo_fingerprint(toplevel,
                                                                 gitdir,
                                                                 force=force)

                if my_fingerprint != culled[gitdir]['fingerprint']:
                    logger.debug('No fingerprint match, will pull %s', gitdir)
                    changed = True
                else:
                    logger.debug('Fingerprints match, skipping %s', gitdir)
            else:
                logger.debug('Will use timestamps to compare %s', gitdir)
                if force:
                    logger.debug('Will force-pull %s', gitdir)
                    changed = True
                    # set timestamp to 0 as well
                    grokmirror.set_repo_timestamp(toplevel, gitdir, 0)
                else:
                    ts = grokmirror.get_repo_timestamp(toplevel, gitdir)
                    if ts < culled[gitdir]['modified']:
                        changed = True

            if changed:
                to_pull.append(gitdir)
                grokmirror.unlock_repo(fullpath)
                continue
            else:
                logger.debug('Repo %s unchanged', gitdir)
                # if we don't have a fingerprint for it, add it now
                if culled[gitdir]['fingerprint'] is None:
                    fpr = grokmirror.get_repo_fingerprint(toplevel, gitdir)
                    culled[gitdir]['fingerprint'] = fpr
                existing.append(gitdir)
                grokmirror.unlock_repo(fullpath)
                continue

        else:
            # Newly incoming repo
            to_clone.append(gitdir)
            grokmirror.unlock_repo(fullpath)
            continue

        # If we got here, something is odd.
        # noinspection PyUnreachableCode
        logger.critical('Could not figure out what to do with %s', gitdir)
        grokmirror.unlock_repo(fullpath)

    logger.info('Compared new manifest against %s repositories in %0.2fs', len(culled), e_cmp.elapsed)
    e_cmp.close()

    if verify:
        if len(verify_fails):
            logger.critical('%s repos failed to verify', len(verify_fails))
            return 1
        else:
            logger.info('Verification successful')
            return 0

    hookscript = config['post_update_hook']

    if len(to_pull):

        if len(lock_fails) > 0:
            pull_threads -= len(lock_fails)

        # Don't spin up more threads than we need
        if pull_threads > len(to_pull):
            pull_threads = len(to_pull)

        # exit if we're ever at 0 pull_threads. Shouldn't happen, but some extra
        # precaution doesn't hurt
        if pull_threads <= 0:
            logger.info('Too many repositories locked. Exiting.')
            return 0

        logger.info('Will use %d threads to pull repos', pull_threads)

        # noinspection PyTypeChecker
        e_pull = em.counter(total=len(to_pull), desc='Updating :', unit='repos', leave=False)
        logger.info('Updating %s repos from %s', len(to_pull), config['site'])
        in_queue = Queue()
        out_queue = Queue()

        for gitdir in to_pull:
            in_queue.put((gitdir, culled[gitdir]['fingerprint'],
                          culled[gitdir]['modified']))

        for i in range(pull_threads):
            logger.debug('Spun up thread %s', i)
            t = PullerThread(in_queue, out_queue, config, i, e_pull)
            t.setDaemon(True)
            t.start()

        # wait till it's all done
        in_queue.join()
        logger.info('All threads finished.')

        while not out_queue.empty():
            # see if any of it failed
            (gitdir, my_fingerprint, status) = out_queue.get()
            # We always record our fingerprint in our manifest
            culled[gitdir]['fingerprint'] = my_fingerprint
            if not status:
                # To make sure we check this again during next run,
                # fudge the manifest accordingly.
                logger.debug('Will recheck %s during next run', gitdir)
                culled[gitdir] = mymanifest[gitdir]
                # this is rather hackish, but effective
                last_modified -= 1

        logger.info('Updates completed in %0.2fs', e_pull.elapsed)
        e_pull.close()
    else:
        logger.info('No repositories need updating')

    # how many lockfiles have we seen?
    # If there are more lock_fails than there are
    # pull_threads configured, we skip cloning out of caution
    if len(to_clone) and len(lock_fails) > pull_threads:
        logger.info('Too many repositories locked. Skipping cloning new repos.')
        to_clone = []

    if len(to_clone):
        # noinspection PyTypeChecker
        e_clone = em.counter(total=len(to_clone), desc='Cloning  :', unit='repos', leave=False)
        logger.info('Cloning %s repos from %s', len(to_clone), config['site'])
        # we use "existing" to track which repos can be used as references
        existing.extend(to_pull)

        to_clone_sorted = []
        clone_order(to_clone, manifest, to_clone_sorted, existing)

        for gitdir in to_clone_sorted:
            e_clone.refresh()

            fullpath = os.path.join(toplevel, gitdir.lstrip('/'))

            # Did grok-fsck request to reclone it?
            rfile = os.path.join(fullpath, 'grokmirror.reclone')
            if os.path.exists(rfile):
                logger.debug('Removing %s for reclone', gitdir)
                shutil.move(fullpath, '%s.reclone' % fullpath)
                shutil.rmtree('%s.reclone' % fullpath)

            # Do we still need to clone it, or has another process
            # already done this for us?
            ts = grokmirror.get_repo_timestamp(toplevel, gitdir)

            if ts > 0:
                logger.debug('Looks like %s already cloned, skipping', gitdir)
                continue

            try:
                grokmirror.lock_repo(fullpath, nonblocking=True)
            except IOError:
                logger.info('Could not lock %s, skipping', gitdir)
                lock_fails.append(gitdir)
                e_clone.update()
                continue

            reference = None
            if config['ignore_repo_references'] != 'yes':
                reference = culled[gitdir]['reference']

            if reference is not None and reference in existing:
                # Make sure we can lock the reference repo
                refrepo = os.path.join(toplevel, reference.lstrip('/'))
                try:
                    grokmirror.lock_repo(refrepo, nonblocking=True)
                    success = clone_repo(toplevel, gitdir, config['site'],
                                         reference=reference)
                    grokmirror.unlock_repo(refrepo)
                except IOError:
                    logger.info('Cannot lock reference repo %s, skipping %s',
                                reference, gitdir)
                    if reference not in lock_fails:
                        lock_fails.append(reference)

                    grokmirror.unlock_repo(fullpath)
                    e_clone.update()
                    continue
            else:
                success = clone_repo(toplevel, gitdir, config['site'])

            # check dir to make sure cloning succeeded and then add to existing
            if os.path.exists(fullpath) and success:
                logger.debug('Cloning of %s succeeded, adding to existing',
                             gitdir)
                existing.append(gitdir)

                desc = culled[gitdir].get('description')
                owner = culled[gitdir].get('owner')
                ref = culled[gitdir].get('reference')

                if owner is None:
                    owner = config['default_owner']
                set_repo_params(toplevel, gitdir, owner, desc, ref)
                set_agefile(toplevel, gitdir, culled[gitdir]['modified'])
                my_fingerprint = grokmirror.set_repo_fingerprint(toplevel,
                                                                 gitdir)
                culled[gitdir]['fingerprint'] = my_fingerprint
                run_post_update_hook(hookscript, toplevel, gitdir)
            else:
                logger.warning('Was not able to clone %s', gitdir)
                # Remove it from our manifest so we can try re-cloning
                # next time grok-pull runs
                del culled[gitdir]
                git_fails.append(gitdir)

            grokmirror.unlock_repo(fullpath)
            e_clone.update()

        logger.info('Clones completed in %0.2fs' % e_clone.elapsed)
        e_clone.close()

    else:
        logger.info('No repositories need cloning')

    # loop through all entries and find any symlinks we need to set
    # We also collect all symlinks to do purging correctly
    symlinks = []
    for gitdir in culled.keys():
        if 'symlinks' in culled[gitdir].keys():
            source = os.path.join(config['toplevel'], gitdir.lstrip('/'))
            for symlink in culled[gitdir]['symlinks']:
                if symlink not in symlinks:
                    symlinks.append(symlink)
                target = os.path.join(config['toplevel'], symlink.lstrip('/'))

                if os.path.exists(source):
                    if os.path.islink(target):
                        # are you pointing to where we need you?
                        if os.path.realpath(target) != source:
                            # Remove symlink and recreate below
                            logger.debug('Removed existing wrong symlink %s',
                                         target)
                            os.unlink(target)
                    elif os.path.exists(target):
                        logger.warning('Deleted repo %s, because it is now'
                                       ' a symlink to %s' % (target, source))
                        shutil.rmtree(target)

                    # Here we re-check if we still need to do anything
                    if not os.path.exists(target):
                        logger.info('Symlinking %s -> %s', target, source)
                        # Make sure the leading dirs are in place
                        if not os.path.exists(os.path.dirname(target)):
                            os.makedirs(os.path.dirname(target))
                        os.symlink(source, target)

    manifile = config['mymanifest']
    grokmirror.manifest_lock(manifile)

    # Is the local manifest newer than last_modified? That would indicate
    # that another process has run and "culled" is no longer the latest info
    if os.path.exists(manifile):
        fstat = os.stat(manifile)
        if fstat[8] > last_modified:
            logger.info('Local manifest is newer, not saving.')
            grokmirror.manifest_unlock(manifile)
            return 0

    if purge:
        to_purge = []
        found_repos = 0
        for founddir in grokmirror.find_all_gitdirs(config['toplevel']):
            gitdir = founddir.replace(config['toplevel'], '')
            found_repos += 1

            if gitdir not in culled.keys() and gitdir not in symlinks:
                to_purge.append(founddir)

        if len(to_purge):
            # Purge-protection engage
            try:
                purge_limit = int(config['purgeprotect'])
                assert 1 <= purge_limit <= 99
            except (ValueError, AssertionError):
                logger.critical('Warning: "%s" is not valid for purgeprotect.',
                                config['purgeprotect'])
                logger.critical('Please set to a number between 1 and 99.')
                logger.critical('Defaulting to purgeprotect=5.')
                purge_limit = 5

            purge_pc = len(to_purge) * 100 / found_repos
            logger.debug('purgeprotect=%s', purge_limit)
            logger.debug('purge prercentage=%s', purge_pc)

            if not forcepurge and purge_pc >= purge_limit:
                logger.critical('Refusing to purge %s repos (%s%%)',
                                len(to_purge), purge_pc)
                logger.critical('Set purgeprotect to a higher percentage, or'
                                ' override with --force-purge.')
                logger.info('Not saving local manifest')
                return 1
            else:
                # noinspection PyTypeChecker
                e_purge = em.counter(total=len(to_purge), desc='Purging  :', unit='repos', leave=False)
                for founddir in to_purge:
                    e_purge.refresh()
                    if os.path.islink(founddir):
                        logger.info('Removing unreferenced symlink %s', gitdir)
                        os.unlink(founddir)
                    else:
                        # is anything using us for alternates?
                        gitdir = '/' + os.path.relpath(founddir, toplevel).lstrip('/')
                        if grokmirror.is_alt_repo(toplevel, gitdir):
                            logger.info('Not purging %s because it is used by '
                                        'other repos via alternates', founddir)
                        else:
                            try:
                                logger.info('Purging %s', founddir)
                                grokmirror.lock_repo(founddir, nonblocking=True)
                                shutil.rmtree(founddir)
                            except IOError:
                                lock_fails.append(gitdir)
                                logger.info('%s is locked, not purging',
                                            gitdir)
                    e_purge.update()

                logger.info('Purging completed in %0.2fs', e_purge.elapsed)
                e_purge.close()

        else:
            logger.info('No repositories need purging')

    # Done with progress bars
    em.stop()

    # Go through all repos in culled and get the latest local timestamps.
    for gitdir in culled:
        ts = grokmirror.get_repo_timestamp(toplevel, gitdir)
        culled[gitdir]['modified'] = ts

    # If there were any lock failures, we fudge last_modified to always
    # be older than the server, which will force the next grokmirror run.
    if len(lock_fails):
        logger.info('%s repos could not be locked. Forcing next run.',
                    len(lock_fails))
        last_modified -= 1
    elif len(git_fails):
        logger.info('%s repos failed. Forcing next run.', len(git_fails))
        last_modified -= 1

    # Once we're done, save culled as our new manifest
    grokmirror.write_manifest(manifile, culled, mtime=last_modified,
                              pretty=pretty)

    grokmirror.manifest_unlock(manifile)

    # write out projects.list, if asked to
    write_projects_list(culled, config)

    return 127