Example #1
0
def check_reclone_error(fullpath, config, errors):
    reclone = None
    for line in errors:
        for estring in config['reclone_on_errors']:
            if line.find(estring) != -1:
                # is this repo used for alternates?
                gitdir = '/' + os.path.relpath(fullpath, config['toplevel']).lstrip('/')
                if grokmirror.is_alt_repo(config['toplevel'], gitdir):
                    logger.critical('\tused for alternates, not requesting auto-reclone')
                    return
                else:
                    reclone = line
                    logger.critical('\trequested auto-reclone')
                break
            if reclone is not None:
                break
    if reclone is None:
        return

    rfile = os.path.join(fullpath, 'grokmirror.reclone')
    # Have we already requested a reclone?
    if os.path.exists(rfile):
        logger.debug('Already requested repo reclone for %s', fullpath)
        return

    with open(rfile, 'w') as rfh:
        rfh.write('Requested by grok-fsck due to error: %s' % reclone)
Example #2
0
def check_reclone_error(fullpath, config, errors):
    reclone = None
    for line in errors:
        for estring in config['reclone_on_errors']:
            if line.find(estring) != -1:
                # is this repo used for alternates?
                gitdir = '/' + os.path.relpath(fullpath,
                                               config['toplevel']).lstrip('/')
                if grokmirror.is_alt_repo(config['toplevel'], gitdir):
                    logger.critical(
                        '\tused for alternates, not requesting auto-reclone')
                    return
                else:
                    reclone = line
                    logger.critical('\trequested auto-reclone')
                break
            if reclone is not None:
                break
    if reclone is None:
        return

    rfile = os.path.join(fullpath, 'grokmirror.reclone')
    # Have we already requested a reclone?
    if os.path.exists(rfile):
        logger.debug('Already requested repo reclone for %s', fullpath)
        return

    with open(rfile, 'w') as rfh:
        rfh.write('Requested by grok-fsck due to error: %s' % reclone)
Example #3
0
def run_git_prune(fullpath, config):
    prune_ok = True
    if 'prune' not in config or config['prune'] != 'yes':
        return prune_ok

    # Are any other repos using us in their objects/info/alternates?
    gitdir = '/' + os.path.relpath(fullpath, config['toplevel']).lstrip('/')
    if grokmirror.is_alt_repo(config['toplevel'], gitdir):
        logger.info('  prune : skipped, is alternate to other repos')
        return prune_ok

    # We set expire to yesterday in order to avoid race conditions
    # in repositories that are actively being accessed at the time of
    # running the prune job.
    args = ['prune', '--expire=yesterday']
    logger.info('  prune : pruning')
    retcode, output, error = grokmirror.run_git_command(fullpath, args)

    if error:
        # Put things we recognize as fairly benign into debug
        debug = []
        warn = []
        for line in error.split('\n'):
            ignored = False
            for estring in config['ignore_errors']:
                if line.find(estring) != -1:
                    ignored = True
                    debug.append(line)
                    break
            if not ignored:
                warn.append(line)

        if debug:
            logger.debug('Stderr: %s', '\n'.join(debug))
        if warn:
            logger.critical('Pruning %s returned critical errors:', fullpath)
            prune_ok = False
            for entry in warn:
                logger.critical("\t%s", entry)
            check_reclone_error(fullpath, config, warn)

    return prune_ok
Example #4
0
def run_git_prune(fullpath, config):
    prune_ok = True
    if 'prune' not in config.keys() or config['prune'] != 'yes':
        return prune_ok

    # Are any other repos using us in their objects/info/alternates?
    gitdir = '/' + os.path.relpath(fullpath, config['toplevel']).lstrip('/')
    if grokmirror.is_alt_repo(config['toplevel'], gitdir):
        logger.info('  prune : skipped, is alternate to other repos')
        return prune_ok

    # We set expire to yesterday in order to avoid race conditions
    # in repositories that are actively being accessed at the time of
    # running the prune job.
    args = ['prune', '--expire=yesterday']
    logger.info('  prune : pruning')
    retcode, output, error = grokmirror.run_git_command(fullpath, args)

    if error:
        # Put things we recognize as fairly benign into debug
        debug = []
        warn = []
        for line in error.split('\n'):
            ignored = False
            for estring in config['ignore_errors']:
                if line.find(estring) != -1:
                    ignored = True
                    debug.append(line)
                    break
            if not ignored:
                warn.append(line)

        if debug:
            logger.debug('Stderr: %s', '\n'.join(debug))
        if warn:
            logger.critical('Pruning %s returned critical errors:', fullpath)
            prune_ok = False
            for entry in warn:
                logger.critical("\t%s", entry)
            check_reclone_error(fullpath, config, warn)

    return prune_ok
Example #5
0
def run_git_repack(fullpath, config, level=1):
    # Returns false if we hit any errors on the way
    repack_ok = True

    is_precious = False
    set_precious = False
    gen_commitgraph = True

    # Figure out what our repack flags should be.
    repack_flags = list()
    if 'extra_repack_flags' in config and len(config['extra_repack_flags']):
        repack_flags += config['extra_repack_flags'].split()

    full_repack_flags = ['-f', '-b', '--pack-kept-objects']
    if 'extra_repack_flags_full' in config and len(
            config['extra_repack_flags_full']):
        full_repack_flags += config['extra_repack_flags_full'].split()

    # Are any other repos using us in their objects/info/alternates?
    gitdir = '/' + os.path.relpath(fullpath, config['toplevel']).lstrip('/')
    if grokmirror.is_alt_repo(config['toplevel'], gitdir):
        # we are a "mother repo"
        # Force preciousObjects if precious is "always"
        if config['precious'] == 'always':
            is_precious = True
            set_precious_objects(fullpath, enabled=True)
        else:
            # Turn precious off during repacks
            set_precious_objects(fullpath, enabled=False)
            # Turn it back on after we're done
            set_precious = True

        # are we using alternates ourselves? Multiple levels of alternates are
        # a bad idea in general due high possibility of corruption.
        if os.path.exists(
                os.path.join(fullpath, 'objects', 'info', 'alternates')):
            logger.warning(
                'warning : has alternates and is used by others for alternates'
            )
            logger.warning('        : this can cause grandchild corruption')
            repack_flags.append('-A')
            repack_flags.append('-l')
        else:
            repack_flags.append('-a')
            if not is_precious:
                repack_flags.append('-k')

            if level > 1:
                logger.info(
                    ' repack : performing a full repack for optimal deltas')
                repack_flags += full_repack_flags

    elif os.path.exists(os.path.join(fullpath, 'objects', 'info',
                                     'alternates')):
        # we are a "child repo"
        gen_commitgraph = False
        repack_flags.append('-l')
        if level > 1:
            repack_flags.append('-A')

    else:
        # we have no relationships with other repos
        if level > 1:
            logger.info(
                ' repack : performing a full repack for optimal deltas')
            repack_flags.append('-a')
            if not is_precious:
                repack_flags.append('-k')
            repack_flags += full_repack_flags

    if not is_precious:
        repack_flags.append('-d')

    args = ['repack'] + repack_flags
    logger.info(' repack : repacking with "%s"', ' '.join(repack_flags))

    # We always tack on -q
    repack_flags.append('-q')

    retcode, output, error = grokmirror.run_git_command(fullpath, args)

    if set_precious:
        set_precious_objects(fullpath, enabled=True)

    # With newer versions of git, repack may return warnings that are safe to ignore
    # so use the same strategy to weed out things we aren't interested in seeing
    if error:
        # Put things we recognize as fairly benign into debug
        debug = []
        warn = []
        for line in error.split('\n'):
            ignored = False
            for estring in config['ignore_errors']:
                if line.find(estring) != -1:
                    ignored = True
                    debug.append(line)
                    break
            if not ignored:
                warn.append(line)

        if debug:
            logger.debug('Stderr: %s', '\n'.join(debug))
        if warn:
            logger.critical('Repacking %s returned critical errors:', fullpath)
            repack_ok = False
            for entry in warn:
                logger.critical("\t%s", entry)
            check_reclone_error(fullpath, config, warn)

    if not repack_ok:
        # No need to repack refs if repo is broken
        return False

    if gen_commitgraph and config['commitgraph'] == 'yes':
        run_git_commit_graph(fullpath)

    # only repack refs on full repacks
    if level < 2:
        # do we still have loose objects after repacking?
        obj_info = get_repo_obj_info(fullpath)
        if obj_info['count'] != '0':
            return run_git_prune(fullpath, config)
        return repack_ok

    # repacking refs requires a separate command, so run it now
    args = ['pack-refs', '--all']
    logger.info(' repack : repacking refs')
    retcode, output, error = grokmirror.run_git_command(fullpath, args)

    # pack-refs shouldn't return anything, but use the same ignore_errors block
    # to weed out any future potential benign warnings
    if error:
        # Put things we recognize as fairly benign into debug
        debug = []
        warn = []
        for line in error.split('\n'):
            ignored = False
            for estring in config['ignore_errors']:
                if line.find(estring) != -1:
                    ignored = True
                    debug.append(line)
                    break
            if not ignored:
                warn.append(line)

        if debug:
            logger.debug('Stderr: %s', '\n'.join(debug))
        if warn:
            logger.critical('Repacking refs %s returned critical errors:',
                            fullpath)
            repack_ok = False
            for entry in warn:
                logger.critical("\t%s", entry)

            check_reclone_error(fullpath, config, warn)

    if repack_ok and 'prune' in config and config['prune'] == 'yes':
        # run prune now
        return run_git_prune(fullpath, config)

    return repack_ok
Example #6
0
def pull_mirror(name,
                config,
                verbose=False,
                force=False,
                nomtime=False,
                verify=False,
                verify_subpath='*',
                noreuse=False,
                purge=False,
                pretty=False,
                forcepurge=False):
    global logger
    global lock_fails

    # noinspection PyTypeChecker
    em = enlighten.get_manager(series=' -=#')

    logger = logging.getLogger(name)
    logger.setLevel(logging.DEBUG)

    if 'log' in config.keys():
        ch = logging.FileHandler(config['log'])
        formatter = logging.Formatter(
            "[%(process)d] %(asctime)s - %(levelname)s - %(message)s")
        ch.setFormatter(formatter)
        loglevel = logging.INFO

        if 'loglevel' in config.keys():
            if config['loglevel'] == 'debug':
                loglevel = logging.DEBUG

        ch.setLevel(loglevel)
        logger.addHandler(ch)

    ch = logging.StreamHandler()
    formatter = logging.Formatter('%(message)s')
    ch.setFormatter(formatter)

    if verbose:
        ch.setLevel(logging.INFO)
    else:
        ch.setLevel(logging.CRITICAL)
        em.enabled = False

    logger.addHandler(ch)

    # push it into grokmirror to override the default logger
    grokmirror.logger = logger

    logger.info('Checking [%s]', name)
    mymanifest = config['mymanifest']

    if verify:
        logger.info('Verifying mirror against %s', config['manifest'])
        nomtime = True

    if config['manifest'].find('file:///') == 0:
        manifile = config['manifest'].replace('file://', '')
        if not os.path.exists(manifile):
            logger.critical('Remote manifest not found in %s! Quitting!',
                            config['manifest'])
            return 1

        fstat = os.stat(manifile)
        last_modified = fstat[8]
        logger.debug('mtime on %s is: %s', manifile, fstat[8])

        if os.path.exists(config['mymanifest']):
            fstat = os.stat(config['mymanifest'])
            my_last_modified = fstat[8]
            logger.debug('Our last-modified is: %s', my_last_modified)
            if not (force or nomtime) and last_modified <= my_last_modified:
                logger.info('Manifest file unchanged. Quitting.')
                return 0

        logger.info('Reading new manifest from %s', manifile)
        manifest = grokmirror.read_manifest(manifile)
        # Don't accept empty manifests -- that indicates something is wrong
        if not len(manifest.keys()):
            logger.warning('Remote manifest empty or unparseable! Quitting.')
            return 1

    else:
        # Load it from remote host using http and header magic
        logger.info('Fetching remote manifest from %s', config['manifest'])

        # Do we have username:password@ in the URL?
        chunks = urlparse(config['manifest'])
        if chunks.netloc.find('@') > 0:
            logger.debug(
                'Taking username/password from the URL for basic auth')
            (upass, netloc) = chunks.netloc.split('@')
            if upass.find(':') > 0:
                (username, password) = upass.split(':')
            else:
                username = upass
                password = ''

            manifesturl = config['manifest'].replace(chunks.netloc, netloc)
            logger.debug('manifesturl=%s', manifesturl)
            request = urllib_request.Request(manifesturl)

            password_mgr = urllib_request.HTTPPasswordMgrWithDefaultRealm()
            password_mgr.add_password(None, manifesturl, username, password)
            auth_handler = urllib_request.HTTPBasicAuthHandler(password_mgr)
            opener = urllib_request.build_opener(auth_handler)

        else:
            request = urllib_request.Request(config['manifest'])
            opener = urllib_request.build_opener()

        # Find out if we need to run at all first
        if not (force or nomtime) and os.path.exists(mymanifest):
            fstat = os.stat(mymanifest)
            mtime = fstat[8]
            logger.debug('mtime on %s is: %s', mymanifest, mtime)
            my_last_modified = time.strftime('%a, %d %b %Y %H:%M:%S GMT',
                                             time.gmtime(mtime))
            logger.debug('Our last-modified is: %s', my_last_modified)
            request.add_header('If-Modified-Since', my_last_modified)

        try:
            ufh = opener.open(request, timeout=30)
        except HTTPError as ex:
            if ex.code == 304:
                logger.info('Server says we have the latest manifest. '
                            'Quitting.')
                return 0
            logger.warning('Could not fetch %s', config['manifest'])
            logger.warning('Server returned: %s', ex)
            return 1
        except (URLError, ssl.SSLError, ssl.CertificateError) as ex:
            logger.warning('Could not fetch %s', config['manifest'])
            logger.warning('Error was: %s', ex)
            return 1

        last_modified = ufh.headers.get('Last-Modified')
        last_modified = time.strptime(last_modified,
                                      '%a, %d %b %Y %H:%M:%S %Z')
        last_modified = calendar.timegm(last_modified)

        # We don't use read_manifest for the remote manifest, as it can be
        # anything, really. For now, blindly open it with gzipfile if it ends
        # with .gz. XXX: some http servers will auto-deflate such files.
        try:
            if config['manifest'].find('.gz') > 0:
                fh = gzip.GzipFile(fileobj=BytesIO(ufh.read()))
            else:
                fh = ufh

            jdata = fh.read().decode('utf-8')
            fh.close()

            manifest = anyjson.deserialize(jdata)

        except Exception as ex:
            logger.warning('Failed to parse %s', config['manifest'])
            logger.warning('Error was: %s', ex)
            return 1

    mymanifest = grokmirror.read_manifest(mymanifest)

    culled = cull_manifest(manifest, config)

    to_clone = []
    to_pull = []
    existing = []

    toplevel = config['toplevel']
    if not os.access(toplevel, os.W_OK):
        logger.critical('Toplevel %s does not exist or is not writable',
                        toplevel)
        sys.exit(1)

    if 'pull_threads' in config.keys():
        pull_threads = int(config['pull_threads'])
        if pull_threads < 1:
            logger.info('pull_threads is less than 1, forcing to 1')
            pull_threads = 1
    else:
        # be conservative
        logger.info('pull_threads is not set, consider setting it')
        pull_threads = 5

    # noinspection PyTypeChecker
    e_cmp = em.counter(total=len(culled),
                       desc='Comparing:',
                       unit='repos',
                       leave=False)

    for gitdir in list(culled):
        fullpath = os.path.join(toplevel, gitdir.lstrip('/'))
        e_cmp.update()

        # fingerprints were added in later versions, so deal if the upstream
        # manifest doesn't have a fingerprint
        if 'fingerprint' not in culled[gitdir]:
            culled[gitdir]['fingerprint'] = None

        # Attempt to lock the repo
        try:
            grokmirror.lock_repo(fullpath, nonblocking=True)
        except IOError:
            logger.info('Could not lock %s, skipping', gitdir)
            lock_fails.append(gitdir)
            # Force the fingerprint to what we have in mymanifest,
            # if we have it.
            culled[gitdir]['fingerprint'] = None
            if gitdir in mymanifest and 'fingerprint' in mymanifest[gitdir]:
                culled[gitdir]['fingerprint'] = mymanifest[gitdir][
                    'fingerprint']
            if len(lock_fails) >= pull_threads:
                logger.info('Too many repositories locked (%s). Exiting.',
                            len(lock_fails))
                return 0
            continue

        if verify:
            if culled[gitdir]['fingerprint'] is None:
                logger.debug('No fingerprint for %s, not verifying', gitdir)
                grokmirror.unlock_repo(fullpath)
                continue

            if not fnmatch.fnmatch(gitdir, verify_subpath):
                grokmirror.unlock_repo(fullpath)
                continue

            logger.debug('Verifying %s', gitdir)
            if not os.path.exists(fullpath):
                verify_fails.append(gitdir)
                logger.info('Verify: %s ABSENT', gitdir)
                grokmirror.unlock_repo(fullpath)
                continue

            my_fingerprint = grokmirror.get_repo_fingerprint(toplevel,
                                                             gitdir,
                                                             force=force)

            if my_fingerprint == culled[gitdir]['fingerprint']:
                logger.info('Verify: %s OK', gitdir)
            else:
                logger.critical('Verify: %s FAILED', gitdir)
                verify_fails.append(gitdir)

            grokmirror.unlock_repo(fullpath)
            continue

        # Is the directory in place?
        if os.path.exists(fullpath):
            # Did grok-fsck request to reclone it?
            rfile = os.path.join(fullpath, 'grokmirror.reclone')
            if os.path.exists(rfile):
                logger.info('Reclone requested for %s:', gitdir)
                with open(rfile, 'r') as rfh:
                    reason = rfh.read()
                    logger.info('  %s', reason)

                to_clone.append(gitdir)
                grokmirror.unlock_repo(fullpath)
                continue

            # Fix owner and description, if necessary
            if gitdir in mymanifest.keys():
                # This code is hurky and needs to be cleaned up
                desc = culled[gitdir].get('description')
                owner = culled[gitdir].get('owner')
                ref = None
                if config['ignore_repo_references'] != 'yes':
                    ref = culled[gitdir].get('reference')

                # dirty hack to force on-disk owner/description checks
                # when we're called with -n, in case our manifest
                # differs from what is on disk for owner/description/alternates
                myref = None
                if nomtime:
                    mydesc = None
                    myowner = None
                else:
                    mydesc = mymanifest[gitdir].get('description')
                    myowner = mymanifest[gitdir].get('owner')

                    if config['ignore_repo_references'] != 'yes':
                        myref = mymanifest[gitdir].get('reference')

                    if myowner is None:
                        myowner = config['default_owner']

                if owner is None:
                    owner = config['default_owner']

                if desc != mydesc or owner != myowner or ref != myref:
                    # we can do this right away without waiting
                    set_repo_params(toplevel, gitdir, owner, desc, ref)

            else:
                # It exists on disk, but not in my manifest?
                if noreuse:
                    logger.critical('Found existing git repo in %s', fullpath)
                    logger.critical('But you asked NOT to reuse repos')
                    logger.critical('Skipping %s', gitdir)
                    grokmirror.unlock_repo(fullpath)
                    continue

                logger.info('Setting new origin for %s', gitdir)
                fix_remotes(gitdir, toplevel, config['site'])
                to_pull.append(gitdir)
                grokmirror.unlock_repo(fullpath)
                continue

            # fingerprints were added late, so if we don't have them
            # in the remote manifest, fall back on using timestamps
            changed = False
            if culled[gitdir]['fingerprint'] is not None:
                logger.debug('Will use fingerprints to compare %s', gitdir)
                my_fingerprint = grokmirror.get_repo_fingerprint(toplevel,
                                                                 gitdir,
                                                                 force=force)

                if my_fingerprint != culled[gitdir]['fingerprint']:
                    logger.debug('No fingerprint match, will pull %s', gitdir)
                    changed = True
                else:
                    logger.debug('Fingerprints match, skipping %s', gitdir)
            else:
                logger.debug('Will use timestamps to compare %s', gitdir)
                if force:
                    logger.debug('Will force-pull %s', gitdir)
                    changed = True
                    # set timestamp to 0 as well
                    grokmirror.set_repo_timestamp(toplevel, gitdir, 0)
                else:
                    ts = grokmirror.get_repo_timestamp(toplevel, gitdir)
                    if ts < culled[gitdir]['modified']:
                        changed = True

            if changed:
                to_pull.append(gitdir)
                grokmirror.unlock_repo(fullpath)
                continue
            else:
                logger.debug('Repo %s unchanged', gitdir)
                # if we don't have a fingerprint for it, add it now
                if culled[gitdir]['fingerprint'] is None:
                    fpr = grokmirror.get_repo_fingerprint(toplevel, gitdir)
                    culled[gitdir]['fingerprint'] = fpr
                existing.append(gitdir)
                grokmirror.unlock_repo(fullpath)
                continue

        else:
            # Newly incoming repo
            to_clone.append(gitdir)
            grokmirror.unlock_repo(fullpath)
            continue

        # If we got here, something is odd.
        # noinspection PyUnreachableCode
        logger.critical('Could not figure out what to do with %s', gitdir)
        grokmirror.unlock_repo(fullpath)

    logger.info('Compared new manifest against %s repositories in %0.2fs',
                len(culled), e_cmp.elapsed)
    e_cmp.close()

    if verify:
        if len(verify_fails):
            logger.critical('%s repos failed to verify', len(verify_fails))
            return 1
        else:
            logger.info('Verification successful')
            return 0

    hookscript = config['post_update_hook']

    if len(to_pull):

        if len(lock_fails) > 0:
            pull_threads -= len(lock_fails)

        # Don't spin up more threads than we need
        if pull_threads > len(to_pull):
            pull_threads = len(to_pull)

        # exit if we're ever at 0 pull_threads. Shouldn't happen, but some extra
        # precaution doesn't hurt
        if pull_threads <= 0:
            logger.info('Too many repositories locked. Exiting.')
            return 0

        logger.info('Will use %d threads to pull repos', pull_threads)

        # noinspection PyTypeChecker
        e_pull = em.counter(total=len(to_pull),
                            desc='Updating :',
                            unit='repos',
                            leave=False)
        logger.info('Updating %s repos from %s', len(to_pull), config['site'])
        in_queue = Queue()
        out_queue = Queue()

        for gitdir in to_pull:
            in_queue.put((gitdir, culled[gitdir]['fingerprint'],
                          culled[gitdir]['modified']))

        for i in range(pull_threads):
            logger.debug('Spun up thread %s', i)
            t = PullerThread(in_queue, out_queue, config, i, e_pull)
            t.setDaemon(True)
            t.start()

        # wait till it's all done
        in_queue.join()
        logger.info('All threads finished.')

        while not out_queue.empty():
            # see if any of it failed
            (gitdir, my_fingerprint, status) = out_queue.get()
            # We always record our fingerprint in our manifest
            culled[gitdir]['fingerprint'] = my_fingerprint
            if not status:
                # To make sure we check this again during next run,
                # fudge the manifest accordingly.
                logger.debug('Will recheck %s during next run', gitdir)
                culled[gitdir] = mymanifest[gitdir]
                # this is rather hackish, but effective
                last_modified -= 1

        logger.info('Updates completed in %0.2fs', e_pull.elapsed)
        e_pull.close()
    else:
        logger.info('No repositories need updating')

    # how many lockfiles have we seen?
    # If there are more lock_fails than there are
    # pull_threads configured, we skip cloning out of caution
    if len(to_clone) and len(lock_fails) > pull_threads:
        logger.info(
            'Too many repositories locked. Skipping cloning new repos.')
        to_clone = []

    if len(to_clone):
        # noinspection PyTypeChecker
        e_clone = em.counter(total=len(to_clone),
                             desc='Cloning  :',
                             unit='repos',
                             leave=False)
        logger.info('Cloning %s repos from %s', len(to_clone), config['site'])
        # we use "existing" to track which repos can be used as references
        existing.extend(to_pull)

        to_clone_sorted = []
        clone_order(to_clone, manifest, to_clone_sorted, existing)

        for gitdir in to_clone_sorted:
            e_clone.refresh()

            fullpath = os.path.join(toplevel, gitdir.lstrip('/'))

            # Did grok-fsck request to reclone it?
            rfile = os.path.join(fullpath, 'grokmirror.reclone')
            if os.path.exists(rfile):
                logger.debug('Removing %s for reclone', gitdir)
                shutil.move(fullpath, '%s.reclone' % fullpath)
                shutil.rmtree('%s.reclone' % fullpath)

            # Do we still need to clone it, or has another process
            # already done this for us?
            ts = grokmirror.get_repo_timestamp(toplevel, gitdir)

            if ts > 0:
                logger.debug('Looks like %s already cloned, skipping', gitdir)
                e_clone.update()
                continue

            try:
                grokmirror.lock_repo(fullpath, nonblocking=True)
            except IOError:
                logger.info('Could not lock %s, skipping', gitdir)
                lock_fails.append(gitdir)
                e_clone.update()
                continue

            reference = None
            if config['ignore_repo_references'] != 'yes':
                reference = culled[gitdir]['reference']

            if reference is not None and reference in existing:
                # Make sure we can lock the reference repo
                refrepo = os.path.join(toplevel, reference.lstrip('/'))
                try:
                    grokmirror.lock_repo(refrepo, nonblocking=True)
                    success = clone_repo(toplevel,
                                         gitdir,
                                         config['site'],
                                         reference=reference)
                    grokmirror.unlock_repo(refrepo)
                except IOError:
                    logger.info('Cannot lock reference repo %s, skipping %s',
                                reference, gitdir)
                    if reference not in lock_fails:
                        lock_fails.append(reference)

                    grokmirror.unlock_repo(fullpath)
                    e_clone.update()
                    continue
            else:
                success = clone_repo(toplevel, gitdir, config['site'])

            # check dir to make sure cloning succeeded and then add to existing
            if os.path.exists(fullpath) and success:
                logger.debug('Cloning of %s succeeded, adding to existing',
                             gitdir)
                existing.append(gitdir)

                desc = culled[gitdir].get('description')
                owner = culled[gitdir].get('owner')
                ref = culled[gitdir].get('reference')

                if owner is None:
                    owner = config['default_owner']
                set_repo_params(toplevel, gitdir, owner, desc, ref)
                set_agefile(toplevel, gitdir, culled[gitdir]['modified'])
                my_fingerprint = grokmirror.set_repo_fingerprint(
                    toplevel, gitdir)
                culled[gitdir]['fingerprint'] = my_fingerprint
                run_post_update_hook(hookscript, toplevel, gitdir)
            else:
                logger.warning('Was not able to clone %s', gitdir)
                # Remove it from our manifest so we can try re-cloning
                # next time grok-pull runs
                del culled[gitdir]
                git_fails.append(gitdir)

            grokmirror.unlock_repo(fullpath)
            e_clone.update()

        logger.info('Clones completed in %0.2fs' % e_clone.elapsed)
        e_clone.close()

    else:
        logger.info('No repositories need cloning')

    # loop through all entries and find any symlinks we need to set
    # We also collect all symlinks to do purging correctly
    symlinks = []
    for gitdir in culled.keys():
        if 'symlinks' in culled[gitdir].keys():
            source = os.path.join(config['toplevel'], gitdir.lstrip('/'))
            for symlink in culled[gitdir]['symlinks']:
                if symlink not in symlinks:
                    symlinks.append(symlink)
                target = os.path.join(config['toplevel'], symlink.lstrip('/'))

                if os.path.exists(source):
                    if os.path.islink(target):
                        # are you pointing to where we need you?
                        if os.path.realpath(target) != source:
                            # Remove symlink and recreate below
                            logger.debug('Removed existing wrong symlink %s',
                                         target)
                            os.unlink(target)
                    elif os.path.exists(target):
                        logger.warning('Deleted repo %s, because it is now'
                                       ' a symlink to %s' % (target, source))
                        shutil.rmtree(target)

                    # Here we re-check if we still need to do anything
                    if not os.path.exists(target):
                        logger.info('Symlinking %s -> %s', target, source)
                        # Make sure the leading dirs are in place
                        if not os.path.exists(os.path.dirname(target)):
                            os.makedirs(os.path.dirname(target))
                        os.symlink(source, target)

    manifile = config['mymanifest']
    grokmirror.manifest_lock(manifile)

    # Is the local manifest newer than last_modified? That would indicate
    # that another process has run and "culled" is no longer the latest info
    if os.path.exists(manifile):
        fstat = os.stat(manifile)
        if fstat[8] > last_modified:
            logger.info('Local manifest is newer, not saving.')
            grokmirror.manifest_unlock(manifile)
            return 0

    if purge:
        to_purge = []
        found_repos = 0
        for founddir in grokmirror.find_all_gitdirs(config['toplevel']):
            gitdir = founddir.replace(config['toplevel'], '')
            found_repos += 1

            if gitdir not in culled.keys() and gitdir not in symlinks:
                to_purge.append(founddir)

        if len(to_purge):
            # Purge-protection engage
            try:
                purge_limit = int(config['purgeprotect'])
                assert 1 <= purge_limit <= 99
            except (ValueError, AssertionError):
                logger.critical('Warning: "%s" is not valid for purgeprotect.',
                                config['purgeprotect'])
                logger.critical('Please set to a number between 1 and 99.')
                logger.critical('Defaulting to purgeprotect=5.')
                purge_limit = 5

            purge_pc = len(to_purge) * 100 / found_repos
            logger.debug('purgeprotect=%s', purge_limit)
            logger.debug('purge prercentage=%s', purge_pc)

            if not forcepurge and purge_pc >= purge_limit:
                logger.critical('Refusing to purge %s repos (%s%%)',
                                len(to_purge), purge_pc)
                logger.critical('Set purgeprotect to a higher percentage, or'
                                ' override with --force-purge.')
                logger.info('Not saving local manifest')
                return 1
            else:
                # noinspection PyTypeChecker
                e_purge = em.counter(total=len(to_purge),
                                     desc='Purging  :',
                                     unit='repos',
                                     leave=False)
                for founddir in to_purge:
                    e_purge.refresh()
                    if os.path.islink(founddir):
                        logger.info('Removing unreferenced symlink %s', gitdir)
                        os.unlink(founddir)
                    else:
                        # is anything using us for alternates?
                        gitdir = '/' + os.path.relpath(founddir,
                                                       toplevel).lstrip('/')
                        if grokmirror.is_alt_repo(toplevel, gitdir):
                            logger.info(
                                'Not purging %s because it is used by '
                                'other repos via alternates', founddir)
                        else:
                            try:
                                logger.info('Purging %s', founddir)
                                grokmirror.lock_repo(founddir,
                                                     nonblocking=True)
                                shutil.rmtree(founddir)
                            except IOError:
                                lock_fails.append(gitdir)
                                logger.info('%s is locked, not purging',
                                            gitdir)
                    e_purge.update()

                logger.info('Purging completed in %0.2fs', e_purge.elapsed)
                e_purge.close()

        else:
            logger.info('No repositories need purging')

    # Done with progress bars
    em.stop()

    # Go through all repos in culled and get the latest local timestamps.
    for gitdir in culled:
        ts = grokmirror.get_repo_timestamp(toplevel, gitdir)
        culled[gitdir]['modified'] = ts

    # If there were any lock failures, we fudge last_modified to always
    # be older than the server, which will force the next grokmirror run.
    if len(lock_fails):
        logger.info('%s repos could not be locked. Forcing next run.',
                    len(lock_fails))
        last_modified -= 1
    elif len(git_fails):
        logger.info('%s repos failed. Forcing next run.', len(git_fails))
        last_modified -= 1

    # Once we're done, save culled as our new manifest
    grokmirror.write_manifest(manifile,
                              culled,
                              mtime=last_modified,
                              pretty=pretty)

    grokmirror.manifest_unlock(manifile)

    # write out projects.list, if asked to
    write_projects_list(culled, config)

    return 127
Example #7
0
def run_git_repack(fullpath, config, level=1):
    # Returns false if we hit any errors on the way
    repack_ok = True

    is_precious = check_precious_objects(fullpath)

    # Figure out what our repack flags should be.
    repack_flags = list()
    if 'extra_repack_flags' in config and len(config['extra_repack_flags']):
        repack_flags += config['extra_repack_flags'].split()

    full_repack_flags = ['-f', '-b', '--pack-kept-objects']
    if 'extra_repack_flags_full' in config and len(config['extra_repack_flags_full']):
        full_repack_flags += config['extra_repack_flags_full'].split()

    # Are any other repos using us in their objects/info/alternates?
    gitdir = '/' + os.path.relpath(fullpath, config['toplevel']).lstrip('/')
    if grokmirror.is_alt_repo(config['toplevel'], gitdir):
        # we are a "mother repo"
        if not is_precious and ('precious' in config and config['precious'] == 'yes'):
            is_precious = True
            set_precious_objects(fullpath)

        # are we using alternates ourselves? Multiple levels of alternates are
        # a bad idea in general due high possibility of corruption.
        if os.path.exists(os.path.join(fullpath, 'objects', 'info', 'alternates')):
            logger.warning('warning : has alternates and is used by others for alternates')
            logger.warning('        : this can cause grandchild corruption')
            repack_flags.append('-A')
            repack_flags.append('-l')
        else:
            repack_flags.append('-a')
            if not is_precious:
                repack_flags.append('-k')

            if level > 1:
                logger.info(' repack : performing a full repack for optimal deltas')
                repack_flags += full_repack_flags

    elif os.path.exists(os.path.join(fullpath, 'objects', 'info', 'alternates')):
        # we are a "child repo"
        repack_flags.append('-l')
        if level > 1:
            repack_flags.append('-A')

    else:
        # we have no relationships with other repos
        if level > 1:
            logger.info(' repack : performing a full repack for optimal deltas')
            repack_flags.append('-a')
            if not is_precious:
                repack_flags.append('-k')
            repack_flags += full_repack_flags

    if not is_precious:
        repack_flags.append('-d')

    args = ['repack'] + repack_flags
    logger.info(' repack : repacking with "%s"', ' '.join(repack_flags))

    # We always tack on -q
    repack_flags.append('-q')

    retcode, output, error = grokmirror.run_git_command(fullpath, args)

    # With newer versions of git, repack may return warnings that are safe to ignore
    # so use the same strategy to weed out things we aren't interested in seeing
    if error:
        # Put things we recognize as fairly benign into debug
        debug = []
        warn = []
        for line in error.split('\n'):
            ignored = False
            for estring in config['ignore_errors']:
                if line.find(estring) != -1:
                    ignored = True
                    debug.append(line)
                    break
            if not ignored:
                warn.append(line)

        if debug:
            logger.debug('Stderr: %s', '\n'.join(debug))
        if warn:
            logger.critical('Repacking %s returned critical errors:', fullpath)
            repack_ok = False
            for entry in warn:
                logger.critical("\t%s", entry)
            check_reclone_error(fullpath, config, warn)

    if not repack_ok:
        # No need to repack refs if repo is broken
        return False

    # only repack refs on full repacks
    if level < 2:
        # do we still have loose objects after repacking?
        obj_info = get_repo_obj_info(fullpath)
        if obj_info['count'] != '0':
            return run_git_prune(fullpath, config)
        return repack_ok

    # repacking refs requires a separate command, so run it now
    args = ['pack-refs', '--all']
    logger.info(' repack : repacking refs')
    retcode, output, error = grokmirror.run_git_command(fullpath, args)

    # pack-refs shouldn't return anything, but use the same ignore_errors block
    # to weed out any future potential benign warnings
    if error:
        # Put things we recognize as fairly benign into debug
        debug = []
        warn = []
        for line in error.split('\n'):
            ignored = False
            for estring in config['ignore_errors']:
                if line.find(estring) != -1:
                    ignored = True
                    debug.append(line)
                    break
            if not ignored:
                warn.append(line)

        if debug:
            logger.debug('Stderr: %s', '\n'.join(debug))
        if warn:
            logger.critical('Repacking refs %s returned critical errors:',
                            fullpath)
            repack_ok = False
            for entry in warn:
                logger.critical("\t%s", entry)

            check_reclone_error(fullpath, config, warn)

    if repack_ok and 'prune' in config and config['prune'] == 'yes':
        # run prune now
        return run_git_prune(fullpath, config)

    return repack_ok
Example #8
0
def pull_mirror(name, config, verbose=False, force=False, nomtime=False,
                verify=False, verify_subpath='*', noreuse=False,
                purge=False, pretty=False, forcepurge=False):
    global logger
    global lock_fails

    # noinspection PyTypeChecker
    em = enlighten.get_manager(series=' -=#')

    logger = logging.getLogger(name)
    logger.setLevel(logging.DEBUG)

    if 'log' in config.keys():
        ch = logging.FileHandler(config['log'])
        formatter = logging.Formatter(
            "[%(process)d] %(asctime)s - %(levelname)s - %(message)s")
        ch.setFormatter(formatter)
        loglevel = logging.INFO

        if 'loglevel' in config.keys():
            if config['loglevel'] == 'debug':
                loglevel = logging.DEBUG

        ch.setLevel(loglevel)
        logger.addHandler(ch)

    ch = logging.StreamHandler()
    formatter = logging.Formatter('%(message)s')
    ch.setFormatter(formatter)

    if verbose:
        ch.setLevel(logging.INFO)
    else:
        ch.setLevel(logging.CRITICAL)
        em.enabled = False

    logger.addHandler(ch)

    # push it into grokmirror to override the default logger
    grokmirror.logger = logger

    logger.info('Checking [%s]', name)
    mymanifest = config['mymanifest']

    if verify:
        logger.info('Verifying mirror against %s', config['manifest'])
        nomtime = True

    if config['manifest'].find('file:///') == 0:
        manifile = config['manifest'].replace('file://', '')
        if not os.path.exists(manifile):
            logger.critical('Remote manifest not found in %s! Quitting!',
                            config['manifest'])
            return 1

        fstat = os.stat(manifile)
        last_modified = fstat[8]
        logger.debug('mtime on %s is: %s', manifile, fstat[8])

        if os.path.exists(config['mymanifest']):
            fstat = os.stat(config['mymanifest'])
            my_last_modified = fstat[8]
            logger.debug('Our last-modified is: %s', my_last_modified)
            if not (force or nomtime) and last_modified <= my_last_modified:
                logger.info('Manifest file unchanged. Quitting.')
                return 0

        logger.info('Reading new manifest from %s', manifile)
        manifest = grokmirror.read_manifest(manifile)
        # Don't accept empty manifests -- that indicates something is wrong
        if not len(manifest.keys()):
            logger.warning('Remote manifest empty or unparseable! Quitting.')
            return 1

    else:
        # Load it from remote host using http and header magic
        logger.info('Fetching remote manifest from %s', config['manifest'])

        # Do we have username:password@ in the URL?
        chunks = urlparse(config['manifest'])
        if chunks.netloc.find('@') > 0:
            logger.debug('Taking username/password from the URL for basic auth')
            (upass, netloc) = chunks.netloc.split('@')
            if upass.find(':') > 0:
                (username, password) = upass.split(':')
            else:
                username = upass
                password = ''

            manifesturl = config['manifest'].replace(chunks.netloc, netloc)
            logger.debug('manifesturl=%s', manifesturl)
            request = urllib_request.Request(manifesturl)

            password_mgr = urllib_request.HTTPPasswordMgrWithDefaultRealm()
            password_mgr.add_password(None, manifesturl, username, password)
            auth_handler = urllib_request.HTTPBasicAuthHandler(password_mgr)
            opener = urllib_request.build_opener(auth_handler)

        else:
            request = urllib_request.Request(config['manifest'])
            opener = urllib_request.build_opener()

        # Find out if we need to run at all first
        if not (force or nomtime) and os.path.exists(mymanifest):
            fstat = os.stat(mymanifest)
            mtime = fstat[8]
            logger.debug('mtime on %s is: %s', mymanifest, mtime)
            my_last_modified = time.strftime('%a, %d %b %Y %H:%M:%S GMT',
                                             time.gmtime(mtime))
            logger.debug('Our last-modified is: %s', my_last_modified)
            request.add_header('If-Modified-Since', my_last_modified)

        try:
            ufh = opener.open(request, timeout=30)
        except HTTPError as ex:
            if ex.code == 304:
                logger.info('Server says we have the latest manifest. '
                            'Quitting.')
                return 0
            logger.warning('Could not fetch %s', config['manifest'])
            logger.warning('Server returned: %s', ex)
            return 1
        except (URLError, ssl.SSLError, ssl.CertificateError) as ex:
            logger.warning('Could not fetch %s', config['manifest'])
            logger.warning('Error was: %s', ex)
            return 1

        last_modified = ufh.headers.get('Last-Modified')
        last_modified = time.strptime(last_modified, '%a, %d %b %Y %H:%M:%S %Z')
        last_modified = calendar.timegm(last_modified)

        # We don't use read_manifest for the remote manifest, as it can be
        # anything, really. For now, blindly open it with gzipfile if it ends
        # with .gz. XXX: some http servers will auto-deflate such files.
        try:
            if config['manifest'].find('.gz') > 0:
                fh = gzip.GzipFile(fileobj=BytesIO(ufh.read()))
            else:
                fh = ufh

            jdata = fh.read().decode('utf-8')
            fh.close()

            manifest = anyjson.deserialize(jdata)

        except Exception as ex:
            logger.warning('Failed to parse %s', config['manifest'])
            logger.warning('Error was: %s', ex)
            return 1

    mymanifest = grokmirror.read_manifest(mymanifest)

    culled = cull_manifest(manifest, config)

    to_clone = []
    to_pull = []
    existing = []

    toplevel = config['toplevel']
    if not os.access(toplevel, os.W_OK):
        logger.critical('Toplevel %s does not exist or is not writable',
                        toplevel)
        sys.exit(1)

    if 'pull_threads' in config.keys():
        pull_threads = int(config['pull_threads'])
        if pull_threads < 1:
            logger.info('pull_threads is less than 1, forcing to 1')
            pull_threads = 1
    else:
        # be conservative
        logger.info('pull_threads is not set, consider setting it')
        pull_threads = 5

    # noinspection PyTypeChecker
    e_cmp = em.counter(total=len(culled), desc='Comparing:', unit='repos', leave=False)

    for gitdir in list(culled):
        fullpath = os.path.join(toplevel, gitdir.lstrip('/'))
        e_cmp.update()

        # fingerprints were added in later versions, so deal if the upstream
        # manifest doesn't have a fingerprint
        if 'fingerprint' not in culled[gitdir]:
            culled[gitdir]['fingerprint'] = None

        # Attempt to lock the repo
        try:
            grokmirror.lock_repo(fullpath, nonblocking=True)
        except IOError:
            logger.info('Could not lock %s, skipping', gitdir)
            lock_fails.append(gitdir)
            # Force the fingerprint to what we have in mymanifest,
            # if we have it.
            culled[gitdir]['fingerprint'] = None
            if gitdir in mymanifest and 'fingerprint' in mymanifest[gitdir]:
                culled[gitdir]['fingerprint'] = mymanifest[gitdir][
                    'fingerprint']
            if len(lock_fails) >= pull_threads:
                logger.info('Too many repositories locked (%s). Exiting.',
                            len(lock_fails))
                return 0
            continue

        if verify:
            if culled[gitdir]['fingerprint'] is None:
                logger.debug('No fingerprint for %s, not verifying', gitdir)
                grokmirror.unlock_repo(fullpath)
                continue

            if not fnmatch.fnmatch(gitdir, verify_subpath):
                grokmirror.unlock_repo(fullpath)
                continue

            logger.debug('Verifying %s', gitdir)
            if not os.path.exists(fullpath):
                verify_fails.append(gitdir)
                logger.info('Verify: %s ABSENT', gitdir)
                grokmirror.unlock_repo(fullpath)
                continue

            my_fingerprint = grokmirror.get_repo_fingerprint(
                toplevel, gitdir, force=force)

            if my_fingerprint == culled[gitdir]['fingerprint']:
                logger.info('Verify: %s OK', gitdir)
            else:
                logger.critical('Verify: %s FAILED', gitdir)
                verify_fails.append(gitdir)

            grokmirror.unlock_repo(fullpath)
            continue

        # Is the directory in place?
        if os.path.exists(fullpath):
            # Did grok-fsck request to reclone it?
            rfile = os.path.join(fullpath, 'grokmirror.reclone')
            if os.path.exists(rfile):
                logger.info('Reclone requested for %s:', gitdir)
                with open(rfile, 'r') as rfh:
                    reason = rfh.read()
                    logger.info('  %s', reason)

                to_clone.append(gitdir)
                grokmirror.unlock_repo(fullpath)
                continue

            # Fix owner and description, if necessary
            if gitdir in mymanifest.keys():
                # This code is hurky and needs to be cleaned up
                desc = culled[gitdir].get('description')
                owner = culled[gitdir].get('owner')
                ref = None
                if config['ignore_repo_references'] != 'yes':
                    ref = culled[gitdir].get('reference')

                # dirty hack to force on-disk owner/description checks
                # when we're called with -n, in case our manifest
                # differs from what is on disk for owner/description/alternates
                myref = None
                if nomtime:
                    mydesc = None
                    myowner = None
                else:
                    mydesc = mymanifest[gitdir].get('description')
                    myowner = mymanifest[gitdir].get('owner')

                    if config['ignore_repo_references'] != 'yes':
                        myref = mymanifest[gitdir].get('reference')

                    if myowner is None:
                        myowner = config['default_owner']

                if owner is None:
                    owner = config['default_owner']

                if desc != mydesc or owner != myowner or ref != myref:
                    # we can do this right away without waiting
                    set_repo_params(toplevel, gitdir, owner, desc, ref)

            else:
                # It exists on disk, but not in my manifest?
                if noreuse:
                    logger.critical('Found existing git repo in %s', fullpath)
                    logger.critical('But you asked NOT to reuse repos')
                    logger.critical('Skipping %s', gitdir)
                    grokmirror.unlock_repo(fullpath)
                    continue

                logger.info('Setting new origin for %s', gitdir)
                fix_remotes(gitdir, toplevel, config['site'])
                to_pull.append(gitdir)
                grokmirror.unlock_repo(fullpath)
                continue

            # fingerprints were added late, so if we don't have them
            # in the remote manifest, fall back on using timestamps
            changed = False
            if culled[gitdir]['fingerprint'] is not None:
                logger.debug('Will use fingerprints to compare %s', gitdir)
                my_fingerprint = grokmirror.get_repo_fingerprint(toplevel,
                                                                 gitdir,
                                                                 force=force)

                if my_fingerprint != culled[gitdir]['fingerprint']:
                    logger.debug('No fingerprint match, will pull %s', gitdir)
                    changed = True
                else:
                    logger.debug('Fingerprints match, skipping %s', gitdir)
            else:
                logger.debug('Will use timestamps to compare %s', gitdir)
                if force:
                    logger.debug('Will force-pull %s', gitdir)
                    changed = True
                    # set timestamp to 0 as well
                    grokmirror.set_repo_timestamp(toplevel, gitdir, 0)
                else:
                    ts = grokmirror.get_repo_timestamp(toplevel, gitdir)
                    if ts < culled[gitdir]['modified']:
                        changed = True

            if changed:
                to_pull.append(gitdir)
                grokmirror.unlock_repo(fullpath)
                continue
            else:
                logger.debug('Repo %s unchanged', gitdir)
                # if we don't have a fingerprint for it, add it now
                if culled[gitdir]['fingerprint'] is None:
                    fpr = grokmirror.get_repo_fingerprint(toplevel, gitdir)
                    culled[gitdir]['fingerprint'] = fpr
                existing.append(gitdir)
                grokmirror.unlock_repo(fullpath)
                continue

        else:
            # Newly incoming repo
            to_clone.append(gitdir)
            grokmirror.unlock_repo(fullpath)
            continue

        # If we got here, something is odd.
        # noinspection PyUnreachableCode
        logger.critical('Could not figure out what to do with %s', gitdir)
        grokmirror.unlock_repo(fullpath)

    logger.info('Compared new manifest against %s repositories in %0.2fs', len(culled), e_cmp.elapsed)
    e_cmp.close()

    if verify:
        if len(verify_fails):
            logger.critical('%s repos failed to verify', len(verify_fails))
            return 1
        else:
            logger.info('Verification successful')
            return 0

    hookscript = config['post_update_hook']

    if len(to_pull):

        if len(lock_fails) > 0:
            pull_threads -= len(lock_fails)

        # Don't spin up more threads than we need
        if pull_threads > len(to_pull):
            pull_threads = len(to_pull)

        # exit if we're ever at 0 pull_threads. Shouldn't happen, but some extra
        # precaution doesn't hurt
        if pull_threads <= 0:
            logger.info('Too many repositories locked. Exiting.')
            return 0

        logger.info('Will use %d threads to pull repos', pull_threads)

        # noinspection PyTypeChecker
        e_pull = em.counter(total=len(to_pull), desc='Updating :', unit='repos', leave=False)
        logger.info('Updating %s repos from %s', len(to_pull), config['site'])
        in_queue = Queue()
        out_queue = Queue()

        for gitdir in to_pull:
            in_queue.put((gitdir, culled[gitdir]['fingerprint'],
                          culled[gitdir]['modified']))

        for i in range(pull_threads):
            logger.debug('Spun up thread %s', i)
            t = PullerThread(in_queue, out_queue, config, i, e_pull)
            t.setDaemon(True)
            t.start()

        # wait till it's all done
        in_queue.join()
        logger.info('All threads finished.')

        while not out_queue.empty():
            # see if any of it failed
            (gitdir, my_fingerprint, status) = out_queue.get()
            # We always record our fingerprint in our manifest
            culled[gitdir]['fingerprint'] = my_fingerprint
            if not status:
                # To make sure we check this again during next run,
                # fudge the manifest accordingly.
                logger.debug('Will recheck %s during next run', gitdir)
                culled[gitdir] = mymanifest[gitdir]
                # this is rather hackish, but effective
                last_modified -= 1

        logger.info('Updates completed in %0.2fs', e_pull.elapsed)
        e_pull.close()
    else:
        logger.info('No repositories need updating')

    # how many lockfiles have we seen?
    # If there are more lock_fails than there are
    # pull_threads configured, we skip cloning out of caution
    if len(to_clone) and len(lock_fails) > pull_threads:
        logger.info('Too many repositories locked. Skipping cloning new repos.')
        to_clone = []

    if len(to_clone):
        # noinspection PyTypeChecker
        e_clone = em.counter(total=len(to_clone), desc='Cloning  :', unit='repos', leave=False)
        logger.info('Cloning %s repos from %s', len(to_clone), config['site'])
        # we use "existing" to track which repos can be used as references
        existing.extend(to_pull)

        to_clone_sorted = []
        clone_order(to_clone, manifest, to_clone_sorted, existing)

        for gitdir in to_clone_sorted:
            e_clone.refresh()

            fullpath = os.path.join(toplevel, gitdir.lstrip('/'))

            # Did grok-fsck request to reclone it?
            rfile = os.path.join(fullpath, 'grokmirror.reclone')
            if os.path.exists(rfile):
                logger.debug('Removing %s for reclone', gitdir)
                shutil.move(fullpath, '%s.reclone' % fullpath)
                shutil.rmtree('%s.reclone' % fullpath)

            # Do we still need to clone it, or has another process
            # already done this for us?
            ts = grokmirror.get_repo_timestamp(toplevel, gitdir)

            if ts > 0:
                logger.debug('Looks like %s already cloned, skipping', gitdir)
                continue

            try:
                grokmirror.lock_repo(fullpath, nonblocking=True)
            except IOError:
                logger.info('Could not lock %s, skipping', gitdir)
                lock_fails.append(gitdir)
                e_clone.update()
                continue

            reference = None
            if config['ignore_repo_references'] != 'yes':
                reference = culled[gitdir]['reference']

            if reference is not None and reference in existing:
                # Make sure we can lock the reference repo
                refrepo = os.path.join(toplevel, reference.lstrip('/'))
                try:
                    grokmirror.lock_repo(refrepo, nonblocking=True)
                    success = clone_repo(toplevel, gitdir, config['site'],
                                         reference=reference)
                    grokmirror.unlock_repo(refrepo)
                except IOError:
                    logger.info('Cannot lock reference repo %s, skipping %s',
                                reference, gitdir)
                    if reference not in lock_fails:
                        lock_fails.append(reference)

                    grokmirror.unlock_repo(fullpath)
                    e_clone.update()
                    continue
            else:
                success = clone_repo(toplevel, gitdir, config['site'])

            # check dir to make sure cloning succeeded and then add to existing
            if os.path.exists(fullpath) and success:
                logger.debug('Cloning of %s succeeded, adding to existing',
                             gitdir)
                existing.append(gitdir)

                desc = culled[gitdir].get('description')
                owner = culled[gitdir].get('owner')
                ref = culled[gitdir].get('reference')

                if owner is None:
                    owner = config['default_owner']
                set_repo_params(toplevel, gitdir, owner, desc, ref)
                set_agefile(toplevel, gitdir, culled[gitdir]['modified'])
                my_fingerprint = grokmirror.set_repo_fingerprint(toplevel,
                                                                 gitdir)
                culled[gitdir]['fingerprint'] = my_fingerprint
                run_post_update_hook(hookscript, toplevel, gitdir)
            else:
                logger.warning('Was not able to clone %s', gitdir)
                # Remove it from our manifest so we can try re-cloning
                # next time grok-pull runs
                del culled[gitdir]
                git_fails.append(gitdir)

            grokmirror.unlock_repo(fullpath)
            e_clone.update()

        logger.info('Clones completed in %0.2fs' % e_clone.elapsed)
        e_clone.close()

    else:
        logger.info('No repositories need cloning')

    # loop through all entries and find any symlinks we need to set
    # We also collect all symlinks to do purging correctly
    symlinks = []
    for gitdir in culled.keys():
        if 'symlinks' in culled[gitdir].keys():
            source = os.path.join(config['toplevel'], gitdir.lstrip('/'))
            for symlink in culled[gitdir]['symlinks']:
                if symlink not in symlinks:
                    symlinks.append(symlink)
                target = os.path.join(config['toplevel'], symlink.lstrip('/'))

                if os.path.exists(source):
                    if os.path.islink(target):
                        # are you pointing to where we need you?
                        if os.path.realpath(target) != source:
                            # Remove symlink and recreate below
                            logger.debug('Removed existing wrong symlink %s',
                                         target)
                            os.unlink(target)
                    elif os.path.exists(target):
                        logger.warning('Deleted repo %s, because it is now'
                                       ' a symlink to %s' % (target, source))
                        shutil.rmtree(target)

                    # Here we re-check if we still need to do anything
                    if not os.path.exists(target):
                        logger.info('Symlinking %s -> %s', target, source)
                        # Make sure the leading dirs are in place
                        if not os.path.exists(os.path.dirname(target)):
                            os.makedirs(os.path.dirname(target))
                        os.symlink(source, target)

    manifile = config['mymanifest']
    grokmirror.manifest_lock(manifile)

    # Is the local manifest newer than last_modified? That would indicate
    # that another process has run and "culled" is no longer the latest info
    if os.path.exists(manifile):
        fstat = os.stat(manifile)
        if fstat[8] > last_modified:
            logger.info('Local manifest is newer, not saving.')
            grokmirror.manifest_unlock(manifile)
            return 0

    if purge:
        to_purge = []
        found_repos = 0
        for founddir in grokmirror.find_all_gitdirs(config['toplevel']):
            gitdir = founddir.replace(config['toplevel'], '')
            found_repos += 1

            if gitdir not in culled.keys() and gitdir not in symlinks:
                to_purge.append(founddir)

        if len(to_purge):
            # Purge-protection engage
            try:
                purge_limit = int(config['purgeprotect'])
                assert 1 <= purge_limit <= 99
            except (ValueError, AssertionError):
                logger.critical('Warning: "%s" is not valid for purgeprotect.',
                                config['purgeprotect'])
                logger.critical('Please set to a number between 1 and 99.')
                logger.critical('Defaulting to purgeprotect=5.')
                purge_limit = 5

            purge_pc = len(to_purge) * 100 / found_repos
            logger.debug('purgeprotect=%s', purge_limit)
            logger.debug('purge prercentage=%s', purge_pc)

            if not forcepurge and purge_pc >= purge_limit:
                logger.critical('Refusing to purge %s repos (%s%%)',
                                len(to_purge), purge_pc)
                logger.critical('Set purgeprotect to a higher percentage, or'
                                ' override with --force-purge.')
                logger.info('Not saving local manifest')
                return 1
            else:
                # noinspection PyTypeChecker
                e_purge = em.counter(total=len(to_purge), desc='Purging  :', unit='repos', leave=False)
                for founddir in to_purge:
                    e_purge.refresh()
                    if os.path.islink(founddir):
                        logger.info('Removing unreferenced symlink %s', gitdir)
                        os.unlink(founddir)
                    else:
                        # is anything using us for alternates?
                        gitdir = '/' + os.path.relpath(founddir, toplevel).lstrip('/')
                        if grokmirror.is_alt_repo(toplevel, gitdir):
                            logger.info('Not purging %s because it is used by '
                                        'other repos via alternates', founddir)
                        else:
                            try:
                                logger.info('Purging %s', founddir)
                                grokmirror.lock_repo(founddir, nonblocking=True)
                                shutil.rmtree(founddir)
                            except IOError:
                                lock_fails.append(gitdir)
                                logger.info('%s is locked, not purging',
                                            gitdir)
                    e_purge.update()

                logger.info('Purging completed in %0.2fs', e_purge.elapsed)
                e_purge.close()

        else:
            logger.info('No repositories need purging')

    # Done with progress bars
    em.stop()

    # Go through all repos in culled and get the latest local timestamps.
    for gitdir in culled:
        ts = grokmirror.get_repo_timestamp(toplevel, gitdir)
        culled[gitdir]['modified'] = ts

    # If there were any lock failures, we fudge last_modified to always
    # be older than the server, which will force the next grokmirror run.
    if len(lock_fails):
        logger.info('%s repos could not be locked. Forcing next run.',
                    len(lock_fails))
        last_modified -= 1
    elif len(git_fails):
        logger.info('%s repos failed. Forcing next run.', len(git_fails))
        last_modified -= 1

    # Once we're done, save culled as our new manifest
    grokmirror.write_manifest(manifile, culled, mtime=last_modified,
                              pretty=pretty)

    grokmirror.manifest_unlock(manifile)

    # write out projects.list, if asked to
    write_projects_list(culled, config)

    return 127