def check_reclone_error(fullpath, config, errors): reclone = None for line in errors: for estring in config['reclone_on_errors']: if line.find(estring) != -1: # is this repo used for alternates? gitdir = '/' + os.path.relpath(fullpath, config['toplevel']).lstrip('/') if grokmirror.is_alt_repo(config['toplevel'], gitdir): logger.critical('\tused for alternates, not requesting auto-reclone') return else: reclone = line logger.critical('\trequested auto-reclone') break if reclone is not None: break if reclone is None: return rfile = os.path.join(fullpath, 'grokmirror.reclone') # Have we already requested a reclone? if os.path.exists(rfile): logger.debug('Already requested repo reclone for %s', fullpath) return with open(rfile, 'w') as rfh: rfh.write('Requested by grok-fsck due to error: %s' % reclone)
def check_reclone_error(fullpath, config, errors): reclone = None for line in errors: for estring in config['reclone_on_errors']: if line.find(estring) != -1: # is this repo used for alternates? gitdir = '/' + os.path.relpath(fullpath, config['toplevel']).lstrip('/') if grokmirror.is_alt_repo(config['toplevel'], gitdir): logger.critical( '\tused for alternates, not requesting auto-reclone') return else: reclone = line logger.critical('\trequested auto-reclone') break if reclone is not None: break if reclone is None: return rfile = os.path.join(fullpath, 'grokmirror.reclone') # Have we already requested a reclone? if os.path.exists(rfile): logger.debug('Already requested repo reclone for %s', fullpath) return with open(rfile, 'w') as rfh: rfh.write('Requested by grok-fsck due to error: %s' % reclone)
def run_git_prune(fullpath, config): prune_ok = True if 'prune' not in config or config['prune'] != 'yes': return prune_ok # Are any other repos using us in their objects/info/alternates? gitdir = '/' + os.path.relpath(fullpath, config['toplevel']).lstrip('/') if grokmirror.is_alt_repo(config['toplevel'], gitdir): logger.info(' prune : skipped, is alternate to other repos') return prune_ok # We set expire to yesterday in order to avoid race conditions # in repositories that are actively being accessed at the time of # running the prune job. args = ['prune', '--expire=yesterday'] logger.info(' prune : pruning') retcode, output, error = grokmirror.run_git_command(fullpath, args) if error: # Put things we recognize as fairly benign into debug debug = [] warn = [] for line in error.split('\n'): ignored = False for estring in config['ignore_errors']: if line.find(estring) != -1: ignored = True debug.append(line) break if not ignored: warn.append(line) if debug: logger.debug('Stderr: %s', '\n'.join(debug)) if warn: logger.critical('Pruning %s returned critical errors:', fullpath) prune_ok = False for entry in warn: logger.critical("\t%s", entry) check_reclone_error(fullpath, config, warn) return prune_ok
def run_git_prune(fullpath, config): prune_ok = True if 'prune' not in config.keys() or config['prune'] != 'yes': return prune_ok # Are any other repos using us in their objects/info/alternates? gitdir = '/' + os.path.relpath(fullpath, config['toplevel']).lstrip('/') if grokmirror.is_alt_repo(config['toplevel'], gitdir): logger.info(' prune : skipped, is alternate to other repos') return prune_ok # We set expire to yesterday in order to avoid race conditions # in repositories that are actively being accessed at the time of # running the prune job. args = ['prune', '--expire=yesterday'] logger.info(' prune : pruning') retcode, output, error = grokmirror.run_git_command(fullpath, args) if error: # Put things we recognize as fairly benign into debug debug = [] warn = [] for line in error.split('\n'): ignored = False for estring in config['ignore_errors']: if line.find(estring) != -1: ignored = True debug.append(line) break if not ignored: warn.append(line) if debug: logger.debug('Stderr: %s', '\n'.join(debug)) if warn: logger.critical('Pruning %s returned critical errors:', fullpath) prune_ok = False for entry in warn: logger.critical("\t%s", entry) check_reclone_error(fullpath, config, warn) return prune_ok
def run_git_repack(fullpath, config, level=1): # Returns false if we hit any errors on the way repack_ok = True is_precious = False set_precious = False gen_commitgraph = True # Figure out what our repack flags should be. repack_flags = list() if 'extra_repack_flags' in config and len(config['extra_repack_flags']): repack_flags += config['extra_repack_flags'].split() full_repack_flags = ['-f', '-b', '--pack-kept-objects'] if 'extra_repack_flags_full' in config and len( config['extra_repack_flags_full']): full_repack_flags += config['extra_repack_flags_full'].split() # Are any other repos using us in their objects/info/alternates? gitdir = '/' + os.path.relpath(fullpath, config['toplevel']).lstrip('/') if grokmirror.is_alt_repo(config['toplevel'], gitdir): # we are a "mother repo" # Force preciousObjects if precious is "always" if config['precious'] == 'always': is_precious = True set_precious_objects(fullpath, enabled=True) else: # Turn precious off during repacks set_precious_objects(fullpath, enabled=False) # Turn it back on after we're done set_precious = True # are we using alternates ourselves? Multiple levels of alternates are # a bad idea in general due high possibility of corruption. if os.path.exists( os.path.join(fullpath, 'objects', 'info', 'alternates')): logger.warning( 'warning : has alternates and is used by others for alternates' ) logger.warning(' : this can cause grandchild corruption') repack_flags.append('-A') repack_flags.append('-l') else: repack_flags.append('-a') if not is_precious: repack_flags.append('-k') if level > 1: logger.info( ' repack : performing a full repack for optimal deltas') repack_flags += full_repack_flags elif os.path.exists(os.path.join(fullpath, 'objects', 'info', 'alternates')): # we are a "child repo" gen_commitgraph = False repack_flags.append('-l') if level > 1: repack_flags.append('-A') else: # we have no relationships with other repos if level > 1: logger.info( ' repack : performing a full repack for optimal deltas') repack_flags.append('-a') if not is_precious: repack_flags.append('-k') repack_flags += full_repack_flags if not is_precious: repack_flags.append('-d') args = ['repack'] + repack_flags logger.info(' repack : repacking with "%s"', ' '.join(repack_flags)) # We always tack on -q repack_flags.append('-q') retcode, output, error = grokmirror.run_git_command(fullpath, args) if set_precious: set_precious_objects(fullpath, enabled=True) # With newer versions of git, repack may return warnings that are safe to ignore # so use the same strategy to weed out things we aren't interested in seeing if error: # Put things we recognize as fairly benign into debug debug = [] warn = [] for line in error.split('\n'): ignored = False for estring in config['ignore_errors']: if line.find(estring) != -1: ignored = True debug.append(line) break if not ignored: warn.append(line) if debug: logger.debug('Stderr: %s', '\n'.join(debug)) if warn: logger.critical('Repacking %s returned critical errors:', fullpath) repack_ok = False for entry in warn: logger.critical("\t%s", entry) check_reclone_error(fullpath, config, warn) if not repack_ok: # No need to repack refs if repo is broken return False if gen_commitgraph and config['commitgraph'] == 'yes': run_git_commit_graph(fullpath) # only repack refs on full repacks if level < 2: # do we still have loose objects after repacking? obj_info = get_repo_obj_info(fullpath) if obj_info['count'] != '0': return run_git_prune(fullpath, config) return repack_ok # repacking refs requires a separate command, so run it now args = ['pack-refs', '--all'] logger.info(' repack : repacking refs') retcode, output, error = grokmirror.run_git_command(fullpath, args) # pack-refs shouldn't return anything, but use the same ignore_errors block # to weed out any future potential benign warnings if error: # Put things we recognize as fairly benign into debug debug = [] warn = [] for line in error.split('\n'): ignored = False for estring in config['ignore_errors']: if line.find(estring) != -1: ignored = True debug.append(line) break if not ignored: warn.append(line) if debug: logger.debug('Stderr: %s', '\n'.join(debug)) if warn: logger.critical('Repacking refs %s returned critical errors:', fullpath) repack_ok = False for entry in warn: logger.critical("\t%s", entry) check_reclone_error(fullpath, config, warn) if repack_ok and 'prune' in config and config['prune'] == 'yes': # run prune now return run_git_prune(fullpath, config) return repack_ok
def pull_mirror(name, config, verbose=False, force=False, nomtime=False, verify=False, verify_subpath='*', noreuse=False, purge=False, pretty=False, forcepurge=False): global logger global lock_fails # noinspection PyTypeChecker em = enlighten.get_manager(series=' -=#') logger = logging.getLogger(name) logger.setLevel(logging.DEBUG) if 'log' in config.keys(): ch = logging.FileHandler(config['log']) formatter = logging.Formatter( "[%(process)d] %(asctime)s - %(levelname)s - %(message)s") ch.setFormatter(formatter) loglevel = logging.INFO if 'loglevel' in config.keys(): if config['loglevel'] == 'debug': loglevel = logging.DEBUG ch.setLevel(loglevel) logger.addHandler(ch) ch = logging.StreamHandler() formatter = logging.Formatter('%(message)s') ch.setFormatter(formatter) if verbose: ch.setLevel(logging.INFO) else: ch.setLevel(logging.CRITICAL) em.enabled = False logger.addHandler(ch) # push it into grokmirror to override the default logger grokmirror.logger = logger logger.info('Checking [%s]', name) mymanifest = config['mymanifest'] if verify: logger.info('Verifying mirror against %s', config['manifest']) nomtime = True if config['manifest'].find('file:///') == 0: manifile = config['manifest'].replace('file://', '') if not os.path.exists(manifile): logger.critical('Remote manifest not found in %s! Quitting!', config['manifest']) return 1 fstat = os.stat(manifile) last_modified = fstat[8] logger.debug('mtime on %s is: %s', manifile, fstat[8]) if os.path.exists(config['mymanifest']): fstat = os.stat(config['mymanifest']) my_last_modified = fstat[8] logger.debug('Our last-modified is: %s', my_last_modified) if not (force or nomtime) and last_modified <= my_last_modified: logger.info('Manifest file unchanged. Quitting.') return 0 logger.info('Reading new manifest from %s', manifile) manifest = grokmirror.read_manifest(manifile) # Don't accept empty manifests -- that indicates something is wrong if not len(manifest.keys()): logger.warning('Remote manifest empty or unparseable! Quitting.') return 1 else: # Load it from remote host using http and header magic logger.info('Fetching remote manifest from %s', config['manifest']) # Do we have username:password@ in the URL? chunks = urlparse(config['manifest']) if chunks.netloc.find('@') > 0: logger.debug( 'Taking username/password from the URL for basic auth') (upass, netloc) = chunks.netloc.split('@') if upass.find(':') > 0: (username, password) = upass.split(':') else: username = upass password = '' manifesturl = config['manifest'].replace(chunks.netloc, netloc) logger.debug('manifesturl=%s', manifesturl) request = urllib_request.Request(manifesturl) password_mgr = urllib_request.HTTPPasswordMgrWithDefaultRealm() password_mgr.add_password(None, manifesturl, username, password) auth_handler = urllib_request.HTTPBasicAuthHandler(password_mgr) opener = urllib_request.build_opener(auth_handler) else: request = urllib_request.Request(config['manifest']) opener = urllib_request.build_opener() # Find out if we need to run at all first if not (force or nomtime) and os.path.exists(mymanifest): fstat = os.stat(mymanifest) mtime = fstat[8] logger.debug('mtime on %s is: %s', mymanifest, mtime) my_last_modified = time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(mtime)) logger.debug('Our last-modified is: %s', my_last_modified) request.add_header('If-Modified-Since', my_last_modified) try: ufh = opener.open(request, timeout=30) except HTTPError as ex: if ex.code == 304: logger.info('Server says we have the latest manifest. ' 'Quitting.') return 0 logger.warning('Could not fetch %s', config['manifest']) logger.warning('Server returned: %s', ex) return 1 except (URLError, ssl.SSLError, ssl.CertificateError) as ex: logger.warning('Could not fetch %s', config['manifest']) logger.warning('Error was: %s', ex) return 1 last_modified = ufh.headers.get('Last-Modified') last_modified = time.strptime(last_modified, '%a, %d %b %Y %H:%M:%S %Z') last_modified = calendar.timegm(last_modified) # We don't use read_manifest for the remote manifest, as it can be # anything, really. For now, blindly open it with gzipfile if it ends # with .gz. XXX: some http servers will auto-deflate such files. try: if config['manifest'].find('.gz') > 0: fh = gzip.GzipFile(fileobj=BytesIO(ufh.read())) else: fh = ufh jdata = fh.read().decode('utf-8') fh.close() manifest = anyjson.deserialize(jdata) except Exception as ex: logger.warning('Failed to parse %s', config['manifest']) logger.warning('Error was: %s', ex) return 1 mymanifest = grokmirror.read_manifest(mymanifest) culled = cull_manifest(manifest, config) to_clone = [] to_pull = [] existing = [] toplevel = config['toplevel'] if not os.access(toplevel, os.W_OK): logger.critical('Toplevel %s does not exist or is not writable', toplevel) sys.exit(1) if 'pull_threads' in config.keys(): pull_threads = int(config['pull_threads']) if pull_threads < 1: logger.info('pull_threads is less than 1, forcing to 1') pull_threads = 1 else: # be conservative logger.info('pull_threads is not set, consider setting it') pull_threads = 5 # noinspection PyTypeChecker e_cmp = em.counter(total=len(culled), desc='Comparing:', unit='repos', leave=False) for gitdir in list(culled): fullpath = os.path.join(toplevel, gitdir.lstrip('/')) e_cmp.update() # fingerprints were added in later versions, so deal if the upstream # manifest doesn't have a fingerprint if 'fingerprint' not in culled[gitdir]: culled[gitdir]['fingerprint'] = None # Attempt to lock the repo try: grokmirror.lock_repo(fullpath, nonblocking=True) except IOError: logger.info('Could not lock %s, skipping', gitdir) lock_fails.append(gitdir) # Force the fingerprint to what we have in mymanifest, # if we have it. culled[gitdir]['fingerprint'] = None if gitdir in mymanifest and 'fingerprint' in mymanifest[gitdir]: culled[gitdir]['fingerprint'] = mymanifest[gitdir][ 'fingerprint'] if len(lock_fails) >= pull_threads: logger.info('Too many repositories locked (%s). Exiting.', len(lock_fails)) return 0 continue if verify: if culled[gitdir]['fingerprint'] is None: logger.debug('No fingerprint for %s, not verifying', gitdir) grokmirror.unlock_repo(fullpath) continue if not fnmatch.fnmatch(gitdir, verify_subpath): grokmirror.unlock_repo(fullpath) continue logger.debug('Verifying %s', gitdir) if not os.path.exists(fullpath): verify_fails.append(gitdir) logger.info('Verify: %s ABSENT', gitdir) grokmirror.unlock_repo(fullpath) continue my_fingerprint = grokmirror.get_repo_fingerprint(toplevel, gitdir, force=force) if my_fingerprint == culled[gitdir]['fingerprint']: logger.info('Verify: %s OK', gitdir) else: logger.critical('Verify: %s FAILED', gitdir) verify_fails.append(gitdir) grokmirror.unlock_repo(fullpath) continue # Is the directory in place? if os.path.exists(fullpath): # Did grok-fsck request to reclone it? rfile = os.path.join(fullpath, 'grokmirror.reclone') if os.path.exists(rfile): logger.info('Reclone requested for %s:', gitdir) with open(rfile, 'r') as rfh: reason = rfh.read() logger.info(' %s', reason) to_clone.append(gitdir) grokmirror.unlock_repo(fullpath) continue # Fix owner and description, if necessary if gitdir in mymanifest.keys(): # This code is hurky and needs to be cleaned up desc = culled[gitdir].get('description') owner = culled[gitdir].get('owner') ref = None if config['ignore_repo_references'] != 'yes': ref = culled[gitdir].get('reference') # dirty hack to force on-disk owner/description checks # when we're called with -n, in case our manifest # differs from what is on disk for owner/description/alternates myref = None if nomtime: mydesc = None myowner = None else: mydesc = mymanifest[gitdir].get('description') myowner = mymanifest[gitdir].get('owner') if config['ignore_repo_references'] != 'yes': myref = mymanifest[gitdir].get('reference') if myowner is None: myowner = config['default_owner'] if owner is None: owner = config['default_owner'] if desc != mydesc or owner != myowner or ref != myref: # we can do this right away without waiting set_repo_params(toplevel, gitdir, owner, desc, ref) else: # It exists on disk, but not in my manifest? if noreuse: logger.critical('Found existing git repo in %s', fullpath) logger.critical('But you asked NOT to reuse repos') logger.critical('Skipping %s', gitdir) grokmirror.unlock_repo(fullpath) continue logger.info('Setting new origin for %s', gitdir) fix_remotes(gitdir, toplevel, config['site']) to_pull.append(gitdir) grokmirror.unlock_repo(fullpath) continue # fingerprints were added late, so if we don't have them # in the remote manifest, fall back on using timestamps changed = False if culled[gitdir]['fingerprint'] is not None: logger.debug('Will use fingerprints to compare %s', gitdir) my_fingerprint = grokmirror.get_repo_fingerprint(toplevel, gitdir, force=force) if my_fingerprint != culled[gitdir]['fingerprint']: logger.debug('No fingerprint match, will pull %s', gitdir) changed = True else: logger.debug('Fingerprints match, skipping %s', gitdir) else: logger.debug('Will use timestamps to compare %s', gitdir) if force: logger.debug('Will force-pull %s', gitdir) changed = True # set timestamp to 0 as well grokmirror.set_repo_timestamp(toplevel, gitdir, 0) else: ts = grokmirror.get_repo_timestamp(toplevel, gitdir) if ts < culled[gitdir]['modified']: changed = True if changed: to_pull.append(gitdir) grokmirror.unlock_repo(fullpath) continue else: logger.debug('Repo %s unchanged', gitdir) # if we don't have a fingerprint for it, add it now if culled[gitdir]['fingerprint'] is None: fpr = grokmirror.get_repo_fingerprint(toplevel, gitdir) culled[gitdir]['fingerprint'] = fpr existing.append(gitdir) grokmirror.unlock_repo(fullpath) continue else: # Newly incoming repo to_clone.append(gitdir) grokmirror.unlock_repo(fullpath) continue # If we got here, something is odd. # noinspection PyUnreachableCode logger.critical('Could not figure out what to do with %s', gitdir) grokmirror.unlock_repo(fullpath) logger.info('Compared new manifest against %s repositories in %0.2fs', len(culled), e_cmp.elapsed) e_cmp.close() if verify: if len(verify_fails): logger.critical('%s repos failed to verify', len(verify_fails)) return 1 else: logger.info('Verification successful') return 0 hookscript = config['post_update_hook'] if len(to_pull): if len(lock_fails) > 0: pull_threads -= len(lock_fails) # Don't spin up more threads than we need if pull_threads > len(to_pull): pull_threads = len(to_pull) # exit if we're ever at 0 pull_threads. Shouldn't happen, but some extra # precaution doesn't hurt if pull_threads <= 0: logger.info('Too many repositories locked. Exiting.') return 0 logger.info('Will use %d threads to pull repos', pull_threads) # noinspection PyTypeChecker e_pull = em.counter(total=len(to_pull), desc='Updating :', unit='repos', leave=False) logger.info('Updating %s repos from %s', len(to_pull), config['site']) in_queue = Queue() out_queue = Queue() for gitdir in to_pull: in_queue.put((gitdir, culled[gitdir]['fingerprint'], culled[gitdir]['modified'])) for i in range(pull_threads): logger.debug('Spun up thread %s', i) t = PullerThread(in_queue, out_queue, config, i, e_pull) t.setDaemon(True) t.start() # wait till it's all done in_queue.join() logger.info('All threads finished.') while not out_queue.empty(): # see if any of it failed (gitdir, my_fingerprint, status) = out_queue.get() # We always record our fingerprint in our manifest culled[gitdir]['fingerprint'] = my_fingerprint if not status: # To make sure we check this again during next run, # fudge the manifest accordingly. logger.debug('Will recheck %s during next run', gitdir) culled[gitdir] = mymanifest[gitdir] # this is rather hackish, but effective last_modified -= 1 logger.info('Updates completed in %0.2fs', e_pull.elapsed) e_pull.close() else: logger.info('No repositories need updating') # how many lockfiles have we seen? # If there are more lock_fails than there are # pull_threads configured, we skip cloning out of caution if len(to_clone) and len(lock_fails) > pull_threads: logger.info( 'Too many repositories locked. Skipping cloning new repos.') to_clone = [] if len(to_clone): # noinspection PyTypeChecker e_clone = em.counter(total=len(to_clone), desc='Cloning :', unit='repos', leave=False) logger.info('Cloning %s repos from %s', len(to_clone), config['site']) # we use "existing" to track which repos can be used as references existing.extend(to_pull) to_clone_sorted = [] clone_order(to_clone, manifest, to_clone_sorted, existing) for gitdir in to_clone_sorted: e_clone.refresh() fullpath = os.path.join(toplevel, gitdir.lstrip('/')) # Did grok-fsck request to reclone it? rfile = os.path.join(fullpath, 'grokmirror.reclone') if os.path.exists(rfile): logger.debug('Removing %s for reclone', gitdir) shutil.move(fullpath, '%s.reclone' % fullpath) shutil.rmtree('%s.reclone' % fullpath) # Do we still need to clone it, or has another process # already done this for us? ts = grokmirror.get_repo_timestamp(toplevel, gitdir) if ts > 0: logger.debug('Looks like %s already cloned, skipping', gitdir) e_clone.update() continue try: grokmirror.lock_repo(fullpath, nonblocking=True) except IOError: logger.info('Could not lock %s, skipping', gitdir) lock_fails.append(gitdir) e_clone.update() continue reference = None if config['ignore_repo_references'] != 'yes': reference = culled[gitdir]['reference'] if reference is not None and reference in existing: # Make sure we can lock the reference repo refrepo = os.path.join(toplevel, reference.lstrip('/')) try: grokmirror.lock_repo(refrepo, nonblocking=True) success = clone_repo(toplevel, gitdir, config['site'], reference=reference) grokmirror.unlock_repo(refrepo) except IOError: logger.info('Cannot lock reference repo %s, skipping %s', reference, gitdir) if reference not in lock_fails: lock_fails.append(reference) grokmirror.unlock_repo(fullpath) e_clone.update() continue else: success = clone_repo(toplevel, gitdir, config['site']) # check dir to make sure cloning succeeded and then add to existing if os.path.exists(fullpath) and success: logger.debug('Cloning of %s succeeded, adding to existing', gitdir) existing.append(gitdir) desc = culled[gitdir].get('description') owner = culled[gitdir].get('owner') ref = culled[gitdir].get('reference') if owner is None: owner = config['default_owner'] set_repo_params(toplevel, gitdir, owner, desc, ref) set_agefile(toplevel, gitdir, culled[gitdir]['modified']) my_fingerprint = grokmirror.set_repo_fingerprint( toplevel, gitdir) culled[gitdir]['fingerprint'] = my_fingerprint run_post_update_hook(hookscript, toplevel, gitdir) else: logger.warning('Was not able to clone %s', gitdir) # Remove it from our manifest so we can try re-cloning # next time grok-pull runs del culled[gitdir] git_fails.append(gitdir) grokmirror.unlock_repo(fullpath) e_clone.update() logger.info('Clones completed in %0.2fs' % e_clone.elapsed) e_clone.close() else: logger.info('No repositories need cloning') # loop through all entries and find any symlinks we need to set # We also collect all symlinks to do purging correctly symlinks = [] for gitdir in culled.keys(): if 'symlinks' in culled[gitdir].keys(): source = os.path.join(config['toplevel'], gitdir.lstrip('/')) for symlink in culled[gitdir]['symlinks']: if symlink not in symlinks: symlinks.append(symlink) target = os.path.join(config['toplevel'], symlink.lstrip('/')) if os.path.exists(source): if os.path.islink(target): # are you pointing to where we need you? if os.path.realpath(target) != source: # Remove symlink and recreate below logger.debug('Removed existing wrong symlink %s', target) os.unlink(target) elif os.path.exists(target): logger.warning('Deleted repo %s, because it is now' ' a symlink to %s' % (target, source)) shutil.rmtree(target) # Here we re-check if we still need to do anything if not os.path.exists(target): logger.info('Symlinking %s -> %s', target, source) # Make sure the leading dirs are in place if not os.path.exists(os.path.dirname(target)): os.makedirs(os.path.dirname(target)) os.symlink(source, target) manifile = config['mymanifest'] grokmirror.manifest_lock(manifile) # Is the local manifest newer than last_modified? That would indicate # that another process has run and "culled" is no longer the latest info if os.path.exists(manifile): fstat = os.stat(manifile) if fstat[8] > last_modified: logger.info('Local manifest is newer, not saving.') grokmirror.manifest_unlock(manifile) return 0 if purge: to_purge = [] found_repos = 0 for founddir in grokmirror.find_all_gitdirs(config['toplevel']): gitdir = founddir.replace(config['toplevel'], '') found_repos += 1 if gitdir not in culled.keys() and gitdir not in symlinks: to_purge.append(founddir) if len(to_purge): # Purge-protection engage try: purge_limit = int(config['purgeprotect']) assert 1 <= purge_limit <= 99 except (ValueError, AssertionError): logger.critical('Warning: "%s" is not valid for purgeprotect.', config['purgeprotect']) logger.critical('Please set to a number between 1 and 99.') logger.critical('Defaulting to purgeprotect=5.') purge_limit = 5 purge_pc = len(to_purge) * 100 / found_repos logger.debug('purgeprotect=%s', purge_limit) logger.debug('purge prercentage=%s', purge_pc) if not forcepurge and purge_pc >= purge_limit: logger.critical('Refusing to purge %s repos (%s%%)', len(to_purge), purge_pc) logger.critical('Set purgeprotect to a higher percentage, or' ' override with --force-purge.') logger.info('Not saving local manifest') return 1 else: # noinspection PyTypeChecker e_purge = em.counter(total=len(to_purge), desc='Purging :', unit='repos', leave=False) for founddir in to_purge: e_purge.refresh() if os.path.islink(founddir): logger.info('Removing unreferenced symlink %s', gitdir) os.unlink(founddir) else: # is anything using us for alternates? gitdir = '/' + os.path.relpath(founddir, toplevel).lstrip('/') if grokmirror.is_alt_repo(toplevel, gitdir): logger.info( 'Not purging %s because it is used by ' 'other repos via alternates', founddir) else: try: logger.info('Purging %s', founddir) grokmirror.lock_repo(founddir, nonblocking=True) shutil.rmtree(founddir) except IOError: lock_fails.append(gitdir) logger.info('%s is locked, not purging', gitdir) e_purge.update() logger.info('Purging completed in %0.2fs', e_purge.elapsed) e_purge.close() else: logger.info('No repositories need purging') # Done with progress bars em.stop() # Go through all repos in culled and get the latest local timestamps. for gitdir in culled: ts = grokmirror.get_repo_timestamp(toplevel, gitdir) culled[gitdir]['modified'] = ts # If there were any lock failures, we fudge last_modified to always # be older than the server, which will force the next grokmirror run. if len(lock_fails): logger.info('%s repos could not be locked. Forcing next run.', len(lock_fails)) last_modified -= 1 elif len(git_fails): logger.info('%s repos failed. Forcing next run.', len(git_fails)) last_modified -= 1 # Once we're done, save culled as our new manifest grokmirror.write_manifest(manifile, culled, mtime=last_modified, pretty=pretty) grokmirror.manifest_unlock(manifile) # write out projects.list, if asked to write_projects_list(culled, config) return 127
def run_git_repack(fullpath, config, level=1): # Returns false if we hit any errors on the way repack_ok = True is_precious = check_precious_objects(fullpath) # Figure out what our repack flags should be. repack_flags = list() if 'extra_repack_flags' in config and len(config['extra_repack_flags']): repack_flags += config['extra_repack_flags'].split() full_repack_flags = ['-f', '-b', '--pack-kept-objects'] if 'extra_repack_flags_full' in config and len(config['extra_repack_flags_full']): full_repack_flags += config['extra_repack_flags_full'].split() # Are any other repos using us in their objects/info/alternates? gitdir = '/' + os.path.relpath(fullpath, config['toplevel']).lstrip('/') if grokmirror.is_alt_repo(config['toplevel'], gitdir): # we are a "mother repo" if not is_precious and ('precious' in config and config['precious'] == 'yes'): is_precious = True set_precious_objects(fullpath) # are we using alternates ourselves? Multiple levels of alternates are # a bad idea in general due high possibility of corruption. if os.path.exists(os.path.join(fullpath, 'objects', 'info', 'alternates')): logger.warning('warning : has alternates and is used by others for alternates') logger.warning(' : this can cause grandchild corruption') repack_flags.append('-A') repack_flags.append('-l') else: repack_flags.append('-a') if not is_precious: repack_flags.append('-k') if level > 1: logger.info(' repack : performing a full repack for optimal deltas') repack_flags += full_repack_flags elif os.path.exists(os.path.join(fullpath, 'objects', 'info', 'alternates')): # we are a "child repo" repack_flags.append('-l') if level > 1: repack_flags.append('-A') else: # we have no relationships with other repos if level > 1: logger.info(' repack : performing a full repack for optimal deltas') repack_flags.append('-a') if not is_precious: repack_flags.append('-k') repack_flags += full_repack_flags if not is_precious: repack_flags.append('-d') args = ['repack'] + repack_flags logger.info(' repack : repacking with "%s"', ' '.join(repack_flags)) # We always tack on -q repack_flags.append('-q') retcode, output, error = grokmirror.run_git_command(fullpath, args) # With newer versions of git, repack may return warnings that are safe to ignore # so use the same strategy to weed out things we aren't interested in seeing if error: # Put things we recognize as fairly benign into debug debug = [] warn = [] for line in error.split('\n'): ignored = False for estring in config['ignore_errors']: if line.find(estring) != -1: ignored = True debug.append(line) break if not ignored: warn.append(line) if debug: logger.debug('Stderr: %s', '\n'.join(debug)) if warn: logger.critical('Repacking %s returned critical errors:', fullpath) repack_ok = False for entry in warn: logger.critical("\t%s", entry) check_reclone_error(fullpath, config, warn) if not repack_ok: # No need to repack refs if repo is broken return False # only repack refs on full repacks if level < 2: # do we still have loose objects after repacking? obj_info = get_repo_obj_info(fullpath) if obj_info['count'] != '0': return run_git_prune(fullpath, config) return repack_ok # repacking refs requires a separate command, so run it now args = ['pack-refs', '--all'] logger.info(' repack : repacking refs') retcode, output, error = grokmirror.run_git_command(fullpath, args) # pack-refs shouldn't return anything, but use the same ignore_errors block # to weed out any future potential benign warnings if error: # Put things we recognize as fairly benign into debug debug = [] warn = [] for line in error.split('\n'): ignored = False for estring in config['ignore_errors']: if line.find(estring) != -1: ignored = True debug.append(line) break if not ignored: warn.append(line) if debug: logger.debug('Stderr: %s', '\n'.join(debug)) if warn: logger.critical('Repacking refs %s returned critical errors:', fullpath) repack_ok = False for entry in warn: logger.critical("\t%s", entry) check_reclone_error(fullpath, config, warn) if repack_ok and 'prune' in config and config['prune'] == 'yes': # run prune now return run_git_prune(fullpath, config) return repack_ok
def pull_mirror(name, config, verbose=False, force=False, nomtime=False, verify=False, verify_subpath='*', noreuse=False, purge=False, pretty=False, forcepurge=False): global logger global lock_fails # noinspection PyTypeChecker em = enlighten.get_manager(series=' -=#') logger = logging.getLogger(name) logger.setLevel(logging.DEBUG) if 'log' in config.keys(): ch = logging.FileHandler(config['log']) formatter = logging.Formatter( "[%(process)d] %(asctime)s - %(levelname)s - %(message)s") ch.setFormatter(formatter) loglevel = logging.INFO if 'loglevel' in config.keys(): if config['loglevel'] == 'debug': loglevel = logging.DEBUG ch.setLevel(loglevel) logger.addHandler(ch) ch = logging.StreamHandler() formatter = logging.Formatter('%(message)s') ch.setFormatter(formatter) if verbose: ch.setLevel(logging.INFO) else: ch.setLevel(logging.CRITICAL) em.enabled = False logger.addHandler(ch) # push it into grokmirror to override the default logger grokmirror.logger = logger logger.info('Checking [%s]', name) mymanifest = config['mymanifest'] if verify: logger.info('Verifying mirror against %s', config['manifest']) nomtime = True if config['manifest'].find('file:///') == 0: manifile = config['manifest'].replace('file://', '') if not os.path.exists(manifile): logger.critical('Remote manifest not found in %s! Quitting!', config['manifest']) return 1 fstat = os.stat(manifile) last_modified = fstat[8] logger.debug('mtime on %s is: %s', manifile, fstat[8]) if os.path.exists(config['mymanifest']): fstat = os.stat(config['mymanifest']) my_last_modified = fstat[8] logger.debug('Our last-modified is: %s', my_last_modified) if not (force or nomtime) and last_modified <= my_last_modified: logger.info('Manifest file unchanged. Quitting.') return 0 logger.info('Reading new manifest from %s', manifile) manifest = grokmirror.read_manifest(manifile) # Don't accept empty manifests -- that indicates something is wrong if not len(manifest.keys()): logger.warning('Remote manifest empty or unparseable! Quitting.') return 1 else: # Load it from remote host using http and header magic logger.info('Fetching remote manifest from %s', config['manifest']) # Do we have username:password@ in the URL? chunks = urlparse(config['manifest']) if chunks.netloc.find('@') > 0: logger.debug('Taking username/password from the URL for basic auth') (upass, netloc) = chunks.netloc.split('@') if upass.find(':') > 0: (username, password) = upass.split(':') else: username = upass password = '' manifesturl = config['manifest'].replace(chunks.netloc, netloc) logger.debug('manifesturl=%s', manifesturl) request = urllib_request.Request(manifesturl) password_mgr = urllib_request.HTTPPasswordMgrWithDefaultRealm() password_mgr.add_password(None, manifesturl, username, password) auth_handler = urllib_request.HTTPBasicAuthHandler(password_mgr) opener = urllib_request.build_opener(auth_handler) else: request = urllib_request.Request(config['manifest']) opener = urllib_request.build_opener() # Find out if we need to run at all first if not (force or nomtime) and os.path.exists(mymanifest): fstat = os.stat(mymanifest) mtime = fstat[8] logger.debug('mtime on %s is: %s', mymanifest, mtime) my_last_modified = time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(mtime)) logger.debug('Our last-modified is: %s', my_last_modified) request.add_header('If-Modified-Since', my_last_modified) try: ufh = opener.open(request, timeout=30) except HTTPError as ex: if ex.code == 304: logger.info('Server says we have the latest manifest. ' 'Quitting.') return 0 logger.warning('Could not fetch %s', config['manifest']) logger.warning('Server returned: %s', ex) return 1 except (URLError, ssl.SSLError, ssl.CertificateError) as ex: logger.warning('Could not fetch %s', config['manifest']) logger.warning('Error was: %s', ex) return 1 last_modified = ufh.headers.get('Last-Modified') last_modified = time.strptime(last_modified, '%a, %d %b %Y %H:%M:%S %Z') last_modified = calendar.timegm(last_modified) # We don't use read_manifest for the remote manifest, as it can be # anything, really. For now, blindly open it with gzipfile if it ends # with .gz. XXX: some http servers will auto-deflate such files. try: if config['manifest'].find('.gz') > 0: fh = gzip.GzipFile(fileobj=BytesIO(ufh.read())) else: fh = ufh jdata = fh.read().decode('utf-8') fh.close() manifest = anyjson.deserialize(jdata) except Exception as ex: logger.warning('Failed to parse %s', config['manifest']) logger.warning('Error was: %s', ex) return 1 mymanifest = grokmirror.read_manifest(mymanifest) culled = cull_manifest(manifest, config) to_clone = [] to_pull = [] existing = [] toplevel = config['toplevel'] if not os.access(toplevel, os.W_OK): logger.critical('Toplevel %s does not exist or is not writable', toplevel) sys.exit(1) if 'pull_threads' in config.keys(): pull_threads = int(config['pull_threads']) if pull_threads < 1: logger.info('pull_threads is less than 1, forcing to 1') pull_threads = 1 else: # be conservative logger.info('pull_threads is not set, consider setting it') pull_threads = 5 # noinspection PyTypeChecker e_cmp = em.counter(total=len(culled), desc='Comparing:', unit='repos', leave=False) for gitdir in list(culled): fullpath = os.path.join(toplevel, gitdir.lstrip('/')) e_cmp.update() # fingerprints were added in later versions, so deal if the upstream # manifest doesn't have a fingerprint if 'fingerprint' not in culled[gitdir]: culled[gitdir]['fingerprint'] = None # Attempt to lock the repo try: grokmirror.lock_repo(fullpath, nonblocking=True) except IOError: logger.info('Could not lock %s, skipping', gitdir) lock_fails.append(gitdir) # Force the fingerprint to what we have in mymanifest, # if we have it. culled[gitdir]['fingerprint'] = None if gitdir in mymanifest and 'fingerprint' in mymanifest[gitdir]: culled[gitdir]['fingerprint'] = mymanifest[gitdir][ 'fingerprint'] if len(lock_fails) >= pull_threads: logger.info('Too many repositories locked (%s). Exiting.', len(lock_fails)) return 0 continue if verify: if culled[gitdir]['fingerprint'] is None: logger.debug('No fingerprint for %s, not verifying', gitdir) grokmirror.unlock_repo(fullpath) continue if not fnmatch.fnmatch(gitdir, verify_subpath): grokmirror.unlock_repo(fullpath) continue logger.debug('Verifying %s', gitdir) if not os.path.exists(fullpath): verify_fails.append(gitdir) logger.info('Verify: %s ABSENT', gitdir) grokmirror.unlock_repo(fullpath) continue my_fingerprint = grokmirror.get_repo_fingerprint( toplevel, gitdir, force=force) if my_fingerprint == culled[gitdir]['fingerprint']: logger.info('Verify: %s OK', gitdir) else: logger.critical('Verify: %s FAILED', gitdir) verify_fails.append(gitdir) grokmirror.unlock_repo(fullpath) continue # Is the directory in place? if os.path.exists(fullpath): # Did grok-fsck request to reclone it? rfile = os.path.join(fullpath, 'grokmirror.reclone') if os.path.exists(rfile): logger.info('Reclone requested for %s:', gitdir) with open(rfile, 'r') as rfh: reason = rfh.read() logger.info(' %s', reason) to_clone.append(gitdir) grokmirror.unlock_repo(fullpath) continue # Fix owner and description, if necessary if gitdir in mymanifest.keys(): # This code is hurky and needs to be cleaned up desc = culled[gitdir].get('description') owner = culled[gitdir].get('owner') ref = None if config['ignore_repo_references'] != 'yes': ref = culled[gitdir].get('reference') # dirty hack to force on-disk owner/description checks # when we're called with -n, in case our manifest # differs from what is on disk for owner/description/alternates myref = None if nomtime: mydesc = None myowner = None else: mydesc = mymanifest[gitdir].get('description') myowner = mymanifest[gitdir].get('owner') if config['ignore_repo_references'] != 'yes': myref = mymanifest[gitdir].get('reference') if myowner is None: myowner = config['default_owner'] if owner is None: owner = config['default_owner'] if desc != mydesc or owner != myowner or ref != myref: # we can do this right away without waiting set_repo_params(toplevel, gitdir, owner, desc, ref) else: # It exists on disk, but not in my manifest? if noreuse: logger.critical('Found existing git repo in %s', fullpath) logger.critical('But you asked NOT to reuse repos') logger.critical('Skipping %s', gitdir) grokmirror.unlock_repo(fullpath) continue logger.info('Setting new origin for %s', gitdir) fix_remotes(gitdir, toplevel, config['site']) to_pull.append(gitdir) grokmirror.unlock_repo(fullpath) continue # fingerprints were added late, so if we don't have them # in the remote manifest, fall back on using timestamps changed = False if culled[gitdir]['fingerprint'] is not None: logger.debug('Will use fingerprints to compare %s', gitdir) my_fingerprint = grokmirror.get_repo_fingerprint(toplevel, gitdir, force=force) if my_fingerprint != culled[gitdir]['fingerprint']: logger.debug('No fingerprint match, will pull %s', gitdir) changed = True else: logger.debug('Fingerprints match, skipping %s', gitdir) else: logger.debug('Will use timestamps to compare %s', gitdir) if force: logger.debug('Will force-pull %s', gitdir) changed = True # set timestamp to 0 as well grokmirror.set_repo_timestamp(toplevel, gitdir, 0) else: ts = grokmirror.get_repo_timestamp(toplevel, gitdir) if ts < culled[gitdir]['modified']: changed = True if changed: to_pull.append(gitdir) grokmirror.unlock_repo(fullpath) continue else: logger.debug('Repo %s unchanged', gitdir) # if we don't have a fingerprint for it, add it now if culled[gitdir]['fingerprint'] is None: fpr = grokmirror.get_repo_fingerprint(toplevel, gitdir) culled[gitdir]['fingerprint'] = fpr existing.append(gitdir) grokmirror.unlock_repo(fullpath) continue else: # Newly incoming repo to_clone.append(gitdir) grokmirror.unlock_repo(fullpath) continue # If we got here, something is odd. # noinspection PyUnreachableCode logger.critical('Could not figure out what to do with %s', gitdir) grokmirror.unlock_repo(fullpath) logger.info('Compared new manifest against %s repositories in %0.2fs', len(culled), e_cmp.elapsed) e_cmp.close() if verify: if len(verify_fails): logger.critical('%s repos failed to verify', len(verify_fails)) return 1 else: logger.info('Verification successful') return 0 hookscript = config['post_update_hook'] if len(to_pull): if len(lock_fails) > 0: pull_threads -= len(lock_fails) # Don't spin up more threads than we need if pull_threads > len(to_pull): pull_threads = len(to_pull) # exit if we're ever at 0 pull_threads. Shouldn't happen, but some extra # precaution doesn't hurt if pull_threads <= 0: logger.info('Too many repositories locked. Exiting.') return 0 logger.info('Will use %d threads to pull repos', pull_threads) # noinspection PyTypeChecker e_pull = em.counter(total=len(to_pull), desc='Updating :', unit='repos', leave=False) logger.info('Updating %s repos from %s', len(to_pull), config['site']) in_queue = Queue() out_queue = Queue() for gitdir in to_pull: in_queue.put((gitdir, culled[gitdir]['fingerprint'], culled[gitdir]['modified'])) for i in range(pull_threads): logger.debug('Spun up thread %s', i) t = PullerThread(in_queue, out_queue, config, i, e_pull) t.setDaemon(True) t.start() # wait till it's all done in_queue.join() logger.info('All threads finished.') while not out_queue.empty(): # see if any of it failed (gitdir, my_fingerprint, status) = out_queue.get() # We always record our fingerprint in our manifest culled[gitdir]['fingerprint'] = my_fingerprint if not status: # To make sure we check this again during next run, # fudge the manifest accordingly. logger.debug('Will recheck %s during next run', gitdir) culled[gitdir] = mymanifest[gitdir] # this is rather hackish, but effective last_modified -= 1 logger.info('Updates completed in %0.2fs', e_pull.elapsed) e_pull.close() else: logger.info('No repositories need updating') # how many lockfiles have we seen? # If there are more lock_fails than there are # pull_threads configured, we skip cloning out of caution if len(to_clone) and len(lock_fails) > pull_threads: logger.info('Too many repositories locked. Skipping cloning new repos.') to_clone = [] if len(to_clone): # noinspection PyTypeChecker e_clone = em.counter(total=len(to_clone), desc='Cloning :', unit='repos', leave=False) logger.info('Cloning %s repos from %s', len(to_clone), config['site']) # we use "existing" to track which repos can be used as references existing.extend(to_pull) to_clone_sorted = [] clone_order(to_clone, manifest, to_clone_sorted, existing) for gitdir in to_clone_sorted: e_clone.refresh() fullpath = os.path.join(toplevel, gitdir.lstrip('/')) # Did grok-fsck request to reclone it? rfile = os.path.join(fullpath, 'grokmirror.reclone') if os.path.exists(rfile): logger.debug('Removing %s for reclone', gitdir) shutil.move(fullpath, '%s.reclone' % fullpath) shutil.rmtree('%s.reclone' % fullpath) # Do we still need to clone it, or has another process # already done this for us? ts = grokmirror.get_repo_timestamp(toplevel, gitdir) if ts > 0: logger.debug('Looks like %s already cloned, skipping', gitdir) continue try: grokmirror.lock_repo(fullpath, nonblocking=True) except IOError: logger.info('Could not lock %s, skipping', gitdir) lock_fails.append(gitdir) e_clone.update() continue reference = None if config['ignore_repo_references'] != 'yes': reference = culled[gitdir]['reference'] if reference is not None and reference in existing: # Make sure we can lock the reference repo refrepo = os.path.join(toplevel, reference.lstrip('/')) try: grokmirror.lock_repo(refrepo, nonblocking=True) success = clone_repo(toplevel, gitdir, config['site'], reference=reference) grokmirror.unlock_repo(refrepo) except IOError: logger.info('Cannot lock reference repo %s, skipping %s', reference, gitdir) if reference not in lock_fails: lock_fails.append(reference) grokmirror.unlock_repo(fullpath) e_clone.update() continue else: success = clone_repo(toplevel, gitdir, config['site']) # check dir to make sure cloning succeeded and then add to existing if os.path.exists(fullpath) and success: logger.debug('Cloning of %s succeeded, adding to existing', gitdir) existing.append(gitdir) desc = culled[gitdir].get('description') owner = culled[gitdir].get('owner') ref = culled[gitdir].get('reference') if owner is None: owner = config['default_owner'] set_repo_params(toplevel, gitdir, owner, desc, ref) set_agefile(toplevel, gitdir, culled[gitdir]['modified']) my_fingerprint = grokmirror.set_repo_fingerprint(toplevel, gitdir) culled[gitdir]['fingerprint'] = my_fingerprint run_post_update_hook(hookscript, toplevel, gitdir) else: logger.warning('Was not able to clone %s', gitdir) # Remove it from our manifest so we can try re-cloning # next time grok-pull runs del culled[gitdir] git_fails.append(gitdir) grokmirror.unlock_repo(fullpath) e_clone.update() logger.info('Clones completed in %0.2fs' % e_clone.elapsed) e_clone.close() else: logger.info('No repositories need cloning') # loop through all entries and find any symlinks we need to set # We also collect all symlinks to do purging correctly symlinks = [] for gitdir in culled.keys(): if 'symlinks' in culled[gitdir].keys(): source = os.path.join(config['toplevel'], gitdir.lstrip('/')) for symlink in culled[gitdir]['symlinks']: if symlink not in symlinks: symlinks.append(symlink) target = os.path.join(config['toplevel'], symlink.lstrip('/')) if os.path.exists(source): if os.path.islink(target): # are you pointing to where we need you? if os.path.realpath(target) != source: # Remove symlink and recreate below logger.debug('Removed existing wrong symlink %s', target) os.unlink(target) elif os.path.exists(target): logger.warning('Deleted repo %s, because it is now' ' a symlink to %s' % (target, source)) shutil.rmtree(target) # Here we re-check if we still need to do anything if not os.path.exists(target): logger.info('Symlinking %s -> %s', target, source) # Make sure the leading dirs are in place if not os.path.exists(os.path.dirname(target)): os.makedirs(os.path.dirname(target)) os.symlink(source, target) manifile = config['mymanifest'] grokmirror.manifest_lock(manifile) # Is the local manifest newer than last_modified? That would indicate # that another process has run and "culled" is no longer the latest info if os.path.exists(manifile): fstat = os.stat(manifile) if fstat[8] > last_modified: logger.info('Local manifest is newer, not saving.') grokmirror.manifest_unlock(manifile) return 0 if purge: to_purge = [] found_repos = 0 for founddir in grokmirror.find_all_gitdirs(config['toplevel']): gitdir = founddir.replace(config['toplevel'], '') found_repos += 1 if gitdir not in culled.keys() and gitdir not in symlinks: to_purge.append(founddir) if len(to_purge): # Purge-protection engage try: purge_limit = int(config['purgeprotect']) assert 1 <= purge_limit <= 99 except (ValueError, AssertionError): logger.critical('Warning: "%s" is not valid for purgeprotect.', config['purgeprotect']) logger.critical('Please set to a number between 1 and 99.') logger.critical('Defaulting to purgeprotect=5.') purge_limit = 5 purge_pc = len(to_purge) * 100 / found_repos logger.debug('purgeprotect=%s', purge_limit) logger.debug('purge prercentage=%s', purge_pc) if not forcepurge and purge_pc >= purge_limit: logger.critical('Refusing to purge %s repos (%s%%)', len(to_purge), purge_pc) logger.critical('Set purgeprotect to a higher percentage, or' ' override with --force-purge.') logger.info('Not saving local manifest') return 1 else: # noinspection PyTypeChecker e_purge = em.counter(total=len(to_purge), desc='Purging :', unit='repos', leave=False) for founddir in to_purge: e_purge.refresh() if os.path.islink(founddir): logger.info('Removing unreferenced symlink %s', gitdir) os.unlink(founddir) else: # is anything using us for alternates? gitdir = '/' + os.path.relpath(founddir, toplevel).lstrip('/') if grokmirror.is_alt_repo(toplevel, gitdir): logger.info('Not purging %s because it is used by ' 'other repos via alternates', founddir) else: try: logger.info('Purging %s', founddir) grokmirror.lock_repo(founddir, nonblocking=True) shutil.rmtree(founddir) except IOError: lock_fails.append(gitdir) logger.info('%s is locked, not purging', gitdir) e_purge.update() logger.info('Purging completed in %0.2fs', e_purge.elapsed) e_purge.close() else: logger.info('No repositories need purging') # Done with progress bars em.stop() # Go through all repos in culled and get the latest local timestamps. for gitdir in culled: ts = grokmirror.get_repo_timestamp(toplevel, gitdir) culled[gitdir]['modified'] = ts # If there were any lock failures, we fudge last_modified to always # be older than the server, which will force the next grokmirror run. if len(lock_fails): logger.info('%s repos could not be locked. Forcing next run.', len(lock_fails)) last_modified -= 1 elif len(git_fails): logger.info('%s repos failed. Forcing next run.', len(git_fails)) last_modified -= 1 # Once we're done, save culled as our new manifest grokmirror.write_manifest(manifile, culled, mtime=last_modified, pretty=pretty) grokmirror.manifest_unlock(manifile) # write out projects.list, if asked to write_projects_list(culled, config) return 127