def main(*args): if len(args) >= 1: subcmd, *args = args if subcmd == 'mirror': return mirror(*args) if subcmd == 'deduplicate' and not args: return deduplicate(log.Sublogger('deduplicate')) log.error('usage: ') log.error(' fingertip saviour mirror <config-file> [<what-to-mirror>]') log.error(' fingertip saviour deduplicate') raise SystemExit()
def _load_from_path(data_dir_path): log.debug(f'load from {data_dir_path}') with open(os.path.join(data_dir_path, 'machine.clpickle'), 'rb') as f: m = cloudpickle.load(f) assert m._state == 'saving' m._state = 'loading' m.log = log.Sublogger('<unknown>') assert m.path == data_dir_path assert m._parent_path == os.path.realpath(os.path.dirname(data_dir_path)) m.hooks.load() m._state = 'loaded' return m
def clone_and_load(from_path, name_hint=None): log.debug(f'clone {from_path}') temp_path = temp.disappearing_dir(from_path, hint=name_hint) log.debug(f'temp = {temp_path}') os.makedirs(temp_path, exist_ok=True) with open(os.path.join(from_path, 'machine.clpickle'), 'rb') as f: m = cloudpickle.load(f) m.log = log.Sublogger('<cloning>') m.hooks.clone(temp_path) m._parent_path = os.path.realpath(from_path) m.path = temp_path with open(os.path.join(m.path, 'machine.clpickle'), 'wb') as f: cloudpickle.dump(m, f) return _load_from_path(temp_path)
def build(first_step, *args, fingertip_last_step=False, **kwargs): func, tag = step_loader.func_and_autotag(first_step, *args, **kwargs) # Could there already be a cached result? mpath = path.machines(tag) lock_path = path.machines('.' + tag + '-lock') log.info(f'acquiring lock for {tag}...') transient_hint = func.transient if hasattr(func, 'transient') else None if callable(transient_hint): transient_hint = supply_last_step_if_requested(transient_hint, fingertip_last_step) transient_hint = transient_hint(*args, **kwargs) transient = (transient_hint in ('always', True) or transient_hint == 'last' and fingertip_last_step) with lock.Lock(lock_path) if not transient else lock.NoLock(): if not os.path.exists(mpath) or needs_a_rebuild(mpath): log.info(f'building {tag}...') func = supply_last_step_if_requested(func, fingertip_last_step) first = func(*args, **kwargs) if first is None: assert transient, 'first step returned None' return if transient: log.info(f'succesfully built and discarded {tag}') first._finalize() # discard (not fast-dropped though) if transient_hint == 'last' and fingertip_last_step: fname = f'{datetime.datetime.utcnow().isoformat()}.txt' t = path.logs(fname, makedirs=True) with open(t, 'w') as f: f.write(first.log_contents) return t else: log.info(f'succesfully built and saved {tag}') first._finalize(link_as=mpath, name_hint=tag) if fingertip_last_step: return os.path.join(mpath, 'log.txt') m = clone_and_load(mpath) m.log = log.Sublogger('fingertip.<just built>', os.path.join(m.path, 'log.txt')) return m
def __init__(self, backend_name, sealed=True, expire_in='7d'): self.hooks = hooks.HookManager() os.makedirs(path.MACHINES, exist_ok=True) self.path = temp.disappearing_dir(path.MACHINES) self._parent_path = path.MACHINES # States: loaded -> spun_up -> spun_down -> saved/dropped self._state = 'spun_down' self._transient = False self._up_counter = 0 self.sealed = sealed self.expiration = expiration.Expiration(expire_in) self.time_desync = time_desync.TimeDesync(self) self.backend = backend_name self.log = log.Sublogger(f'plugins.backend.{backend_name}', os.path.join(self.path, 'log.txt')) self.log.debug(f'created {backend_name}') self.hooks.clone.append(lambda to: reflink.auto( os.path.join(self.path, 'log.txt'), os.path.join(to, 'log.txt')))
def mirror(config, *what_to_mirror, deduplicate=None): total_failures = [] failures = collections.defaultdict(list) with open(config) as f: config = ruamel.yaml.YAML(typ='safe').load(f) if 'mirror' in config and not config['mirror']: log.warning('mirroring is disabled in config') return hows, whats = config['how'], config['what'] if not what_to_mirror: what_to_mirror = whats.keys() else: what_to_mirror = ([ k for k in whats.keys() if any( fnmatch.fnmatch(k, req) for req in what_to_mirror) ] + [k for k in what_to_mirror if '=' in k]) if not what_to_mirror: log.error('nothing to mirror') return for resource in what_to_mirror: log.debug(f'processing {resource}...') if '=' not in resource: # example: alpine-3.13=alpine/v3.13/main/x86 resource_name, tail = resource, '' s = whats[resource_name] else: # example: alpine-3.13=alpine/v3.13/main/x86 resource_name, s = resource.split('=', 1) # FIXME UGLY: config overrides are stronger that = (more syntax?) # TODO: whats shouldn't be a dict, I think, just a list of strings if resource_name in whats: s = whats[resource_name] if s is None: s = resource_name if '/' in s: how_name, suffix = s.split('/', 1) suffix = '/' + suffix else: how_name, suffix = s, '' try: how = hows[how_name] except KeyError: log.error(f'missing how section on {how_name}') raise SystemExit() url = how['url'] + suffix method = how['method'] sources = (how['sources'] if 'sources' in how else [how['url']]) sources = [s + suffix for s in sources] extra_args = { k: v for k, v in how.items() if k not in ('url', 'sources', 'method', 'validate', 'deduplicate') } if f'method_{method}' not in globals(): log.error(f'unsupported method {method}') raise SystemExit() meth = globals()[f'method_{method}'] symlink = path.saviour(url.rstrip('/')) # usually symlink points to data, but while we're working on it, # it temporarily points to a consistent snapshot of it named `snap` data = os.path.realpath(path.saviour('_', resource_name, 'data')) snap = os.path.realpath(path.saviour('_', resource_name, 'snap')) temp = os.path.realpath(path.saviour('_', resource_name, 'temp')) lockfile = path.saviour('_', resource_name) + '-lock' assert data.startswith(os.path.realpath(path.SAVIOUR)) assert snap.startswith(os.path.realpath(path.SAVIOUR)) assert temp.startswith(os.path.realpath(path.SAVIOUR)) sublog = log.Sublogger(f'{method} {resource_name}') sublog.info('locking...') with lock.Lock(lockfile): os.makedirs(os.path.dirname(snap), exist_ok=True) if os.path.exists(temp): sublog.info('removing stale temp...') _remove(temp) if os.path.exists(symlink): # it's already published if os.path.exists(data) and not os.path.exists(snap): # `data` is present and is the best we have to publish sublog.info('snapshotting...') reflink.always(data, temp, preserve=True) os.rename(temp, snap) if os.path.exists(snap): # link to a consistent snapshot while we work on `data` _symlink(snap, symlink) for source in sources: sublog.info(f'trying {source}...') try: meth(sublog, source, snap, data, **extra_args) assert os.path.exists(data) if 'validate' in how: sublog.info(f'validating with {how["validate"]}...') validator = globals()[f'validate_{how["validate"]}'] validator(sublog, source, data) sublog.info('validated') break except Exception as _: traceback.print_exc() failures[resource_name].append(source) fingertip.util.log.warning(f'failed to mirror {source}') if len(failures[resource_name]) == len(sources): sublog.error(f'failed to mirror ' f'from all {len(sources)} sources') total_failures.append(resource_name) continue _symlink(data, symlink) if os.path.exists(snap): os.rename(snap, temp) # move it out the way asap sublog.info('removing now obsolete snapshot...') _remove(temp) how_deduplicate = how.get('deduplicate', True) db_name = how_deduplicate if how_deduplicate is not True else how_name if how_deduplicate and deduplicate is not False: try: _deduplicate(sublog, db_name, resource_name, timeout=1) except lock.LockTimeout: log.warning(f'skipped deduplication of {resource_name}, ' f'db {db_name} was locked') if total_failures: fingertip.util.log.error(f'failed: {", ".join(total_failures)}') raise FailureToMirrorError(", ".join(total_failures)) log.info('saviour has completed mirroring')
def _cache_aware_apply(self, step, tag, func, args, kwargs, last_step): assert self._state == 'loaded' transient_hint = func.transient if hasattr(func, 'transient') else None if callable(transient_hint): transient_hint = supply_last_step_if_requested( transient_hint, last_step) transient_hint = transient_hint(self, *args, **kwargs) return_as_transient = self._transient exec_as_transient = (transient_hint in ('always', True) or transient_hint == 'last' and last_step) log.debug(f'transient: {transient_hint}') log.debug(f'exec_as_transient: {exec_as_transient}') log.debug(f'return_as_transient: {return_as_transient}') self._transient = exec_as_transient # Could there already be a cached result? log.debug(f'PATH {self.path} {tag}') new_mpath = os.path.join(self._parent_path, tag) lock_path = os.path.join(self._parent_path, '.' + tag + '-lock') do_lock = not self._transient if do_lock: log.info(f'acquiring lock for {tag}...') prev_log_name = self.log.name self.log.finalize() with lock.Lock(lock_path) if do_lock else lock.NoLock(): if (os.path.exists(new_mpath) and not needs_a_rebuild(new_mpath) and not exec_as_transient): # sweet, scratch this instance, fast-forward to cached result log.info(f'reusing {step} @ {new_mpath}') self._finalize() clone_from_path = new_mpath else: # loaded, not spun up, step not cached: perform step, cache log.info(f'applying (and, possibly, caching) {tag}') self.log = log.Sublogger('plugins.' + tag.split(':', 1)[0], os.path.join(self.path, 'log.txt')) func = supply_last_step_if_requested(func, last_step) m = func(self, *args, **kwargs) if m: if m._transient and transient_hint == 'last' and last_step: assert m._state == 'dropped' # transient-when-last step returned m # just in case it's not the last, but it was. # m is dropped already, only log contents is preserved. fname = f'{datetime.datetime.utcnow().isoformat()}.txt' t = path.logs(fname, makedirs=True) with open(t, 'w') as f: f.write(m.log_contents) return t assert not m._transient, 'transient step returned a value' m._finalize(link_as=new_mpath, name_hint=tag) clone_from_path = new_mpath log.info(f'successfully applied and saved {tag}') else: # transient step, either had hints or just returned None clone_from_path = self._parent_path log.info(f'successfully applied and dropped {tag}') if last_step: return os.path.join(clone_from_path, 'log.txt') m = clone_and_load(clone_from_path) m.log = log.Sublogger(prev_log_name, os.path.join(m.path, 'log.txt')) m._transient = return_as_transient return m
def mirror(config, *what_to_mirror): total_failures = [] failures = collections.defaultdict(list) with open(config) as f: config = ruamel.yaml.YAML(typ='safe').load(f) hows, whats = config['how'], config['what'] if not what_to_mirror: what_to_mirror = whats.keys() else: what_to_mirror = [k for k in whats.keys() if any((fnmatch.fnmatch(k, req) for req in what_to_mirror))] for resource_name in what_to_mirror or whats.keys(): s = whats[resource_name] log.debug(f'processing {resource_name}...') if s is None: how, suffix = resource_name, '' elif '/' in s: how, suffix = s.split('/', 1) suffix = '/' + suffix else: how, suffix = s, '' try: how = hows[how] except KeyError: log.error(f'missing how section on {how}') raise SystemExit() url = how['url'] + suffix method = how['method'] sources = (how['sources'] if 'sources' in how else [how['url']]) sources = [s + suffix for s in sources] extra_args = {k: v for k, v in how.items() if k not in ('url', 'sources', 'method')} if f'method_{method}' not in globals(): log.error(f'unsupported method {method}') raise SystemExit() meth = globals()[f'method_{method}'] symlink = path.saviour(url.rstrip('/')) # usually symlink points to data, but while we're working on it, # it temporarily points to a consistent snapshot of it named `snap` data = path.saviour('_', resource_name, 'data') snap = path.saviour('_', resource_name, 'snap') temp = path.saviour('_', resource_name, 'temp') lockfile = path.saviour('_', resource_name) + '-lock' assert data.startswith(path.SAVIOUR) assert snap.startswith(path.SAVIOUR) assert temp.startswith(path.SAVIOUR) sublog = log.Sublogger(f'{method} {resource_name}') sublog.info('locking...') with lock.Lock(lockfile): os.makedirs(os.path.dirname(snap), exist_ok=True) if os.path.exists(temp): sublog.info('removing stale temp...') _remove(temp) if os.path.exists(symlink): # it's already published if os.path.exists(data) and not os.path.exists(snap): # `data` is present and is the best we have to publish sublog.info('snapshotting...') reflink.always(data, temp, preserve=True) os.rename(temp, snap) if os.path.exists(snap): # link to a consistent snapshot while we work on `data` _symlink(snap, symlink) for source in sources: sublog.info(f'trying {source}...') try: meth(sublog, source, snap, data, **extra_args) assert os.path.exists(data) break except Exception as _: traceback.print_exc() failures[resource_name].append(source) fingertip.util.log.warning(f'failed to mirror {source}') if len(failures[resource_name]) == len(sources): sublog.error(f'failed to mirror ' f'from all {len(sources)} sources') total_failures.append(resource_name) continue _symlink(data, symlink) if os.path.exists(snap): os.rename(snap, temp) # move it out the way asap sublog.info('removing now obsolete snapshot...') _remove(temp) try: deduplicate(sublog, resource_name, timeout=1) except lock.LockTimeout: log.warning('skipped deduplication, db was locked') if total_failures: fingertip.util.log.error(f'failed: {", ".join(total_failures)}') raise SystemExit() log.info('saviour has completed mirroring')