def main(*args): if len(args) >= 1: subcmd, *args = args if subcmd == 'mirror': return mirror(*args) log.error('usage: ') log.error(' fingertip saviour mirror <config-file> [<what-to-mirror>]') raise SystemExit()
def is_supported(dirpath): tmp = temp.disappearing_file(dstdir=dirpath) r = subprocess.Popen(['cp', '--reflink=always', tmp, tmp + '-reflink'], stderr=subprocess.PIPE) _, err = r.communicate() r.wait() temp.remove(tmp, tmp + '-reflink') sure_not = b'failed to clone' in err and b'Operation not supported' in err if r.returncode and not sure_not: log.error('reflink support detection inconclusive, cache dir problems') return r.returncode == 0
def c_r_offline(self, request): cache_url = self.cache_url(request.url) log.debug(f'looking up {cache_url} in the cache') cache_data = self.cache.get(cache_url) if cache_data is None: log.error(f'{cache_url} not in cache and fingertip is offline') return False resp = self.serializer.loads(request, cache_data) if not resp: log.error(f'{cache_url} cache entry deserialization failed, ignored') return False log.warning(f'Using {cache_url} from offline cache') return resp
def main(what=None): if what in ('setup', 'unmount', 'cleanup'): return globals()[what]() log.error('usage: ') log.error(' fingertip filesystem setup') log.error(' fingertip filesystem unmount') log.error(' fingertip filesystem cleanup') raise SystemExit()
def storage_setup_wizard(): assert SETUP in ('auto', 'suggest', 'never') if SETUP == 'never': return size = SIZE os.makedirs(path.MACHINES, exist_ok=True) if not is_supported(path.MACHINES): log.warning(f'images directory {path.MACHINES} lacks reflink support') log.warning('without it, fingertip will thrash and fill up your SSD ' 'in no time') backing_file = os.path.join(path.CACHE, 'for-machines.xfs') if not os.path.exists(backing_file): if SETUP == 'suggest': log.info(f'would you like to allow fingertip ' f'to allocate {size} at {backing_file} ' 'for a reflink-enabled XFS loop mount?') log.info('(set FINGERTIP_SETUP="auto" environment variable' ' to do it automatically)') i = input(f'[{size}]/different size/cancel/ignore> ').strip() if i == 'cancel': log.error('cancelled') sys.exit(1) elif i == 'ignore': return size = i or size tmp = temp.disappearing_file(path.CACHE) create_supported_fs(tmp, size) os.rename(tmp, backing_file) log.info(f'fingertip will now mount the XFS image at {backing_file}') if SETUP == 'suggest': i = input(f'[ok]/skip/cancel> ').strip() if i == 'skip': log.warning('skipping; ' 'fingertip will have no reflink superpowers') log.warning('tell your SSD I\'m sorry') return elif i and i != 'ok': log.error('cancelled') sys.exit(1) mount_supported_fs(backing_file, path.MACHINES)
def main(*args): if len(args) >= 1: subcmd, *args = args if subcmd == 'mirror': return mirror(*args) if subcmd == 'deduplicate' and not args: return deduplicate(log.Sublogger('deduplicate')) log.error('usage: ') log.error(' fingertip saviour mirror <config-file> [<what-to-mirror>]') log.error(' fingertip saviour deduplicate') raise SystemExit()
def main(what=None, older_than=0): if OFFLINE: log.error('FINGERTIP_OFFLINE set to 1, no cleanup') return if what == 'everything': return everything() if what == 'periodic': return periodic() elif what in ('downloads', 'logs', 'machines'): return globals()[what](older_than) log.error('usage: ') log.error(' fingertip cleanup downloads [<older-than>]') log.error(' fingertip cleanup logs [<older-than>]') log.error(' fingertip cleanup machines [<expired-for>|all]') log.error(' fingertip cleanup everything') log.error(' fingertip cleanup periodic') raise SystemExit()
def main(what=None, *args, **kwargs): if what == 'everything': return everything() if what == 'periodic': return periodic() elif what in ('downloads', 'logs', 'machines', 'tempfiles'): return globals()[what](*args, **kwargs) log.error('usage: ') log.error(' fingertip cleanup downloads [<older-than>]') log.error(' fingertip cleanup logs [<older-than>]') log.error(' fingertip cleanup machines [<expired-for>|all]') log.error(' fingertip cleanup tempfiles [<older-than> [<location]]') log.error(' fingertip cleanup everything') log.error(' fingertip cleanup periodic') raise SystemExit()
def mirror(config, *what_to_mirror, deduplicate=None): total_failures = [] failures = collections.defaultdict(list) with open(config) as f: config = ruamel.yaml.YAML(typ='safe').load(f) if 'mirror' in config and not config['mirror']: log.warning('mirroring is disabled in config') return hows, whats = config['how'], config['what'] if not what_to_mirror: what_to_mirror = whats.keys() else: what_to_mirror = ([ k for k in whats.keys() if any( fnmatch.fnmatch(k, req) for req in what_to_mirror) ] + [k for k in what_to_mirror if '=' in k]) if not what_to_mirror: log.error('nothing to mirror') return for resource in what_to_mirror: log.debug(f'processing {resource}...') if '=' not in resource: # example: alpine-3.13=alpine/v3.13/main/x86 resource_name, tail = resource, '' s = whats[resource_name] else: # example: alpine-3.13=alpine/v3.13/main/x86 resource_name, s = resource.split('=', 1) # FIXME UGLY: config overrides are stronger that = (more syntax?) # TODO: whats shouldn't be a dict, I think, just a list of strings if resource_name in whats: s = whats[resource_name] if s is None: s = resource_name if '/' in s: how_name, suffix = s.split('/', 1) suffix = '/' + suffix else: how_name, suffix = s, '' try: how = hows[how_name] except KeyError: log.error(f'missing how section on {how_name}') raise SystemExit() url = how['url'] + suffix method = how['method'] sources = (how['sources'] if 'sources' in how else [how['url']]) sources = [s + suffix for s in sources] extra_args = { k: v for k, v in how.items() if k not in ('url', 'sources', 'method', 'validate', 'deduplicate') } if f'method_{method}' not in globals(): log.error(f'unsupported method {method}') raise SystemExit() meth = globals()[f'method_{method}'] symlink = path.saviour(url.rstrip('/')) # usually symlink points to data, but while we're working on it, # it temporarily points to a consistent snapshot of it named `snap` data = os.path.realpath(path.saviour('_', resource_name, 'data')) snap = os.path.realpath(path.saviour('_', resource_name, 'snap')) temp = os.path.realpath(path.saviour('_', resource_name, 'temp')) lockfile = path.saviour('_', resource_name) + '-lock' assert data.startswith(os.path.realpath(path.SAVIOUR)) assert snap.startswith(os.path.realpath(path.SAVIOUR)) assert temp.startswith(os.path.realpath(path.SAVIOUR)) sublog = log.Sublogger(f'{method} {resource_name}') sublog.info('locking...') with lock.Lock(lockfile): os.makedirs(os.path.dirname(snap), exist_ok=True) if os.path.exists(temp): sublog.info('removing stale temp...') _remove(temp) if os.path.exists(symlink): # it's already published if os.path.exists(data) and not os.path.exists(snap): # `data` is present and is the best we have to publish sublog.info('snapshotting...') reflink.always(data, temp, preserve=True) os.rename(temp, snap) if os.path.exists(snap): # link to a consistent snapshot while we work on `data` _symlink(snap, symlink) for source in sources: sublog.info(f'trying {source}...') try: meth(sublog, source, snap, data, **extra_args) assert os.path.exists(data) if 'validate' in how: sublog.info(f'validating with {how["validate"]}...') validator = globals()[f'validate_{how["validate"]}'] validator(sublog, source, data) sublog.info('validated') break except Exception as _: traceback.print_exc() failures[resource_name].append(source) fingertip.util.log.warning(f'failed to mirror {source}') if len(failures[resource_name]) == len(sources): sublog.error(f'failed to mirror ' f'from all {len(sources)} sources') total_failures.append(resource_name) continue _symlink(data, symlink) if os.path.exists(snap): os.rename(snap, temp) # move it out the way asap sublog.info('removing now obsolete snapshot...') _remove(temp) how_deduplicate = how.get('deduplicate', True) db_name = how_deduplicate if how_deduplicate is not True else how_name if how_deduplicate and deduplicate is not False: try: _deduplicate(sublog, db_name, resource_name, timeout=1) except lock.LockTimeout: log.warning(f'skipped deduplication of {resource_name}, ' f'db {db_name} was locked') if total_failures: fingertip.util.log.error(f'failed: {", ".join(total_failures)}') raise FailureToMirrorError(", ".join(total_failures)) log.info('saviour has completed mirroring')
def __init__(self, url, *path_components, enough_to_have=None): if not path_components: path_components = [url.replace('/', '::')] self.url = url cache_path = path.downloads('git', *path_components, makedirs=True) self.path = temp.disappearing_dir(os.path.dirname(cache_path), path_components[-1]) lock_working_copy_path = self.path + '-lock' lock_cache_path = cache_path + '-lock' lock.Lock.__init__(self, lock_working_copy_path) sources = saviour_sources() self.self_destruct = False with lock.Lock(lock_cache_path), lock.Lock(lock_working_copy_path): cache_is_enough = False if os.path.exists(cache_path): try: cr = git.Repo(cache_path) cache_is_enough = (enough_to_have and _has_rev(cr, enough_to_have)) except git.GitError as e: log.error(f'something wrong with git cache {cache_path}') log.error(str(e)) _remove(self.path) for i, (source, cache) in enumerate(sources): last_source = i == len(sources) - 1 if cache and cache_is_enough: log.info(f'not re-fetching {url} from {source} ' f'because {enough_to_have} ' 'is already present in cache') git.Repo.clone_from(cache_path, self.path, mirror=True) break if source == 'local': surl = path.saviour(url).replace('//', '/') # workaround if not os.path.exists(surl) and not last_source: continue log.info(f'cloning {url} from local saviour mirror') git.Repo.clone_from(surl, self.path, mirror=True) break elif source == 'direct': surl = url else: surl = source + '/' + url surl = 'http://' + surl if '://' not in source else surl log.info(f'cloning {url} from {source} ' f'cache_exists={os.path.exists(cache_path)}...') try: # TODO: bare clone # no harm in referencing cache, even w/o cached+ git.Repo.clone_from(surl, self.path, mirror=True, dissociate=True, reference_if_able=cache_path) except git.GitError: log.warning(f'could not clone {url} from {source}') if last_source: raise continue break _remove(cache_path) reflink.auto(self.path, cache_path) git.Repo.__init__(self, self.path) self.remotes[0].set_url(url) self.self_destruct = True
def _serve_http(self, uri, headers, meth='GET', cache=True, retries=RETRIES_MAX): sess = http_cache._get_requests_session(direct=not cache) sess_dir = http_cache._get_requests_session(direct=True) basename = os.path.basename(uri) headers = { k: v for k, v in headers.items() if not (k in STRIP_HEADERS or k.startswith('Proxy-')) } headers['Accept-Encoding'] = 'identity' log.debug(f'{meth} {basename} ({uri})') for k, v in headers.items(): log.debug(f'{k}: {v}') error = None try: if meth == 'GET': # direct streaming or trickery might be required... preview = sess.head(uri, headers=headers, allow_redirects=False) if (300 <= preview.status_code < 400 and 'Location' in preview.headers): nu = preview.headers['Location'] if nu.startswith('https://'): # no point in serving that, we have to pretend # that never happened log.debug(f'suppressing HTTPS redirect {nu}') return self._serve_http(nu, headers, meth=meth, cache=cache, retries=retries) direct = [] if not cache: direct.append('caching disabled for this source') if int(preview.headers.get('Content-Length', 0)) > BIG: direct.append(f'file bigger than {BIG}') if 'Range' in headers: # There seems to be a bug in CacheControl # that serves contents in full if a range request # hits a non-ranged cached entry. direct.append('ranged request, playing safe') if direct: # Don't cache, don't reencode, stream it as is log.debug(f'streaming {basename} directly ' f'from {uri} ({", ".join(direct)})') r = sess_dir.get(uri, headers=headers, stream=True) self._status_and_headers(r.status_code, r.headers) shutil.copyfileobj(r.raw, self.wfile) return # fetch with caching m_func = getattr(sess, meth.lower()) r = m_func(uri if '://' in uri else 'http://self' + uri, headers=headers, allow_redirects=False) data = r.content if 'Content-Length' in r.headers: length = int(r.headers['Content-Length']) if len(data) != length: data = hack_around_unpacking(uri, headers, data) assert len(data) == length except BrokenPipeError: error = f'Upwards broken pipe for {meth} {uri}' except ConnectionResetError: error = f'Upwards connection reset for {meth} {uri}' except requests.exceptions.ConnectionError: error = f'Upwards connection error for {meth} {uri}' if error: # delay a re-request if retries: log.warning(f'{error} (will retry x{retries})') t = (RETRIES_MAX - retries) / RETRIES_MAX * COOLDOWN time.sleep(t) return self._serve_http(uri, headers, meth=meth, cache=cache, retries=retries - 1) else: log.error(f'{error} (out of retries)') self.send_error(http.HTTPStatus.SERVICE_UNAVAILABLE) return log.debug(f'{meth} {basename} fetched {r.status_code} ({uri})') try: self._status_and_headers(r.status_code, r.headers) if meth == 'GET': self.wfile.write(data) except BrokenPipeError: log.warning(f'Downwards broken pipe for {meth} {uri}') except ConnectionResetError: log.warning(f'Downwards connection reset for {meth} {uri}') except requests.exceptions.ConnectionError: log.warning(f'Downwards connection error for {meth} {uri}') log.debug(f'{meth} {basename} served ({uri})')
def mirror(config, *what_to_mirror): total_failures = [] failures = collections.defaultdict(list) with open(config) as f: config = ruamel.yaml.YAML(typ='safe').load(f) hows, whats = config['how'], config['what'] if not what_to_mirror: what_to_mirror = whats.keys() else: what_to_mirror = [k for k in whats.keys() if any((fnmatch.fnmatch(k, req) for req in what_to_mirror))] for resource_name in what_to_mirror or whats.keys(): s = whats[resource_name] log.debug(f'processing {resource_name}...') if s is None: how, suffix = resource_name, '' elif '/' in s: how, suffix = s.split('/', 1) suffix = '/' + suffix else: how, suffix = s, '' try: how = hows[how] except KeyError: log.error(f'missing how section on {how}') raise SystemExit() url = how['url'] + suffix method = how['method'] sources = (how['sources'] if 'sources' in how else [how['url']]) sources = [s + suffix for s in sources] extra_args = {k: v for k, v in how.items() if k not in ('url', 'sources', 'method')} if f'method_{method}' not in globals(): log.error(f'unsupported method {method}') raise SystemExit() meth = globals()[f'method_{method}'] symlink = path.saviour(url.rstrip('/')) # usually symlink points to data, but while we're working on it, # it temporarily points to a consistent snapshot of it named `snap` data = path.saviour('_', resource_name, 'data') snap = path.saviour('_', resource_name, 'snap') temp = path.saviour('_', resource_name, 'temp') lockfile = path.saviour('_', resource_name) + '-lock' assert data.startswith(path.SAVIOUR) assert snap.startswith(path.SAVIOUR) assert temp.startswith(path.SAVIOUR) sublog = log.Sublogger(f'{method} {resource_name}') sublog.info('locking...') with lock.Lock(lockfile): os.makedirs(os.path.dirname(snap), exist_ok=True) if os.path.exists(temp): sublog.info('removing stale temp...') _remove(temp) if os.path.exists(symlink): # it's already published if os.path.exists(data) and not os.path.exists(snap): # `data` is present and is the best we have to publish sublog.info('snapshotting...') reflink.always(data, temp, preserve=True) os.rename(temp, snap) if os.path.exists(snap): # link to a consistent snapshot while we work on `data` _symlink(snap, symlink) for source in sources: sublog.info(f'trying {source}...') try: meth(sublog, source, snap, data, **extra_args) assert os.path.exists(data) break except Exception as _: traceback.print_exc() failures[resource_name].append(source) fingertip.util.log.warning(f'failed to mirror {source}') if len(failures[resource_name]) == len(sources): sublog.error(f'failed to mirror ' f'from all {len(sources)} sources') total_failures.append(resource_name) continue _symlink(data, symlink) if os.path.exists(snap): os.rename(snap, temp) # move it out the way asap sublog.info('removing now obsolete snapshot...') _remove(temp) try: deduplicate(sublog, resource_name, timeout=1) except lock.LockTimeout: log.warning('skipped deduplication, db was locked') if total_failures: fingertip.util.log.error(f'failed: {", ".join(total_failures)}') raise SystemExit() log.info('saviour has completed mirroring')