コード例 #1
0
 def _finalize(self, link_as=None, name_hint=None):
     log.debug(f'finalize hint={name_hint} link_as={link_as} {self._state}')
     self.log.disable_hint()
     if link_as and self._state == 'spun_down':
         self.hooks.save.in_reverse()
         temp_path = self.path
         self.path = temp.unique_dir(self._parent_path, hint=name_hint)
         log.debug(f'saving to temp {temp_path}')
         self._state = 'saving'
         self.expiration.depend_on_loaded_python_modules()
         del self.log
         with open(os.path.join(temp_path, 'machine.clpickle'), 'wb') as f:
             cloudpickle.dump(self, f)
         log.debug(f'moving {temp_path} to {self.path}')
         os.rename(temp_path, self.path)
         self._state == 'saved'
         link_this = self.path
     else:
         assert self._state in ('spun_down', 'loaded', 'dropped')
         log.info(f'discarding {self.path}')
         temp.remove(self.path)
         link_this = self._parent_path
         self._state = 'dropped'
     if (link_this and link_as and
             os.path.realpath(link_as) != os.path.realpath(link_this)):
         log.debug(f'linking {link_this} to {link_as}')
         if os.path.lexists(link_as):
             if os.path.exists(link_as) and not needs_a_rebuild(link_as):
                 log.critical(f'Refusing to overwrite fresh {link_as}')
                 raise RuntimeError(f'Not overriding fresh {link_as}')
             os.unlink(link_as)
         os.symlink(link_this, link_as)
         return link_as
コード例 #2
0
def schedule():
    # Do this only if fingertip is in PATH
    if not shutil.which("fingertip"):
        log.debug('No `fingertip` found in PATH. Not scheduling '
                  'automatic cleanup.')
        return

    # Skip if systemd is not available
    if not shutil.which('systemd-run') or not shutil.which('systemctl'):
        log.warning('It looks like systemd is not available. '
                    'No cleanup is scheduled! If you are running out of disk, '
                    'space, run `fingertip cleanup periodic` manually.')
        return

    # If the timer is already installed skip installation too
    p = subprocess.run([
        'systemctl', '--user', 'is-active', '--quiet',
        'fingertip-cleanup.timer'
    ])
    if p.returncode == 0:
        log.debug('The systemd timer handling cleanup is already installed '
                  'and running.')
        return

    # Run twice a day
    log.info('Scheduling cleanup to run every two hours')
    subprocess.run([
        'systemd-run', '--unit=fingertip-cleanup', '--user',
        '--on-calendar=0/2:00:00', 'fingertip', 'cleanup', 'periodic'
    ])
コード例 #3
0
ファイル: cleanup.py プロジェクト: t184256/fingertip
def machines(expired_for=0):
    if expired_for != 'all':
        adjusted_time = time.time() - units.parse_time_interval(expired_for)
    else:
        adjusted_time = None
    for root, dirs, files in os.walk(path.MACHINES, topdown=False):
        for d in (os.path.join(root, x) for x in dirs):
            lock_path = os.path.join(root, '.' + os.path.basename(d) + '-lock')
            lock = fasteners.process_lock.InterProcessLock(lock_path)
            lock.acquire()
            try:
                remove = fingertip.machine.needs_a_rebuild(d, by=adjusted_time)
            except Exception as ex:
                log.warning(f'while processing {d}: {ex}')
                remove = True
            if (expired_for == 'all' or remove):
                assert os.path.realpath(d).startswith(
                    os.path.realpath(path.MACHINES)
                )
                log.info(f'removing {os.path.realpath(d)}')
                if not os.path.islink(d):
                    shutil.rmtree(d)
                else:
                    os.unlink(d)
            else:
                log.debug(f'keeping {os.path.realpath(d)}')
            os.unlink(lock_path)
            lock.release()
コード例 #4
0
def machines(expired_for=0):
    if expired_for != 'all':
        adjusted_time = time.time() - fingertip.expiration._parse(expired_for)
    for root, dirs, files in os.walk(path.MACHINES, topdown=False):
        for d in (os.path.join(root, x) for x in dirs):
            lock_path = os.path.join(root, '.' + os.path.basename(d) + '-lock')
            lock = fasteners.process_lock.InterProcessLock(lock_path)
            lock.acquire()
            try:
                remove = fingertip.machine.needs_a_rebuild(d, by=adjusted_time)
            except (FileNotFoundError, EOFError, UnboundLocalError):
                remove = True
            if (expired_for == 'all' or remove):
                assert os.path.realpath(d).startswith(
                    os.path.realpath(path.MACHINES)
                )
                log.info(f'removing {os.path.realpath(d)}')
                if not os.path.islink(d):
                    shutil.rmtree(d)
                else:
                    os.unlink(d)
            else:
                log.debug(f'keeping {os.path.realpath(d)}')
            os.unlink(lock_path)
            lock.release()
コード例 #5
0
 def translate_path(self, http_path):  # directly serve local files
     if http_path in http_cache._local_files_to_serve:
         local_path = http_cache._local_files_to_serve[http_path]
     else:
         local_path = super().translate_path(http_path)
     log.info(f'serving {os.path.basename(http_path)} '
              f'directly from {local_path}')
     return local_path
コード例 #6
0
def deduplicate(log, *subpath, timeout=None):
    log.info('locking the deduplication db...')
    with lock.Lock(path.saviour('.duperemove.hashfile-lock'), timeout=timeout):
        log.info('deduplicating...')
        run = log.pipe_powered(subprocess.run,
                               stdout=logging.INFO, stderr=logging.WARNING)
        r = run(['duperemove',
                 '--hashfile', path.saviour('.duperemove.hashfile'),
                 '-hdr', path.saviour('_', *subpath)])
        assert r.returncode in (0, 22)  # nothing to deduplicate
コード例 #7
0
def mount_supported_fs(backing_file, tgt):
    log.info('mounting a reflink-supported filesystem for image storage...')
    tgt_uid, tgt_gid = os.stat(tgt).st_uid, os.stat(tgt).st_gid
    subprocess.run(['sudo', 'mount', '-o', 'loop', backing_file, tgt],
                   check=True)
    mount_uid, mount_gid = os.stat(tgt).st_uid, os.stat(tgt).st_gid
    if (tgt_uid, tgt_gid) != (mount_uid, mount_gid):
        log.debug(f'fixing owner:group ({tgt_uid}:{tgt_gid})')
        subprocess.run(['sudo', 'chown', f'{tgt_uid}:{tgt_gid}', tgt],
                       check=True)
コード例 #8
0
def method_command(log, src, base, dst, command='false', reuse=True):
    if not reuse:
        fingertip.util.log.info(f'removing {dst}...')
        _remove(dst)
    env = os.environ.copy()
    env['SRC'], env['BASE'], env['DST'] = src, base, dst
    run = log.pipe_powered(subprocess.run,
                           stdout=logging.INFO, stderr=logging.WARNING)
    log.info(command.replace('$SRC', src).replace('$BASE', base)
                    .replace('$DST', dst))
    run(command, shell=True, cwd=os.path.dirname(dst), env=env,
        check=True)
コード例 #9
0
ファイル: cleanup.py プロジェクト: shigaraki0/fingertip
def _cleanup_dir(dirpath, preserve_func):
    for root, dirs, files in os.walk(dirpath, topdown=False):
        for f in (os.path.join(root, x) for x in files):
            assert os.path.realpath(f).startswith(os.path.realpath(dirpath))
            if not preserve_func(f):
                log.info(f'removing {os.path.realpath(f)}')
                os.unlink(f)
        for d in (os.path.join(root, x) for x in dirs):
            assert os.path.realpath(d).startswith(os.path.realpath(dirpath))
            try:
                os.rmdir(d)
            except OSError:  # directory not empty => ignore
                pass
コード例 #10
0
ファイル: git_cache.py プロジェクト: sahanaprasad07/fingertip
 def __init__(self, url, *path_components, enough_to_have=None):
     self.url = url
     self.path = os.path.join(DIR, *path_components)
     lock_path = self.path + '-lock'
     self.lock = fasteners.process_lock.InterProcessLock(lock_path)
     self.lock.acquire()
     if not os.path.exists(self.path):
         log.info(f'cloning {url}...')
         git.Repo.clone_from(url, self.path, mirror=True)  # TODO: use bare
         super().__init__(self.path)
     else:
         super().__init__(self.path)
         update_not_needed = enough_to_have and (
             enough_to_have in (t.name for t in self.tags)
             or enough_to_have in (h.name for h in self.heads)
             or enough_to_have in (c.hexsha for c in self.iter_commits())
             # that's not all commits, but best-effort should be fine here
         )
         if update_not_needed:
             log.info(f'not re-fetching {url} '
                      f'because {enough_to_have} is already present')
         if OFFLINE:
             log.info(f'not re-fetching {url} because of offline mode')
         if not OFFLINE and not update_not_needed:
             log.info(f'updating {url}...')
             self.remote().fetch(tags=True)
     self.lock.release()
コード例 #11
0
ファイル: http_cache.py プロジェクト: thrix/fingertip
            def _serve(self, uri, headers, meth='GET'):
                sess = http_cache._get_requests_session()

                headers = {k: v for k, v in headers.items() if
                           not (k in STRIP_HEADERS or k.startswith('Proxy-'))}
                log.debug(f'{meth} {uri}')
                for k, v in headers.items():
                    log.debug(f'{k}: {v}')

                try:
                    if meth == 'GET' and not OFFLINE:
                        # direct streaming might be required...
                        preview = sess.head(uri, headers=headers,
                                            allow_redirects=False)
                        direct = None
                        if int(preview.headers.get('Content-Length', 0)) > BIG:
                            direct = f'file bigger than {BIG}'
                        if 'Range' in headers:
                            # There seems to be a bug in CacheControl
                            # that serves contents in full if a range request
                            # hits a non-ranged cached entry.
                            direct = f'ranged request, playing safe'
                        if direct:
                            # Don't cache, don't reencode, stream it as is
                            log.warning(f'streaming {uri} directly ({direct})')
                            r = requests.get(uri, headers=headers, stream=True)
                            self._status_and_headers(r.status_code, r.headers)
                            self.copyfile(r.raw, self.wfile)
                            return

                    # fetch with caching
                    m_func = getattr(sess, meth.lower())
                    r = m_func(uri if '://' in uri else 'http://self' + uri,
                               headers=headers, allow_redirects=False)
                    data = r.content
                    length = int(r.headers.get('Content-Length', 0))
                    if len(data) != length:
                        data = hack_around_unpacking(uri, headers, data)
                    assert len(data) == length
                    self._status_and_headers(r.status_code, r.headers)
                    if meth == 'GET':
                        self.wfile.write(data)
                    log.info(f'{meth} {uri} served {length}')
                except BrokenPipeError:
                    log.warning(f'Broken pipe for {meth} {uri}')
                except ConnectionResetError:
                    log.warning(f'Connection reset for {meth} {uri}')
                except requests.exceptions.ConnectionError:
                    log.warning(f'Connection error for {meth} {uri}')
コード例 #12
0
def _cleanup_dir(dirpath, older_than, time_func):
    cutoff_time = time.time() - fingertip.expiration._parse(older_than)
    for root, dirs, files in os.walk(dirpath, topdown=False):
        for f in (os.path.join(root, x) for x in files):
            assert os.path.realpath(f).startswith(os.path.realpath(dirpath))
            if time_func(f) <= cutoff_time:
                log.info(f'removing {os.path.realpath(f)}')
                os.unlink(f)
        for d in (os.path.join(root, x) for x in dirs):
            assert os.path.realpath(d).startswith(os.path.realpath(dirpath))
            try:
                log.info(f'removing {os.path.realpath(d)}')
                os.rmdir(d)
            except OSError:  # directory not empty => ignore
                pass
コード例 #13
0
def autotag(something, *args, **kwargs):
    log.info(f'autotag in: {something} {args} {kwargs}')
    if isinstance(something, str):
        name = something if not something.startswith('.') else '_' + something
    else:
        name = something.__module__ + '.' + something.__qualname__
        assert name.startswith('fingertip.plugins.')
        name = name[len('fingertip.plugins.'):]
        if name.endswith('.__main__'):
            name = name[:len('__main__')]
    args_str = ':'.join([f'{a}' for a in args] +
                        [f'{k}={v}' for k, v in sorted(kwargs.items())])
    if args_str and (' ' in args_str or '/' in args_str or len(args_str) > 20):
        args_str = '::' + weak_hash.of_string(args_str)
    tag = f'{name}:{args_str}' if args_str else name
    return tag
コード例 #14
0
def mount_supported_fs(backing_file, tgt):
    log.info('mounting a reflink-supported filesystem for image storage...')
    tgt_uid, tgt_gid = os.stat(tgt).st_uid, os.stat(tgt).st_gid
    subprocess.run(['sudo', 'mount', '-o', 'loop', backing_file, tgt],
                   check=True)
    mount_uid, mount_gid = os.stat(tgt).st_uid, os.stat(tgt).st_gid
    if (tgt_uid, tgt_gid) != (mount_uid, mount_gid):
        log.debug(f'fixing owner:group ({tgt_uid}:{tgt_gid})')
        subprocess.run(['sudo', 'chown', f'{tgt_uid}:{tgt_gid}', tgt],
                       check=True)
        if tgt.startswith('/home'):
            subprocess.run([
                'sudo', 'semanage', 'fcontext', '-a', '-t', 'user_home_dir_t',
                tgt + '(/.*)?'
            ],
                           check=False)
            subprocess.run(['sudo', 'restorecon', '-v', tgt], check=False)
コード例 #15
0
def method_reposync(log,
                    src,
                    base,
                    dst,
                    arches=['noarch', 'x86_64'],
                    source='auto',
                    metadata='download',
                    options=[],
                    excludes=[]):
    if source == 'auto':
        source = '/source' in src or '/SRPM' in src
    excludes = [e[:-4] if e.endswith('.rpm') else e for e in excludes]
    repo_desc_for_mirroring = textwrap.dedent(f'''
        [repo]
        baseurl = {src}
        name = repo
        enabled = 1
        gpgcheck = 0
    ''')
    repodir = temp.disappearing_dir()
    with open(os.path.join(repodir, f'whatever.repo'), 'w') as f:
        f.write(repo_desc_for_mirroring)
    run = log.pipe_powered(subprocess.run,
                           stdout=logging.INFO,
                           stderr=logging.WARNING)
    run([
        'dnf', f'--setopt=reposdir={repodir}', 'reposync', '--norepopath',
        f'--download-path={dst}', '--repoid=repo', '--delete', '--remote-time'
    ] + [f'--arch={arch}' for arch in arches] +
        (['--download-metadata'] if metadata != 'generate' else []) +
        (['--source'] if source else []) +
        (['--exclude=' + ','.join(excludes)] if excludes else []) + options,
        check=True)
    run = log.pipe_powered(
        subprocess.run,  # either too silent or too noisy =/
        stdout=logging.INFO,
        stderr=logging.INFO)
    createrepo_c_options = ['-v', '--error-exit-val', '--ignore-lock']
    if metadata == 'regenerate':
        log.info('regenerating metadata...')
        run(['createrepo_c'] + createrepo_c_options + ['--update', dst],
            check=True)
    elif metadata == 'generate':
        log.info('generating metadata from scratch...')
        run(['createrepo_c'] + createrepo_c_options + [dst], check=True)
コード例 #16
0
def _deduplicate(log, db_name, resource_name, timeout=None):
    log.info(f'locking the deduplication db {db_name}...')
    hashfilesdir = path.saviour('.duperemove', 'hashfiles')
    if not os.path.exists(hashfilesdir):
        os.makedirs(hashfilesdir)
        os.system(f'chattr +C {hashfilesdir} || true')
    db_file = path.saviour('.duperemove', 'hashfiles', db_name)
    db_lock = path.saviour('.duperemove', 'locks', db_name, makedirs=True)
    with lock.Lock(db_lock, timeout=timeout):
        log.info(f'deduplicating {resource_name} ({db_name})...')
        run = log.pipe_powered(subprocess.run,
                               stdout=logging.INFO,
                               stderr=logging.WARNING)
        r = run([
            'duperemove', '--dedupe-options=nofiemap', '--io-threads=2',
            '--cpu-threads=2', '--hashfile', db_file, '-hdr',
            path.saviour('_', resource_name, 'data')
        ])
        assert r.returncode in (0, 22)  # nothing to deduplicate
コード例 #17
0
ファイル: git_cache.py プロジェクト: sahanaprasad07/fingertip
def cached_clone(m, url, path_in_m, rev=None, rev_is_enough=True):
    assert hasattr(m, 'ssh')
    with m:
        kwa = {} if not rev_is_enough else {'enough_to_have': rev}
        with Repo(url, url.replace('/', '::'), **kwa) as repo:
            tar = temp.disappearing_file()
            tar_in_m = f'/tmp/{os.path.basename(tar)}'
            extracted_in_m = f'/tmp/{os.path.basename(tar)}-extracted'
            log.info(f'packing {url} checkout...')
            with tarfile.open(tar, 'w') as tf:
                tf.add(repo.path, arcname=extracted_in_m)
            log.info(f'uploading {url} checkout...')
            m.ssh.upload(tar, tar_in_m)
        log.info(f'performing {url} checkout...')
        m(f'''
            set -uex
            tar xmvf {tar_in_m} -C /
            mkdir -p {path_in_m}
            git clone -n {extracted_in_m} {path_in_m}
            cd {path_in_m}
            git remote set-url origin {url}
            git checkout {f'{rev}' if rev else 'origin/HEAD'}
            rm -rf {extracted_in_m}
            rm -f {tar_in_m}
        ''')
    return m
コード例 #18
0
    def _cache_aware_apply(self, step, tag, func, args, kwargs):
        assert self._state == 'loaded'

        # Could there already be a cached result?
        log.debug(f'PATH {self.path} {tag}')
        new_mpath = os.path.join(self._parent_path, tag)
        end_goal = self._link_as

        lock_path = os.path.join(self._parent_path, '.' + tag + '-lock')
        do_lock = not hasattr(func, 'transient')
        if do_lock:
            log.info(f'acquiring lock for {tag}...')
        with lock.MaybeLock(lock_path, lock=do_lock):
            prev_log = self.log
            if os.path.exists(new_mpath) and not needs_a_rebuild(new_mpath):
                # sweet, scratch this instance, fast-forward to cached result
                log.info(f'reusing {step} @ {new_mpath}')
                self._finalize()
                clone_from_path = new_mpath
            else:
                # loaded, not spun up, step not cached: perform step, cache
                log.info(f'applying (and, possibly, caching) {tag}')
                prev_log.disable_hint()
                self.log = log.sublogger('plugins.' + tag.split(':', 1)[0],
                                         os.path.join(self.path, 'log.txt'))
                m = func(self, *args, **kwargs)
                prev_log.enable_hint()
                if m:
                    assert not m._transient
                    m._finalize(link_as=new_mpath, name_hint=tag)
                    clone_from_path = new_mpath
                    log.info(f'successfully applied and saved {tag}')
                else:  # transient step
                    clone_from_path = self._parent_path
                    log.info(f'successfully applied and dropped {tag}')
        m = clone_and_load(clone_from_path, link_as=end_goal)
        m.log = prev_log
        return m
コード例 #19
0
ファイル: machine.py プロジェクト: t184256/fingertip
def build(first_step, *args, fingertip_last_step=False, **kwargs):
    func, tag = step_loader.func_and_autotag(first_step, *args, **kwargs)

    # Could there already be a cached result?
    mpath = path.machines(tag)
    lock_path = path.machines('.' + tag + '-lock')
    log.info(f'acquiring lock for {tag}...')

    transient_hint = func.transient if hasattr(func, 'transient') else None
    if callable(transient_hint):
        transient_hint = supply_last_step_if_requested(transient_hint,
                                                       fingertip_last_step)
        transient_hint = transient_hint(*args, **kwargs)
    transient = (transient_hint in ('always', True)
                 or transient_hint == 'last' and fingertip_last_step)

    with lock.Lock(lock_path) if not transient else lock.NoLock():
        if not os.path.exists(mpath) or needs_a_rebuild(mpath):
            log.info(f'building {tag}...')
            func = supply_last_step_if_requested(func, fingertip_last_step)
            first = func(*args, **kwargs)

            if first is None:
                assert transient, 'first step returned None'
                return

            if transient:
                log.info(f'succesfully built and discarded {tag}')
                first._finalize()  # discard (not fast-dropped though)

                if transient_hint == 'last' and fingertip_last_step:
                    fname = f'{datetime.datetime.utcnow().isoformat()}.txt'
                    t = path.logs(fname, makedirs=True)
                    with open(t, 'w') as f:
                        f.write(first.log_contents)
                    return t
            else:
                log.info(f'succesfully built and saved {tag}')
                first._finalize(link_as=mpath, name_hint=tag)

    if fingertip_last_step:
        return os.path.join(mpath, 'log.txt')
    m = clone_and_load(mpath)
    m.log = log.Sublogger('fingertip.<just built>',
                          os.path.join(m.path, 'log.txt'))
    return m
コード例 #20
0
def build(first_step, *args, **kwargs):
    func, tag = step_loader.func_and_autotag(first_step, *args, **kwargs)

    # Could there already be a cached result?
    mpath = path.machines(tag)
    lock_path = path.machines('.' + tag + '-lock')
    log.info(f'acquiring lock for {tag}...')
    do_lock = not hasattr(func, 'transient')
    with lock.MaybeLock(lock_path, lock=do_lock):
        if not os.path.exists(mpath) or needs_a_rebuild(mpath):
            log.info(f'building {tag}...')
            first = func(*args, **kwargs)
            if first is None:
                return
            first._finalize(link_as=mpath, name_hint=tag)
            log.info(f'succesfully built {tag}')
    m = clone_and_load(mpath)
    return m
コード例 #21
0
ファイル: git_cache.py プロジェクト: t184256/fingertip
def upload_contents(m, url, path_in_m, rev=None, rev_is_enough=True):
    assert hasattr(m, 'ssh')
    with m:
        kwa = {} if not rev_is_enough else {'enough_to_have': rev}
        with Repo(url, url.replace('/', '::'), **kwa) as repo:
            tar = temp.disappearing_file()
            log.info(f'packing {url} contents at rev {rev}...')
            tar_in_m = f'/.tmp-{os.path.basename(tar)}'
            with open(tar, 'wb') as tf:
                repo.archive(tf, treeish=rev, prefix=path_in_m + '/')
            log.info(f'uploading {url} contents at rev {rev}...')
            m.ssh.upload(tar, tar_in_m)
        log.info(f'unpacking {url} contents at rev {rev}...')
        m(f'''
            set -uex
            tar xmf {tar_in_m} -C /
            rm -f {tar_in_m}
        ''')
    return m
コード例 #22
0
def storage_setup_wizard():
    assert SETUP in ('auto', 'suggest', 'never')
    if SETUP == 'never':
        return
    size = SIZE
    os.makedirs(path.MACHINES, exist_ok=True)
    if not is_supported(path.MACHINES):
        log.warning(f'images directory {path.MACHINES} lacks reflink support')
        log.warning('without it, fingertip will thrash and fill up your SSD '
                    'in no time')
        backing_file = os.path.join(path.CACHE, 'for-machines.xfs')
        if not os.path.exists(backing_file):
            if SETUP == 'suggest':
                log.info(f'would you like to allow fingertip '
                         f'to allocate {size} at {backing_file} '
                         'for a reflink-enabled XFS loop mount?')
                log.info('(set FINGERTIP_SETUP="auto" environment variable'
                         ' to do it automatically)')
                i = input(f'[{size}]/different size/cancel/ignore> ').strip()
                if i == 'cancel':
                    log.error('cancelled')
                    sys.exit(1)
                elif i == 'ignore':
                    return
                size = i or size
            tmp = temp.disappearing_file(path.CACHE)
            create_supported_fs(tmp, size)
            os.rename(tmp, backing_file)

        log.info(f'fingertip will now mount the XFS image at {backing_file}')
        if SETUP == 'suggest':
            i = input(f'[ok]/skip/cancel> ').strip()
            if i == 'skip':
                log.warning('skipping; '
                            'fingertip will have no reflink superpowers')
                log.warning('tell your SSD I\'m sorry')
                return
            elif i and i != 'ok':
                log.error('cancelled')
                sys.exit(1)

        mount_supported_fs(backing_file, path.MACHINES)
コード例 #23
0
            def _serve_http(self,
                            uri,
                            headers,
                            meth='GET',
                            cache=True,
                            retries=RETRIES_MAX):
                sess = http_cache._get_requests_session(direct=not cache)
                sess_dir = http_cache._get_requests_session(direct=True)
                basename = os.path.basename(uri)

                headers = {
                    k: v
                    for k, v in headers.items()
                    if not (k in STRIP_HEADERS or k.startswith('Proxy-'))
                }
                headers['Accept-Encoding'] = 'identity'
                log.debug(f'{meth} {basename} ({uri})')
                for k, v in headers.items():
                    log.debug(f'{k}: {v}')

                error = None
                try:
                    if meth == 'GET':
                        # direct streaming or trickery might be required...
                        preview = sess.head(uri,
                                            headers=headers,
                                            allow_redirects=False)
                        if (300 <= preview.status_code < 400
                                and 'Location' in preview.headers):
                            nu = preview.headers['Location']
                            if nu.startswith('https://'):
                                # no point in serving that, we have to pretend
                                # that never happened
                                log.info(f'suppressing HTTPS redirect {nu}')
                                return self._serve_http(nu,
                                                        headers,
                                                        meth=meth,
                                                        cache=cache,
                                                        retries=retries)
                        direct = []
                        if not cache:
                            direct.append('caching disabled for this source')
                        if int(preview.headers.get('Content-Length', 0)) > BIG:
                            direct.append(f'file bigger than {BIG}')
                        if 'Range' in headers:
                            # There seems to be a bug in CacheControl
                            # that serves contents in full if a range request
                            # hits a non-ranged cached entry.
                            direct.append('ranged request, playing safe')
                        if direct:
                            # Don't cache, don't reencode, stream it as is
                            log.info(f'streaming {basename} directly '
                                     f'from {uri} ({", ".join(direct)})')
                            r = sess_dir.get(uri, headers=headers, stream=True)
                            self._status_and_headers(r.status_code, r.headers)
                            shutil.copyfileobj(r.raw, self.wfile)
                            return

                    # fetch with caching
                    m_func = getattr(sess, meth.lower())
                    r = m_func(uri if '://' in uri else 'http://self' + uri,
                               headers=headers,
                               allow_redirects=False)
                    data = r.content
                    if 'Content-Length' in r.headers:
                        length = int(r.headers['Content-Length'])
                        if len(data) != length:
                            data = hack_around_unpacking(uri, headers, data)
                        assert len(data) == length
                except BrokenPipeError:
                    error = f'Upwards broken pipe for {meth} {uri}'
                except ConnectionResetError:
                    error = f'Upwards connection reset for {meth} {uri}'
                except requests.exceptions.ConnectionError:
                    error = f'Upwards connection error for {meth} {uri}'
                if error:
                    # delay a re-request
                    if retries:
                        log.warning(f'{error} (will retry x{retries})')
                        t = (RETRIES_MAX - retries) / RETRIES_MAX * COOLDOWN
                        time.sleep(t)
                        return self._serve_http(uri,
                                                headers,
                                                meth=meth,
                                                cache=cache,
                                                retries=retries - 1)
                    else:
                        log.error(f'{error} (out of retries)')
                        self.send_error(http.HTTPStatus.SERVICE_UNAVAILABLE)
                        return
                log.debug(f'{meth} {basename} fetched {r.status_code} ({uri})')
                try:
                    self._status_and_headers(r.status_code, r.headers)
                    if meth == 'GET':
                        self.wfile.write(data)
                except BrokenPipeError:
                    log.warning(f'Downwards broken pipe for {meth} {uri}')
                except ConnectionResetError:
                    log.warning(f'Downwards connection reset for {meth} {uri}')
                except requests.exceptions.ConnectionError:
                    log.warning(f'Downwards connection error for {meth} {uri}')
                log.info(f'{meth} {basename} served ({uri})')
コード例 #24
0
    def __init__(self, url, *path_components, enough_to_have=None):
        assert path_components
        self.url = url
        cache_path = path.downloads('git', *path_components, makedirs=True)
        cache_exists = os.path.exists(cache_path)
        self.path = temp.disappearing_dir(os.path.dirname(cache_path),
                                          path_components[-1])
        lock_working_copy_path = self.path + '-lock'
        lock_cache_path = cache_path + '-lock'
        lock.Lock.__init__(self, lock_working_copy_path)
        update_not_needed = None
        sources = saviour_sources()
        self.self_destruct = False
        with lock.Lock(lock_cache_path), lock.Lock(lock_working_copy_path):
            _remove(self.path)

            for i, (source, cache) in enumerate(sources):
                last_source = i == len(sources) - 1

                if cache and cache_exists and update_not_needed is None:
                    cr = git.Repo(cache_path)
                    update_not_needed = enough_to_have and (
                        enough_to_have in (t.name for t in cr.tags) or
                        enough_to_have in (h.name for h in cr.heads) or
                        enough_to_have in (c.hexsha for c in cr.iter_commits())
                        # that's not all revspecs, but best-effort is fine
                    )
                    if update_not_needed:
                        log.info(f'not re-fetching {url} from {source} '
                                 f'because {enough_to_have} '
                                 'is already present in cache')
                        git.Repo.clone_from(cache_path, self.path, mirror=True)
                        break

                if source == 'local':
                    surl = path.saviour(url).replace('//', '/')  # workaround
                    if not os.path.exists(surl) and not last_source:
                        continue
                    log.info(f'cloning {url} from local saviour mirror')
                    git.Repo.clone_from(surl, self.path, mirror=True)
                    break
                elif source == 'direct':
                    surl = url
                else:
                    surl = source + '/' + url
                    surl = 'http://' + surl if '://' not in source else surl

                log.info(f'cloning {url} from {source} '
                         f'cache_exists={cache_exists}...')
                try:
                    # TODO: bare clone
                    # no harm in referencing cache, even w/o cached+
                    git.Repo.clone_from(surl, self.path, mirror=True,
                                        dissociate=True,
                                        reference_if_able=cache_path)
                except git.GitError:
                    log.warning(f'could not clone {url} from {source}')
                    if last_source:
                        raise
                    continue
                break

            _remove(cache_path)
            reflink.auto(self.path, cache_path)
            git.Repo.__init__(self, self.path)
            self.remotes[0].set_url(url)
        self.self_destruct = True
コード例 #25
0
def storage_unmount():
    log.plain()
    log.info(f'unmounting {path.CACHE} ...')
    subprocess.run(['sudo', 'umount', '-l', path.CACHE])
    log.nicer()
コード例 #26
0
def mirror(config, *what_to_mirror):
    total_failures = []
    failures = collections.defaultdict(list)

    with open(config) as f:
        config = ruamel.yaml.YAML(typ='safe').load(f)
    hows, whats = config['how'], config['what']
    if not what_to_mirror:
        what_to_mirror = whats.keys()
    else:
        what_to_mirror = [k for k in whats.keys()
                          if any((fnmatch.fnmatch(k, req)
                                  for req in what_to_mirror))]

    for resource_name in what_to_mirror or whats.keys():
        s = whats[resource_name]
        log.debug(f'processing {resource_name}...')

        if s is None:
            how, suffix = resource_name, ''
        elif '/' in s:
            how, suffix = s.split('/', 1)
            suffix = '/' + suffix
        else:
            how, suffix = s, ''

        try:
            how = hows[how]
        except KeyError:
            log.error(f'missing how section on {how}')
            raise SystemExit()

        url = how['url'] + suffix
        method = how['method']
        sources = (how['sources'] if 'sources' in how else [how['url']])
        sources = [s + suffix for s in sources]
        extra_args = {k: v for k, v in how.items()
                      if k not in ('url', 'sources', 'method')}

        if f'method_{method}' not in globals():
            log.error(f'unsupported method {method}')
            raise SystemExit()

        meth = globals()[f'method_{method}']
        symlink = path.saviour(url.rstrip('/'))
        # usually symlink points to data, but while we're working on it,
        # it temporarily points to a consistent snapshot of it named `snap`
        data = path.saviour('_', resource_name, 'data')
        snap = path.saviour('_', resource_name, 'snap')
        temp = path.saviour('_', resource_name, 'temp')
        lockfile = path.saviour('_', resource_name) + '-lock'
        assert data.startswith(path.SAVIOUR)
        assert snap.startswith(path.SAVIOUR)
        assert temp.startswith(path.SAVIOUR)

        sublog = log.Sublogger(f'{method} {resource_name}')
        sublog.info('locking...')
        with lock.Lock(lockfile):
            os.makedirs(os.path.dirname(snap), exist_ok=True)

            if os.path.exists(temp):
                sublog.info('removing stale temp...')
                _remove(temp)
            if os.path.exists(symlink):  # it's already published
                if os.path.exists(data) and not os.path.exists(snap):
                    # `data` is present and is the best we have to publish
                    sublog.info('snapshotting...')
                    reflink.always(data, temp, preserve=True)
                    os.rename(temp, snap)
                if os.path.exists(snap):
                    # link to a consistent snapshot while we work on `data`
                    _symlink(snap, symlink)

            for source in sources:
                sublog.info(f'trying {source}...')
                try:
                    meth(sublog, source, snap, data, **extra_args)
                    assert os.path.exists(data)
                    break
                except Exception as _:
                    traceback.print_exc()
                    failures[resource_name].append(source)
                    fingertip.util.log.warning(f'failed to mirror {source}')

            if len(failures[resource_name]) == len(sources):
                sublog.error(f'failed to mirror '
                             f'from all {len(sources)} sources')
                total_failures.append(resource_name)
                continue

            _symlink(data, symlink)
            if os.path.exists(snap):
                os.rename(snap, temp)  # move it out the way asap
                sublog.info('removing now obsolete snapshot...')
                _remove(temp)

            try:
                deduplicate(sublog, resource_name, timeout=1)
            except lock.LockTimeout:
                log.warning('skipped deduplication, db was locked')
    if total_failures:
        fingertip.util.log.error(f'failed: {", ".join(total_failures)}')
        raise SystemExit()
    log.info('saviour has completed mirroring')
コード例 #27
0
ファイル: machine.py プロジェクト: sahanaprasad07/fingertip
    def _cache_aware_apply(self, step, tag, func, args, kwargs, last_step):
        assert self._state == 'loaded'

        transient_hint = func.transient if hasattr(func, 'transient') else None
        if callable(transient_hint):
            transient_hint = supply_last_step_if_requested(
                transient_hint, last_step)
            transient_hint = transient_hint(self, *args, **kwargs)

        return_as_transient = self._transient
        exec_as_transient = (transient_hint in ('always', True)
                             or transient_hint == 'last' and last_step)
        log.debug(f'transient: {transient_hint}')
        log.debug(f'exec_as_transient: {exec_as_transient}')
        log.debug(f'return_as_transient: {return_as_transient}')
        self._transient = exec_as_transient

        # Could there already be a cached result?
        log.debug(f'PATH {self.path} {tag}')
        new_mpath = os.path.join(self._parent_path, tag)

        lock_path = os.path.join(self._parent_path, '.' + tag + '-lock')
        do_lock = not self._transient
        if do_lock:
            log.info(f'acquiring lock for {tag}...')
        prev_log_name = self.log.name
        self.log.finalize()
        with lock.Lock(lock_path) if do_lock else lock.NoLock():
            if (os.path.exists(new_mpath) and not needs_a_rebuild(new_mpath)
                    and not exec_as_transient):
                # sweet, scratch this instance, fast-forward to cached result
                log.info(f'reusing {step} @ {new_mpath}')
                self._finalize()
                clone_from_path = new_mpath
            else:
                # loaded, not spun up, step not cached: perform step, cache
                log.info(f'applying (and, possibly, caching) {tag}')
                self.log = log.Sublogger('plugins.' + tag.split(':', 1)[0],
                                         os.path.join(self.path, 'log.txt'))
                func = supply_last_step_if_requested(func, last_step)
                m = func(self, *args, **kwargs)
                if m:
                    if m._transient and transient_hint == 'last' and last_step:
                        assert m._state == 'dropped'
                        # transient-when-last step returned m
                        # just in case it's not the last, but it was.
                        # m is dropped already, only log contents is preserved.
                        fname = f'{datetime.datetime.utcnow().isoformat()}.txt'
                        t = path.logs(fname, makedirs=True)
                        with open(t, 'w') as f:
                            f.write(m.log_contents)
                        return t
                    assert not m._transient, 'transient step returned a value'
                    m._finalize(link_as=new_mpath, name_hint=tag)
                    clone_from_path = new_mpath
                    log.info(f'successfully applied and saved {tag}')
                else:  # transient step, either had hints or just returned None
                    clone_from_path = self._parent_path
                    log.info(f'successfully applied and dropped {tag}')
        if last_step:
            return os.path.join(clone_from_path, 'log.txt')
        m = clone_and_load(clone_from_path)
        m.log = log.Sublogger(prev_log_name, os.path.join(m.path, 'log.txt'))
        m._transient = return_as_transient
        return m
コード例 #28
0
ファイル: git_cache.py プロジェクト: t184256/fingertip
    def __init__(self, url, *path_components, enough_to_have=None):
        if not path_components:
            path_components = [url.replace('/', '::')]
        self.url = url
        cache_path = path.downloads('git', *path_components, makedirs=True)
        self.path = temp.disappearing_dir(os.path.dirname(cache_path),
                                          path_components[-1])
        lock_working_copy_path = self.path + '-lock'
        lock_cache_path = cache_path + '-lock'
        lock.Lock.__init__(self, lock_working_copy_path)
        sources = saviour_sources()
        self.self_destruct = False
        with lock.Lock(lock_cache_path), lock.Lock(lock_working_copy_path):
            cache_is_enough = False
            if os.path.exists(cache_path):
                try:
                    cr = git.Repo(cache_path)
                    cache_is_enough = (enough_to_have
                                       and _has_rev(cr, enough_to_have))
                except git.GitError as e:
                    log.error(f'something wrong with git cache {cache_path}')
                    log.error(str(e))
                _remove(self.path)

            for i, (source, cache) in enumerate(sources):
                last_source = i == len(sources) - 1

                if cache and cache_is_enough:
                    log.info(f'not re-fetching {url} from {source} '
                             f'because {enough_to_have} '
                             'is already present in cache')
                    git.Repo.clone_from(cache_path, self.path, mirror=True)
                    break

                if source == 'local':
                    surl = path.saviour(url).replace('//', '/')  # workaround
                    if not os.path.exists(surl) and not last_source:
                        continue
                    log.info(f'cloning {url} from local saviour mirror')
                    git.Repo.clone_from(surl, self.path, mirror=True)
                    break
                elif source == 'direct':
                    surl = url
                else:
                    surl = source + '/' + url
                    surl = 'http://' + surl if '://' not in source else surl

                log.info(f'cloning {url} from {source} '
                         f'cache_exists={os.path.exists(cache_path)}...')
                try:
                    # TODO: bare clone
                    # no harm in referencing cache, even w/o cached+
                    git.Repo.clone_from(surl,
                                        self.path,
                                        mirror=True,
                                        dissociate=True,
                                        reference_if_able=cache_path)
                except git.GitError:
                    log.warning(f'could not clone {url} from {source}')
                    if last_source:
                        raise
                    continue
                break

            _remove(cache_path)
            reflink.auto(self.path, cache_path)
            git.Repo.__init__(self, self.path)
            self.remotes[0].set_url(url)
        self.self_destruct = True
コード例 #29
0
def mirror(config, *what_to_mirror, deduplicate=None):
    total_failures = []
    failures = collections.defaultdict(list)

    with open(config) as f:
        config = ruamel.yaml.YAML(typ='safe').load(f)
    if 'mirror' in config and not config['mirror']:
        log.warning('mirroring is disabled in config')
        return

    hows, whats = config['how'], config['what']
    if not what_to_mirror:
        what_to_mirror = whats.keys()
    else:
        what_to_mirror = ([
            k for k in whats.keys() if any(
                fnmatch.fnmatch(k, req) for req in what_to_mirror)
        ] + [k for k in what_to_mirror if '=' in k])

    if not what_to_mirror:
        log.error('nothing to mirror')
        return

    for resource in what_to_mirror:
        log.debug(f'processing {resource}...')

        if '=' not in resource:  # example: alpine-3.13=alpine/v3.13/main/x86
            resource_name, tail = resource, ''
            s = whats[resource_name]
        else:  # example: alpine-3.13=alpine/v3.13/main/x86
            resource_name, s = resource.split('=', 1)
            # FIXME UGLY: config overrides are stronger that = (more syntax?)
            # TODO: whats shouldn't be a dict, I think, just a list of strings
            if resource_name in whats:
                s = whats[resource_name]

        if s is None:
            s = resource_name
        if '/' in s:
            how_name, suffix = s.split('/', 1)
            suffix = '/' + suffix
        else:
            how_name, suffix = s, ''

        try:
            how = hows[how_name]
        except KeyError:
            log.error(f'missing how section on {how_name}')
            raise SystemExit()

        url = how['url'] + suffix
        method = how['method']
        sources = (how['sources'] if 'sources' in how else [how['url']])
        sources = [s + suffix for s in sources]
        extra_args = {
            k: v
            for k, v in how.items()
            if k not in ('url', 'sources', 'method', 'validate', 'deduplicate')
        }

        if f'method_{method}' not in globals():
            log.error(f'unsupported method {method}')
            raise SystemExit()

        meth = globals()[f'method_{method}']
        symlink = path.saviour(url.rstrip('/'))
        # usually symlink points to data, but while we're working on it,
        # it temporarily points to a consistent snapshot of it named `snap`
        data = os.path.realpath(path.saviour('_', resource_name, 'data'))
        snap = os.path.realpath(path.saviour('_', resource_name, 'snap'))
        temp = os.path.realpath(path.saviour('_', resource_name, 'temp'))
        lockfile = path.saviour('_', resource_name) + '-lock'
        assert data.startswith(os.path.realpath(path.SAVIOUR))
        assert snap.startswith(os.path.realpath(path.SAVIOUR))
        assert temp.startswith(os.path.realpath(path.SAVIOUR))

        sublog = log.Sublogger(f'{method} {resource_name}')
        sublog.info('locking...')
        with lock.Lock(lockfile):
            os.makedirs(os.path.dirname(snap), exist_ok=True)

            if os.path.exists(temp):
                sublog.info('removing stale temp...')
                _remove(temp)
            if os.path.exists(symlink):  # it's already published
                if os.path.exists(data) and not os.path.exists(snap):
                    # `data` is present and is the best we have to publish
                    sublog.info('snapshotting...')
                    reflink.always(data, temp, preserve=True)
                    os.rename(temp, snap)
                if os.path.exists(snap):
                    # link to a consistent snapshot while we work on `data`
                    _symlink(snap, symlink)

            for source in sources:
                sublog.info(f'trying {source}...')
                try:
                    meth(sublog, source, snap, data, **extra_args)
                    assert os.path.exists(data)
                    if 'validate' in how:
                        sublog.info(f'validating with {how["validate"]}...')
                        validator = globals()[f'validate_{how["validate"]}']
                        validator(sublog, source, data)
                        sublog.info('validated')
                    break
                except Exception as _:
                    traceback.print_exc()
                    failures[resource_name].append(source)
                    fingertip.util.log.warning(f'failed to mirror {source}')

            if len(failures[resource_name]) == len(sources):
                sublog.error(f'failed to mirror '
                             f'from all {len(sources)} sources')
                total_failures.append(resource_name)
                continue

            _symlink(data, symlink)
            if os.path.exists(snap):
                os.rename(snap, temp)  # move it out the way asap
                sublog.info('removing now obsolete snapshot...')
                _remove(temp)

        how_deduplicate = how.get('deduplicate', True)
        db_name = how_deduplicate if how_deduplicate is not True else how_name
        if how_deduplicate and deduplicate is not False:
            try:
                _deduplicate(sublog, db_name, resource_name, timeout=1)
            except lock.LockTimeout:
                log.warning(f'skipped deduplication of {resource_name}, '
                            f'db {db_name} was locked')
    if total_failures:
        fingertip.util.log.error(f'failed: {", ".join(total_failures)}')
        raise FailureToMirrorError(", ".join(total_failures))
    log.info('saviour has completed mirroring')