예제 #1
0
def main(*args):
    if len(args) >= 1:
        subcmd, *args = args
        if subcmd == 'mirror':
            return mirror(*args)
        if subcmd == 'deduplicate' and not args:
            return deduplicate(log.Sublogger('deduplicate'))
    log.error('usage: ')
    log.error('    fingertip saviour mirror <config-file> [<what-to-mirror>]')
    log.error('    fingertip saviour deduplicate')
    raise SystemExit()
예제 #2
0
def _load_from_path(data_dir_path):
    log.debug(f'load from {data_dir_path}')
    with open(os.path.join(data_dir_path, 'machine.clpickle'), 'rb') as f:
        m = cloudpickle.load(f)
    assert m._state == 'saving'
    m._state = 'loading'
    m.log = log.Sublogger('<unknown>')
    assert m.path == data_dir_path
    assert m._parent_path == os.path.realpath(os.path.dirname(data_dir_path))
    m.hooks.load()
    m._state = 'loaded'
    return m
예제 #3
0
def clone_and_load(from_path, name_hint=None):
    log.debug(f'clone {from_path}')
    temp_path = temp.disappearing_dir(from_path, hint=name_hint)
    log.debug(f'temp = {temp_path}')
    os.makedirs(temp_path, exist_ok=True)
    with open(os.path.join(from_path, 'machine.clpickle'), 'rb') as f:
        m = cloudpickle.load(f)
    m.log = log.Sublogger('<cloning>')
    m.hooks.clone(temp_path)
    m._parent_path = os.path.realpath(from_path)
    m.path = temp_path
    with open(os.path.join(m.path, 'machine.clpickle'), 'wb') as f:
        cloudpickle.dump(m, f)
    return _load_from_path(temp_path)
예제 #4
0
def build(first_step, *args, fingertip_last_step=False, **kwargs):
    func, tag = step_loader.func_and_autotag(first_step, *args, **kwargs)

    # Could there already be a cached result?
    mpath = path.machines(tag)
    lock_path = path.machines('.' + tag + '-lock')
    log.info(f'acquiring lock for {tag}...')

    transient_hint = func.transient if hasattr(func, 'transient') else None
    if callable(transient_hint):
        transient_hint = supply_last_step_if_requested(transient_hint,
                                                       fingertip_last_step)
        transient_hint = transient_hint(*args, **kwargs)
    transient = (transient_hint in ('always', True)
                 or transient_hint == 'last' and fingertip_last_step)

    with lock.Lock(lock_path) if not transient else lock.NoLock():
        if not os.path.exists(mpath) or needs_a_rebuild(mpath):
            log.info(f'building {tag}...')
            func = supply_last_step_if_requested(func, fingertip_last_step)
            first = func(*args, **kwargs)

            if first is None:
                assert transient, 'first step returned None'
                return

            if transient:
                log.info(f'succesfully built and discarded {tag}')
                first._finalize()  # discard (not fast-dropped though)

                if transient_hint == 'last' and fingertip_last_step:
                    fname = f'{datetime.datetime.utcnow().isoformat()}.txt'
                    t = path.logs(fname, makedirs=True)
                    with open(t, 'w') as f:
                        f.write(first.log_contents)
                    return t
            else:
                log.info(f'succesfully built and saved {tag}')
                first._finalize(link_as=mpath, name_hint=tag)

    if fingertip_last_step:
        return os.path.join(mpath, 'log.txt')
    m = clone_and_load(mpath)
    m.log = log.Sublogger('fingertip.<just built>',
                          os.path.join(m.path, 'log.txt'))
    return m
예제 #5
0
 def __init__(self, backend_name, sealed=True, expire_in='7d'):
     self.hooks = hooks.HookManager()
     os.makedirs(path.MACHINES, exist_ok=True)
     self.path = temp.disappearing_dir(path.MACHINES)
     self._parent_path = path.MACHINES
     # States: loaded -> spun_up -> spun_down -> saved/dropped
     self._state = 'spun_down'
     self._transient = False
     self._up_counter = 0
     self.sealed = sealed
     self.expiration = expiration.Expiration(expire_in)
     self.time_desync = time_desync.TimeDesync(self)
     self.backend = backend_name
     self.log = log.Sublogger(f'plugins.backend.{backend_name}',
                              os.path.join(self.path, 'log.txt'))
     self.log.debug(f'created {backend_name}')
     self.hooks.clone.append(lambda to: reflink.auto(
         os.path.join(self.path, 'log.txt'), os.path.join(to, 'log.txt')))
예제 #6
0
def mirror(config, *what_to_mirror, deduplicate=None):
    total_failures = []
    failures = collections.defaultdict(list)

    with open(config) as f:
        config = ruamel.yaml.YAML(typ='safe').load(f)
    if 'mirror' in config and not config['mirror']:
        log.warning('mirroring is disabled in config')
        return

    hows, whats = config['how'], config['what']
    if not what_to_mirror:
        what_to_mirror = whats.keys()
    else:
        what_to_mirror = ([
            k for k in whats.keys() if any(
                fnmatch.fnmatch(k, req) for req in what_to_mirror)
        ] + [k for k in what_to_mirror if '=' in k])

    if not what_to_mirror:
        log.error('nothing to mirror')
        return

    for resource in what_to_mirror:
        log.debug(f'processing {resource}...')

        if '=' not in resource:  # example: alpine-3.13=alpine/v3.13/main/x86
            resource_name, tail = resource, ''
            s = whats[resource_name]
        else:  # example: alpine-3.13=alpine/v3.13/main/x86
            resource_name, s = resource.split('=', 1)
            # FIXME UGLY: config overrides are stronger that = (more syntax?)
            # TODO: whats shouldn't be a dict, I think, just a list of strings
            if resource_name in whats:
                s = whats[resource_name]

        if s is None:
            s = resource_name
        if '/' in s:
            how_name, suffix = s.split('/', 1)
            suffix = '/' + suffix
        else:
            how_name, suffix = s, ''

        try:
            how = hows[how_name]
        except KeyError:
            log.error(f'missing how section on {how_name}')
            raise SystemExit()

        url = how['url'] + suffix
        method = how['method']
        sources = (how['sources'] if 'sources' in how else [how['url']])
        sources = [s + suffix for s in sources]
        extra_args = {
            k: v
            for k, v in how.items()
            if k not in ('url', 'sources', 'method', 'validate', 'deduplicate')
        }

        if f'method_{method}' not in globals():
            log.error(f'unsupported method {method}')
            raise SystemExit()

        meth = globals()[f'method_{method}']
        symlink = path.saviour(url.rstrip('/'))
        # usually symlink points to data, but while we're working on it,
        # it temporarily points to a consistent snapshot of it named `snap`
        data = os.path.realpath(path.saviour('_', resource_name, 'data'))
        snap = os.path.realpath(path.saviour('_', resource_name, 'snap'))
        temp = os.path.realpath(path.saviour('_', resource_name, 'temp'))
        lockfile = path.saviour('_', resource_name) + '-lock'
        assert data.startswith(os.path.realpath(path.SAVIOUR))
        assert snap.startswith(os.path.realpath(path.SAVIOUR))
        assert temp.startswith(os.path.realpath(path.SAVIOUR))

        sublog = log.Sublogger(f'{method} {resource_name}')
        sublog.info('locking...')
        with lock.Lock(lockfile):
            os.makedirs(os.path.dirname(snap), exist_ok=True)

            if os.path.exists(temp):
                sublog.info('removing stale temp...')
                _remove(temp)
            if os.path.exists(symlink):  # it's already published
                if os.path.exists(data) and not os.path.exists(snap):
                    # `data` is present and is the best we have to publish
                    sublog.info('snapshotting...')
                    reflink.always(data, temp, preserve=True)
                    os.rename(temp, snap)
                if os.path.exists(snap):
                    # link to a consistent snapshot while we work on `data`
                    _symlink(snap, symlink)

            for source in sources:
                sublog.info(f'trying {source}...')
                try:
                    meth(sublog, source, snap, data, **extra_args)
                    assert os.path.exists(data)
                    if 'validate' in how:
                        sublog.info(f'validating with {how["validate"]}...')
                        validator = globals()[f'validate_{how["validate"]}']
                        validator(sublog, source, data)
                        sublog.info('validated')
                    break
                except Exception as _:
                    traceback.print_exc()
                    failures[resource_name].append(source)
                    fingertip.util.log.warning(f'failed to mirror {source}')

            if len(failures[resource_name]) == len(sources):
                sublog.error(f'failed to mirror '
                             f'from all {len(sources)} sources')
                total_failures.append(resource_name)
                continue

            _symlink(data, symlink)
            if os.path.exists(snap):
                os.rename(snap, temp)  # move it out the way asap
                sublog.info('removing now obsolete snapshot...')
                _remove(temp)

        how_deduplicate = how.get('deduplicate', True)
        db_name = how_deduplicate if how_deduplicate is not True else how_name
        if how_deduplicate and deduplicate is not False:
            try:
                _deduplicate(sublog, db_name, resource_name, timeout=1)
            except lock.LockTimeout:
                log.warning(f'skipped deduplication of {resource_name}, '
                            f'db {db_name} was locked')
    if total_failures:
        fingertip.util.log.error(f'failed: {", ".join(total_failures)}')
        raise FailureToMirrorError(", ".join(total_failures))
    log.info('saviour has completed mirroring')
예제 #7
0
    def _cache_aware_apply(self, step, tag, func, args, kwargs, last_step):
        assert self._state == 'loaded'

        transient_hint = func.transient if hasattr(func, 'transient') else None
        if callable(transient_hint):
            transient_hint = supply_last_step_if_requested(
                transient_hint, last_step)
            transient_hint = transient_hint(self, *args, **kwargs)

        return_as_transient = self._transient
        exec_as_transient = (transient_hint in ('always', True)
                             or transient_hint == 'last' and last_step)
        log.debug(f'transient: {transient_hint}')
        log.debug(f'exec_as_transient: {exec_as_transient}')
        log.debug(f'return_as_transient: {return_as_transient}')
        self._transient = exec_as_transient

        # Could there already be a cached result?
        log.debug(f'PATH {self.path} {tag}')
        new_mpath = os.path.join(self._parent_path, tag)

        lock_path = os.path.join(self._parent_path, '.' + tag + '-lock')
        do_lock = not self._transient
        if do_lock:
            log.info(f'acquiring lock for {tag}...')
        prev_log_name = self.log.name
        self.log.finalize()
        with lock.Lock(lock_path) if do_lock else lock.NoLock():
            if (os.path.exists(new_mpath) and not needs_a_rebuild(new_mpath)
                    and not exec_as_transient):
                # sweet, scratch this instance, fast-forward to cached result
                log.info(f'reusing {step} @ {new_mpath}')
                self._finalize()
                clone_from_path = new_mpath
            else:
                # loaded, not spun up, step not cached: perform step, cache
                log.info(f'applying (and, possibly, caching) {tag}')
                self.log = log.Sublogger('plugins.' + tag.split(':', 1)[0],
                                         os.path.join(self.path, 'log.txt'))
                func = supply_last_step_if_requested(func, last_step)
                m = func(self, *args, **kwargs)
                if m:
                    if m._transient and transient_hint == 'last' and last_step:
                        assert m._state == 'dropped'
                        # transient-when-last step returned m
                        # just in case it's not the last, but it was.
                        # m is dropped already, only log contents is preserved.
                        fname = f'{datetime.datetime.utcnow().isoformat()}.txt'
                        t = path.logs(fname, makedirs=True)
                        with open(t, 'w') as f:
                            f.write(m.log_contents)
                        return t
                    assert not m._transient, 'transient step returned a value'
                    m._finalize(link_as=new_mpath, name_hint=tag)
                    clone_from_path = new_mpath
                    log.info(f'successfully applied and saved {tag}')
                else:  # transient step, either had hints or just returned None
                    clone_from_path = self._parent_path
                    log.info(f'successfully applied and dropped {tag}')
        if last_step:
            return os.path.join(clone_from_path, 'log.txt')
        m = clone_and_load(clone_from_path)
        m.log = log.Sublogger(prev_log_name, os.path.join(m.path, 'log.txt'))
        m._transient = return_as_transient
        return m
예제 #8
0
def mirror(config, *what_to_mirror):
    total_failures = []
    failures = collections.defaultdict(list)

    with open(config) as f:
        config = ruamel.yaml.YAML(typ='safe').load(f)
    hows, whats = config['how'], config['what']
    if not what_to_mirror:
        what_to_mirror = whats.keys()
    else:
        what_to_mirror = [k for k in whats.keys()
                          if any((fnmatch.fnmatch(k, req)
                                  for req in what_to_mirror))]

    for resource_name in what_to_mirror or whats.keys():
        s = whats[resource_name]
        log.debug(f'processing {resource_name}...')

        if s is None:
            how, suffix = resource_name, ''
        elif '/' in s:
            how, suffix = s.split('/', 1)
            suffix = '/' + suffix
        else:
            how, suffix = s, ''

        try:
            how = hows[how]
        except KeyError:
            log.error(f'missing how section on {how}')
            raise SystemExit()

        url = how['url'] + suffix
        method = how['method']
        sources = (how['sources'] if 'sources' in how else [how['url']])
        sources = [s + suffix for s in sources]
        extra_args = {k: v for k, v in how.items()
                      if k not in ('url', 'sources', 'method')}

        if f'method_{method}' not in globals():
            log.error(f'unsupported method {method}')
            raise SystemExit()

        meth = globals()[f'method_{method}']
        symlink = path.saviour(url.rstrip('/'))
        # usually symlink points to data, but while we're working on it,
        # it temporarily points to a consistent snapshot of it named `snap`
        data = path.saviour('_', resource_name, 'data')
        snap = path.saviour('_', resource_name, 'snap')
        temp = path.saviour('_', resource_name, 'temp')
        lockfile = path.saviour('_', resource_name) + '-lock'
        assert data.startswith(path.SAVIOUR)
        assert snap.startswith(path.SAVIOUR)
        assert temp.startswith(path.SAVIOUR)

        sublog = log.Sublogger(f'{method} {resource_name}')
        sublog.info('locking...')
        with lock.Lock(lockfile):
            os.makedirs(os.path.dirname(snap), exist_ok=True)

            if os.path.exists(temp):
                sublog.info('removing stale temp...')
                _remove(temp)
            if os.path.exists(symlink):  # it's already published
                if os.path.exists(data) and not os.path.exists(snap):
                    # `data` is present and is the best we have to publish
                    sublog.info('snapshotting...')
                    reflink.always(data, temp, preserve=True)
                    os.rename(temp, snap)
                if os.path.exists(snap):
                    # link to a consistent snapshot while we work on `data`
                    _symlink(snap, symlink)

            for source in sources:
                sublog.info(f'trying {source}...')
                try:
                    meth(sublog, source, snap, data, **extra_args)
                    assert os.path.exists(data)
                    break
                except Exception as _:
                    traceback.print_exc()
                    failures[resource_name].append(source)
                    fingertip.util.log.warning(f'failed to mirror {source}')

            if len(failures[resource_name]) == len(sources):
                sublog.error(f'failed to mirror '
                             f'from all {len(sources)} sources')
                total_failures.append(resource_name)
                continue

            _symlink(data, symlink)
            if os.path.exists(snap):
                os.rename(snap, temp)  # move it out the way asap
                sublog.info('removing now obsolete snapshot...')
                _remove(temp)

            try:
                deduplicate(sublog, resource_name, timeout=1)
            except lock.LockTimeout:
                log.warning('skipped deduplication, db was locked')
    if total_failures:
        fingertip.util.log.error(f'failed: {", ".join(total_failures)}')
        raise SystemExit()
    log.info('saviour has completed mirroring')