Example #1
0
    def fetch(self, size_limit_mb=2):
        """ bypass remote to fetch directly based on stored meta """
        meta = self.meta
        if self.is_dir():
            raise NotImplementedError(
                'not going to fetch all data in a dir at the moment')
        if meta.file_id is None:
            self.refresh(update_data=True, force=True)
            # the file name could be different so we have to return here
            return

        size_ok = size_limit_mb is not None and meta.size is not None and meta.size.mb < size_limit_mb
        size_not_ok = size_limit_mb is not None and meta.size is not None and meta.size.mb > size_limit_mb

        if size_ok or size_limit_mb is None:  # FIXME should we force fetch here by default if the file exists?
            if self.is_broken_symlink():
                # FIXME touch a temporary file and set the meta first!
                self.unlink()
                self.touch()
                self._meta_setter(meta)

            log.info(f'Fetching remote via cache id {self.id} -> {self.local}')
            self.local.data = self.data  # note that this should trigger storage to .ops/objects

        if size_not_ok:
            log.warning(
                f'File is over the size limit {meta.size.mb} > {size_limit_mb}'
            )
Example #2
0
    def _meta_updater(self, pathmeta, fetch=True):
        original = self.meta
        file_is_different, updated = self._update_meta(original, pathmeta)
        # FIXME missing checksum is one source of problems here
        must_fetch = file_is_different and self.is_file() and self.exists(
        ) and fetch

        if must_fetch:
            try:
                # FIXME performance, and pathmeta.checksum is None case
                if self.local.content_different(
                ) and self.local.meta.checksum != pathmeta.checksum:
                    raise exc.LocalChangesError(f'not fetching {self}')

            except exc.NoRemoteFileWithThatIdError as e:
                log.warning(
                    'cant fetch remote file there may be untracked local changes for\n{self}'
                )

            log.info(f'crumpling to preserve existing metadata\n{self}')
            trashed = self.crumple()

        try:
            self._meta_setter(updated)
            if must_fetch:
                self.fetch(size_limit_mb=None)

        except BaseException as e:
            log.error(e)
            if must_fetch:
                trashed.rename(self)
            raise e

        return file_is_different
Example #3
0
    def refresh(self, update_data=False, size_limit_mb=2, force=False):
        if self.meta is None:
            breakpoint()

        limit = (size_limit_mb if not self.meta.size or
                 (size_limit_mb > self.meta.size.mb) else self.meta.size.mb +
                 1)
        new = self.remote.refresh(update_cache=True,
                                  update_data=update_data,
                                  update_data_on_cache=(self.is_file()
                                                        and self.exists()),
                                  size_limit_mb=size_limit_mb,
                                  force=force)
        if new is not None:
            return new
        else:
            log.info(f'Remote for {self} has been deleted. Moving to trash.')
            try:
                self.rename(self.trash /
                            f'{self.parent.id}-{self.id}-{self.name}')
            except FileNotFoundError as e:
                if not self.trash.exists():
                    self.trash.mkdir()
                    log.info('created {self.trash}')
                else:
                    raise e
Example #4
0
    def meta(self):
        #if hasattr(self, '_in_bootstrap'):
        #if hasattr(self, '_meta'):  # if we have in memory we are bootstrapping so don't fiddle about
        #return self._meta

        exists = self.exists()
        if exists:
            #log.debug(self)  # TODO this still gets hit a lot in threes
            meta = super().meta
            if meta:  # implicit else failover to backup cache
                return meta

        elif not exists and self._not_exists_cache and self.is_symlink():
            try:
                cache = self._not_exists_cache(self)
                return cache.meta
            except exc.NoCachedMetadataError as e:
                log.warning(e)

        if self._backup_cache:
            try:
                cache = self._backup_cache(self)
                meta = cache.meta
                if meta:
                    log.info(f'restoring from backup {meta}')
                    self._meta_setter(
                        meta)  # repopulate primary cache from backup
                    return meta

            except exc.NoCachedMetadataError as e:
                log.warning(e)
Example #5
0
    def dedupe(self, other, pretend=False):
        # FIXME blackfynn doesn't set update when a folder name changes ??!
        if self.id != other.id:
            raise ValueError(
                'Can only dedupe when ids match, {self.id} != {other.id}')

        su, ou = self.meta.updated, other.meta.updated
        lsu, lou = self.local.meta.updated, other.local.meta.updated
        if su < ou:
            old, new = self, other

        elif su > ou:
            new, old = self, other

        elif lsu is None and lou is None:
            new, old = self, other

        elif lsu is None:
            old, new = self, other

        elif lou is None:
            new, old = self, other

        elif lsu < lou:
            old, new = self, other

        elif lsu > lou:
            new, old = self, other

        else:  # ==
            ss, os = self.meta.size, other.meta.size
            if ss is not None and os is not None:
                new, old = self, other

            elif ss is None:
                old, new = self, other

            elif os is None:
                new, old = self, other

            else:
                raise BaseException('how did we get here!?')

        file_is_different, meta = self._update_meta(old.meta, new.meta)
        if file_is_different:
            log.info(f'{self}\n!=\n{other}\n{meta}')

        if not pretend:
            #old.rename('/dev/null')  # hah
            pass

        return new
Example #6
0
    def _update_meta(old, new):
        if not old:
            return False, new  # if there is no file it is both different and not different

        if not new:
            return False, old

        file_is_different = False

        kwargs = {k: v for k, v in old.items()}
        if old.id != new.id:
            kwargs['old_id'] = old.id

        for k, vnew in new.items():
            vold = kwargs[k]

            if vnew is None or hasattr(vnew, '__iter__') and not vnew:
                # don't update with None or empty iterables
                continue

            if vold is not None and vold != vnew:
                log.info(f'{old.id} field {k} changed from {vold} -> {vnew}')
                if k in ('created', 'updated', 'size', 'checksum', 'file_id'):
                    file_is_different = True

            kwargs[k] = vnew

        if file_is_different:
            # strip fields missing from new in the case where
            # we aren't merging metadata from two different sources

            for k, vnew in new.items():
                if k == 'old_id':
                    continue

                if vnew is None:
                    log.debug(kwargs.pop(k))

        #old.updated == new.updated
        #old.updated < new.updated
        #old.updated > new.updated

        #old.created == new.created
        #old.created < new.created
        #old.created > new.created

        return file_is_different, PathMeta(**kwargs)
Example #7
0
    def _bootstrap(
            self,
            meta,
            *,
            parents=False,
            fetch_data=False,
            size_limit_mb=2,
            recursive=False,
            only=tuple(),
            skip=tuple(),
            sparse=tuple(),
    ):
        """ The actual bootstrap implementation """

        # figure out if we are actually bootstrapping this class or skipping it
        if not meta or meta.id is None:
            raise exc.BootstrappingError(
                f'PathMeta to bootstrap from has no id! {meta}')

        if only or skip or sparse:
            if self._meta_is_root(meta):
                # since we only go one organization at a time right now
                # we never want to skip the top level id
                log.info(f'Bootstrapping {meta.id} -> {self.local!r}')
            elif meta.id in skip:
                log.info(f'Skipped       {meta.id} since it is in skip')
                return
            elif only and meta.id not in only:
                log.info(f'Skipped       {meta.id} since it is not in only')
                return
            else:
                if sparse and meta.id in sparse:
                    log.info(f'Sparse strap {meta.id} -> {self.local!r}')
                    self._is_sparse_root = True
                    sparse = True
                else:
                    # if you pass the only mask so do your children
                    log.info(f'Bootstrapping {meta.id} -> {self.local!r}')

                only = tuple()

        if self.meta is not None and not recursive:
            msg = f'{self} already has meta!\n{self.meta.as_pretty()}'
            raise exc.BootstrappingError(msg)

        if self.exists() and self.meta and self.meta.id == meta.id:
            self._meta_updater(meta)

        else:
            # set single use bootstrapping id
            self._bootstrapping_id = meta.id

            # directory, file, or fake file as symlink?
            is_file_and_fetch_data = self._bootstrap_prepare_filesystem(
                parents,
                fetch_data,
                size_limit_mb,
                sparse,
            )

            is_file_and_fetch_data = False  # XXX NOTE _bootstrap_prepare_filesystem always returns None
            # remove this static assignment to False if there is a use case for bootstrapping the data
            self._bootstrap_data(is_file_and_fetch_data)

        if recursive:  # ah the irony of using loops to do this
            yield from self._bootstrap_recursive(only, skip, sparse)

        yield self
Example #8
0
    def meta(self, pathmeta):
        if not self.exists():
            # if the path does not exist write even temporary to disk
            if self.is_symlink():
                meta = self.meta
                if meta == pathmeta:
                    log.debug(
                        f'Metadata unchanged for {meta.id}. Not updating.')
                    return

                if meta.id != pathmeta.id:
                    msg = ('Existing cache id does not match new id!\n'
                           f'{self!r}\n'
                           f'{meta.id} != {pathmeta.id}\n'
                           f'{meta.as_pretty()}\n'
                           f'{pathmeta.as_pretty()}')
                    log.critical(msg)
                    meta_newer = 'Meta newer. Not updating.'
                    pathmeta_newer = 'Other meta newer.'
                    msg = '{}'  # apparently I was out of my mind when I wrote this originally ...
                    if meta.updated is None and pathmeta.updated is None:
                        log.warning(
                            'no change since either has an updated value (wat)'
                        )
                        return  #FIXME

                    if meta.updated > pathmeta.updated:
                        log.info(msg.format(meta_newer))
                        return  # this is the right thing to do for a sane filesystem
                    elif meta.updated < pathmeta.updated:
                        log.info(msg.format(pathmeta_newer))
                        # THIS IS EXPLICITLY ALLOWED
                    else:  # they are equal
                        extra = 'Both updated at the same time '
                        if meta.created is not None and pathmeta.created is not None:
                            if meta.created > pathmeta.created:
                                log.info(msg.format(extra + meta_newer))
                                return
                            elif meta.created < pathmeta.created:
                                log.info(msg.format(extra + pathmeta_newer))
                                # THIS IS EXPLICITLY ALLOWED
                            else:  # same created
                                log.info(
                                    msg.format(
                                        'Identical timestamps. Not updating.'))
                                return
                        elif meta.created is not None:
                            log.info(
                                msg.format(
                                    extra +
                                    'Meta has datetime other does not. Not updating.'
                                ))
                            return
                        elif pathmeta.created is not None:
                            msg = msg.format(
                                extra + 'Meta has no datetime other does.')
                            log.info(msg)
                            raise exc.MetadataIdMismatchError(msg)
                        else:  # both none
                            log.info(
                                msg.format(extra + (
                                    'Identical update time both missing created time. '
                                    'Not updating.')))
                            return
                    # equality
                # id mismatch all cases above should return or raise except for other metadata newer

                if meta.size is not None and pathmeta.size is None:
                    log.error('new meta has no size so will not overwrite')
                    return

                # FIXME do the timestamp dance above here
                log.debug('Metadata exists, but ids match so will update')

                # trash old versions instead of just unlinking
                pc = self.local.cache
                trash = pc.trash
                self.rename(trash / f'{pc.parent.id}-{meta.id}-{self.name}')
                #self.unlink()

            # FIXME if an id starts with / then the local name is overwritten due to pathlib logic
            # we need to error if that happens
            #symlink = pathlib.PurePosixPath(self.local.name, pathmeta.as_symlink().as_posix().strip('/'))
            symlink = pathlib.PurePosixPath(
                self.local.name) / pathmeta.as_symlink()
            self.local.symlink_to(symlink)

        else:
            raise exc.PathExistsError(f'Path exists {self}')