Beispiel #1
0
    def validate_file(self):
        meta = self.meta
        if meta.etag:
            local_checksum, local_count = self.local.etag(meta.chunksize)
            cache_checksum, cache_count = meta.etag
            if local_checksum != cache_checksum or local_count != cache_count:
                msg = (f'etags do not match!\n(!='
                       f'\n{local_checksum}-{local_count}'
                       f'\n{cache_checksum}-{cache_count}\n)')
                log.critical(msg)

        elif meta.checksum:
            lc = self.local.meta.checksum
            cc = self.meta.checksum
            if lc != cc:
                msg = f'Checksums do not match!\n(!=\n{lc}\n{cc}\n)'
                log.critical(
                    msg
                )  # haven't figured out how to comput the bf checksums yet
                #raise exc.ChecksumError(msg)
        elif meta.size is not None:
            log.warning(f'No checksum! Your data is at risk!\n'
                        f'{self.remote!r} -> {self.local!r}! ')
            ls = self.local.meta.size
            cs = self.meta.size
            if ls != cs:
                raise exc.SizeError(f'Sizes do not match!\n(!=\n{ls}\n{cs}\n)')
        else:
            log.warning(f'No checksum and no size! Your data is at risk!\n'
                        '{self.remote!r} -> {self.local!r}! ')
Beispiel #2
0
    def fetch(self, size_limit_mb=2):
        """ bypass remote to fetch directly based on stored meta """
        meta = self.meta
        if self.is_dir():
            raise NotImplementedError(
                'not going to fetch all data in a dir at the moment')
        if meta.file_id is None:
            self.refresh(update_data=True, force=True)
            # the file name could be different so we have to return here
            return

        size_ok = size_limit_mb is not None and meta.size is not None and meta.size.mb < size_limit_mb
        size_not_ok = size_limit_mb is not None and meta.size is not None and meta.size.mb > size_limit_mb

        if size_ok or size_limit_mb is None:  # FIXME should we force fetch here by default if the file exists?
            if self.is_broken_symlink():
                # FIXME touch a temporary file and set the meta first!
                self.unlink()
                self.touch()
                self._meta_setter(meta)

            log.info(f'Fetching remote via cache id {self.id} -> {self.local}')
            self.local.data = self.data  # note that this should trigger storage to .ops/objects

        if size_not_ok:
            log.warning(
                f'File is over the size limit {meta.size.mb} > {size_limit_mb}'
            )
Beispiel #3
0
    def _meta_updater(self, pathmeta, fetch=True):
        original = self.meta
        file_is_different, updated = self._update_meta(original, pathmeta)
        # FIXME missing checksum is one source of problems here
        must_fetch = file_is_different and self.is_file() and self.exists(
        ) and fetch

        if must_fetch:
            try:
                # FIXME performance, and pathmeta.checksum is None case
                if self.local.content_different(
                ) and self.local.meta.checksum != pathmeta.checksum:
                    raise exc.LocalChangesError(f'not fetching {self}')

            except exc.NoRemoteFileWithThatIdError as e:
                log.warning(
                    'cant fetch remote file there may be untracked local changes for\n{self}'
                )

            log.info(f'crumpling to preserve existing metadata\n{self}')
            trashed = self.crumple()

        try:
            self._meta_setter(updated)
            if must_fetch:
                self.fetch(size_limit_mb=None)

        except BaseException as e:
            log.error(e)
            if must_fetch:
                trashed.rename(self)
            raise e

        return file_is_different
Beispiel #4
0
    def _meta_setter(self, pathmeta, memory_only=False):
        """ we need memory_only for bootstrap I think """
        if not pathmeta:
            log.warning(f'Trying to set empty pathmeta on {self}')
            return

        if self.exists_not_symlink(
        ):  # if a file already exists just follow instructions
            super()._meta_setter(pathmeta)

        else:
            if not hasattr(self, '_remote') or self._remote is None:
                self._bootstrapping_id = pathmeta.id

            # need to run this to create directories
            self._bootstrap_prepare_filesystem(parents=False,
                                               fetch_data=False,
                                               size_limit_mb=0)

            if self.exists():  # we a directory now
                super()._meta_setter(pathmeta)

            elif self._not_exists_cache:
                cache = self._not_exists_cache(self, meta=pathmeta)

        if self._backup_cache:
            cache = self._backup_cache(self, meta=pathmeta)

        if hasattr(self, '_meta'):
            delattr(self, '_meta')

        if hasattr(self, '_id'):
            delattr(self, '_id')
Beispiel #5
0
    def meta(self):
        #if hasattr(self, '_in_bootstrap'):
        #if hasattr(self, '_meta'):  # if we have in memory we are bootstrapping so don't fiddle about
        #return self._meta

        exists = self.exists()
        if exists:
            #log.debug(self)  # TODO this still gets hit a lot in threes
            meta = super().meta
            if meta:  # implicit else failover to backup cache
                return meta

        elif not exists and self._not_exists_cache and self.is_symlink():
            try:
                cache = self._not_exists_cache(self)
                return cache.meta
            except exc.NoCachedMetadataError as e:
                log.warning(e)

        if self._backup_cache:
            try:
                cache = self._backup_cache(self)
                meta = cache.meta
                if meta:
                    log.info(f'restoring from backup {meta}')
                    self._meta_setter(
                        meta)  # repopulate primary cache from backup
                    return meta

            except exc.NoCachedMetadataError as e:
                log.warning(e)
Beispiel #6
0
    def __init__(
            self,
            size=None,
            created=None,
            updated=None,
            checksum=None,
            etag=None,
            chunksize=None,  # used for properly checksumming?
            id=None,
            file_id=None,
            old_id=None,
            gid=None,  # needed to determine local writability
            user_id=None,
            mode=None,
            errors=tuple(),
            **kwargs):

        if not file_id and file_id is not None and file_id is not 0:
            raise TypeError('wat')
        if created is not None and not isinstance(
                created, int) and not isinstance(created, datetime):
            _created = created
        else:
            self._created_ok = created
            _created = None

        if updated is not None and not isinstance(
                updated, int) and not isinstance(updated, datetime):
            _updated = updated
        else:
            self._updated_ok = updated
            _updated = None

        if id is not None and not isinstance(id, str):
            # no implicit type mutation, the system providing the ids
            # is where the information about how to do the converstion lives
            # we don't handle it here
            raise TypeError(f'id must be a string! {id!r}')

        self.size = size if size is None else FileSize(size)
        self._created = _created
        self._updated = _updated
        self.checksum = checksum
        self.etag = etag
        self.chunksize = chunksize
        self.id = id
        self.file_id = file_id
        self.old_id = old_id
        self.gid = gid
        self.user_id = user_id
        self.mode = mode
        self.errors = tuple(errors) if errors else tuple()
        if kwargs:
            log.warning(f'Unexpected meta values! {kwargs}')
            self.__kwargs = kwargs  # roundtrip values we don't explicitly handle
Beispiel #7
0
    def _datetime(self, value):
        if not isinstance(value, datetime):
            raise TypeError(f'{type(value)} is not a datetime for {value}')

        has_tz = (value.tzinfo is not None
                  and value.tzinfo.utcoffset(None) is not None)
        value = isoformat(value)
        if not has_tz:
            log.warning('why do you have a timestamp without a timezone ;_;')

        return value
Beispiel #8
0
 def popd(N=0, n=False):
     """ see popd --help """
     # note that python lists append in the oppsite direction
     # so we invert the N dex
     reversed_index = - (N + 1)
     if AugmentedPath._stack:
         path = AugmentedPath._stack.pop(reversed_index)
         path.chdir()
         print(*reversed(AugmentedPath._stack), AugmentedPath.cwd())
         return path
     else:
         log.warning('popd: directory stack empty')
Beispiel #9
0
    def encode(self, field, value):
        if field == 'errors':
            return list(value)

        if field == 'checksum':
            if isinstance(value, bytes):
                value = value.hex()

        try:
            return _str_encode(field, value)
        except exc.UnhandledTypeError:
            log.warning(f'conversion not implemented for field {field}')

        return value
Beispiel #10
0
    def encode(self, field, value):
        #if field in ('created', 'updated') and not isinstance(value, datetime):
        #field.replace(cls.path_field_sep, ',')  # FIXME hack around iso8601
        # turns it 8601 isnt actually standard >_< with . instead of , sigh

        empty_iterable = hasattr(value, '__iter__') and not value
        if value is None or empty_iterable:
            raise TypeError('cannot encode an empty value')

        try:
            return _bytes_encode(field, value)
        except exc.UnhandledTypeError:
            log.warning(f'conversion not implemented for field {field}')

        raise exc.UnhandledTypeError(f'dont know what to do with {value!r}')
Beispiel #11
0
    def decode(self, field, value):
        if field in ('created', 'updated'):  # FIXME human readable vs integer
            try:
                # needed for legacy cases
                value, = struct.unpack('d', value)
                return datetime.fromtimestamp(value)
            except struct.error:
                pass
            vd = value.decode()
            setattr(self, '_' + field,
                    vd)  # FIXME with timezone vs without ...
            return vd

        elif field == 'checksum':
            return value

        elif field == 'etag':
            # struct pack this sucker so the count can fit as well?
            value = value.decode()  # FIXME
            checksum, strcount = value.rsplit('-', 1)
            count = int(strcount)
            return bytes.fromhex(checksum), count

        elif field == 'errors':
            value = value.decode()
            return tuple(_ for _ in value.split(';') if _)

        elif field == 'user_id':
            try:
                return int(value)
            except ValueError:  # FIXME :/ uid vs owner_id etc ...
                return value.decode()

        elif field in ('id', 'mode', 'old_id'):
            return value.decode()

        elif field not in self.fields:
            log.warning(f'Unhandled field {field}')
            return value

        else:
            try:
                return int(value)
            except ValueError as e:
                log.exception(f'{field} {value}')
                raise e
Beispiel #12
0
    def move(self, *, remote=None, target=None, meta=None):
        """ instantiate a new cache and cleanup self because we are moving """
        # FIXME what to do if we have data
        if remote is None and (target is None or meta is None):
            raise TypeError(
                'either remote or meta and target are required arguments')

        # deal with moving to a different directory that might not even exist yet
        if target is None:
            if not isinstance(self.anchor, self.__class__):
                raise TypeError(
                    f'mismatched anchor types {self!r} {self.anchor!r}')

            target = self.anchor / remote  # FIXME why does this not try to instantiate the caches? or does it?

        if target.absolute() == self.absolute():
            log.warning(f'trying to move a file onto itself {self.absolute()}')
            return target

        common = self.commonpath(target).absolute()
        target_parent = target.parent.absolute()
        parent = self.parent.absolute()

        assert target.name != self.name or target_parent != parent

        if target_parent != parent:
            _id = remote.id if remote else meta.id
            log.warning('A parent of current file has changed location!\n'
                        f'{common}\n{self.relative_to(common)}\n'
                        f'{target.relative_to(common)}\n{_id}')

        if not target_parent.exists():
            if remote is None:  # we have to have a remote to pull parent structure
                remote = self._remote_class(meta)

            target_parent.mkdir_cache(remote)

        do_cast = not isinstance(target, self.__class__)
        if do_cast:
            target = self.__class__(target, meta=meta)

        if target.exists() or target.is_broken_symlink():
            if target.id == self.id:  #(remote.id if remote else meta.id):
                if self.is_broken_symlink():
                    # we may be a package with extra metadata that needs to
                    # be merged with the target before we are unlinked
                    file_is_different = target._meta_updater(self.meta)
                    # FIXME ... if file is different then this causes staleness
                    # and we need to fetch
                    if file_is_different:
                        log.critical('DO SOMETHING ABOUT THIS STALE DATA'
                                     f'\n{target}\n{target.meta.as_pretty()}')

                elif do_cast:
                    # the target meta was just put there, if the ids match it should be ok
                    # however since arbitrary meta can be passed in, best to double check
                    file_is_different = target._meta_updater(self.meta)
                    if file_is_different:
                        log.critical('Something has gone wrong'
                                     f'\n{target}\n{target.meta.as_pretty()}')
                else:
                    # directory moves that are resolved during pull
                    log.warning(f'what is this!?\n{target}\n{self}')
            elif target.is_broken_symlink():
                remote._cache = self  # restore the mapping for remote -> self
                raise exc.WhyDidntThisGetMovedBeforeError(
                    f'\n{target}\n{self}')
            else:
                raise exc.PathExistsError(f'Target {target} already exists!')

        if self.exists():
            safe_unlink = target.local.parent / f'.unlink-{target.name}'
            try:
                if target.is_broken_symlink():
                    target.rename(safe_unlink)

                self.rename(
                    target
                )  # if target is_dir then this will fail, which is ok
            except BaseException as e:
                log.exception(e)
                if safe_unlink.is_broken_symlink():
                    safe_unlink.rename(target)
            finally:
                if safe_unlink.is_broken_symlink():
                    safe_unlink.unlink()

        elif self.is_broken_symlink():
            # we don't move to trash here because this was just a file rename
            self.unlink(
            )  # don't move the meta since it will break the naming insurance measure

        return target
Beispiel #13
0
    def meta(self, pathmeta):
        if not self.exists():
            # if the path does not exist write even temporary to disk
            if self.is_symlink():
                meta = self.meta
                if meta == pathmeta:
                    log.debug(
                        f'Metadata unchanged for {meta.id}. Not updating.')
                    return

                if meta.id != pathmeta.id:
                    msg = ('Existing cache id does not match new id!\n'
                           f'{self!r}\n'
                           f'{meta.id} != {pathmeta.id}\n'
                           f'{meta.as_pretty()}\n'
                           f'{pathmeta.as_pretty()}')
                    log.critical(msg)
                    meta_newer = 'Meta newer. Not updating.'
                    pathmeta_newer = 'Other meta newer.'
                    msg = '{}'  # apparently I was out of my mind when I wrote this originally ...
                    if meta.updated is None and pathmeta.updated is None:
                        log.warning(
                            'no change since either has an updated value (wat)'
                        )
                        return  #FIXME

                    if meta.updated > pathmeta.updated:
                        log.info(msg.format(meta_newer))
                        return  # this is the right thing to do for a sane filesystem
                    elif meta.updated < pathmeta.updated:
                        log.info(msg.format(pathmeta_newer))
                        # THIS IS EXPLICITLY ALLOWED
                    else:  # they are equal
                        extra = 'Both updated at the same time '
                        if meta.created is not None and pathmeta.created is not None:
                            if meta.created > pathmeta.created:
                                log.info(msg.format(extra + meta_newer))
                                return
                            elif meta.created < pathmeta.created:
                                log.info(msg.format(extra + pathmeta_newer))
                                # THIS IS EXPLICITLY ALLOWED
                            else:  # same created
                                log.info(
                                    msg.format(
                                        'Identical timestamps. Not updating.'))
                                return
                        elif meta.created is not None:
                            log.info(
                                msg.format(
                                    extra +
                                    'Meta has datetime other does not. Not updating.'
                                ))
                            return
                        elif pathmeta.created is not None:
                            msg = msg.format(
                                extra + 'Meta has no datetime other does.')
                            log.info(msg)
                            raise exc.MetadataIdMismatchError(msg)
                        else:  # both none
                            log.info(
                                msg.format(extra + (
                                    'Identical update time both missing created time. '
                                    'Not updating.')))
                            return
                    # equality
                # id mismatch all cases above should return or raise except for other metadata newer

                if meta.size is not None and pathmeta.size is None:
                    log.error('new meta has no size so will not overwrite')
                    return

                # FIXME do the timestamp dance above here
                log.debug('Metadata exists, but ids match so will update')

                # trash old versions instead of just unlinking
                pc = self.local.cache
                trash = pc.trash
                self.rename(trash / f'{pc.parent.id}-{meta.id}-{self.name}')
                #self.unlink()

            # FIXME if an id starts with / then the local name is overwritten due to pathlib logic
            # we need to error if that happens
            #symlink = pathlib.PurePosixPath(self.local.name, pathmeta.as_symlink().as_posix().strip('/'))
            symlink = pathlib.PurePosixPath(
                self.local.name) / pathmeta.as_symlink()
            self.local.symlink_to(symlink)

        else:
            raise exc.PathExistsError(f'Path exists {self}')