예제 #1
0
    def checksum(self):
        remote_cmd = (f'{self.cypher_command} {self.rpath} | '
                      'awk \'{ print $1 }\';')

        hex_ = self._ssh(remote_cmd).decode(self.encoding)
        log.debug(hex_)
        return bytes.fromhex(hex_)
예제 #2
0
    def data(self, generator):
        cache = self.cache
        if cache is not None:
            cmeta = cache.meta
        else:
            assert self.cache is None

        # FIXME do we touch a file, write the meta
        # and then write the data?
        # do we touch a temporary file, write the meta
        # unlink the symlink, and move the temp file in, and then write the data?
        # the order that we do this in is very important for robustness to failure
        # especially when updating a file ...
        # storing history in the symlink cache also an option?
        log.debug(f'writing to {self}')
        chunk1 = next(generator)  # if an error occurs don't open the file
        with open(self, 'wb') as f:
            f.write(chunk1)
            for chunk in generator:
                #log.debug(chunk)
                f.write(chunk)

        if cache is not None:  # FIXME cache
            if not cache.meta:
                cache.meta = cmeta  # glories of persisting xattrs :/
            # yep sometimes the xattrs get  blasted >_<
            assert cache.meta
            assert self.cache.meta
예제 #3
0
 def _data_setter(self, generator):
     """ a data setter that can be used in a chain of generators """
     log.debug(f'writing to {self}')
     chunk1 = next(generator)  # if an error occurs don't open the file
     with open(self, 'wb') as f:
         f.write(chunk1)
         yield chunk1
         for chunk in generator:
             #log.debug(chunk)
             f.write(chunk)
             yield chunk
예제 #4
0
    def copy_to(self, target, force=False, copy_cache_meta=False):
        """ copy from a the current path object to a target path """
        if type(target) != type(self):
            target = self.__class__(target)

        if not target.exists() and not target.is_symlink() or force:
            target.data = self.data
        else:
            raise exc.PathExistsError(f'{target}')

        if copy_cache_meta:
            log.debug(f'copying cache meta {self.cache.meta}')
            target.cache_init(self.cache.meta)
예제 #5
0
    def update_cache(self, cache=None, fetch=True):
        """ Update a cache object using the metadata attached to this remote.

            This is different form _cache_setter in that it runs update_meta
            by default, handles many more edge cases, and checks for consistency.
            _cache_setter is usually invoked internally by a CachePath method that
            wants to register itself with a remote as an implementaiton detail. """

        if cache is not None and self.cache is not None:
            # TODO see if there are any exceptions to this behavior
            raise TypeError(
                'cannot accept cache kwarg when self.cache not None')
        elif cache is None:
            cache = self.cache

        parent_changed = self._parent_changed(cache)

        if self.cache is None:
            # HACK test if cache is not None before it may have been reassigned
            if cache.name != self.name:
                msg = ('Cannot update the name and content of a file at the '
                       'same time.\nAre you sure you have passed the right '
                       f'cache object?\n{cache.name} != {self.name}')
                raise ValueError(msg)

            elif parent_changed:
                msg = ('Cannot update the parent and content of a file at the '
                       'same time.\nAre you sure you have passed the right '
                       f'cache object?\n{cache.parent.id} != {self.parent_id}')
                raise ValueError(msg)

        log.debug(f'maybe updating cache for {self.name}')
        file_is_different = cache._meta_updater(self.meta, fetch=fetch)
        # update the cache first  # FIXME this may be out of order ...
        # then move to the new name if relevant
        # prevents moving partial metadata onto existing files
        if cache.name != self.name or parent_changed:  # this is localy correct
            # the issue is that move is now smarter
            # and will detect if a parent path has changed
            try:
                cache.move(remote=self)
            except exc.WhyDidntThisGetMovedBeforeError as e:
                # AAAAAAAAAAAAAAAAAAAAAAAAAAAAA
                # deal with the sadness that is non-unique filenames
                # I am 99.999999999999999% certain that users do not
                # expect this behavior ...
                log.error(e)
                self._on_cache_move_error(e, cache)

        return file_is_different
예제 #6
0
    def _update_meta(old, new):
        if not old:
            return False, new  # if there is no file it is both different and not different

        if not new:
            return False, old

        file_is_different = False

        kwargs = {k: v for k, v in old.items()}
        if old.id != new.id:
            kwargs['old_id'] = old.id

        for k, vnew in new.items():
            vold = kwargs[k]

            if vnew is None or hasattr(vnew, '__iter__') and not vnew:
                # don't update with None or empty iterables
                continue

            if vold is not None and vold != vnew:
                log.info(f'{old.id} field {k} changed from {vold} -> {vnew}')
                if k in ('created', 'updated', 'size', 'checksum', 'file_id'):
                    file_is_different = True

            kwargs[k] = vnew

        if file_is_different:
            # strip fields missing from new in the case where
            # we aren't merging metadata from two different sources

            for k, vnew in new.items():
                if k == 'old_id':
                    continue

                if vnew is None:
                    log.debug(kwargs.pop(k))

        #old.updated == new.updated
        #old.updated < new.updated
        #old.updated > new.updated

        #old.created == new.created
        #old.created < new.created
        #old.created > new.created

        return file_is_different, PathMeta(**kwargs)
예제 #7
0
        def inner(child):
            if child.is_dir() and isd:
                if child.name == self.name:
                    self.meta = child.meta
                    return

            elif child.is_file() and isf:
                log.debug(f'{child.name} {child.stem}, {child.suffix!r}')
                log.debug(f'{self.name} {self.stem}, {self.suffix!r}')
                if child.name == self.name:
                    self.meta = child.meta
                elif child.name == self.stem:
                    candidates.append(child)
                elif child.stem == self.name:
                    candidates.append(child)
                elif child.stem == self.stem:
                    # worst cases
                    candidates.append(child)

            else:
                #log.critical('file type mismatch')
                pass
예제 #8
0
    def setxattr(self, key, value, namespace=XATTR_DEFAULT_NS):
        if not isinstance(value, bytes):  # checksums
            raise TypeError(f'setxattr only accepts values already encoded to bytes!\n{value!r}')
        else:
            bytes_value = value

        if isinstance(key, bytes):
            key = key.decode()

        name = self._key_convention(key, namespace)
        stream = self._stream(name)
        log.debug(name)
        log.debug(stream)
        log.debug(bytes_value)
        with open(stream, 'wb') as f:
            f.write(bytes_value)
예제 #9
0
    def _bootstrap_recursive(self, only=tuple(), skip=tuple(), sparse=False):
        # TODO if rchildren looks like it could be bad
        # go back up to dataset level?
        #sname = lambda gen: sorted(gen, key=lambda c: c.name)  # c.name doesn't work for remotes
        #rcs = sname(self.remote._rchildren(create_cache=False, sparse=sparse))
        rcs = self.remote._rchildren(create_cache=False, sparse=sparse)

        local_paths = list(self.local.rchildren)
        local_files = set(p for p in local_paths
                          if p.is_file() or p.is_broken_symlink())
        file_index = {f.cache_id: f
                      for f in local_files}  # FIXME WARNING can get big
        # FIXME have to compute file_index here because for some reason
        # computing local_dirs will remove folders entirely !??
        local_dirs = set(
            p.relative_to(self.anchor) for p in local_paths if p.is_dir())
        if local_dirs:
            rcs = list(rcs)  # sigh
            remote_dirs = set(c for c in rcs if c.is_dir())

            rd = set(
                d.as_path()
                for d in remote_dirs)  # FIXME as_path => lots of network calls
            old_local = local_dirs - rd
            while old_local:
                thisl = sorted(old_local, key=lambda d: len(d.as_posix()))
                for d in thisl:
                    ad = self.anchor.local / d
                    if ad.cache is None:
                        log.critical(
                            f'would you fix the nullability already? {d}')
                        continue
                    new = ad.cache.refresh()
                    #log.info(f'{new}')
                    local_dirs = set(
                        ld for ld in local_dirs
                        if not ld.as_posix().startswith(d.as_posix()))
                    old_local = local_dirs - rd

        if sparse:
            #if local_dirs:
            #gen = (c for c in _local_remotes if c.is_dir() or (c.is_file() and c._sparse_include()))
            #else:
            gen = (c for c in rcs
                   if c.is_dir() or (c.is_file() and c._sparse_include()))
            # FIXME rcs still takes too long, though using the generator
            # does get some useful work done first
        else:
            # FIXME horrid performance on remotes with loads of files
            gen = sorted(rcs, key=lambda c: len(c.as_path().as_posix()))

        for child in gen:
            # use the remote's recursive implementation
            # not the local implementation, since the
            # remote may have additional requirements
            #child.bootstrap(only=only, skip=skip)
            # because of how remote works now we don't even have to
            # bootstrap this
            cc = child.cache

            if cc is None:
                if child.is_file() and child.id in file_index:
                    _cache = file_index[child.id].cache
                    cmeta = _cache.meta
                    rmeta = child.meta
                    file_is_different, nmeta = self._update_meta(cmeta, rmeta)
                    if file_is_different:
                        log.critical(f'WAT {_cache}')
                    else:
                        yield _cache
                        # yield the old cache if it exists
                        # otherwise consumers of bootstrap will
                        # think the file may have been deleted
                        continue

                cc = child.cache_init()
                log.debug(cc)

            yield cc
예제 #10
0
    def meta(self, pathmeta):
        if not self.exists():
            # if the path does not exist write even temporary to disk
            if self.is_symlink():
                meta = self.meta
                if meta == pathmeta:
                    log.debug(
                        f'Metadata unchanged for {meta.id}. Not updating.')
                    return

                if meta.id != pathmeta.id:
                    msg = ('Existing cache id does not match new id!\n'
                           f'{self!r}\n'
                           f'{meta.id} != {pathmeta.id}\n'
                           f'{meta.as_pretty()}\n'
                           f'{pathmeta.as_pretty()}')
                    log.critical(msg)
                    meta_newer = 'Meta newer. Not updating.'
                    pathmeta_newer = 'Other meta newer.'
                    msg = '{}'  # apparently I was out of my mind when I wrote this originally ...
                    if meta.updated is None and pathmeta.updated is None:
                        log.warning(
                            'no change since either has an updated value (wat)'
                        )
                        return  #FIXME

                    if meta.updated > pathmeta.updated:
                        log.info(msg.format(meta_newer))
                        return  # this is the right thing to do for a sane filesystem
                    elif meta.updated < pathmeta.updated:
                        log.info(msg.format(pathmeta_newer))
                        # THIS IS EXPLICITLY ALLOWED
                    else:  # they are equal
                        extra = 'Both updated at the same time '
                        if meta.created is not None and pathmeta.created is not None:
                            if meta.created > pathmeta.created:
                                log.info(msg.format(extra + meta_newer))
                                return
                            elif meta.created < pathmeta.created:
                                log.info(msg.format(extra + pathmeta_newer))
                                # THIS IS EXPLICITLY ALLOWED
                            else:  # same created
                                log.info(
                                    msg.format(
                                        'Identical timestamps. Not updating.'))
                                return
                        elif meta.created is not None:
                            log.info(
                                msg.format(
                                    extra +
                                    'Meta has datetime other does not. Not updating.'
                                ))
                            return
                        elif pathmeta.created is not None:
                            msg = msg.format(
                                extra + 'Meta has no datetime other does.')
                            log.info(msg)
                            raise exc.MetadataIdMismatchError(msg)
                        else:  # both none
                            log.info(
                                msg.format(extra + (
                                    'Identical update time both missing created time. '
                                    'Not updating.')))
                            return
                    # equality
                # id mismatch all cases above should return or raise except for other metadata newer

                if meta.size is not None and pathmeta.size is None:
                    log.error('new meta has no size so will not overwrite')
                    return

                # FIXME do the timestamp dance above here
                log.debug('Metadata exists, but ids match so will update')

                # trash old versions instead of just unlinking
                pc = self.local.cache
                trash = pc.trash
                self.rename(trash / f'{pc.parent.id}-{meta.id}-{self.name}')
                #self.unlink()

            # FIXME if an id starts with / then the local name is overwritten due to pathlib logic
            # we need to error if that happens
            #symlink = pathlib.PurePosixPath(self.local.name, pathmeta.as_symlink().as_posix().strip('/'))
            symlink = pathlib.PurePosixPath(
                self.local.name) / pathmeta.as_symlink()
            self.local.symlink_to(symlink)

        else:
            raise exc.PathExistsError(f'Path exists {self}')