Exemple #1
0
    def test_save_sets_mod(self, tmp_path):
        path = tmp_path / "doc.json"
        doc = FileMappedDocument(path, mode=0o600)
        doc.save()

        mode = path.stat().st_mode & 0o777
        assert oct(mode) == oct(0o600)
Exemple #2
0
 def inferr_from_path(path):
     "Infer as much information as possible from the file"
     doc = Document.parse_path(path)
     if not doc.date:
         doc.date = datetime.datetime.fromtimestamp(
             path.stat().st_ctime).strftime("%Y-%m-%d")
     return doc
Exemple #3
0
    def add(self, path, derived=None, commit=True):
        # type: (EntryType, Optional[Dict[str, Any]], bool) -> None

        stats = path.stat()
        self._add_file(fspath(path), stats.st_size, stats.st_mtime_ns, derived)
        if commit:
            self.commit()
Exemple #4
0
def filesize(path):
    if hasattr(path, 'filesize') and path.filesize is not None:
        return path.filesize
    if path.fp or path.is_url():
        return 0
    st = path.stat()  # vstat(path)
    return st and st.st_size
Exemple #5
0
def tree(path, dirs_only=False, max_depth=0, _depth=0):
    path = Path(path)
    lst = path.lstat()
    is_symlink = stat.S_ISLNK(lst.st_mode)
    st = lst if is_symlink else path.stat()
    is_dir = stat.S_ISDIR(st.st_mode)
    if is_symlink:
        size = 0
    elif is_dir:
        size = functools.reduce(operator.add, [
            tree(p, dirs_only=dirs_only,
                 max_depth=max_depth,
                 _depth=_depth+1)
            for p in sorted(path.iterdir())
        ], 0)
    else:
        size = lst.st_size
    if (is_dir or not dirs_only) and \
       (not max_depth or _depth <= max_depth):
        p = str(path)
        if is_dir:
            p += os.path.sep
        if is_symlink:
            p += ' -> ' + os.readlink(str(path))
        print('%10s  %s' % (format_size(size), p))
    return size
Exemple #6
0
 def add_candidates(self):
     if not self.new_target_list():
         return ()
     dirsize = 0
     start = datetime.datetime.now()
     logger.info(f"Walking target list: {self.state.dirlist}")
     gis = ImageSignature()
     for top in self.state.dirlist:
         message = f"Traversing tree at {top} and adding to queue."
         logger.info(message)
         self.status(message)
         top_path = Path(top)
         for path in top_path.rglob("**/*"):
             ext = path.suffix.lower()
             if ext in cfg.settings.image_filetypes:
                 size = path.stat().st_size
                 dirsize += size
                 photo_b = self.get_bytes(path)
                 md5sum = hashlib.md5(photo_b).hexdigest()
                 # if not MD%sum already in database:
                 im = Image.open(io.BytesIO(photo_b))
                 tags = {
                     "cameraMake": im.info['parsed_exif'].get(0x010f, ""),
                     "cameraModel": im.info['parsed_exif'].get(0x0110, ""),
                     "creationTime": im.info['parsed_exif'].get(0x9003, ""),
                     "width": im.width,
                     "height": im.height,
                 }
                 image_md5 = hashlib.md5(im.tobytes()).hexdigest()
                 signature = gis.generate_signature(
                     photo_b, bytestream=True
                 ).tolist()
                 record = {
                     "src_path": str(path),
                     "size": size,
                     "md5sum": md5sum,
                     "image_md5": image_md5,
                     "signature": signature,
                     "mediaMetadata": tags,
                 }
                 photos.add(record)
                 logger.info(f"Added: {path}")
             else:
                 ext = ext.replace(
                     ".", ""
                 )  # Database can't handle keys starting with dot
                 excluded = self.state.excluded_ext_dict
                 if ext in excluded:
                     excluded[ext] += 1
                 else:
                     excluded[ext] = 1
                 self.state.update(excluded_ext_dict=excluded)
     self.state.save()
     elapsed = datetime.datetime.now() - start
     self.state.modify(
         dirsize=self.state.dirsize + dirsize,
         dirtime=elapsed.seconds + elapsed.microseconds / 1e6,
     )
     return
Exemple #7
0
    def __init__(self, str_dir_name):

        dir_name = "."  #dirname(filePath)
        dir_name_new = str_dir_name + '_lg_files'
        dir_name_new2 = str_dir_name + '_lg_files_out'
        if path.exists(dir_name_new):

            shutil.rmtree(dir_name_new)
        if path.exists(dir_name_new2):
            shutil.rmtree(dir_name_new2)
        try:

            stat(dir_name_new)
            stat(dir_name_new2)
        except:
            mkdir(dir_name_new)
            mkdir(dir_name_new2)
def delete_empty_file(local_filename):
    path = Path(local_filename)
    try:
        size = path.stat().st_size
    except FileNotFoundError:
        return
    if size == 0:
        path.unlink()
  def __init__(self,str_dir_name):
          
      dir_name ="." #dirname(filePath)
      dir_name_new = str_dir_name+'_lg_files'
      dir_name_new2=str_dir_name+'_lg_files_out'
      if path.exists(dir_name_new):
 
              shutil.rmtree(dir_name_new)
      if path.exists(dir_name_new2):
              shutil.rmtree(dir_name_new2)
      try:
          
          stat(dir_name_new)
          stat(dir_name_new2)
      except:
          mkdir(dir_name_new)
          mkdir(dir_name_new2)
Exemple #10
0
def file_id(path: pl.Path) -> bytes:
    stat = path.stat()
    stat_data = f"{stat.st_ino}_{stat.st_size}_{stat.st_mtime}"

    id_sum = hashlib.new('sha1')
    id_sum.update(stat_data.encode('ascii'))
    with path.open(mode="rb") as fh:
        id_sum.update(fh.read())
    return id_sum.digest()
Exemple #11
0
def set_path_readonly(path: Path) -> None:
    if path.is_dir():
        # Need to add
        right = stat.S_IXUSR | stat.S_IRGRP | stat.S_IXGRP | stat.S_IRUSR
    else:
        # Already in read only
        right = stat.S_IRGRP | stat.S_IRUSR

    if path.stat().st_mode & ~right != 0:
        path.chmod(right)
Exemple #12
0
def set_path_readonly(path: Path) -> None:
    if path.is_dir():
        # Need to add
        right = stat.S_IXUSR | stat.S_IRGRP | stat.S_IXGRP | stat.S_IRUSR
    else:
        # Already in read only
        right = stat.S_IRGRP | stat.S_IRUSR

    if path.stat().st_mode & ~right != 0:
        path.chmod(right)
Exemple #13
0
	def get(self, path, only=frozenset(), no=frozenset()):
		# type: (EntryType, FrozenSet[str], FrozenSet[str]) -> tuple

		""" Retrieves latest row based on mandatory information
			which is solely based on the `path`.
			Use `only`/`no` to include/exclude returned fields.
		"""

		stats = path.stat()
		return self.get_latest(fspath(path), stats.st_size, stats.st_mtime_ns, ignore_null=True, only=only, no=no)
Exemple #14
0
    def path_to_info_hash(self, path):
        # type: (Path, ) -> str

        name = path.name
        size = path.stat().st_size

        try:
            return self.map[(name, size)]
        except KeyError:
            raise NotFound(
                f"Could not find infohash for name={name}, size={size}")
Exemple #15
0
 def calc_state_size(self):
     # Should work even for pre-7.4 versions, counting only files and folder related to that version
     result = 0
     for filename in STATE_FILES_TO_COPY:
         path = self.directory / filename
         if path.exists():
             result += path.stat().st_size
     for dirname in STATE_DIRS_TO_COPY:
         path = self.directory / dirname
         for f in path.glob('**/*'):
             result += f.stat().st_size
     return result
Exemple #16
0
def unset_path_readonly(path: Path) -> None:
    if path.is_dir():
        right = (stat.S_IXUSR
                 | stat.S_IRGRP
                 | stat.S_IXGRP
                 | stat.S_IRUSR
                 | stat.S_IWGRP
                 | stat.S_IWUSR)
    else:
        right = stat.S_IRGRP | stat.S_IRUSR | stat.S_IWGRP | stat.S_IWUSR

    if path.stat().st_mode & right != right:
        path.chmod(right)
Exemple #17
0
def get_file_stat(path: Path) -> Tuple[int, Optional[datetime]]:
    """Get size of file in bytes and last modified time stamp."""
    try:
        stats = path.stat()
    except IOError as exc:
        raise ValueError(
            f"Could not retrieve file stat of {path}: {exc}") from exc

    try:
        update_time = datetime.fromtimestamp(stats.st_mtime, tzlocal())
    except (ValueError, OSError, OverflowError):
        update_time = None

    return stats.st_size, update_time
Exemple #18
0
def unset_path_readonly(path: Path) -> None:
    if path.is_dir():
        right = (
            stat.S_IXUSR
            | stat.S_IRGRP
            | stat.S_IXGRP
            | stat.S_IRUSR
            | stat.S_IWGRP
            | stat.S_IWUSR
        )
    else:
        right = stat.S_IRGRP | stat.S_IRUSR | stat.S_IWGRP | stat.S_IWUSR

    if path.stat().st_mode & right != right:
        path.chmod(right)
Exemple #19
0
def mdatetime(path, aslocal=False):
    # type: (PathType, bool) -> datetime
    """Returns the last modified date of `path`
    as a timezone aware datetime object.

    If `aslocal=True` it will be formatted as local time,
    and UTC otherwise (the default).
    """

    if isinstance(path, (Path, DirEntry)):
        mtime = path.stat().st_mtime
    else:
        mtime = os.stat(path).st_mtime

    return datetime_from_utc_timestamp(mtime, aslocal)
Exemple #20
0
def __file_is_empty(path: Path) -> bool:
    """
    Check whether the input file path is an empty file

    Parameters
    ----------
    path
          Path object containing the input file path

    Returns
    -------
    bool
          whether the input file path is an empty file
    """
    return path.stat().st_size == 0
Exemple #21
0
def delete_files(directory, filenames, files_to_keep=()):
    ensure_overwritable(*[directory.joinpath(f) for f in filenames])
    # We implement the "files to keep" logic using inodes rather than names so
    # we can safely handle case-insenstiive filesystems
    inodes_to_keep = set()
    for filename in files_to_keep:
        try:
            stat = directory.joinpath(filename).stat()
            inodes_to_keep.add((stat.st_dev, stat.st_ino))
        except FileNotFoundError:
            pass
    for filename in filenames:
        path = directory / filename
        try:
            stat = path.stat()
        except FileNotFoundError:
            continue
        inode = (stat.st_dev, stat.st_ino)
        if inode not in inodes_to_keep:
            path.unlink()
Exemple #22
0
    def add_candidates(self):
        self.state.reload()
        if self.state.target == self.state.old_target:
            return
        self.state.modify(old_target=self.state.target)
        message = 'Walking target directories...'
        logger.info(message)
        self.status(message)
        dirsize = 0

        start = datetime.datetime.now()
        self.state.modify(dirlist=list(glob.iglob(self.state.target)))
        logger.info(f"Target list: {self.state.dirlist}")
        for top in self.state.dirlist:
            message = f'Traversing tree at {top} and adding to queue.'
            logger.info(message)
            self.status(message)
            top_path = Path(top)
            for path in top_path.rglob("**/*"):
                ext = path.suffix.lower()
                if ext in cfg.local.image_filetypes:
                    size = path.stat().st_size
                    dirsize += size
                    Queue(src_path=str(path), size=size).save()
                else:
                    ext = ext.replace(
                        ".", ""
                    )  # Database can't handle keys starting with dot
                    excluded = self.state.excluded_ext_dict
                    if ext in excluded:
                        excluded[ext] += 1
                    else:
                        excluded[ext] = 1
                    self.state.update(excluded_ext_dict=excluded)
        self.state.save()
        elapsed = datetime.datetime.now() - start
        self.state.modify(
            dirsize=self.state.dirsize + dirsize,
            dirtime=elapsed.seconds + elapsed.microseconds / 1e6,
        )
        return
Exemple #23
0
def preview(hwd, dir, order='latest'):
    glob = Path(dir).glob('*.jpg')
    if order is 'latest':
        # find latest image in the folder
        image_path = next(iter(glob))
        for path in glob:
            if path.stat().st_mtime > image_path.stat().st_mtime:
                image_path = path
    elif order is 'random':
        # choose random image
        p = list(glob)
        try:
            image_path = str(random.choice(p))
        except e:
            print(e)
            pass
    else:
        raise NotImplementedError

    # Delay preview, let image be fully writen to disk
    QtCore.QTimer.singleShot(1000, lambda: set_image(hwd, image_path))
def create_file_path(path):
    for base_folder, folder, files in os.walk(path):
        # check for files
        for file in files:
            # create file_path
            file_path = os.path.join(base_folder, file)

            # getting file extension
            file_extension = os.path.splitext(file_path)[1]

            # compare file extension with log_extension
            if log_extension == file_extension:
                # check file properties for set condition
                # get date file was last modified
                timestamp = date.fromtimestamp(path.stat().st_ctime)

                if date.today() == timestamp:
                    if not os.remove(file_path):
                        success_alert()
                        print(f'{file_path} removed successfully')
                    else:
                        print(f'Unable to delete {file_path}')
            else:
                print(f'{file_path} is not a log file')
Exemple #25
0
    def process_path(path, relpath):
        """

        Parameters
        ----------
        path: Path
          Non Pure (OS specific) Path
        relpath:
          For location on server.  Will be cast to PurePosixPath

        Yields
        ------
        dict
          Records for pyout
        """
        # Ensure consistent types
        path = Path(path)
        relpath = PurePosixPath(relpath)
        try:
            try:
                path_stat = path.stat()
                yield {"size": path_stat.st_size}
            except FileNotFoundError:
                yield skip_file("ERROR: File not found")
                return
            except Exception as exc:
                # without limiting [:50] it might cause some pyout indigestion
                yield skip_file("ERROR: %s" % str(exc)[:50])
                return

            #
            # Compute checksums and possible other digests (e.g. for s3, ipfs - TODO)
            #
            yield {"status": "digesting"}
            try:
                # TODO: in theory we could also cache the result, but since it is
                # critical to get correct checksums, safer to just do it all the time.
                # Should typically be faster than upload itself ;-)
                digester = Digester(["sha256"])
                sha256_digest = digester(path)["sha256"]
            except Exception as exc:
                yield skip_file("failed to compute digests: %s" % str(exc))
                return

            extant = client.get_asset_bypath(ds_identifier, "draft", relpath)
            if extant is not None and extant["sha256"] == sha256_digest:
                if existing == "error":
                    # as promised -- not gentle at all!
                    raise FileExistsError("file exists")
                if existing == "skip":
                    yield skip_file("file exists")
                    return
                # Logic below only for overwrite and reupload
                if existing == "overwrite":
                    if extant["sha256"] == sha256_digest:
                        yield skip_file("file exists")
                        return
                elif existing == "refresh":
                    pass
                elif existing == "force":
                    pass
                else:
                    raise ValueError("existing")

            #
            # Validate first, so we do not bother server at all if not kosher
            #
            # TODO: enable back validation of dandiset.yaml
            if path.name != dandiset_metadata_file and validation != "skip":
                yield {"status": "validating"}
                validation_errors = validate_file(path)
                yield {"errors": len(validation_errors)}
                # TODO: split for dandi, pynwb errors
                if validation_errors:
                    if validation == "require":
                        yield skip_file("failed validation")
                        return
                else:
                    yield {"status": "validated"}
            else:
                # yielding empty causes pyout to get stuck or crash
                # https://github.com/pyout/pyout/issues/91
                # yield {"errors": '',}
                pass

            #
            # Special handling for dandiset.yaml
            # Yarik hates it but that is life for now. TODO
            #
            if path.name == dandiset_metadata_file:
                # TODO This is a temporary measure to avoid breaking web UI
                # dandiset metadata schema assumptions.  All edits should happen
                # online.
                if upload_dandiset_metadata:
                    yield {"status": "updating metadata"}
                    client.set_dandiset_metadata(dandiset.identifier,
                                                 metadata=dandiset.metadata)
                    yield {"status": "updated metadata"}
                else:
                    yield skip_file("should be edited online")
                return

            #
            # Extract metadata - delayed since takes time, but is done before
            # actual upload, so we could skip if this fails
            #
            # Extract metadata before actual upload and skip if fails
            # TODO: allow for for non-nwb files to skip this step
            # ad-hoc for dandiset.yaml for now
            yield {"status": "extracting metadata"}
            try:
                asset_metadata = nwb2asset(path,
                                           digest=sha256_digest,
                                           digest_type="SHA256")
            except Exception as exc:
                if allow_any_path:
                    yield {"status": "failed to extract metadata"}
                    metadata = {
                        "contentSize": os.path.getsize(path),
                        "digest": sha256_digest,
                        "digest_type": "SHA256",
                        # "encodingFormat": # TODO
                    }
                else:
                    yield skip_file("failed to extract metadata: %s" %
                                    str(exc))
                    return
            else:
                # We need to convert to a `dict` this way instead of with
                # `.dict()` so that enums will be converted to strings.
                metadata = json.loads(
                    asset_metadata.json(exclude_unset=True, exclude_none=True))

            #
            # Upload file
            #
            yield {"status": "uploading"}
            for r in client.iter_upload(ds_identifier, "draft", str(relpath),
                                        metadata, str(path)):
                if r["status"] == "uploading":
                    uploaded_paths[str(path)]["size"] = r["current"]
                yield r
            yield {"status": "done"}

        except Exception as exc:
            if devel_debug:
                raise
            # Custom formatting for some exceptions we know to extract
            # user-meaningful message
            message = str(exc)
            uploaded_paths[str(path)]["errors"].append(message)
            yield {"status": "ERROR", "message": message}
        finally:
            process_paths.remove(str(path))
Exemple #26
0
    def process_path(path, relpath):
        """

        Parameters
        ----------
        path: Path
          Non Pure (OS specific) Path
        relpath:
          For location on server.  Will be cast to PurePosixPath

        Yields
        ------
        dict
          Records for pyout
        """
        # Ensure consistent types
        path = Path(path)
        relpath = PurePosixPath(relpath)
        try:
            try:
                path_stat = path.stat()
                yield {"size": path_stat.st_size}
            except FileNotFoundError:
                yield skip_file("ERROR: File not found")
                return
            except Exception as exc:
                # without limiting [:50] it might cause some pyout indigestion
                yield skip_file("ERROR: %s" % str(exc)[:50])
                return

            #
            # Validate first, so we do not bother server at all if not kosher
            #
            # TODO: enable back validation of dandiset.yaml
            if path.name != dandiset_metadata_file and validation != "skip":
                yield {"status": "pre-validating"}
                validation_errors = validate_file(path)
                yield {"errors": len(validation_errors)}
                # TODO: split for dandi, pynwb errors
                if validation_errors:
                    if validation == "require":
                        yield skip_file("failed validation")
                        return
                else:
                    yield {"status": "validated"}
            else:
                # yielding empty causes pyout to get stuck or crash
                # https://github.com/pyout/pyout/issues/91
                # yield {"errors": '',}
                pass

            #
            # Special handling for dandiset.yaml
            # Yarik hates it but that is life for now. TODO
            #
            if path.name == dandiset_metadata_file:
                # TODO This is a temporary measure to avoid breaking web UI
                # dandiset metadata schema assumptions.  All edits should happen
                # online.
                if upload_dandiset_metadata:
                    yield {"status": "updating metadata"}
                    client.set_dandiset_metadata(
                        dandiset.identifier, metadata=dandiset.metadata
                    )
                    yield {"status": "updated metadata"}
                else:
                    yield skip_file("should be edited online")
                return

            #
            # Compute checksums
            #
            yield {"status": "digesting"}
            try:
                file_etag = get_digest(path, digest="dandi-etag")
            except Exception as exc:
                yield skip_file("failed to compute digest: %s" % str(exc))
                return

            extant = client.get_asset_bypath(ds_identifier, "draft", str(relpath))
            if extant is not None:
                # The endpoint used to search by paths doesn't include asset
                # metadata, so we need to make another API call:
                metadata = client.get_asset(ds_identifier, "draft", extant["asset_id"])
                local_mtime = ensure_datetime(path_stat.st_mtime)
                remote_mtime_str = metadata.get("blobDateModified")
                d = metadata.get("digest", {})
                if "dandi:dandi-etag" in d:
                    extant_etag = d["dandi:dandi-etag"]
                else:
                    # TODO: Should this error instead?
                    extant_etag = None
                if remote_mtime_str is not None:
                    remote_mtime = ensure_datetime(remote_mtime_str)
                    remote_file_status = (
                        "same"
                        if extant_etag == file_etag and remote_mtime == local_mtime
                        else (
                            "newer"
                            if remote_mtime > local_mtime
                            else ("older" if remote_mtime < local_mtime else "diff")
                        )
                    )
                else:
                    remote_mtime = None
                    remote_file_status = "no mtime"

                exists_msg = f"exists ({remote_file_status})"

                if existing == "error":
                    # as promised -- not gentle at all!
                    raise FileExistsError(exists_msg)
                if existing == "skip":
                    yield skip_file(exists_msg)
                    return
                # Logic below only for overwrite and reupload
                if existing == "overwrite":
                    if extant_etag == file_etag:
                        yield skip_file(exists_msg)
                        return
                elif existing == "refresh":
                    if extant_etag == file_etag:
                        yield skip_file("file exists")
                        return
                    elif remote_mtime is not None and remote_mtime >= local_mtime:
                        yield skip_file(exists_msg)
                        return
                elif existing == "force":
                    pass
                else:
                    raise ValueError(f"invalid value for 'existing': {existing!r}")

                yield {"message": f"{exists_msg} - reuploading"}

            #
            # Extract metadata - delayed since takes time, but is done before
            # actual upload, so we could skip if this fails
            #
            # Extract metadata before actual upload and skip if fails
            # TODO: allow for for non-nwb files to skip this step
            # ad-hoc for dandiset.yaml for now
            yield {"status": "extracting metadata"}
            try:
                asset_metadata = nwb2asset(
                    path, digest=file_etag, digest_type="dandi_etag"
                )
            except Exception as exc:
                lgr.exception("Failed to extract metadata from %s", path)
                if allow_any_path:
                    yield {"status": "failed to extract metadata"}
                    asset_metadata = get_default_metadata(
                        path, digest=file_etag, digest_type="dandi_etag"
                    )
                else:
                    yield skip_file("failed to extract metadata: %s" % str(exc))
                    return
            metadata = asset_metadata.json_dict()
            metadata["path"] = str(relpath)

            #
            # Upload file
            #
            yield {"status": "uploading"}
            validating = False
            for r in client.iter_upload(
                ds_identifier, "draft", metadata, str(path), jobs=jobs_per_file
            ):
                if r["status"] == "uploading":
                    uploaded_paths[str(path)]["size"] = r.pop("current")
                    yield r
                elif r["status"] == "post-validating":
                    # Only yield the first "post-validating" status
                    if not validating:
                        yield r
                        validating = True
                else:
                    yield r
            yield {"status": "done"}

        except Exception as exc:
            if devel_debug:
                raise
            # Custom formatting for some exceptions we know to extract
            # user-meaningful message
            message = str(exc)
            uploaded_paths[str(path)]["errors"].append(message)
            yield {"status": "ERROR", "message": message}
        finally:
            process_paths.remove(str(path))
Exemple #27
0
def modtime(path):
    st = path.stat()
    return st and st.st_mtime
Exemple #28
0
    def revert(self,
               path_or_version=None,
               snapshot=False,
               *,
               log_path=None,
               make_backup=True,
               override=False,
               reply=print):
        """Revert to a different version of Minecraft and restore a pre-update backup.

        Optional arguments:
        path_or_version -- If given, a pathlib.Path pointing at the backup file to be restored, or the Minecraft version to which to restore. By default, the newest available pre-update backup is restored.
        snapshot -- If true, single-letter Minecraft versions will be expanded to include the current year and week number. Defaults to False.

        Keyword-only arguments:
        log_path -- This is passed to the stop function if the server is stopped before the revert.
        make_backup -- Whether to back up the world before reverting. Defaults to True.
        override -- If this is True and the server jar for the target version already exists, it will be deleted and redownloaded. Defaults to False.
        reply -- This function is called several times with a string argument representing revert progress. Defaults to the built-in print function.
        """
        # determine version and backup path
        if path_or_version is None:
            path = sorted((self.backup_path / 'pre-update').iterdir(),
                          key=lambda path: path.stat().st_mtime,
                          reverse=True)[0]  # latest pre-update backup
            version = path.name.split('_')[3]
        elif isinstance(path_or_version, pathlib.Path):
            path = path_or_version
            version = path.name.split('_')[3]
        else:
            version = path_or_version
            if snapshot and len(version) == 1:
                version = datetime.datetime.utcnow().strftime(
                    '%yw%V') + version
            path = next(
                path
                for path in sorted((self.backup_path / 'pre-update').iterdir(),
                                   key=lambda path: path.stat().st_mtime,
                                   reverse=True)
                if path.name.split('_')[3] == version)
        # start iter_update
        update_iterator = self.iter_update(version,
                                           log_path=log_path,
                                           make_backup=False,
                                           override=override,
                                           reply=reply)
        version_dict = next(update_iterator)
        reply('Downloading ' + version_dict['version_text'])
        # make a backup to backup/<world>/reverted
        if make_backup:
            old_version = self.version()
            backup_path = self.backup_path / 'reverted' / '{}_{:%Y-%m-%d_%Hh%M}_{}_{}'.format(
                self.name, datetime.datetime.utcnow(), old_version, version)
            self.backup(reply=reply, path=backup_path, copy_to_latest=False)
        # stop the server
        was_running = self.status()
        if was_running:
            self.say('Server will be reverting to ' +
                     version_dict["version_text"] + ' and therefore restart')
            time.sleep(5)
            self.stop(reply=reply, log_path=log_path)
        reply('Server stopped. Restoring backup...')
        # revert Minecraft version
        for message in update_iterator:
            reply(message)
        # restore backup
        world_path = self.world_path
        if world_path.exists():
            shutil.rmtree(str(world_path))
        subprocess.call(
            ['tar', '-C',
             str(self.path), '-xzf',
             str(path), world_path.name])  # untar tar the world backup
        # restart server
        if was_running:
            self.start(reply=reply,
                       start_message='Server reverted. Restarting...',
                       log_path=log_path)
        return version_dict['version'], version_dict[
            'is_snapshot'], version_dict['version_text']
    def revert(self, path_or_version=None, snapshot=False, *, log_path=None, make_backup=True, override=False, reply=print):
        """Revert to a different version of Minecraft and restore a pre-update backup.

        Optional arguments:
        path_or_version -- If given, a pathlib.Path pointing at the backup file to be restored, or the Minecraft version to which to restore. By default, the newest available pre-update backup is restored.
        snapshot -- If true, single-letter Minecraft versions will be expanded to include the current year and week number. Defaults to False.

        Keyword-only arguments:
        log_path -- This is passed to the stop function if the server is stopped before the revert.
        make_backup -- Whether to back up the world before reverting. Defaults to True.
        override -- If this is True and the server jar for the target version already exists, it will be deleted and redownloaded. Defaults to False.
        reply -- This function is called several times with a string argument representing revert progress. Defaults to the built-in print function.
        """
        # determine version and backup path
        if path_or_version is None:
            path = sorted((self.backup_path / 'pre-update').iterdir(), key=lambda path: path.stat().st_mtime, reverse=True)[0] # latest pre-update backup
            version = path.name.split('_')[3]
        elif isinstance(path_or_version, pathlib.Path):
            path = path_or_version
            version = path.name.split('_')[3]
        else:
            version = path_or_version
            if snapshot and len(version) == 1:
                version = datetime.utcnow().strftime('%yw%V') + version
            path = next(path for path in sorted((self.backup_path / 'pre-update').iterdir(), key=lambda path: path.stat().st_mtime, reverse=True) if path.name.split('_')[3] == version)
        # start iter_update
        update_iterator = self.iter_update(version, log_path=log_path, make_backup=False, override=override, reply=reply)
        version_dict = next(update_iterator)
        reply('Downloading ' + version_dict['version_text'])
        # make a backup to backup/<world>/reverted
        if make_backup:
            old_version = self.version()
            backup_path = self.backup_path / 'reverted' / '{}_{:%Y-%m-%d_%Hh%M}_{}_{}'.format(self.name, datetime.utcnow(), old_version, version)
            self.backup(reply=reply, path=backup_path, copy_to_latest=False)
        # stop the server
        was_running = self.status()
        if was_running:
            self.say('Server will be reverting to ' + version_text + ' and therefore restart')
            time.sleep(5)
            self.stop(reply=reply, log_path=log_path)
        reply('Server stopped. Restoring backup...')
        # revert Minecraft version
        for message in update_iterator:
            reply(message)
        # restore backup
        world_path = self.world_path
        if world_path.exists():
            shutil.rmtree(str(world_path))
        subprocess.call(['tar', '-C', str(self.path), '-xzf', str(path), world_path.name]) # untar tar the world backup
        # restart server
        if was_running:
            self.start(reply=reply, start_message='Server reverted. Restarting...', log_path=log_path)
        return version_dict['version'], version_dict['is_snapshot'], version_dict['version_text']
Exemple #30
0
    def process_path(path, relpath):
        """

        Parameters
        ----------
        path: Path
          Non Pure (OS specific) Path
        relpath:
          For location on Girder.  Will be cast to PurePosixPath

        Yields
        ------
        dict
          Records for pyout
        """
        # Ensure consistent types
        path = Path(path)
        relpath = PurePosixPath(relpath)
        try:
            try:
                path_stat = path.stat()
                yield {"size": path_stat.st_size}
            except FileNotFoundError:
                yield skip_file("ERROR: File not found")
                return
            except Exception as exc:
                # without limiting [:50] it might cause some pyout indigestion
                yield skip_file("ERROR: %s" % str(exc)[:50])
                return

            yield {"status": "checking girder"}

            girder_folder = girder_top_folder / relpath.parent

            # we will add some fields which would help us with deciding to
            # reupload or not
            file_metadata_ = {
                "uploaded_size": path_stat.st_size,
                "uploaded_mtime": ensure_strtime(path_stat.st_mtime),
                # "uploaded_date": None,  # to be filled out upon upload completion
            }

            # A girder delete API target to .delete before uploading a file
            # (e.g. if decided to reupload)
            delete_before_upload = None

            def ensure_item():
                """This function might need to be called twice, e.g. if we
                are to reupload the entire item.

                ATM new versions of the files would create new items since
                the policy is one File per Item
                """
                try:
                    lock.acquire(timeout=60)
                    # TODO: we need to make this all thread safe all the way
                    #       until uploading the file since multiple threads would
                    #       create multiple
                    # ATM it even fails with  No such folder: 5e33658d6eb14e0bf49e97d5",
                    # so will first upload one file and then the rest... not sure why
                    # locking doesn't work
                    folder_rec = girder.ensure_folder(client, collection_rec,
                                                      girder_collection,
                                                      girder_folder)

                    # Get (if already exists) or create an item
                    item_rec = client.createItem(folder_rec["_id"],
                                                 name=relpath.name,
                                                 reuseExisting=True)
                finally:
                    lock.release()
                return item_rec

            def ensure_folder():
                try:
                    lock.acquire(timeout=60)
                    folder_rec = girder.ensure_folder(client, collection_rec,
                                                      girder_collection,
                                                      girder_folder)
                finally:
                    lock.release()
                return folder_rec

            #
            # 1. Validate first, so we do not bother girder at all if not kosher
            #
            # TODO: enable back validation of dandiset.yaml
            if path.name != dandiset_metadata_file and validation != "skip":
                yield {"status": "validating"}
                validation_errors = validate_file(path)
                yield {"errors": len(validation_errors)}
                # TODO: split for dandi, pynwb errors
                if validation_errors:
                    if validation == "require":
                        yield skip_file("failed validation")
                        return
                else:
                    yield {"status": "validated"}
            else:
                # yielding empty causes pyout to get stuck or crash
                # https://github.com/pyout/pyout/issues/91
                # yield {"errors": '',}
                pass

            #
            # Special handling for dandiset.yaml
            # Yarik hates it but that is life for now. TODO
            #
            if path.name == dandiset_metadata_file:
                # TODO This is a temporary measure to avoid breaking web UI
                # dandiset metadata schema assumptions.  All edits should happen
                # online.
                yield skip_file("should be edited online")
                return
                # We need to upload its content as metadata for the entire
                # folder.
                folder_rec = ensure_folder()
                remote_metadata = folder_rec["meta"]
                if remote_metadata.get("dandiset", {}) == dandiset.metadata:
                    yield skip_file("exists (same)")
                else:
                    remote_metadata["dandiset"] = dandiset.metadata
                    yield {"status": "uploading dandiset metadata"}
                    client.addMetadataToFolder(folder_rec["_id"],
                                               remote_metadata)
                    yield {"status": "done"}
                # Interrupt -- no file to upload
                return

            #
            # 2. Ensure having an item
            #
            item_rec = ensure_item()

            #
            # 3. Analyze possibly present on the remote files in the item
            #
            file_recs = list(client.listFile(item_rec["_id"]))

            # get metadata and if we have all indications that it is
            # probably the same -- we just skip
            stat_fields = [
                # Care only about mtime, ignore ctime which could change
                "uploaded_mtime",
                "uploaded_size",
            ]
            assert sorted(file_metadata_) == stat_fields
            item_file_metadata_ = {
                k: item_rec.get("meta", {}).get(k, None)
                for k in stat_fields
            }
            lgr.debug(
                "Files meta: local file: %s  remote file: %s",
                file_metadata_,
                item_file_metadata_,
            )

            if item_file_metadata_["uploaded_mtime"]:
                local_mtime = ensure_datetime(file_metadata_["uploaded_mtime"])
                remote_mtime = ensure_datetime(
                    item_file_metadata_.get("uploaded_mtime"))
                remote_file_status = (
                    "same" if (file_metadata_ == item_file_metadata_) else
                    ("newer" if remote_mtime > local_mtime else
                     ("older" if remote_mtime < local_mtime else "diff")))
            else:
                remote_file_status = "no mtime"
            exists_msg = f"exists ({remote_file_status})"

            if file_recs:  # there is a file already
                if len(file_recs) > 1:
                    lgr.debug(
                        f"Item {item_rec} contains multiple files: {file_recs}"
                    )
                if existing == "error":
                    # as promised -- not gentle at all!
                    raise FileExistsError(exists_msg)
                if existing == "skip":
                    yield skip_file(exists_msg)
                    return
                # Logic below only for overwrite and reupload
                if existing == "overwrite":
                    if remote_file_status == "same":
                        yield skip_file(exists_msg)
                        return
                elif existing == "refresh":
                    if not remote_file_status == "older":
                        yield skip_file(exists_msg)
                        return
                elif existing == "force":
                    pass
                else:
                    raise ValueError("existing")

                delete_before_upload = f'/item/{item_rec["_id"]}'

                yield {"message": exists_msg + " - reuploading"}

            #
            # 4. Extract metadata - delayed since takes time, but is done
            #    before actual upload, so we could skip if this fails
            #
            # Extract metadata before actual upload and skip if fails
            # TODO: allow for for non-nwb files to skip this step
            # ad-hoc for dandiset.yaml for now
            if path.name != dandiset_metadata_file:
                yield {"status": "extracting metadata"}
                try:
                    metadata = get_metadata(path)
                except Exception as exc:
                    if allow_any_path:
                        yield {"status": "failed to extract metadata"}
                        metadata = {}
                    else:
                        yield skip_file("failed to extract metadata: %s" %
                                        str(exc))
                        if not file_recs:
                            # remove empty item
                            yield {"status": "deleting empty item"}
                            client.delete(f'/item/{item_rec["_id"]}')
                            yield {"status": "deleted empty item"}
                        return

            #
            # ?. Compute checksums and possible other digests (e.g. for s3, ipfs - TODO)
            #
            yield {"status": "digesting"}
            try:
                # TODO: in theory we could also cache the result, but since it is
                # critical to get correct checksums, safer to just do it all the time.
                # Should typically be faster than upload itself ;-)
                digester = Digester(metadata_digests)
                file_metadata_.update(digester(path))
            except Exception as exc:
                yield skip_file("failed to compute digests: %s" % str(exc))
                return

            #
            # 5. Upload file
            #
            # TODO: we could potentially keep new item "hidden" until we are
            #  done with upload, and only then remove old one and replace with
            #  a new one (rename from "hidden" name).
            if delete_before_upload:
                yield {"status": "deleting old"}
                client.delete(delete_before_upload)
                yield {"status": "old deleted"}
                # create a a new item
                item_rec = ensure_item()

            yield {"status": "uploading"}
            # Upload file to an item
            # XXX TODO progress reporting back to pyout is actually tricky
            #     if possible to implement via callback since
            #     callback would need to yield somehow from the context here.
            #     yoh doesn't see how that could be done yet. In the worst
            #     case we would copy uploadFileToItem and _uploadContents
            #     and make them into generators to relay progress instead of
            #     via callback
            # https://stackoverflow.com/questions/9968592/turn-functions-with-a-callback-into-python-generators
            # has some solutions but all IMHO are abit too complex

            for r in generator_from_callback(lambda c: client.uploadFileToItem(
                    item_rec["_id"], str(path), progressCallback=c)):
                upload_perc = 100 * (
                    (r["current"] / r["total"]) if r["total"] else 1.0)
                if girder._DANDI_LOG_GIRDER:
                    girder.lgr.debug(
                        "PROGRESS[%s]: done=%d %%done=%s",
                        str(path),
                        r["current"],
                        upload_perc,
                    )
                uploaded_paths[str(path)]["size"] = r["current"]
                yield {"upload": upload_perc}

            # Get uploaded file id
            file_id, current = client.isFileCurrent(item_rec["_id"], path.name,
                                                    path.absolute())
            if not current:
                yield skip_file("File on server was unexpectedly changed")
                return

            # Compare file size against what download headers report
            # S3 doesn't seem to allow HEAD requests, so we need to instead do
            # a GET with a streaming response and not read the body.
            with client.sendRestRequest("GET",
                                        f"file/{file_id}/download",
                                        jsonResp=False,
                                        stream=True) as r:
                if int(r.headers["Content-Length"]) != path.stat().st_size:
                    yield skip_file(
                        "File size on server does not match local file")
                    return

            #
            # 6. Upload metadata
            #
            metadata_ = {}
            for k, v in metadata.items():
                if v in ("", None):
                    continue  # degenerate, why bother
                # XXX TODO: remove this -- it is only temporary, search should handle
                if isinstance(v, str):
                    metadata_[k] = v.lower()
                elif isinstance(v, datetime):
                    metadata_[k] = ensure_strtime(v)
            # we will add some fields which would help us with deciding to
            # reupload or not
            # .isoformat() would give is8601 representation but I see in girder
            # already
            # session_start_time   1971-01-01 12:00:00+00:00
            # decided to go for .isoformat for internal consistency -- let's see
            file_metadata_["uploaded_datetime"] = ensure_strtime(time.time())
            metadata_.update(file_metadata_)
            metadata_["uploaded_size"] = path_stat.st_size
            metadata_["uploaded_mtime"] = ensure_strtime(path_stat.st_mtime)
            metadata_["uploaded_by"] = "dandi %s" % __version__
            # Also store object_id for the file to help identify changes/moves
            try:
                metadata_["uploaded_nwb_object_id"] = get_object_id(str(path))
            except Exception as exc:
                (lgr.debug if allow_any_path else lgr.warning)(
                    "Failed to read object_id: %s", exc)

            # #
            # # 7. Also set remote file ctime to match local mtime
            # #   since for type "file", Resource has no "updated" field.
            # #   and this could us help to identify changes being done
            # #   to the remote file -- if metadata["uploaded_mtime"]
            # #   differs
            # yield {"status": "setting remote file timestamp"}
            # try:
            #     client.setResourceTimestamp(
            #         file_id, type="file", created=metadata_["uploaded_mtime"]
            #     )
            # except girder.gcl.HttpError as exc:
            #     if devel_debug:
            #         raise
            #     response = girder.get_HttpError_response(exc)
            #     message = response.get("message", str(exc))
            #     yield {"status": "WARNING", "message": message}

            # 7. Upload metadata
            yield {"status": "uploading metadata"}
            client.addMetadataToItem(item_rec["_id"], metadata_)
            yield {"status": "done"}

        except Exception as exc:
            if devel_debug:
                raise
            # Custom formatting for some exceptions we know to extract
            # user-meaningful message
            message = str(exc)
            if isinstance(exc, girder.gcl.HttpError):
                response = girder.get_HttpError_response(exc)
                if "message" in response:
                    message = response["message"]
            uploaded_paths[str(path)]["errors"].append(message)
            yield {"status": "ERROR", "message": message}
        finally:
            process_paths.remove(str(path))