def __init__(self, path: Path, *, location: Point5D = Point5D.zero(), filesystem: FS): try: raw_data = skimage.io.imread(filesystem.openbin(path.as_posix())) except ValueError: raise UnsupportedUrlException(path) axiskeys = "yxc"[: len(raw_data.shape)] super().__init__(url=filesystem.desc(path.as_posix()), data=raw_data, axiskeys=axiskeys, location=location)
def _merge_primaries(changes_dict: ChangesDict, src_fs: FS, dst_fs: FS) -> None: # TODO Not sure that this hits all cases, including removal of # files and directories. Think about it. for dirpath in src_fs.walk.dirs(search="depth"): if _is_component_path(dirpath): lid = dirpath_to_lid(dirpath) changed = changes_dict.changed(lid) if changed: if not dst_fs.isdir(dirpath): dst_fs.makedirs(dirpath) src_sub_fs = SubFS(src_fs, dirpath) dst_sub_fs = SubFS(dst_fs, dirpath) # delete directories in dst that don't exist in src for subdirpath in dst_sub_fs.walk.dirs(search="depth"): if not src_sub_fs.isdir(subdirpath): dst_sub_fs.removetree(subdirpath) # delete the files in the destination (if any) for filepath in component_files(dst_fs, dirpath): dst_sub_fs.remove(filepath) # copy the new files across src_sub_fs = SubFS(src_fs, dirpath) for filepath in component_files(src_fs, dirpath): fs.copy.copy_file(src_sub_fs, filepath, dst_sub_fs, filepath)
def openDataset(cls, path: Path, filesystem: FS) -> Tuple[h5py.Dataset, Path, Path]: outer_path = path dataset_path_components: List[str] = [] while True: try: info = filesystem.getinfo(outer_path.as_posix()) if not info.is_file: raise UnsupportedUrlException(path.as_posix()) break except ResourceNotFound as e: dataset_path_components.insert(0, outer_path.name) parent = outer_path.parent if parent == outer_path: raise UnsupportedUrlException(path.as_posix()) outer_path = parent try: binfile = filesystem.openbin(outer_path.as_posix()) f = h5py.File(binfile, "r") except OSError as e: raise UnsupportedUrlException(path) from e try: inner_path = "/".join(dataset_path_components) dataset = f[inner_path] if not isinstance(dataset, h5py.Dataset): raise ValueError(f"{inner_path} is not a h5py.Dataset") except Exception as e: f.close() raise e return dataset, outer_path, Path(inner_path)
def copy_fs(version_view: FS, deliverable: FS) -> None: # TODO The note below is probably obsolete. It was written when # we were using the DeliverableFS to make the deliverable. We've # now switched to using the DeliverableView. (The obsolete # DeliverableFS has been deleted from the repo.) In any case, # this doesn't seem to hurt anything, but it can probably all be # replaced by fs.copy.copy_file()--see the last line. Try it and # see. # TODO I could (and used to) just do a fs.copy.copy_fs() from the # version_view to a DeliverableFS. I removed it to debug issues # with the validation tool. Now I find this hack is just as easy # (though I wonder about efficiency). It bothers me that this # visits hack parallels a visits hack in # plain_lidvid_to_visits_dirpath(). I should figure this out and # make it clean. For now, though, this works. # Uses dollar-terminated paths for path, dirs, files in version_view.walk(): parts = fs.path.parts(path) if len(parts) == 4: if len(parts[3]) == 10: visit = "visit_" + parts[3][4:6].lower() + "$" parts[3] = visit new_path = fs.path.join(*parts) if not deliverable.isdir(new_path): deliverable.makedir(new_path) for file in files: old_filepath = fs.path.join(path, file.name) new_filepath = fs.path.join(new_path, file.name) fs.copy.copy_file(version_view, old_filepath, deliverable, new_filepath)
def write_to_bento(self, bento_fs: FS, build_ctx: str): docker_folder = fs.path.join("env", "docker") bento_fs.makedirs(docker_folder, recreate=True) dockerfile = fs.path.join(docker_folder, "Dockerfile") template_file = os.path.join(os.path.dirname(__file__), "docker", "Dockerfile.template") with open(template_file, "r", encoding="utf-8") as f: dockerfile_template = f.read() with bento_fs.open(dockerfile, "w") as dockerfile: dockerfile.write( dockerfile_template.format( base_image=self.get_base_image_tag())) for filename in ["init.sh", "entrypoint.sh"]: copy_file_to_fs_folder( os.path.join(os.path.dirname(__file__), "docker", filename), bento_fs, docker_folder, ) if self.setup_script: try: setup_script = resolve_user_filepath(self.setup_script, build_ctx) except FileNotFoundError as e: raise InvalidArgument(f"Invalid setup_script file: {e}") copy_file_to_fs_folder(setup_script, bento_fs, docker_folder, "setup_script")
def __init__(self, host, user, passwd, port=22, compress=False, timeout=10, keepalive=10): FS.__init__(self) self._user = user self._host = host self._port = port self._client = client = paramiko.SSHClient() self._locale = None try: client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) client.connect( socket.gethostbyname(host), port, user, passwd, compress=compress, timeout=timeout, allow_agent=False,look_for_keys=False ) if keepalive > 0: client.get_transport().set_keepalive(keepalive) self._sftp = client.open_sftp() self._platform = None except (paramiko.ssh_exception.SSHException, # protocol errors paramiko.ssh_exception.NoValidConnectionsError, # connexion errors socket.gaierror, socket.timeout) as e: # TCP errors message = "Unable to create filesystem: {}".format(e) raise errors.CreateFailed(message)
def compile_fragment_files( self, write_fs: FS, found_fragments: Iterable[FoundFragment]) -> List[str]: """ Compile fragment files into `parent_dir`. """ outputs = [] for version_fs, filename in found_fragments: try: fragment = self.load_fragment(version_fs.readtext(filename)) fragment_type = fragment.get('type') showcontent = self.config.fragment_types.get( fragment_type, {}).get('showcontent', True) section = fragment.get('section') or None rendered_content = render_fragment( fragment, showcontent, self.config.changelog_output_type) if rendered_content.strip(): filename_stem = splitext(basename(filename))[0] output_path = join(*filter(None, [ section, '{}.{}'.format(filename_stem, fragment_type) ])) log.info('Compiling {} -> {}'.format( version_fs.getsyspath(filename), write_fs.getsyspath(output_path))) parent_dir = dirname(output_path) if parent_dir: write_fs.makedirs(parent_dir, recreate=True) write_fs.writetext(output_path, rendered_content) outputs.append(output_path) except Exception: raise FragmentCompilationError(filename) return outputs
def write_to_bento(self, bento_fs: FS, build_ctx: str): conda_folder = fs.path.join("env", "conda") bento_fs.makedirs(conda_folder, recreate=True) if self.environment_yml is not None: environment_yml_file = resolve_user_filepath( self.environment_yml, build_ctx) copy_file_to_fs_folder( environment_yml_file, bento_fs, conda_folder, dst_filename="environment_yml", ) return deps_list = [] if self.dependencies is None else self.dependencies if self.pip is not None: deps_list.append(dict(pip=self.pip)) # type: ignore if not deps_list: return yaml_content = dict(dependencies=deps_list) yaml_content["channels"] = (["defaults"] if self.channels is None else self.channels) with bento_fs.open(fs.path.join(conda_folder, "environment_yml"), "w") as f: yaml.dump(yaml_content, f)
def cleantree(tgt_fs: FS, name: str): method = tgt_fs.remove if tgt_fs.isfile(name) else tgt_fs.removetree method(name) remaining_parts = Path(name).parts if len(remaining_parts) > 1: # if dst_path included more than 1 directory we are left with the top parent tgt_fs.removedir(remaining_parts[0])
def __init__(self, path: Path, *, location: Point5D = Point5D.zero(), filesystem: FS): url = filesystem.geturl(path.as_posix()) match = re.search(r"[^/]+\.n5/.*$", url, re.IGNORECASE) if not match: raise UnsupportedUrlException(url) name = match.group(0) self.filesystem = filesystem.opendir(path.as_posix()) with self.filesystem.openbin("attributes.json", "r") as f: attributes_json_bytes = f.read() attributes = json.loads(attributes_json_bytes.decode("utf8")) dimensions = attributes["dimensions"][::-1] blockSize = attributes["blockSize"][::-1] axiskeys = "".join(attributes["axes"]).lower( )[::-1] if "axes" in attributes else guess_axiskeys(dimensions) super().__init__( url=url, name=name, tile_shape=Shape5D.create(raw_shape=blockSize, axiskeys=axiskeys), shape=Shape5D.create(raw_shape=dimensions, axiskeys=axiskeys), dtype=np.dtype(attributes["dataType"]).newbyteorder(">"), location=location, axiskeys=axiskeys, ) self.compression_type = attributes["compression"]["type"] if self.compression_type not in N5Block.DECOMPRESSORS.keys(): raise NotImplementedError( f"Don't know how to decompress from {self.compression_type}")
def create_cowfs(base_fs: FS, read_write_layer: FS, recreate: bool = False) -> "COWFS": additions_fs = read_write_layer.makedir("/additions", recreate=recreate) deletions_fs = read_write_layer.makedir("/deletions", recreate=recreate) return COWFS(base_fs, additions_fs, deletions_fs)
def move(self, src, dst, **kwds): if self.getmeta("atomic.rename", False): if kwds.get("overwrite", False) or not self.exists(dst): try: self.rename(src, dst) return except FSError: pass FS.move(self, src, dst, **kwds)
def move(self, src, dst, **kwds): if self.getmeta("atomic.rename",False): if kwds.get("overwrite",False) or not self.exists(dst): try: self.rename(src,dst) return except FSError: pass FS.move(self, src, dst, **kwds)
def copydir(self, src, dst, overwrite=False, ignore_errors=False, chunk_size=16384): if self.getmeta("read_only"): raise errors.UnsupportedError('read only filesystem') # FIXME: this is out of date; how to do native tahoe copy? # FIXME: Workaround because isfile() not exists on _TahoeLAFS FS.copydir(self, src, dst, overwrite, ignore_errors, chunk_size)
def write_subdir_versions_to_directory(fs: FS, dir: str, d: Dict[str, str]) -> None: """ Given the path to a directory, un-parse and write the contents of the given subdir-versions dictionary into a subdir-versions file in the directory. """ SUBDIR_VERSIONS_FILEPATH = join(dir, SUBDIR_VERSIONS_FILENAME) fs.writetext(SUBDIR_VERSIONS_FILEPATH, unparse_subdir_versions(d), encoding="ascii")
def write_dictionary_to_fs(fs: FS, dir_path: str, d: Dict[Any, Any]) -> None: for k, v in d.items(): assert type(k) in [str, str] type_v = type(v) sub_path = join(dir_path, str(k)) if type_v == dict: fs.makedir(sub_path) write_dictionary_to_fs(fs, sub_path, v) elif type_v in [str, str]: fs.writetext(sub_path, str(v)) else: assert False, f"unexpected type {type_v} at {sub_path}"
def __init__(self, base_fs: VersionView, synth_files: Optional[Dict[str, bytes]] = None) -> None: FS.__init__(self) self.base_fs = base_fs self.path_dict: Dict[str, _Entry] = {"/": _DirInfo()} self._populate_path_dict_from_base_fs() # Insert the synthetic files synth_files = dict() if synth_files is None else synth_files self._populate_path_dict_from_synth_files(synth_files)
def movefile(src_fs, src_path, dst_fs, dst_path, overwrite=True, chunk_size=64 * 1024): """Move a file from one filesystem to another. Will use system copyfile, if both files have a syspath. Otherwise file will be copied a chunk at a time. :param src_fs: Source filesystem object :param src_path: Source path :param dst_fs: Destination filesystem object :param dst_path: Destination filesystem object :param chunk_size: Size of chunks to move if system copyfile is not available (default 64K) """ src_syspath = src_fs.getsyspath(src_path, allow_none=True) dst_syspath = dst_fs.getsyspath(dst_path, allow_none=True) if not overwrite and dst_fs.exists(dst_path): raise DestinationExistsError(dst_path) if src_fs is dst_fs: src_fs.move(src_path, dst_path, overwrite=overwrite) return # System copy if there are two sys paths if src_syspath is not None and dst_syspath is not None: FS._shutil_movefile(src_syspath, dst_syspath) return src_lock = getattr(src_fs, '_lock', None) if src_lock is not None: src_lock.acquire() try: src = None try: # Chunk copy src = src_fs.open(src_path, 'rb') dst_fs.setcontents(dst_path, src, chunk_size=chunk_size) except: raise else: src_fs.remove(src_path) finally: if src is not None: src.close() finally: if src_lock is not None: src_lock.release()
def get_module_version(module_name: str, directory: FS) -> str: """ Get the version of the module in the given directory :param module_name: name of the module :param directory: FS object pointing to the parent directory of the module :return: version of the module or None if it is not present in the directory """ version = None if module_name in directory.listdir("."): manifest = directory.readtext(join(module_name, "__manifest__.py")) version = ast.literal_eval(manifest)["version"] return version
def package_json(cwd_fs: FS): """ Try guess a version from ``package.json``. """ log.debug('Looking for package.json') if cwd_fs.exists('package.json'): log.debug('Guessing version with package.json') try: with cwd_fs.open('package.json', 'r') as fd: return json.load(fd).get('version') except json.JSONDecodeError: pass return None
def __init__(self, fs_prims: FSPrimitives) -> None: FS.__init__(self) self.prims = fs_prims _meta = self._meta = { "case_insensitive": os.path.normcase("Aa") != "aa", "network": False, "read_only": False, "supports_rename": False, "thread_safe": True, "unicode_paths": False, "virtual": False, "invalid_path_chars": "\0", }
def file_reader(*, fs: FS, resource: str, mimetype: str, proxy: bool = False, mmap_mode: Union[str, None] = None, **kwargs) -> Any: if mimetype is None and resource.endswith(".npy"): mimetype = "application/octet-stream" elif mimetype is None: mimetype, _ = guess_type(resource) if isinstance(resource, Path): resource = str(resource) mapping = { "application/octet-stream": ( np.load, "file", { "file": fs.open(resource, mode="rb"), "mmap_mode": mmap_mode, "allow_pickle": False, }, ), "application/json": ( json.load, "fp", { "fp": fs.open(resource, encoding="utf-8") }, ), "text/csv": ( pd.read_csv, "filepath_or_buffer", { "filepath_or_buffer": fs.open(resource) }, ), } try: func, label, kwargs = mapping[mimetype] except KeyError: raise InvalidMimetype("Mimetype '{}' not understoof".format(mimetype)) if proxy: return Proxy(func, label, kwargs) else: return func(**kwargs)
def __init__( self, contains_lidvids: bool, subcomponents: Set[S], fs: FS, filepaths: Set[str], ) -> None: """ Create a 'VersionContents' object. DO NOT USE this constructor. Instead use the static methods VersionContents.create_from_lids() and VersionContents.create_from_lidvids(). """ if not _data_consistent(contains_lidvids, subcomponents): raise TypeError( "Subcomponents are not consistent, should be all " + "livids or all lids." ) self.contains_lidvids = contains_lidvids self.subcomponents: Set[S] = subcomponents for filepath in filepaths: if not isabs(filepath): raise ValueError(f"{filepath} is not an absolute path.") if not fs.isfile(filepath): raise ValueError(f"{filepath} is not a file.") self.fs = fs self.filepaths = filepaths
def __init__(self, fs: FSBase): self._fs = fs try: self._iterations: List[str] = json.loads( fs.readtext(_CHECKPOINT_STATE_FILE)) except Exception: self._iterations = []
def run( self, trial: Trial, judge: TrialJudge, checkpoint_basedir_fs: FSBase, ) -> None: checkpoint = Checkpoint( checkpoint_basedir_fs.makedir(trial.trial_id, recreate=True)) if not judge.can_accept(trial): return self._current_trial = trial self.initialize() try: if len(checkpoint) > 0: self._rung = int(checkpoint.latest.readtext("__RUNG__")) + 1 self.load_checkpoint(checkpoint.latest) budget = judge.get_budget(trial, self.rung) while budget > 0: report = self.run_single_rung(budget) report = report.with_rung(self.rung).with_sort_metric( self.generate_sort_metric(report.metric)) decision = judge.judge(report) if decision.should_checkpoint: with checkpoint.create() as fs: fs.writetext("__RUNG__", str(self.rung)) self.save_checkpoint(fs) budget = decision.budget self._rung += 1 finally: self.finalize()
def __init__(self, path: Path, *, location: Point5D = Point5D.zero(), filesystem: FS): self._dataset: Optional[h5py.Dataset] = None try: self._dataset, outer_path, inner_path = self.openDataset( path, filesystem=filesystem) axiskeys = self.getAxisKeys(self._dataset) tile_shape = Shape5D.create(raw_shape=self._dataset.chunks or self._dataset.shape, axiskeys=axiskeys) super().__init__( url=filesystem.desc(outer_path.as_posix()) + "/" + inner_path.as_posix(), tile_shape=tile_shape, shape=Shape5D.create(raw_shape=self._dataset.shape, axiskeys=axiskeys), dtype=self._dataset.dtype, name=self._dataset.file.filename.split("/")[-1] + self._dataset.name, location=location, axiskeys=axiskeys, ) except Exception as e: if self._dataset: self._dataset.file.close() raise e
def scan_vfs_dir( fs: FS, dir: str, namespaces: Tuple = None ) -> Tuple[List[Info], List[Info], List[Info], List[Info]]: """ Returns a 4-tuple of (ordinary-file infos, ordinary-directory infos, subdir-versions-file infos, version-directory infos). This lets us separate "real" files and dirs from the "bookkeeping" ones. """ infos = list(fs.scandir(dir, namespaces=namespaces)) file_infos = [info for info in infos if info.is_file] dir_infos = [info for info in infos if info.is_dir] ordinary_file_infos = [ info for info in file_infos if info.name != SUBDIR_VERSIONS_FILENAME ] subdir_versions_file_infos = [ info for info in file_infos if info.name == SUBDIR_VERSIONS_FILENAME ] ordinary_dir_infos = [ info for info in dir_infos if info.name[0:2] != _VERSION_DIR_PREFIX ] version_dir_infos = [ info for info in dir_infos if info.name[0:2] == _VERSION_DIR_PREFIX ] return ( ordinary_file_infos, ordinary_dir_infos, subdir_versions_file_infos, version_dir_infos, )
def movefile(src_fs, src_path, dst_fs, dst_path, overwrite=True, chunk_size=64*1024): """Move a file from one filesystem to another. Will use system copyfile, if both files have a syspath. Otherwise file will be copied a chunk at a time. :param src_fs: Source filesystem object :param src_path: Source path :param dst_fs: Destination filesystem object :param dst_path: Destination filesystem object :param chunk_size: Size of chunks to move if system copyfile is not available (default 64K) """ src_syspath = src_fs.getsyspath(src_path, allow_none=True) dst_syspath = dst_fs.getsyspath(dst_path, allow_none=True) if not overwrite and dst_fs.exists(dst_path): raise DestinationExistsError(dst_path) if src_fs is dst_fs: src_fs.move(src_path, dst_path, overwrite=overwrite) return # System copy if there are two sys paths if src_syspath is not None and dst_syspath is not None: FS._shutil_movefile(src_syspath, dst_syspath) return src_lock = getattr(src_fs, '_lock', None) if src_lock is not None: src_lock.acquire() try: src = None try: # Chunk copy src = src_fs.open(src_path, 'rb') dst_fs.setcontents(dst_path, src, chunk_size=chunk_size) except: raise else: src_fs.remove(src_path) finally: if src is not None: src.close() finally: if src_lock is not None: src_lock.release()
def copyfile(src_fs, src_path, dst_fs, dst_path, overwrite=True, update=False, chunk_size=64*1024): """Copy a file from one filesystem to another. Will use system copyfile, if both files have a syspath. Otherwise file will be copied a chunk at a time. In the case where ``overwrite`` is False and ``update`` is True, the behaviour of ``update`` takes precedence. :param src_fs: Source filesystem object :param src_path: -- Source path :param dst_fs: Destination filesystem object :param dst_path: Destination filesystem object :param overwrite: Write to destination files even if they already exist :param update: Write to destination files only if the source is newer :param chunk_size: Size of chunks to move if system copyfile is not available (default 64K) """ assert_write(src_fs, src_path, dst_fs, dst_path, overwrite, update) # If the src and dst fs objects are the same, then use a direct copy if src_fs is dst_fs: src_fs.copy(src_path, dst_path, overwrite=overwrite) return src_syspath = src_fs.getsyspath(src_path, allow_none=True) dst_syspath = dst_fs.getsyspath(dst_path, allow_none=True) # System copy if there are two sys paths if src_syspath is not None and dst_syspath is not None: FS._shutil_copyfile(src_syspath, dst_syspath) return src_lock = getattr(src_fs, '_lock', None) if src_lock is not None: src_lock.acquire() try: src = None try: src = src_fs.open(src_path, 'rb') dst_fs.setcontents(dst_path, src, chunk_size=chunk_size) finally: if src is not None: src.close() finally: if src_lock is not None: src_lock.release()
def file_writer(*, data: Any, fs: FS, resource: str, mimetype: str, **kwargs) -> None: if isinstance(resource, Path): resource = str(resource) if mimetype == "application/octet-stream": return np.save(fs.open(resource, mode="wb"), data, allow_pickle=False) elif mimetype == "application/json": return json.dump( data, fs.open(resource, mode="w", encoding="utf-8"), indent=2, ensure_ascii=False, ) elif mimetype == "text/csv": assert isinstance(data, pd.DataFrame) data.to_csv(fs.open(resource, mode="w", encoding="utf-8"))
def update_from_single_version(self, is_new: IS_NEW_TEST, single_version_fs: FS) -> bool: # TODO This import is circular; that's why I have it here # inside the function. But there must be a better way to # structure. from pdart.fs.multiversioned.version_view import ( is_segment, strip_segment, vv_lid_path, ) # TODO Note that this makes assumptions about the source # filesystem format. Document them. def update_from_lid(lid: LID) -> LIDVID: # Find the path corresponding to this LID. path = vv_lid_path(lid) # First, update all the children recursively. Get their # LIDs by extending this LID with the names of the # subdirectories of path. That handles directories. child_lidvids: Set[LIDVID] = { update_from_lid(lid.extend_lid(strip_segment(name))) for name in single_version_fs.listdir(path) if is_segment(name) } # Now look at files. We create a VersionContents object # from the set of new LIDVIDs and all the files contained # in the component's directory. contents = VersionContents.create_from_lidvids_and_dirpath( child_lidvids, single_version_fs, path) # Now we ask the Multiversioned to insert these contents # as a new version if needed. It returns the new LIDVID # if a new LIDVID is needed, otherwise it returns the old # one. return self.add_contents_if(is_new, lid, contents, False) bundle_segs = [ strip_segment(name) for name in single_version_fs.listdir("/") if is_segment(name) ] # TODO I can't see any reason why there wouldn't be exactly a # single segment, but I'm throwing in an assert to let me know # if I'm wrong. if len(bundle_segs) != 1: raise ValueError(f"bundle_segs: {bundle_segs} is more than 1.") changed = False for bundle_seg in bundle_segs: lid = LID.create_from_parts([str(bundle_seg)]) orig_lidvid: Optional[LIDVID] = self.latest_lidvid(lid) new_lidvid: LIDVID = update_from_lid(lid) changed = changed or new_lidvid != orig_lidvid return changed
def categorize_filesystem(fs: FS) -> str: top_level_listing = fs.listdir("/") if not top_level_listing: return EMPTY_FS_TYPE elif any(name[-1] == "$" for name in top_level_listing): return SINGLE_VERSIONED_FS_TYPE elif any(is_version_dir(dir) for dir in fs.walk.dirs()): return MULTIVERSIONED_FS_TYPE else: return UNKNOWN_FS_TYPE
def makedirs( self, path: str, permissions: Optional[Permissions] = None, recreate: bool = False, ) -> SubFS[FS]: return FS.makedirs(self, path, permissions=permissions, recreate=recreate)
def copydir(self, src, dst, **kwds): FS.copydir(self,src,dst,**kwds)
def movedir(self, src, dst, **kwds): FS.movedir(self,src,dst,**kwds)
def move(self, src, dst, **kwds): FS.move(self,src,dst,**kwds) path = relpath(normpath(src)) with self._size_lock: self._file_sizes.pop(path,None)
def close(self): self._olefile.close() FS.close(self)
def __init__(self): FS.__init__(self, thread_synchronize=True) self.fs_sequence = [] self.fs_lookup = {}