def dirs_match(dirpath: str) -> bool: primary_dirs = filter_to_primary_dirs( dirpath, ( relpath(dir) for dir in SubFS(primary_fs, dirpath).walk.dirs() if "$" in dir ), ) latest_dirs = filter_to_primary_dirs( dirpath, ( relpath(dir) for dir in SubFS(latest_version_fs, dirpath).walk.dirs() if "$" in dir ), ) PDS_LOGGER.open("Directory changes detected") if primary_dirs == latest_dirs: for dir in primary_dirs: full_dirpath = join(dirpath, relpath(dir)) lid = dirpath_to_lid(full_dirpath) if lid not in result.changes_dict: raise KeyError(f"{lid} not in changes_dict.") if result.changed(lid): PDS_LOGGER.log( "info", f"CHANGE DETECTED in {dirpath}: {lid} changed" ) PDS_LOGGER.close() return False PDS_LOGGER.close() return True else: # list of dirs does not match added = primary_dirs - latest_dirs removed = latest_dirs - primary_dirs if added and removed: PDS_LOGGER.log( "info", f"CHANGE DETECTED IN {dirpath}: added {added}; removed {removed}", ) elif added: PDS_LOGGER.log("info", f"CHANGE DETECTED IN {dirpath}: added {added}") else: # removed PDS_LOGGER.log( "info", f"CHANGE DETECTED IN {dirpath}: removed {removed}" ) PDS_LOGGER.close() return False
def movedir(self, src, dst, *args, **kwargs): """Move a directory from one place to another.""" # first, move the backups rel_src = relpath(src) rel_dst = relpath(dst) for path in self.fs.walkfiles(rel_src): if self.has_snapshot(path): new_path = path.replace(rel_src, rel_dst) old_abs_path = self.snapshot_snap_path(path) new_abs_path = self.snapshot_snap_path(new_path) os.rename(old_abs_path, new_abs_path) super(VersioningFS, self).movedir(src, dst, *args, **kwargs)
def _decode(self, path): path = relpath(normpath(path)) path = path.replace("__colon__", ":") if not self.allow_autorun: if path.lower().startswith("autorun."): path = "_" + path return path
def _path_to_key(self, path): """Converts an fs path to a s3 key.""" _path = relpath(normpath(path)) _key = ( "{}/{}".format(self._prefix, _path).lstrip("/").replace("/", self.delimiter) ) return _key
def __init__(self, bucket_name, dir_path='/', aws_access_key_id=None, aws_secret_access_key=None, aws_session_token=None, endpoint_url=None, region=None, delimiter='/', strict=True): _creds = (aws_access_key_id, aws_secret_access_key) if any(_creds) and not all(_creds): raise ValueError( 'aws_access_key_id and aws_secret_access_key ' 'must be set together if specified' ) self._bucket_name = bucket_name self.dir_path = dir_path self._prefix = relpath(normpath(dir_path)).rstrip('/') self.aws_access_key_id = aws_access_key_id self.aws_secret_access_key = aws_secret_access_key self.aws_session_token = aws_session_token self.endpoint_url = endpoint_url self.region = region self.delimiter = delimiter self.strict = strict self._tlocal = threading.local() super(S3FS, self).__init__()
def removedir(self, path, recursive=False, force=False): if self.fs.isdirempty(path) or force: rel_path = relpath(path) for filename in self.fs.walkfiles(rel_path): self.__delete_snapshot(filename) super(VersioningFS, self).removedir(path, recursive, force)
def listdirinfo(self, path="/", wildcard=None, full=False, absolute=False, dirs_only=False, files_only=False): self.wrapped_fs._log(DEBUG, "Listing directory (listdirinfo) %s" % path) _fixpath = self.wrapped_fs._fixpath _path = _fixpath(path) if dirs_only and files_only: raise errors.ValueError("dirs_only and files_only can not both be True") result = [] for item in self.wrapped_fs.tahoeutil.list(self.dircap, _path): if dirs_only and item['type'] == 'filenode': continue elif files_only and item['type'] == 'dirnode': continue if wildcard is not None and \ not fnmatch.fnmatch(item['name'], wildcard): continue if full: item_path = relpath(pathjoin(_path, item['name'])) elif absolute: item_path = abspath(pathjoin(_path, item['name'])) else: item_path = item['name'] cache_name = self.wrapped_fs._fixpath(u"%s/%s" % \ (path, item['name'])) self._cache_set(cache_name, 'getinfo', (), {}, (True, item)) result.append((item_path, item)) return result
def _path_to_dir_key(self, path): """Converts an fs path to a s3 key.""" _path = relpath(normpath(path)) _key = forcedir("{}/{}".format(self._prefix, _path)).lstrip('/').replace( '/', self.delimiter) return _key
def ilistdirinfo(self, path="/", wildcard=None, full=False, absolute=False, dirs_only=False, files_only=False): self._log(DEBUG, "Listing directory (listdirinfo) %s" % path) if dirs_only and files_only: raise ValueError("dirs_only and files_only can not both be True") for item in self.tahoeutil.list(self.dircap, path): if dirs_only and item['type'] == 'filenode': continue elif files_only and item['type'] == 'dirnode': continue if wildcard is not None: if isinstance(wildcard,basestring): if not fnmatch.fnmatch(item['name'], wildcard): continue else: if not wildcard(item['name']): continue if full: item_path = relpath(pathjoin(path, item['name'])) elif absolute: item_path = abspath(pathjoin(path, item['name'])) else: item_path = item['name'] yield (item_path, item)
def _encode(self, path): path = relpath(normpath(path)) path = path.replace(":", "__colon__") if not self.allow_autorun: if path.lower().startswith("_autorun."): path = path[1:] return path
def files_match(dirpath: str) -> bool: # All files in subcomponents will have a "$" in their path (it # comes after the name of the subcomponent), so by filtering # them out, we get only the files for this component. PDS4 # *does* allow directories in a component (that aren't part of # a subcomponent), so we use walk instead of listdir() to get # *all* the files, not just the top-level ones. primary_files = filter_to_primary_files( dirpath, ( relpath(filepath) for filepath in SubFS(primary_fs, dirpath).walk.files() if "$" not in filepath ), ) latest_files = filter_to_primary_files( dirpath, ( relpath(filepath) for filepath in SubFS(latest_version_fs, dirpath).walk.files() if "$" not in filepath ), ) try: PDS_LOGGER.open("File changes detected") if primary_files != latest_files: PDS_LOGGER.log( "info", f"CHANGE DETECTED IN {dirpath}: {primary_files} != {latest_files}", ) PDS_LOGGER.close() return False for filename in primary_files: filepath = join(dirpath, relpath(filename)) if primary_fs.getbytes(filepath) != latest_version_fs.getbytes( filepath ): PDS_LOGGER.log( "info", f"CHANGE DETECTED IN {filepath}; DIRPATH = {dirpath}" ) PDS_LOGGER.close() return False except Exception as e: PDS_LOGGER.exception(e) finally: PDS_LOGGER.close() return True
def _s3path(self, path): """Get the absolute path to a file stored in S3.""" path = relpath(normpath(path)) path = self._separator.join(iteratepath(path)) s3path = self._prefix + path if s3path and s3path[-1] == self._separator: s3path = s3path[:-1] return s3path
def snapshot_snap_path(self, path): """Returns the dir containing the snapshots for a given path.""" path = relpath(path) dest_hash = hash_path(path) backup_dir = self.backup.getsyspath('/') save_snap_dir = os.path.join(backup_dir, dest_hash) return save_snap_dir
def compile_fs_template(fs, template_text, data=None, path=None): """Compile a fs template structure in to a filesystem object""" if data is None: data = {} template = Template(template_text) template.re_special = re.compile( r'\{\{\%((?:\".*?\"|\'.*?\'|.|\s)*?)\%\}\}|(\{\{\#)|(\#\}\})') context = Context({"data": data}, re_sub=r'\$\{\{(.*?)\}\}') with context.frame("data"): fs_template = template.render(context) out_type = None out_filename = None file_lines = [] def write_file(filename, file_type): if filename: if file_type.lower() == "text": with fs.open(filename, 'wt') as f: f.write('\n'.join(file_lines) + '\n') elif file_type.lower() == "wraptext": import textwrap with fs.open(filename, 'wt') as f: for line in file_lines: f.write('\n'.join(textwrap.wrap(line, 79)) + '\n') elif file_type.lower() == "bin": with fs.open(filename, 'wb') as f: for line in file_lines: chunk = b''.join( chr(int(a + b, 16)) for a, b in zip(line[::2], line[1::2])) f.write(chunk) del file_lines[:] for line in fs_template.splitlines(): line = line.rstrip() if line.startswith('@'): #out_path = out_filename write_file(out_filename, out_type) out_filename = None out_type, path_spec = line[1:].split(' ', 1) if path: path_spec = join(path, relpath(path_spec)) if path_spec.endswith('/'): fs.makedir(path_spec, allow_recreate=True, recursive=True) out_filename = None else: fs.makedir(dirname(path_spec), allow_recreate=True, recursive=True) out_filename = path_spec continue if out_filename: file_lines.append(line) if out_filename: write_file(out_filename, out_type)
def snapshot_info_path(self, path): """Returns the snapshot info file path for a given path.""" path = relpath(path) # find where the snapshot info file should be dest_hash = hash_path(path) info_filename = "%s.info" % (dest_hash) info_path = os.path.join(self.__tmp.getsyspath('/'), info_filename) return info_path
def compile_fs_template(fs, template_text, data=None, path=None): """Compile a fs template structure in to a filesystem object""" if data is None: data = {} template = Template(template_text) template.re_special = re.compile( r"\{\{\%((?:\".*?\"|\'.*?\'|.|\s)*?)\%\}\}|(\{\{\#)|(\#\}\})") context = Context(re_sub=r"\$\{\{(.*?)\}\}") # with context.frame("data"): fs_template = template.render(data, context=context) out_type = None out_filename = None file_lines = [] def write_file(filename, file_type): if filename: if file_type.lower() == "text": with fs.open(filename, "wt") as f: f.write("\n".join(file_lines) + "\n") elif file_type.lower() == "wraptext": import textwrap with fs.open(filename, "wt") as f: for line in file_lines: f.write("\n".join(textwrap.wrap(line, 79)) + "\n") elif file_type.lower() == "bin": with fs.open(filename, "wb") as f: for line in file_lines: chunk = b"".join( chr(int(a + b, 16)) for a, b in zip(line[::2], line[1::2])) f.write(chunk) del file_lines[:] for line in fs_template.splitlines(): line = line.rstrip() if line.startswith("@"): write_file(out_filename, out_type) out_filename = None out_type, path_spec = line[1:].split(" ", 1) if path: path_spec = join(path, relpath(path_spec)) if path_spec.endswith("/"): fs.makedirs(path_spec, recreate=True) out_filename = None else: fs.makedirs(dirname(path_spec), recreate=True) out_filename = path_spec continue if out_filename: file_lines.append(line) if out_filename: write_file(out_filename, out_type)
def compile_fs_template(fs, template_text, data=None, path=None): """Compile a fs template structure in to a filesystem object""" if data is None: data = {} template = Template(template_text) template.re_special = re.compile(r'\{\{\%((?:\".*?\"|\'.*?\'|.|\s)*?)\%\}\}|(\{\{\#)|(\#\}\})') context = Context({"data": data}, re_sub=r'\$\{\{(.*?)\}\}') with context.frame("data"): fs_template = template.render(context) out_type = None out_filename = None file_lines = [] def write_file(filename, file_type): if filename: if file_type.lower() == "text": with fs.open(filename, 'wt') as f: f.write('\n'.join(file_lines) + '\n') elif file_type.lower() == "wraptext": import textwrap with fs.open(filename, 'wt') as f: for line in file_lines: f.write('\n'.join(textwrap.wrap(line, 79)) + '\n') elif file_type.lower() == "bin": with fs.open(filename, 'wb') as f: for line in file_lines: chunk = b''.join(chr(int(a + b, 16)) for a, b in zip(line[::2], line[1::2])) f.write(chunk) del file_lines[:] for line in fs_template.splitlines(): line = line.rstrip() if line.startswith('@'): #out_path = out_filename write_file(out_filename, out_type) out_filename = None out_type, path_spec = line[1:].split(' ', 1) if path: path_spec = join(path, relpath(path_spec)) if path_spec.endswith('/'): fs.makedir(path_spec, allow_recreate=True, recursive=True) out_filename = None else: fs.makedir(dirname(path_spec), allow_recreate=True, recursive=True) out_filename = path_spec continue if out_filename: file_lines.append(line) if out_filename: write_file(out_filename, out_type)
def __init__( self, bucket_name, dir_path="/", oss_access_key_id=None, oss_secret_access_key=None, oss_session_token=None, endpoint_url=None, region=None, delimiter="/", strict=True, cache_control=None, acl=None, upload_args=None, download_args=None, ): # boto3 client and resource support config argument, so we needn't using awscli # self._init_aliyun() _creds = (oss_access_key_id, oss_secret_access_key) if any(_creds) and not all(_creds): raise ValueError("oss_access_key_id and oss_secret_access_key " "must be set together if specified") self._bucket_name = bucket_name self.dir_path = dir_path self._prefix = relpath(normpath(dir_path)).rstrip("/") self.aws_access_key_id = oss_access_key_id self.aws_secret_access_key = oss_secret_access_key self.aws_session_token = oss_session_token if region and endpoint_url is None: self.endpoint_url = "https://" + region + ".aliyuncs.com" else: self.endpoint_url = endpoint_url self.region = region self.delimiter = delimiter self.strict = strict self._tlocal = threading.local() if cache_control or acl: upload_args = upload_args or {} if cache_control: upload_args["CacheControl"] = cache_control if acl: upload_args["ACL"] = acl self.upload_args = upload_args self.download_args = download_args super(OSSFS, self).__init__()
def _index(self): ''' Iterate through the local mirror and build a list of all files that can be efficiently searched. These files are stored as relative paths because other functions use `os.path.join()`, which has specific behavior around absolute paths. ''' segments = {} for segment, path in self.segments.items(): results = [] for file in self.fs.walk.files(path=path): results.append(relpath(file)) segments.update({segment: results}) self.index = segments
def __init__(self, bucket_name: str, root_path: str = None, create: bool = False, client: Client = None, retry: int = 5, strict: bool = True): super().__init__() self._bucket_name = bucket_name if not root_path: root_path = "" self.root_path = root_path self._prefix = relpath(normpath(root_path)).rstrip(self.DELIMITER) self.strict = strict self.client = client if self.client is None: self.client = Client() if retry: # urllib3: "method_whitelist" was deprecated in favour of "allowed_methods" in version 1.26.0 # Ensure compatibility with versions < 1.26.0 while at the same time avoiding the `DeprecationWarning` # for versions >= 1.26.0 key = "allowed_methods" if version.parse( urllib3.__version__) >= version.parse( "1.26.0") else "method_whitelist" kwargs = {key: False} # retry on any HTTP method max_retries = Retry(total=retry, status_forcelist=[429, 502, 503, 504], backoff_factor=0.5, **kwargs) adapter = HTTPAdapter(max_retries=max_retries) self.client._http.mount("https://", adapter) self.bucket = self.client.bucket(self._bucket_name) if self._prefix != "": if create: root_marker = self._get_blob(self._prefix + GCSFS.DELIMITER) if root_marker is None: blob = self.bucket.blob(self._prefix + GCSFS.DELIMITER) blob.upload_from_string(b"") elif strict and self._get_blob(self._prefix + GCSFS.DELIMITER) is None: raise errors.CreateFailed( "Root path \"{}\" does not exist".format(root_path))
def read(cls, fs, path, master=None): visited = [] if not isinstance(path, string_types): for p in path: if fs.isfile(p): path = p break else: raise errors.SettingsError( """settings file not found (looked for {} in {})""".format( textual_list(path, join_word="and"), fs)) settings_stack = [] while 1: path = relpath(normpath(path)) if path in visited: raise errors.SettingsError( """recursive extends detected, "{}" has already been extended""" .format(path)) with fs.open(path, "rt") as settings_file: s = iniparse.parse( settings_file, SettingsContainer(), section_class=SettingsSectionContainer, ) visited.append(path) settings_stack.append(s) if "extends" in s[""]: # path = s['']['extends'] path = join(dirname(path), s[""]["extends"]) else: break settings_stack = settings_stack[::-1] settings = settings_stack[0] s = cls.__class__(settings_stack[0]) for s in settings_stack[1:]: for section_name, section in s.items(): if section_name in settings: settings[section_name].update(section) else: settings[section_name] = section if master is not None: cls.apply_master(master, settings) return settings
def __init__( self, bucket_name=None, dir_path="/", aws_access_key_id=None, aws_secret_access_key=None, aws_session_token=None, endpoint_url=None, region=None, delimiter="/", strict=True, cache_control=None, acl=None, upload_args=None, download_args=None, ): _creds = (aws_access_key_id, aws_secret_access_key) if any(_creds) and not all(_creds): raise ValueError( "aws_access_key_id and aws_secret_access_key " "must be set together if specified" ) self._bucket_name = bucket_name self.dir_path = dir_path self._prefix = relpath(normpath(dir_path)).rstrip("/") self.aws_access_key_id = aws_access_key_id self.aws_secret_access_key = aws_secret_access_key self.aws_session_token = aws_session_token self.endpoint_url = endpoint_url self.region = region self.delimiter = delimiter self.strict = strict self._tlocal = threading.local() if cache_control or acl: upload_args = upload_args or {} if cache_control: upload_args["CacheControl"] = cache_control if acl: upload_args["ACL"] = acl self.upload_args = upload_args self.download_args = download_args super(S3FS, self).__init__()
def __init__(self, bucket_name: str, root_path: str = None, create: bool = False, client: Client = None, strict: bool = True): super().__init__() self._bucket_name = bucket_name if not root_path: root_path = "" self.root_path = root_path self._prefix = relpath(normpath(root_path)).rstrip(self.DELIMITER) self.strict = strict self.client = client if self.client is None: self.client = Client() try: self.bucket = self.client.get_bucket(self._bucket_name) except google.api_core.exceptions.NotFound as err: raise CreateFailed( "The bucket \"{}\" does not seem to exist".format( self._bucket_name)) from err except google.api_core.exceptions.Forbidden as err: raise CreateFailed( "You don't have access to the bucket \"{}\"".format( self._bucket_name)) from err if self._prefix != "": if create: root_marker = self._get_blob(self._prefix + GCSFS.DELIMITER) if root_marker is None: blob = self.bucket.blob(self._prefix + GCSFS.DELIMITER) blob.upload_from_string(b"") elif strict and self._get_blob(self._prefix + GCSFS.DELIMITER ) is None and not self._is_dir( self._prefix): raise errors.CreateFailed( "Root path \"{}\" does not exist".format(root_path))
def __init__(self, dir_path="/", client_id=None, client_secret=None, tenant_id=None, username=None, password=None, store=None): self._prefix = relpath(normpath(dir_path)).rstrip("/") self._tlocal = threading.local() self.tenant_id = tenant_id if self.tenant_id: self.username = client_id self.password = client_secret else: self.username = username self.password = password self.store_name = store super(DLKFS, self).__init__()
def read(self, fs, path): visited = [] if not isinstance(path, string_types): for p in path: if fs.isfile(p): path = p break else: raise errors.SettingsError( '''settings file not found (looked for {} in {})'''.format( textual_list(path, join_word='and'), fs)) settings_stack = [] while 1: path = relpath(normpath(path)) if path in visited: raise errors.SettingsError( '''recursive extends detected, "{}" has already been extended''' .format(path)) with fs.open(path, 'rt') as settings_file: s = iniparse.parse(settings_file, SettingsContainer(), section_class=SettingsSectionContainer) visited.append(path) settings_stack.append(s) if "extends" in s['']: path = s['']['extends'] else: break settings_stack = settings_stack[::-1] settings = settings_stack[0] s = self.__class__(settings_stack[0]) for s in settings_stack[1:]: for section_name, section in s.items(): if section_name in settings: settings[section_name].update(section) else: settings[section_name] = section return settings
def read(cls, fs, path, master=None): visited = [] if not isinstance(path, string_types): for p in path: if fs.isfile(p): path = p break else: raise errors.SettingsError('''settings file not found (looked for {} in {})'''.format(textual_list(path, join_word='and'), fs)) settings_stack = [] while 1: path = relpath(normpath(path)) if path in visited: raise errors.SettingsError('''recursive extends detected, "{}" has already been extended'''.format(path)) with fs.open(path, 'rt') as settings_file: s = iniparse.parse(settings_file, SettingsContainer(), section_class=SettingsSectionContainer) visited.append(path) settings_stack.append(s) if "extends" in s['']: path = s['']['extends'] else: break settings_stack = settings_stack[::-1] settings = settings_stack[0] s = cls.__class__(settings_stack[0]) for s in settings_stack[1:]: for section_name, section in s.items(): if section_name in settings: settings[section_name].update(section) else: settings[section_name] = section if master is not None: cls.apply_master(master, settings) return settings
def open(self, path, mode='r', buffering=-1, encoding=None, errors=None, newline=None, line_buffering=False, version=None, take_snapshot=True, **kwargs): """ Returns a file-object. The file-object is wrapped with VersionedFile, which will notify VersioningFS to make a snapshot whenever the file is changed and closed. Parameters name (str): A file name relative to the user directory. mode (str): The mode for opening the file. version (int) (optional): Specifies which version of the file to get. If version is set to None, the most recent copy of the file will be returned. snapshot (bool): Set to False to avoid taking a snapshot. Defaults to True. """ path = relpath(path) if version is None: instance = super(VersioningFS, self) file_object = instance.open(path=path, mode=mode, buffering=buffering, errors=errors, newline=newline, line_buffering=line_buffering, **kwargs) return VersionedFile(fs=self, file_object=file_object, mode=mode, path=path, take_snapshot=take_snapshot) else: if version < 1: raise ResourceNotFoundError("Version %s not found" % (version)) if version == self.version(path): instance = super(VersioningFS, self) file_object = instance.open(path=path, mode=mode, buffering=buffering, errors=errors, newline=newline, line_buffering=line_buffering, **kwargs) return VersionedFile(fs=self, file_object=file_object, mode=mode, temp_file=False, path=path, take_snapshot=take_snapshot) snap_dir = self.snapshot_snap_path(path) sorted_versions = self.list_versions(path) if version > len(sorted_versions): raise ResourceNotFoundError("Version %s not found" % (version)) requested_version = sorted_versions[version-1] if mode == "r" or mode == "rb": temp_name = '%020x' % random.randrange(16**30) dest_path = os.path.join(self.tmp.getsyspath('/'), temp_name) command = ['rdiff-backup', '--restore-as-of', requested_version, snap_dir, dest_path] process = Popen(command, stdout=PIPE, stderr=PIPE) process.communicate() file_path = os.path.join(temp_name, 'datafile') open_file = self.tmp.open(file_path, mode=mode) return VersionedFile(fs=self, file_object=open_file, mode=mode, temp_file=True, path=file_path, remove=dest_path)
def _path_to_key(self, path: str) -> str: """Converts an fs path to a GCS key.""" _path = relpath(normpath(path)) return self.DELIMITER.join([self._prefix, _path]).lstrip( self.DELIMITER).rstrip(self.DELIMITER)
def __str__(self) -> str: return "<gcsfs '{}'>".format( join(self._bucket_name, relpath(self.root_path)))
def hash_path(path): """Returns a hash of a given path.""" safe_path = relpath(path).encode('ascii', 'ignore') dest_hash = hasher(safe_path).hexdigest() return dest_hash
def assert_all_file_versions_equal(self, version): for path in self.fs.walkfiles('/'): if not 'abcdefg' in path and 'tmp' not in path: path = relpath(path) file_version = self.fs.version(path) self.assertEqual(file_version, version)
def assert_all_files_have_snapshot_info(self, should_exist=True): for path in self.fs.walkfiles('/'): if not 'abcdefg' in path and 'tmp' not in path: path = relpath(path) snapshot_info_exists = self.fs.has_snapshot(path) self.assertEqual(snapshot_info_exists, should_exist)
def _get_real_path(self, path): _path = os.path.join(self.fs.root_path, relpath(path)) return _path
def __str__(self): return "<s3fs '{}'>".format( join(self._bucket_name, relpath(self.dir_path)) )
def get_relative_url(self, **kwargs): ''' Get a specific path or query result in a URL form from the perspective of the HTTP server, which is the path at which the file is accessible from the outside world. ''' return relpath(self.urlize(self.get(**kwargs)))