def open(self, path, mode='r', **kwargs): if 'w' in mode or '+' in mode or 'a' in mode: logging.error('cannot use httpfs.open() in write mode: %s' % path) raise UnsupportedError('open', path=path) url = self._build_url(path) for attempt in (1, 2, 3): try: response = requests.get(url) except requests.RequestException as error: logging.warning('open attempt %d: %s %s' % (attempt, url, error)) else: break else: raise RemoteConnectionError('getinfo', path) if response.status_code == 200: return StringIO(response.content) elif response.status_code == 404: raise ResourceNotFoundError(path) else: logging.warning( 'open status %d for %s assumed as connection error.' % (response.status_code, url)) raise RemoteConnectionError('open', path)
class HTTPFS(FS): """Can barely be called a filesystem, because HTTP servers generally don't support typical filesystem functionality. This class exists to allow the :doc:`opener` system to read files over HTTP. If you do need filesystem like functionality over HTTP, see :mod:`fs.contrib.davfs`. """ def __init__(self, url): """ :param url: The base URL """ self.root_url = url def _make_url(self, path): path = normpath(path) url = '%s/%s' % (self.root_url.rstrip('/'), path.lstrip('/')) return url def open(self, path, mode="r"): if '+' in mode or 'w' in mode or 'a' in mode: raise UnsupportedError('write') url = self._make_url(path) try: f = urlopen(url) except URLError, e: raise ResourceNotFoundError(path, details=e) except OSError, e: raise ResourceNotFoundError(path, details=e)
def file_create_folder(self, parent_id, title): """Add newly created directory to cache.""" body = { "title": title, "parents": [{ "id": parent_id }], "mimeType": "application/vnd.google-apps.folder" } try: metadata = self.service.files().insert(body=body).execute() except errors.HttpError as e: if e.resp.status == 405: raise ResourceInvalidError(parent_id) if e.resp.status == 404: raise ResourceNotFoundError(parent_id) raise OperationFailedError(opname='file_create_folder', msg="%s, the reasons could be: parent " "doesn't exist or is a file" % (e.resp.reason, )) except: return self._retry_operation(self.file_create_folder, parent_id, title) self.cache.set(metadata["id"], metadata, parents=[parent_id]) return metadata
def open(self, path, mode='r', **kwargs): """ Open a new PartedFile. We will always set at least the file for part0. """ def create_file_part(part_path): f = self.wrapped_fs.open(part_path, mode, **kwargs) return FilePart(f) if self.isdir(path): raise ResourceInvalidError(path) if "w" not in mode and "a" not in mode: if self.exists(path): parts = [ create_file_part(p) for p in sorted(self.listparts(path)) ] return PartedFile(fs=self.wrapped_fs, path=path, mode=mode, max_part_size=self.max_part_size, parts=parts) else: raise ResourceNotFoundError(path) if 'w' in mode and not '+' in mode and self.exists(path): self.remove(path) return PartedFile(fs=self.wrapped_fs, path=path, mode=mode, max_part_size=self.max_part_size, parts=[create_file_part(self._encode(path))])
def open(self, path, mode="rb", **kwargs): """Open the named file in the given mode. This method downloads the file contents into a local temporary file so that it can be worked on efficiently. Any changes made to the file are only sent back to cloud storage when the file is flushed or closed. :param path: Path to the file to be opened :param mode: In which mode to open the file :raise ResourceNotFoundError: If given path doesn't exist and 'w' is not in mode :return: RemoteFileBuffer object """ path = abspath(normpath(path)) spooled_file = SpooledTemporaryFile(mode=mode, bufsize=MAX_BUFFER) if "w" in mode: # Truncate the file if requested self.client.put_file(path, "", True) else: # Try to write to the spooled file, if path doesn't exist create it # if 'w' is in mode try: spooled_file.write(self.client.get_file(path).read()) spooled_file.seek(0, 0) except: if "w" not in mode: raise ResourceNotFoundError(path) else: self.createfile(path, True) # This will take care of closing the socket when it's done. return RemoteFileBuffer(self, path, mode, spooled_file)
def copy(self, src, dst, overwrite=False, chunk_size=65536): if self.isdir(src): raise ResourceInvalidError(src) if not self.isfile(src): if not self.isdir(dirname(src)): raise ParentDirectoryMissingError(src) raise ResourceNotFoundError(src) if self.isdir(dst): raise ResourceInvalidError(dst) if self.isfile(dst): if overwrite: self.remove(dst) else: raise DestinationExistsError(dst) else: if not self.isdir(dirname(dst)): raise ParentDirectoryMissingError(dst) parent_path = self._ids[dirname(dst)] copy_fh = {'title': basename(dst), 'parents': [{'id': parent_path}]} copy_fh = self.client.auth.service.files() \ .copy(fileId=self._ids[src], body=copy_fh) \ .execute() self._ids[dst] = copy_fh['id']
def copy(self, *args, **kwargs): # Override this because it raises the wrong exception # when the source file does not exist. try: return super(S3FS, self).copy(*args, **kwargs) except ResourceInvalidError as error: raise ResourceNotFoundError(error.path)
def createfile(self, path, wipe=True, **kwargs): """ Creates always an empty file, even if another file with the same name exists. @param path: path to the new file. It has to be in one of following forms: - parent_id/file_title.ext - file_title.ext or /file_title.ext - In this cases root directory is the parent @param wipe: New file with empty content. If a file with the same name exists it will be overwritten. @raise ResourceNotFoundError: If parent doesn't exist. @attention: Root directory is the current root directory of this instance of filesystem and not the root of your Google Drive. @return: Id of the created file """ parts = path.split("/") if parts[0] == "": parent_id = self._root title = parts[1] elif len(parts) == 2: parent_id = parts[0] title = parts[1] if not self.exists(parent_id): raise ResourceNotFoundError("parent doesn't exist") else: parent_id = self._root title = parts[0] return self.client.put_file(parent_id, title, "", wipe)
def removedir(self, path, recursive=False, force=False): path = self._prepare_abspath(path) if not self.exists(path): raise ResourceNotFoundError(path) if self.isfile(path): raise ResourceInvalidError(path) lst = self.listdir(path, full=True) if len(lst) > 0: if not force: raise DirectoryNotEmptyError(path) else: for rpath in lst: try: if self.isfile(rpath): self.remove(rpath) elif self.isdir(rpath): self.removedir(rpath, force=force) except FSError: pass self.conn.rmdir(self.smb_path(path)) if recursive: try: self.removedir(dirname(path), recursive=True) except DirectoryNotEmptyError: pass
def _listPath(self, path, list_contents=False): """ Path listing with SMB errors converted. """ # Explicitly convert the SMB errors to be able to catch the # PyFilesystem error while listing the path. if list_contents: try: # List all contents of a directory. return _conv_smb_errors(self.conn.listPath)(self.share, normpath(path)) except ResourceNotFoundError: if self.isfile(path): raise ResourceInvalidError(path) raise else: # List a specific path (file or directory) by listing the contents # of the containing directory and comparing the filename. pathdir = dirname(path) searchpath = basename(path) for i in _conv_smb_errors(self.conn.listPath)(self.share, pathdir): if i.filename == '..': continue elif ((i.filename == '.' and searchpath == '') or i.filename == searchpath): return i raise ResourceNotFoundError(path)
def copy(self, src, dst, overwrite=False, chunk_size=1024 * 1024): src = normpath(src) if not self.isfile(src): if self.isdir(src): raise ResourceInvalidError( src, msg="Source is not a file: %(path)s") raise ResourceNotFoundError(src) dst = normpath(dst) if not overwrite and self.exists(dst): raise DestinationExistsError(dst) src_file = None dst_file = None try: src_file = self.open(src, 'r') dst_file = self.open(dst, 'w') while 1: copy_buffer = src_file.read(chunk_size) if copy_buffer: dst_file.write(copy_buffer) else: break finally: if src_file is not None: src_file.close() if dst_file is not None: dst_file.close()
def remove(self, path): if not self.exists(path): raise ResourceNotFoundError(path) if not self.isfile(path): raise ResourceInvalidError(path) self.conn.unlink(self.smb_path(path))
def getsize(self, path): try: path = normpath(path) st = self.conn.stat(self.smb_path(path)) return st[stat.ST_SIZE] except smbc.NoEntryError: raise ResourceNotFoundError(path)
def listdir(self, path="./", wildcard=None, full=False, absolute=False, dirs_only=False, files_only=False): if not path: raise PathError(path) path = normpath(path) item = self._get_item_by_path(path) if not item: raise ResourceNotFoundError(path) if item['type'] != _ITEM_TYPE_FOLDER: raise ResourceInvalidError(path) item_children = self._get_children_items(item['id']) result = [] for child in item_children.values(): child_type = child['type'] if dirs_only and child_type != _ITEM_TYPE_FOLDER: continue if files_only and child_type != _ITEM_TYPE_FILE: continue child_path = child['name'] if full: child_path = pathjoin(path, child_path) result.append(child_path) return result
def getinfo(self, path): """ Assemble the info of all the parts and use the most recent updated timestamps of the parts as values of the file. """ if not self.exists(path): raise ResourceNotFoundError(path) if self.isfile(path): info = {} info['st_mode'] = 0o666 | stat.S_IFREG part_infos = [ self.wrapped_fs.getinfo(part) for part in self.listparts(path) ] if len(part_infos) > 0: info["parts"] = part_infos info["size"] = self.getsize(path) info["created_time"] = max( [i.get("created_time") for i in part_infos]) info["modified_time"] = max( [i.get("modified_time") for i in part_infos]) info["accessed_time"] = max( [i.get("accessed_time") for i in part_infos]) else: info = self.wrapped_fs.getinfo(path) return info
def open(self, path, mode='r', buffering=-1, encoding=None, errors=None, newline=None, line_buffering=False, **kwargs): if self.isdir(path): raise ResourceInvalidError(path) if 'w' in mode and not self.isdir(dirname(path)): raise ParentDirectoryMissingError(path) if 'r' in mode and not self.isfile(path): raise ResourceNotFoundError(path) if not self.isdir(dirname(path)): raise ParentDirectoryMissingError(path) if 'w' in mode and '+' not in mode and self.isfile(path): self.remove(path) data = '' if 'r' in mode: data = self.getcontents(path, mode=mode, encoding=encoding, errors=errors, newline=newline) rfile = StringIO(data=data, mode=mode) return RemoteFileBuffer(self, path, mode=mode, rfile=rfile)
def _open(self): self.pos = 0 self.file_size = None self.closed = False self.smb_uri_path = self.smbfs.smb_path(self.path) open_mode = None if 'r' in self.mode and '+' in self.mode: open_mode = os.O_RDWR elif 'r' in self.mode: open_mode = os.O_RDONLY elif 'w' in self.mode and '+' in self.mode: open_mode = os.O_RDWR | os.O_TRUNC | os.O_CREAT elif 'w' in self.mode: open_mode = os.O_WRONLY | os.O_TRUNC | os.O_CREAT elif 'a' in self.mode and '+' in self.mode: open_mode = os.O_RDWR | os.O_CREAT elif 'a' in self.mode: open_mode = os.O_WRONLY | os.O_CREAT if open_mode is None: raise Exception('Undefined mode') try: self.file = self.smbfs.conn.open(self.smb_uri_path, open_mode) fstat = self.file.fstat() self.file_size = fstat[stat.ST_SIZE] if 'a' in self.mode: self.seek(0, fs.SEEK_END) except smbc.NoEntryError: raise ResourceNotFoundError(self.path)
def open(self, path, mode='r', buffering=-1, encoding=None, errors=None, newline=None, line_buffering=False, **kwargs): """Open the named file in the given mode. This method downloads the file contents into a local temporary file so that it can be worked on efficiently. Any changes made to the file are only sent back to cloud storage when the file is flushed or closed. :param path: Id of the file to be opened :param mode: In which mode to open the file :raises ResourceNotFoundError: If given path doesn't exist and 'w' is not in mode :return: RemoteFileBuffer object """ path = self._normpath(path) spooled_file = SpooledTemporaryFile(mode=mode, bufsize=MAX_BUFFER) # Truncate the file if requested if "w" in mode: try: self._update(path, "") except: path = self.createfile(path, True) else: try: spooled_file.write(self.client.get_file(path)) spooled_file.seek(0, 0) except Exception as e: if "w" not in mode and "a" not in mode: raise ResourceNotFoundError("%r" % e) else: path = self.createfile(path, True) return RemoteFileBuffer(self, path, mode, spooled_file)
def getsyspath(self, path, allow_none=False): fs = self._delegate_search(path) if fs is not None: return fs.getsyspath(path, allow_none=allow_none) if allow_none and self.writefs is not None: return self.writefs.getsyspath(path, allow_none=allow_none) raise ResourceNotFoundError(path)
def listdir(self, path="/", wildcard=None, full=False, absolute=False, dirs_only=False, files_only=False): if self.isfile(path): raise ResourceInvalidError(path) if not self.isdir(path): if not self.isdir(dirname(path)): raise ParentDirectoryMissingError(path) raise ResourceNotFoundError(path) query = "'{0}' in parents and trashed=false"\ .format(self._ids[dirname(path)]) if dirs_only: query += " and mimeType = '{0}'".format(self._folder_mimetype) if files_only: query += " and mimeType != '{0}'".format(self._folder_mimetype) self._ids = self._map_ids_to_paths() entries = self._ids.names(path) # entries = self.client.ListFile({"q": query, # "fields": "items(title,id," # "parents(id,isRoot))"}).GetList() # We don't want the _listdir_helper to perform dirs_only # and files_only filtering again return self._listdir_helper(path, entries, wildcard=wildcard, full=full, absolute=absolute, dirs_only=dirs_only, files_only=files_only)
def copy(self, src, dst, overwrite=False): """Copy a file from source to destination. :param src: Source path. :type src: str :param dst: Destination path. :type dst: str :param overwrite: If True, then an existing file at the destination may be overwritten; If False then ``DestinationExistsError`` will be raised. :type overwrite: bool """ src, dst = self._p(src), self._p(dst) # isdir/isfile throws an error if file/dir doesn't exists if not self.isfile(src): if self.isdir(src): raise ResourceInvalidError( src, msg="Source is not a file: %(path)s") raise ResourceNotFoundError(src) if overwrite and self.exists(dst): if self.isdir(dst): self.removedir(dst, force=True) status, dummy = self._client.copy(src, dst, force=overwrite) if not status.ok: self._raise_status(dst, status) return True
def movedir(self, src, dst, overwrite=False, **kwargs): """Move a directory from one location to another. :param src: Source directory path. :type src: str :param dst: Destination directory path. :type dst: str :param overwrite: When True the destination will be overwritten (if it exists), otherwise a DestinationExistsError will be thrown. :type overwrite: bool :raise: `fs.errors.DestinationExistsError` if destination exists and `overwrite` is `False`. :raise: `fs.errors.ResourceInvalidError` if source is not a directory. :raise: `fs.errors.ResourceInvalidError` if source is a directory and destination is a file. :raise: `fs.errors.ResourceNotFoundError` if source was not found. """ src, dst = self._p(src), self._p(dst) if not self.exists(src): raise ResourceNotFoundError(src) if not self.isdir(src): raise ResourceInvalidError( src, msg="Source is not a directory: %(path)s") return self._move(src, dst, overwrite=overwrite)
def move(self, src, dst, overwrite=False, **kwargs): """Move a file from one location to another. :param src: Source path. :type src: str :param dst: Destination path. :type dst: str :param overwrite: When True the destination will be overwritten (if it exists), otherwise a DestinationExistsError will be thrown. :type overwrite: bool :raise: `fs.errors.DestinationExistsError` if destination exists and ``overwrite`` is False. :raise: `fs.errors.ResourceInvalidError` if source is not a file. :raise: `fs.errors.ResourceNotFoundError` if source was not found. """ src, dst = self._p(src), self._p(dst) # isdir/isfile throws an error if file/dir doesn't exists if not self.exists(src): raise ResourceNotFoundError(src) if not self.isfile(src): raise ResourceInvalidError(src, msg="Source is not a file: %(path)s") return self._move(src, dst, overwrite=overwrite)
def desc(self, path): if not self.exists(path): raise ResourceNotFoundError(path) name, fs = self.which(path) if name is None: return "" return "%s, on %s (%s)" % (fs.desc(path), name, fs)
def _check_path(self, path): path = normpath(path) base, fname = pathsplit(abspath(path)) dirlist = self._readdir(base) if fname and fname not in dirlist: raise ResourceNotFoundError(path) return dirlist, fname
def copydir(self, src, dst, overwrite=False, parallel=True): """Copy a directory from source to destination. By default the copy is done by recreating the source directory structure at the destination, and then copy files in parallel from source to destination. :param src: Source directory path. :type src: str :param dst: Destination directory path. :type dst: str :param overwrite: If True then any existing files in the destination directory will be overwritten. :type overwrite: bool :param parallel: If True (default), the copy will be done in parallel. :type parallel: bool """ if not self.isdir(src): if self.isfile(src): raise ResourceInvalidError( src, msg="Source is not a directory: %(path)s") raise ResourceNotFoundError(src) if self.exists(dst): if overwrite: if self.isdir(dst): self.removedir(dst, force=True) elif self.isfile(dst): self.remove(dst) else: raise DestinationExistsError(dst) if parallel: process = CopyProcess() def process_copy(src, dst, overwrite=False): process.add_job(src, dst) copyfile = process_copy else: copyfile = self.copy self.makedir(dst, allow_recreate=True) for src_dirpath, filenames in self.walk(src): dst_dirpath = pathcombine(dst, frombase(src, src_dirpath)) self.makedir(dst_dirpath, allow_recreate=True, recursive=True) for filename in filenames: src_filename = pathjoin(src_dirpath, filename) dst_filename = pathjoin(dst_dirpath, filename) copyfile(src_filename, dst_filename, overwrite=overwrite) if parallel: process.prepare() process.run() return True
def _raise_status(self, path, status, source=None): """Raise error based on status.""" if status.errno == 3011: raise ResourceNotFoundError(path) else: if source: errstr = "XRootD error {0}file: {1}".format( source + ' ', status.message) raise IOError(errstr)
def copy(self, src, dst, **kwds): """ Copies a file from src to dst. This will copy al the parts of one file to the respective location of the new file """ if not self.exists(src): raise ResourceNotFoundError(src) for idx, part_src in enumerate(sorted(self.listparts(src))): part_dst = self._encode(dst, idx) self.wrapped_fs.copy(part_src, part_dst, **kwds)
def open(self, path, mode="r"): if '+' in mode or 'w' in mode or 'a' in mode: raise UnsupportedError('write') url = self._make_url(path) try: f = urlopen(url) except URLError, e: raise ResourceNotFoundError(path, details=e)
def metadata(self, path): """Get metadata of a given path.""" item = self.cache.get(path) if not item or item.metadata is None or item.expired: try: metadata = self.service.files().get(fileId=path).execute() except errors.HttpError as e: if e.resp.status == 404: raise ResourceNotFoundError(path) raise OperationFailedError(opname='metadata', path=path, msg=e.resp.reason) except: return self._retry_operation(self.metadata, path) if metadata.get('trashed', False): raise ResourceNotFoundError(path) item = self.cache[path] = CacheItem(metadata) # Copy the info so the caller cannot affect our cache. return dict(item.metadata.items())