def __exit__(self, ty, ex, tb): # pylint: disable=too-many-branches self.__box._mutex.__exit__(ty, ex, tb) if ex: try: raise ex except (TimeoutError,): self.__box.disconnect() raise CloudDisconnectedError("disconnected on timeout") except BoxOAuthException as e: self.__box.disconnect() raise CloudTokenError("oauth fail %s" % e) except BoxNetworkException as e: self.__box.disconenct() raise CloudDisconnectedError("disconnected %s" % e) except BoxValueError: raise CloudFileNotFoundError() except BoxAPIException as e: if e.status == 400 and e.code == 'folder_not_empty': raise CloudFileExistsError() if e.status == 404 and e.code == 'not_found': raise CloudFileNotFoundError() if e.status == 404 and e.code == 'trashed': raise CloudFileNotFoundError() if e.status == 405 and e.code == 'method_not_allowed': raise PermissionError() if e.status == 409 and e.code == 'item_name_in_use': raise CloudFileExistsError() if e.status == 400 and e.code == 'invalid_grant': raise CloudTokenError() log.exception("unknown box exception: \n%s", e) except CloudException: raise except Exception: pass # this will not swallow the exception, because this is in a context manager
def delete(self, oid): info = self._info_oid(oid) if not info: log.debug("deleted non-existing oid %s", debug_sig(oid)) return # file doesn't exist already... if info.otype == DIRECTORY: try: next(self.listdir(oid)) raise CloudFileExistsError( "Cannot delete non-empty folder %s:%s" % (oid, info.name)) except StopIteration: pass # Folder is empty, delete it no problem if oid == self._root_id: raise CloudFileExistsError("Cannot delete root folder") try: self._api('files', 'delete', fileId=oid) except CloudFileNotFoundError: log.debug("deleted non-existing oid %s", debug_sig(oid)) except PermissionError: try: log.debug("permission denied deleting %s:%s, unfile instead", debug_sig(oid), info.name) remove_str = ",".join(info.pids) self._api('files', 'update', fileId=oid, removeParents=remove_str, fields='id') except PermissionError: log.warning("Unable to delete oid %s.", debug_sig(oid)) path = self._path_oid(oid, info=info) self._uncache(path)
def mkdir(self, path, metadata=None) -> str: # pylint: disable=arguments-differ info = self.info_path(path) if info: if info.otype == FILE: raise CloudFileExistsError(path) log.debug("Skipped creating already existing folder: %s", path) return info.oid # Cache is accurate, just refreshed from info_path call pid = self._get_parent_id(path, use_cache=True) _, name = self.split(path) appProperties = self._prep_app_properties(pid) file_metadata = { 'name': name, 'parents': [pid], 'mimeType': self._folder_mime_type, 'appProperties': appProperties } if metadata: file_metadata.update(metadata) res = self._api('files', 'create', body=file_metadata, fields='id') fileid = res.get('id') self._ids[path] = fileid return fileid
def create(self, path, file_like, metadata=None) -> 'OInfo': if not metadata: metadata = {} if self.exists_path(path): raise CloudFileExistsError() ul, size = self._media_io(file_like) fields = 'id, md5Checksum, size, modifiedTime' # Cache is accurate, just refreshed from exists_path() call parent_oid = self._get_parent_id(path, use_cache=True) metadata['appProperties'] = self._prep_app_properties(parent_oid) gdrive_info = self._prep_upload(path, metadata) gdrive_info['parents'] = [parent_oid] try: def api_call(): return self._api('files', 'create', body=gdrive_info, media_body=ul, fields=fields) if self._client: with patch.object(self._client._http.http, "follow_redirects", False): # pylint: disable=protected-access res = api_call() else: res = api_call() except OSError as e: self.disconnect() raise CloudDisconnectedError("OSError in file create: %s" % repr(e)) log.debug("response from create %s : %s", path, res) if not res: raise CloudTemporaryError("unknown response from drive on upload") self._ids[path] = res['id'] log.debug("path cache %s", self._ids) size = int(res.get("size", 0)) mtime = res.get('modifiedTime') mtime = mtime and self._parse_time(mtime) cache_ent = self.get_quota.get() # pylint: disable=no-member if cache_ent: cache_ent["used"] += size return OInfo(otype=FILE, oid=res['id'], hash=res['md5Checksum'], path=path, size=size, mtime=mtime)
def _unsafe_get_box_object_from_oid(self, client: Client, oid: str, object_type: OType, strict: bool) \ -> Optional[BoxItem]: assert isinstance(client, Client) assert object_type in (FILE, DIRECTORY) box_object = None try: with self._api(): if object_type == FILE: box_object = client.file(file_id=oid) if object_type == DIRECTORY: box_object = client.folder(folder_id=oid) if box_object: box_object = self._unsafe_box_object_populate(client, box_object) return box_object except CloudFileNotFoundError: pass except (CloudFileExistsError, PermissionError): raise except Exception as e: log.exception(e) raise # try again with the other type log.debug("Trying again") if object_type == FILE: box_object = client.folder(folder_id=oid) if object_type == DIRECTORY: box_object = client.file(file_id=oid) box_object = self._unsafe_box_object_populate(client, box_object) # should raise FNF if the object doesn't exists if strict: # if we are here, then the object exists and retval does not comply with "strict" raise CloudFileExistsError() return box_object
def _attempt_rename_folder_over_empty_folder(self, info: OInfo, path: str) -> None: if info.otype != DIRECTORY: raise CloudFileExistsError(path) possible_conflict = self.info_path(path) if possible_conflict.otype == DIRECTORY: try: next(self._listdir(possible_conflict.oid, recursive=False)) raise CloudFileExistsError( "Cannot rename over non-empty folder %s" % path) except StopIteration: pass # Folder is empty, rename over it no problem self.delete(possible_conflict.oid) self._api('files_move_v2', info.oid, path) return else: # conflict is a file, and we already know that the rename is on a folder raise CloudFileExistsError(path)
def __look_for_name_in_collection_entries(self, client: Client, name, collection_entries, object_type, strict): assert isinstance(client, Client) for entry in collection_entries: if entry.name == name: found_type = DIRECTORY if entry.object_type == 'folder' else FILE if object_type is not OType.NOTKNOWN and found_type != object_type and strict: raise CloudFileExistsError() return self._get_box_object(client, oid=entry.object_id, object_type=found_type, strict=strict), found_type return None, None
def create(self, path: str, file_like, metadata=None) -> OInfo: self._verify_parent_folder_exists(path) if self.exists_path(path): raise CloudFileExistsError(path) ret = self._upload(path, file_like, metadata) cache_ent = self.__memoize_quota.get() # pylint: disable=no-member if cache_ent: cache_ent["used"] += ret.size return ret
def upload(self, oid, file_like, metadata=None) -> OInfo: with self._api() as client: box_object: BoxItem = self._get_box_object(client, oid=oid, object_type=FILE, strict=False) if box_object is None: raise CloudFileNotFoundError() if box_object.object_type != 'file': raise CloudFileExistsError() new_object = box_object.update_contents_with_stream(file_like) retval = self._box_get_oinfo(client, new_object) return retval
def upload(self, oid, file_like, metadata=None) -> 'OInfo': if not metadata: metadata = {} gdrive_info = self._prep_upload(None, metadata) ul, size = self._media_io(file_like) fields = 'id, md5Checksum, modifiedTime' try: def api_call(): return self._api('files', 'update', body=gdrive_info, fileId=oid, media_body=ul, fields=fields) if self._client: with patch.object(self._client._http.http, "follow_redirects", False): # pylint: disable=protected-access res = api_call() else: res = api_call() except OSError as e: self.disconnect() raise CloudDisconnectedError("OSError in file upload: %s" % repr(e)) log.debug("response from upload %s", res) if not res: raise CloudTemporaryError("unknown response from drive on upload") mtime = res.get('modifiedTime') mtime = mtime and self._parse_time(mtime) md5 = res.get( 'md5Checksum', None) # can be none if the user tries to upload to a folder if md5 is None: possible_conflict = self._info_oid(oid) if possible_conflict and possible_conflict.otype == DIRECTORY: raise CloudFileExistsError("Can only upload to a file: %s" % possible_conflict.path) return OInfo(otype=FILE, oid=res['id'], hash=md5, path=None, size=size, mtime=mtime)
def mkdir(self, path, metadata=None) -> str: # pylint: disable=arguments-differ, unused-argument # TODO: check if a regular filesystem lets you mkdir over a non-empty folder... self._verify_parent_folder_exists(path) if self.exists_path(path): info = self.info_path(path) if info.otype == FILE: raise CloudFileExistsError() log.debug("Skipped creating already existing folder: %s", path) return info.oid res = self._api('files_create_folder_v2', path) log.debug("dbx mkdir %s", res) res = res.metadata return res.id
def _unsafe_get_box_object_from_path(self, client: Client, # pylint: disable=too-many-locals path: str, object_type: OType, strict: bool, use_cache: bool) -> Optional[BoxItem]: assert isinstance(client, Client) assert object_type in (FILE, DIRECTORY) if path in ('/', ''): # pragma: no cover # no cover because the tests always use a test root root: BoxItem = client.root_folder() root = self._unsafe_box_object_populate(client, root) return root if use_cache: cached_oid = self.__cache.get_oid(path) if cached_oid: cached_type = self.__cache.get_type(path=path) or NOTKNOWN return self._get_box_object(client, oid=cached_oid, object_type=cached_type, strict=strict, use_cache=use_cache) parent, base = self.split(path) cached_parent_oid = None if use_cache: cached_parent_oid = self.__cache.get_oid(parent) parent_object: Optional[BoxFolder] if cached_parent_oid is not None: parent_object = self._get_box_object(client, oid=cached_parent_oid, object_type=DIRECTORY, strict=strict) else: parent_object = self._get_box_object(client, path=parent, object_type=DIRECTORY, strict=strict) if parent_object: self.__cache.set_oid(parent, parent_object.object_id, DIRECTORY) if not parent_object: return None if parent_object.object_type != 'folder': raise CloudFileExistsError collection = parent_object.item_collection collection_entries = list(collection['entries']) entry, found_type = self.__look_for_name_in_collection_entries(client, base, collection_entries, object_type, strict) if not entry: start = time.monotonic() # the next line is very slow for big folders. # limit=5000 speeds it up because it lowers the number of pages # Is there a way to confirm the non-existence of a file that doesn't involve # getting every item in the parent's folder? maybe limiting the fields would speed this up... entries = self._box_get_items(client, parent_object, parent) log.debug("done getting %s, %s", parent, time.monotonic() - start) entry, found_type = self.__look_for_name_in_collection_entries(client, base, entries, object_type, strict) if not entry: raise CloudFileNotFoundError() if strict and found_type != object_type: raise CloudFileExistsError() return self._get_box_object(client, oid=entry.object_id, object_type=found_type, strict=strict)
def _upload(self, oid, file_like, metadata=None) -> OInfo: res = None metadata = metadata or {} file_like.seek(0, io.SEEK_END) size = file_like.tell() file_like.seek(0) if size < self.large_file_size: res = self._api('files_upload', file_like.read(), oid, mode=files.WriteMode('overwrite')) else: cursor = None while True: data = file_like.read(self.upload_block_size) if not data: if cursor: local_mtime = arrow.get( metadata.get('mtime', time.time())).datetime commit = files.CommitInfo( path=oid, mode=files.WriteMode.overwrite, autorename=False, client_modified=local_mtime, mute=True) res = self._api('files_upload_session_finish', data, cursor, commit) break if not cursor: res = self._api('files_upload_session_start', data) cursor = files.UploadSessionCursor(res.session_id, len(data)) else: self._api('files_upload_session_append_v2', data, cursor) cursor.offset += len(data) if res is None: raise CloudFileExistsError() ret = OInfo(otype=FILE, oid=res.id, hash=res.content_hash, path=res.path_display, size=size, mtime=self._mtime_from_metadata(res)) log.debug('upload result is %s', ret) return ret
def delete(self, oid): info = self.info_oid(oid) if not info: return # file doesn't exist already... if info.otype == DIRECTORY: try: next(self._listdir(oid, recursive=False)) raise CloudFileExistsError( "Cannot delete non-empty folder %s:%s" % (oid, info.path)) except StopIteration: pass # Folder is empty, delete it no problem try: self._api('files_delete_v2', oid) except CloudFileNotFoundError: # shouldn't happen because we are checking above... return
def rename(self, oid, path) -> str: # pylint: disable=too-many-branches self.__cache.delete(path=path) try: with self._api() as client: box_object: BoxItem = self._get_box_object(client, oid=oid, object_type=NOTKNOWN, strict=False) # todo: get object_type from cache if box_object is None: self.__cache.delete(oid=oid) raise CloudFileNotFoundError() info = self._box_get_oinfo(client, box_object) if info.path: old_path = info.path else: old_path = self._box_get_path(client, box_object) old_parent, _ignored_old_base = self.split(old_path) new_parent, new_base = self.split(path) if new_parent == old_parent: try: with self._api(): retval = box_object.rename(new_base) except CloudFileExistsError: if box_object.object_type == 'file': raise # are we renaming a folder over another empty folder? box_conflict = self._get_box_object(client, path=path, object_type=NOTKNOWN, strict=False) # todo: get type from cache # should't happen... we just got a FEx error, and we're not moving if box_conflict is None: # pragma: no cover raise items = self._box_get_items(client, box_conflict, new_parent) if box_conflict.object_type == 'folder' and len(items) == 0: box_conflict.delete() else: raise return self.rename(oid, path) else: new_parent_object = self._get_box_object(client, path=new_parent, object_type=DIRECTORY, strict=False) if new_parent_object is None: raise CloudFileNotFoundError() if new_parent_object.object_type != 'folder': raise CloudFileExistsError() retval = box_object.move(parent_folder=new_parent_object, name=new_base) self.__cache.rename(old_path, path) return retval.id except Exception: self.__cache.delete(oid=oid) raise
def _api(self, resource, method, *args, **kwargs): # pylint: disable=arguments-differ, too-many-branches, too-many-statements if not self._client: raise CloudDisconnectedError("currently disconnected") with self._mutex: try: if resource == 'media': res = args[0] args = args[1:] else: res = getattr(self._client, resource)() meth = getattr(res, method)(*args, **kwargs) if resource == 'media' or (resource == 'files' and method == 'get_media'): ret = meth else: ret = meth.execute() log.debug("api: %s (%s) -> %s", method, debug_args(args, kwargs), ret) return ret except SSLError as e: if "WRONG_VERSION" in str(e): # httplib2 used by google's api gives this weird error for no discernable reason raise CloudTemporaryError(str(e)) raise except google.auth.exceptions.RefreshError: self.disconnect() raise CloudTokenError("refresh error") except HttpError as e: log.debug("api: %s (%s) -> %s", method, debug_args(args, kwargs), e.resp.status) if str(e.resp.status) == '416': raise GDriveFileDoneError() if str(e.resp.status) == '413': raise CloudOutOfSpaceError('Payload too large') if str(e.resp.status) == '409': raise CloudFileExistsError('Another user is modifying') if str(e.resp.status) == '404': raise CloudFileNotFoundError( 'File not found when executing %s.%s(%s)' % debug_args(resource, method, kwargs)) reason = self._get_reason_from_http_error(e) if str(e.resp.status) == '403' and str( reason) == 'storageQuotaExceeded': raise CloudOutOfSpaceError("Storage storageQuotaExceeded") if str(e.resp.status) == '401': self.disconnect() raise CloudTokenError("Unauthorized %s" % reason) if str(e.resp.status) == '403' and str( reason) == 'parentNotAFolder': raise CloudFileExistsError("Parent Not A Folder") if str(e.resp.status) == '403' and str( reason) == 'insufficientFilePermissions': raise PermissionError("PermissionError") if (str(e.resp.status) == '403' and reason in ( 'userRateLimitExceeded', 'rateLimitExceeded', 'dailyLimitExceeded')) \ or str(e.resp.status) == '429': raise CloudTemporaryError("rate limit hit") # At this point, _should_retry_response() returns true for error codes >=500, 429, and 403 with # the reason 'userRateLimitExceeded' or 'rateLimitExceeded'. 403 without content, or any other # response is not retried. We have already taken care of some of those cases above, but we call this # below to catch the rest, and in case they improve their library with more conditions. If we called # meth.execute() above with a num_retries argument, all this retrying would happen in the google api # library, and we wouldn't have to think about retries. should_retry = _should_retry_response(e.resp.status, e.content) if should_retry: raise CloudTemporaryError("unknown error %s" % e) log.error("Unhandled %s error %s", e.resp.status, reason) raise except (TimeoutError, HttpLib2Error): self.disconnect() raise CloudDisconnectedError("disconnected on timeout") except ConnectionResetError: raise CloudTemporaryError( "An existing connection was forcibly closed by the remote host" )
def rename(self, oid, path): # pylint: disable=too-many-locals, too-many-branches # Use cache to get parent id, no need to hit info_path twice possible_conflict = self.info_path(path) pid = self._get_parent_id(path, use_cache=True) add_pids = [pid] if pid == 'root': # pragma: no cover # cant ever get hit from the tests due to test root add_pids = [self._root_id] info = self._info_oid(oid) if info is None: log.debug("can't rename, oid doesn't exist %s", debug_sig(oid)) raise CloudFileNotFoundError(oid) remove_pids = info.pids old_path = info.path _, name = self.split(path) appProperties = self._prep_app_properties(pid) body = {'name': name, 'appProperties': appProperties} if possible_conflict: if FILE in (info.otype, possible_conflict.otype): if possible_conflict.oid != oid: # it's OK to rename a file over itself, frex, to change case raise CloudFileExistsError(path) else: if possible_conflict.oid != oid: try: next(self.listdir(possible_conflict.oid)) raise CloudFileExistsError( "Cannot rename over non-empty folder %s" % path) except StopIteration: # Folder is empty, rename over it no problem if possible_conflict.oid != oid: # delete the target if we're not just changing case self.delete(possible_conflict.oid) if not old_path: for cpath, coid in list(self._ids.items()): if coid == oid: old_path = cpath if add_pids == remove_pids: add_pids_str = "" remove_pids_str = "" else: add_pids_str = ",".join(add_pids) remove_pids_str = ",".join(remove_pids) self._api('files', 'update', body=body, fileId=oid, addParents=add_pids_str, removeParents=remove_pids_str, fields='id') if old_path: # TODO: this will break if the kids are cached but not the parent folder, I'm not convinced that can # actually be the case at this point in the code, so, no need to fix until that can be established for cpath, coid in list(self._ids.items()): relative = self.is_subpath(old_path, cpath) if relative: new_cpath = self.join(path, relative) self._ids.pop(cpath) self._ids[new_cpath] = coid log.debug("renamed %s -> %s", debug_sig(oid), body) return oid
def _real_api(self, client, mutex, method, *args, **kwargs): # pylint: disable=too-many-branches, too-many-statements log.debug("_api: %s (%s)", method, debug_args(args, kwargs)) with mutex: if not client: raise CloudDisconnectedError("currently disconnected") try: return getattr(client, method)(*args, **kwargs) except exceptions.AuthError: self.disconnect() raise CloudTokenError() except exceptions.ApiError as e: inside_error: Union[files.LookupError, files.WriteError] if isinstance(e.error, (files.ListFolderError, files.GetMetadataError, files.ListRevisionsError)): if e.error.is_path() and isinstance( e.error.get_path(), files.LookupError): inside_error = e.error.get_path() if inside_error.is_malformed_path(): log.debug( 'Malformed path when executing %s(%s %s) : %s', *debug_args(method, args, kwargs, e)) raise CloudFileNotFoundError( 'Malformed path when executing %s(%s)' % debug_args(method, kwargs)) if inside_error.is_not_found(): log.debug('File not found %s(%s %s) : %s', *debug_args(method, args, kwargs, e)) raise CloudFileNotFoundError( 'File not found when executing %s(%s)' % debug_args(method, kwargs)) if inside_error.is_not_folder(): log.debug( 'Expected folder is actually a file when executing %s(%s %s) : %s', *debug_args(method, args, kwargs, e)) raise CloudFileExistsError( 'Expected folder is actually a file when executing %s(%s %s)' % debug_args(method, args, kwargs)) if isinstance(e.error, sharing.SharedFolderAccessError): raise CloudFileNotFoundError(str(e)) if isinstance(e.error, files.UploadError): if e.error.is_path() and isinstance( e.error.get_path(), files.UploadWriteFailed): inside_error = e.error.get_path() write_error = inside_error.reason if write_error.is_insufficient_space(): log.debug('out of space %s(%s %s) : %s', *debug_args(method, args, kwargs, e)) raise CloudOutOfSpaceError( 'Out of space when executing %s(%s)' % debug_args(method, kwargs)) if write_error.is_conflict(): raise CloudFileExistsError( 'Conflict when executing %s(%s)' % debug_args(method, kwargs)) if isinstance(e.error, files.DownloadError): if e.error.is_path() and isinstance( e.error.get_path(), files.LookupError): inside_error = e.error.get_path() if inside_error.is_not_found(): raise CloudFileNotFoundError( "Not found when executing %s(%s)" % debug_args(method, kwargs)) if isinstance(e.error, files.DeleteError): if e.error.is_path_lookup(): inside_error = e.error.get_path_lookup() if inside_error.is_not_found(): log.debug('file not found %s(%s %s) : %s', *debug_args(method, args, kwargs, e)) raise CloudFileNotFoundError( 'File not found when executing %s(%s)' % debug_args(method, kwargs)) if isinstance(e.error, files.RelocationError): if e.error.is_from_lookup(): inside_error = e.error.get_from_lookup() if inside_error.is_not_found(): log.debug('file not found %s(%s %s) : %s', *debug_args(method, args, kwargs, e)) raise CloudFileNotFoundError( 'File not found when executing %s(%s,%s)' % debug_args(method, args, kwargs)) if e.error.is_to(): inside_error = e.error.get_to() if inside_error.is_conflict(): raise CloudFileExistsError( 'File already exists when executing %s(%s)' % debug_args(method, kwargs)) if e.error.is_duplicated_or_nested_paths(): raise CloudFileExistsError( 'Duplicated or nested path %s(%s)' % debug_args(method, kwargs)) if isinstance(e.error, files.CreateFolderError): if e.error.is_path() and isinstance( e.error.get_path(), files.WriteError): inside_error = e.error.get_path() if inside_error.is_conflict(): raise CloudFileExistsError( 'File already exists when executing %s(%s)' % debug_args(method, kwargs)) if isinstance(e.error, files.ListFolderContinueError): # all list-folder-continue errors should cause a cursor reset # these include the actual "is_reset" and, of course a is_path (fnf) # and also is_other which can secretly contain a reset as well raise CloudCursorError("Cursor reset request") if isinstance(e.error, files.ListRevisionsError): if e.error.is_path(): inside_error = e.error.get_path() if inside_error.is_not_file(): raise NotAFileError(str(e)) if isinstance(e.error, files.ListFolderLongpollError): raise CloudCursorError( "cursor invalidated during longpoll") raise CloudException( "Unknown exception when executing %s(%s,%s): %s" % debug_args(method, args, kwargs, e)) except (exceptions.InternalServerError, exceptions.RateLimitError, requests.exceptions.ReadTimeout): raise CloudTemporaryError() except dropbox.stone_validators.ValidationError as e: log.debug("f*ed up api error: %s", e) if "never created" in str(e): raise CloudFileNotFoundError() if "did not match" in str(e): log.warning("oid error %s", e) raise CloudFileNotFoundError() raise except requests.exceptions.ConnectionError as e: log.error('api error handled exception %s:%s', "dropbox", e.__class__.__name__) self.disconnect() raise CloudDisconnectedError()