class S3ContentsManager(GenericContentsManager): access_key_id = Unicode(help="S3/AWS access key ID", allow_none=True, default_value=None).tag( config=True, env="JPYNB_S3_ACCESS_KEY_ID") secret_access_key = Unicode(help="S3/AWS secret access key", allow_none=True, default_value=None).tag( config=True, env="JPYNB_S3_SECRET_ACCESS_KEY") endpoint_url = Unicode("https://s3.amazonaws.com", help="S3 endpoint URL").tag( config=True, env="JPYNB_S3_ENDPOINT_URL") region_name = Unicode("us-east-1", help="Region name").tag(config=True, env="JPYNB_S3_REGION_NAME") bucket = Unicode("notebooks", help="Bucket name to store notebooks").tag( config=True, env="JPYNB_S3_BUCKET") prefix = Unicode( "", help="Prefix path inside the specified bucket").tag(config=True) signature_version = Unicode(help="").tag(config=True) delimiter = Unicode("/", help="Path delimiter").tag(config=True) sse = Unicode(help="Type of server-side encryption to use").tag( config=True) session_token = Unicode(help="S3/AWS session token", allow_none=True, default_value=None).tag( config=True, env="JPYNB_S3_SESSION_TOKEN") def __init__(self, *args, **kwargs): super(S3ContentsManager, self).__init__(*args, **kwargs) self._fs = S3FS(log=self.log, access_key_id=self.access_key_id, secret_access_key=self.secret_access_key, endpoint_url=self.endpoint_url, region_name=self.region_name, bucket=self.bucket, prefix=self.prefix, session_token=self.session_token, signature_version=self.signature_version, delimiter=self.delimiter, sse=self.sse) def _save_notebook(self, model, path): nb_contents = from_dict(model['content']) self.check_and_sign(nb_contents, path) file_contents = json.dumps(model["content"]) self._fs.writenotebook(path, file_contents) self.validate_notebook_model(model) return model.get("message")
class S3ContentsManager(GenericContentsManager): access_key_id = Unicode( help="S3/AWS access key ID", allow_none=True, default_value=None).tag( config=True, env="JPYNB_S3_ACCESS_KEY_ID") secret_access_key = Unicode( help="S3/AWS secret access key", allow_none=True, default_value=None).tag( config=True, env="JPYNB_S3_SECRET_ACCESS_KEY") endpoint_url = Unicode( "https://s3.amazonaws.com", help="S3 endpoint URL").tag( config=True, env="JPYNB_S3_ENDPOINT_URL") region_name = Unicode( "us-east-1", help="Region name").tag( config=True, env="JPYNB_S3_REGION_NAME") bucket = Unicode( "notebooks", help="Bucket name to store notebooks").tag( config=True, env="JPYNB_S3_BUCKET") prefix = Unicode("", help="Prefix path inside the specified bucket").tag(config=True) signature_version = Unicode(help="").tag(config=True) delimiter = Unicode("/", help="Path delimiter").tag(config=True) def __init__(self, *args, **kwargs): super(S3ContentsManager, self).__init__(*args, **kwargs) self._fs = S3FS( log=self.log, access_key_id=self.access_key_id, secret_access_key=self.secret_access_key, endpoint_url=self.endpoint_url, region_name=self.region_name, bucket=self.bucket, prefix=self.prefix, signature_version=self.signature_version, delimiter=self.delimiter)
class GFContentsManager(GenericContentsManager): project = Unicode(help="GCP Project", allow_none=True, default_value=None).tag(config=True, env="JPYNB_GCS_PROJECT") prefix = Unicode( "", help="Prefix path inside the specified bucket").tag(config=True) separator = Unicode("/", help="Path separator").tag(config=True) def __init__(self, *args, **kwargs): super(GFContentsManager, self).__init__(*args, **kwargs) self._fs = GFFS(log=self.log, project=self.project, prefix=self.prefix, separator=self.separator) @default('checkpoints_class') def _checkpoints_class_default(self): return RemoteFileCheckpoints
class GCSContentsManager(GenericContentsManager): project = Unicode(help="GCP Project", allow_none=True, default_value=None).tag(config=True, env="JPYNB_GCS_PROJECT") token = Unicode(help="Path to the GCP token", allow_none=True, default_value=None).tag(config=True, env="JPYNB_GCS_TOKEN_PATH") region_name = Unicode("us-east-1", help="Region name").tag(config=True, env="JPYNB_GCS_REGION_NAME") bucket = Unicode("notebooks", help="Bucket name to store notebooks").tag( config=True, env="JPYNB_GCS_BUCKET") prefix = Unicode( "", help="Prefix path inside the specified bucket").tag(config=True) separator = Unicode("/", help="Path separator").tag(config=True) def __init__(self, *args, **kwargs): super(GCSContentsManager, self).__init__(*args, **kwargs) self._fs = GCSFS(log=self.log, project=self.project, token=self.token, bucket=self.bucket, prefix=self.prefix, separator=self.separator)
class S3ContentsManager(GenericContentsManager): access_key_id = Unicode(help="S3/AWS access key ID", allow_none=True, default_value=None).tag( config=True, env="JPYNB_S3_ACCESS_KEY_ID") secret_access_key = Unicode(help="S3/AWS secret access key", allow_none=True, default_value=None).tag( config=True, env="JPYNB_S3_SECRET_ACCESS_KEY") endpoint_url = Unicode("s3-us-gov-east-1.amazonaws.com", help="S3 endpoint URL").tag( config=True, env="JPYNB_S3_ENDPOINT_URL") region_name = Unicode("us-gov-east-1", help="Region name").tag(config=True, env="JPYNB_S3_REGION_NAME") bucket = Unicode("notebooks", help="Bucket name to store notebooks").tag( config=True, env="JPYNB_S3_BUCKET") prefix = Unicode( "", help="Prefix path inside the specified bucket").tag(config=True) signature_version = Unicode(help="").tag(config=True) delimiter = Unicode("/", help="Path delimiter").tag(config=True) sse = Unicode(help="Type of server-side encryption to use").tag( config=True) kms_key_id = Unicode(help="KMS ID to use to encrypt workbooks").tag( config=True) session_token = Unicode(help="S3/AWS session token", allow_none=True, default_value=None).tag( config=True, env="JPYNB_S3_SESSION_TOKEN") boto3_session = Any( help= "Place to store custom boto3 session (passed to S3_FS) - could be set by init_s3_hook" ) init_s3_hook = Any(help="optional hook for init'ing s3").tag(config=True) s3fs_additional_kwargs = Any( help="optional dictionary to be appended to s3fs additional kwargs" ).tag(config=True) def __init__(self, *args, **kwargs): super(S3ContentsManager, self).__init__(*args, **kwargs) self.run_init_s3_hook() self.bucket = _validate_bucket(self.bucket, self.log) self._fs = S3FS(log=self.log, access_key_id=self.access_key_id, secret_access_key=self.secret_access_key, endpoint_url=self.endpoint_url, region_name=self.region_name, bucket=self.bucket, prefix=self.prefix, session_token=self.session_token, signature_version=self.signature_version, delimiter=self.delimiter, sse=self.sse, kms_key_id=self.kms_key_id, boto3_session=self.boto3_session, s3fs_additional_kwargs=self.s3fs_additional_kwargs) def run_init_s3_hook(self): if self.init_s3_hook is not None: self.init_s3_hook(self) def _save_notebook(self, model, path): nb_contents = from_dict(model["content"]) self.check_and_sign(nb_contents, path) file_contents = json.dumps(model["content"]) self._fs.writenotebook(path, file_contents) self.validate_notebook_model(model) return model.get("message")
class S3FS(GenericFS): access_key_id = Unicode(help="S3/AWS access key ID", allow_none=True, default_value=None).tag( config=True, env="JPYNB_S3_ACCESS_KEY_ID") secret_access_key = Unicode(help="S3/AWS secret access key", allow_none=True, default_value=None).tag( config=True, env="JPYNB_S3_SECRET_ACCESS_KEY") endpoint_url = Unicode("s3.amazonaws.com", help="S3 endpoint URL").tag( config=True, env="JPYNB_S3_ENDPOINT_URL") region_name = Unicode("us-east-1", help="Region name").tag(config=True, env="JPYNB_S3_REGION_NAME") bucket = Unicode("notebooks", help="Bucket name to store notebooks").tag( config=True, env="JPYNB_S3_BUCKET") signature_version = Unicode(help="").tag(config=True) sse = Unicode(help="Type of server-side encryption to use").tag( config=True) prefix = Unicode( "", help="Prefix path inside the specified bucket").tag(config=True) delimiter = Unicode("/", help="Path delimiter").tag(config=True) dir_keep_file = Unicode( ".s3keep", help="Empty file to create when creating directories").tag(config=True) def __init__(self, log, **kwargs): super(S3FS, self).__init__(**kwargs) self.log = log client_kwargs = { "endpoint_url": self.endpoint_url, "region_name": self.region_name, } config_kwargs = {} if self.signature_version: config_kwargs["signature_version"] = self.signature_version s3_additional_kwargs = {} if self.sse: s3_additional_kwargs["ServerSideEncryption"] = self.sse self.fs = s3fs.S3FileSystem(key=self.access_key_id, secret=self.secret_access_key, client_kwargs=client_kwargs, config_kwargs=config_kwargs, s3_additional_kwargs=s3_additional_kwargs) self._invalidator = threading.Timer(interval=60, function=self.fs.invalidate_cache) self._invalidator.setDaemon(True) self._invalidator.start() self.init() def __del__(self): self._invalidator.cancel() def init(self): self.mkdir("") self.ls("") self.isdir("") # GenericFS methods ----------------------------------------------------------------------------------------------- def ls(self, path=""): path_ = self.path(path) self.log.debug("S3contents.S3FS: Listing directory: `%s`", path_) files = self.fs.ls(path_) return self.unprefix(files) def isfile(self, path): path_ = self.path(path) exists = self.fs.exists(path_) if not exists: is_file = False else: is_file = path_ in set(self.fs.ls(path_)) self.log.debug("S3contents.S3FS: `%s` is a file: %s", path_, is_file) return is_file def isdir(self, path): path_ = self.path(path) exists = self.fs.exists(path_) if not exists: is_dir = False else: is_dir = path_ not in set(self.fs.ls(path_)) return is_dir def mv(self, old_path, new_path): self.log.debug("S3contents.S3FS: Move file `%s` to `%s`", old_path, new_path) self.cp(old_path, new_path) self.rm(old_path) def cp(self, old_path, new_path): old_path_, new_path_ = self.path(old_path), self.path(new_path) self.log.debug("S3contents.S3FS: Coping `%s` to `%s`", old_path_, new_path_) if self.isdir(old_path): old_dir_path, new_dir_path = old_path, new_path for obj in self.ls(old_dir_path): old_item_path = obj new_item_path = old_item_path.replace(old_dir_path, new_dir_path, 1) self.cp(old_item_path, new_item_path) elif self.isfile(old_path): self.fs.copy(old_path_, new_path_) self.fs.invalidate_cache(new_path_) def rm(self, path): path_ = self.path(path) self.log.debug("S3contents.S3FS: Removing: `%s`", path_) if self.isfile(path): self.log.debug("S3contents.S3FS: Removing file: `%s`", path_) self.fs.rm(path_) elif self.isdir(path): self.log.debug("S3contents.S3FS: Removing directory: `%s`", path_) self.fs.rm(path_ + self.delimiter, recursive=True) self.fs.invalidate_cache(path_) def mkdir(self, path): path_ = self.path(path, self.dir_keep_file) self.log.debug("S3contents.S3FS: Making dir: `%s`", path_) self.fs.touch(path_) parent = path_.rsplit('/', 2)[0] self.log.info("S3contents.S3FS: Invalidaing: `%s`", parent) self.fs.invalidate_cache(parent) def read(self, path): path_ = self.path(path) if not self.isfile(path): raise NoSuchFile(path_) with self.fs.open(path_, mode='rb') as f: content = f.read().decode("utf-8") return content def lstat(self, path): path_ = self.path(path) if self.isdir(path): # use the modification timestamps of immediate children to determine our path's mtime try: modification_dates = filter( None, (e.get('LastModified') for e in self.fs.ls(path_, detail=True))) ret = { "ST_MTIME": max(modification_dates, default=None), "ST_SIZE": None, } except FileNotFoundError: ret = {} else: info = self.fs.info(path_) ret = { "ST_MTIME": info["LastModified"], "ST_SIZE": info["Size"], } return ret def write(self, path, content, format): path_ = self.path(self.unprefix(path)) self.log.debug("S3contents.S3FS: Writing file: `%s`", path_) if format not in {'text', 'base64'}: raise HTTPError( 400, "Must specify format of file contents as 'text' or 'base64'", ) try: if format == 'text': content_ = content.encode('utf8') else: b64_bytes = content.encode('ascii') content_ = base64.b64decode(b64_bytes) except Exception as e: raise HTTPError(400, u'Encoding error saving %s: %s' % (path_, e)) with self.fs.open(path_, mode='wb') as f: f.write(content_) def writenotebook(self, path, content): path_ = self.path(self.unprefix(path)) self.log.debug("S3contents.S3FS: Writing notebook: `%s`", path_) with self.fs.open(path_, mode='wb') as f: f.write(content.encode("utf-8")) # Utilities ------------------------------------------------------------------------------------------------------- def get_prefix(self): """Full prefix: bucket + optional prefix""" prefix = self.bucket if self.prefix: prefix += self.delimiter + self.prefix return prefix prefix_ = property(get_prefix) def unprefix(self, path): """Remove the self.prefix_ (if present) from a path or list of paths""" if isinstance(path, six.string_types): path = path[len(self.prefix_):] if path.startswith( self.prefix_) else path path = path[1:] if path.startswith(self.delimiter) else path return path if isinstance(path, (list, tuple)): path = [ p[len(self.prefix_):] if p.startswith(self.prefix_) else p for p in path ] path = [p[1:] if p.startswith(self.delimiter) else p for p in path] return path def path(self, *path): """Utility to join paths including the bucket and prefix""" path = list(filter(None, path)) path = self.unprefix(path) items = [self.prefix_] + path return self.delimiter.join(items)
class GenericContentsManager(ContentsManager, HasTraits): # This makes the checkpoints get saved on this directory root_dir = Unicode("./", config=True) def __init__(self, *args, **kwargs): super(GenericContentsManager, self).__init__(*args, **kwargs) self._fs = None def get_fs(self): return self._fs fs = property(get_fs) def _checkpoints_class_default(self): return GenericFileCheckpoints def do_error(self, msg, code=500): raise HTTPError(code, msg) def no_such_entity(self, path): self.do_error("No such entity: [{path}]".format(path=path), 404) def already_exists(self, path): thing = "File" if self.file_exists(path) else "Directory" self.do_error( u"{thing} already exists: [{path}]".format(thing=thing, path=path), 409) def guess_type(self, path, allow_directory=True): """ Guess the type of a file. If allow_directory is False, don't consider the possibility that the file is a directory. Parameters ---------- obj: s3.Object or string """ if path.endswith(".ipynb"): return "notebook" elif allow_directory and self.dir_exists(path): return "directory" else: return "file" def file_exists(self, path): # Does a file exist at the given path? self.log.debug("S3contents.GenericManager.file_exists: ('%s')", path) return self.fs.isfile(path) def dir_exists(self, path): # Does a directory exist at the given path? self.log.debug("S3contents.GenericManager.dir_exists: path('%s')", path) return self.fs.isdir(path) def get(self, path, content=True, type=None, format=None): # Get a file or directory model. self.log.debug( "S3contents.GenericManager.get] path('%s') type(%s) format(%s)", path, type, format) path = path.strip('/') if type is None: type = self.guess_type(path) try: func = { "directory": self._get_directory, "notebook": self._get_notebook, "file": self._get_file, }[type] except KeyError: raise ValueError("Unknown type passed: '{}'".format(type)) return func(path=path, content=content, format=format) def _get_directory(self, path, content=True, format=None): self.log.debug( "S3contents.GenericManager.get_directory: path('%s') content(%s) format(%s)", path, content, format) return self._directory_model_from_path(path, content=content) def _get_notebook(self, path, content=True, format=None): self.log.debug( "S3contents.GenericManager.get_notebook: path('%s') type(%s) format(%s)", path, content, format) return self._notebook_model_from_path(path, content=content, format=format) def _get_file(self, path, content=True, format=None): self.log.debug( "S3contents.GenericManager.get_file: path('%s') type(%s) format(%s)", path, content, format) return self._file_model_from_path(path, content=content, format=format) def _directory_model_from_path(self, path, content=False): self.log.debug( "S3contents.GenericManager._directory_model_from_path: path('%s') type(%s)", path, content) model = base_directory_model(path) if content: if not self.dir_exists(path): self.no_such_entity(path) model["format"] = "json" dir_content = self.fs.ls(path=path) model["content"] = self._convert_file_records(dir_content) return model def _notebook_model_from_path(self, path, content=False, format=None): """ Build a notebook model from database record. """ model = base_model(path) model["type"] = "notebook" if self.fs.isfile(path): model["last_modified"] = model["created"] = self.fs.lstat( path)["ST_MTIME"] else: model["last_modified"] = model["created"] = DUMMY_CREATED_DATE if content: if not self.fs.isfile(path): self.no_such_entity(path) file_content = self.fs.read(path) nb_content = reads(file_content, as_version=NBFORMAT_VERSION) self.mark_trusted_cells(nb_content, path) model["format"] = "json" model["content"] = nb_content self.validate_notebook_model(model) return model def _file_model_from_path(self, path, content=False, format=None): """ Build a file model from database record. """ model = base_model(path) model["type"] = "file" if self.fs.isfile(path): model["last_modified"] = model["created"] = self.fs.lstat( path)["ST_MTIME"] else: model["last_modified"] = model["created"] = DUMMY_CREATED_DATE if content: try: content = self.fs.read(path) except NoSuchFile as e: self.no_such_entity(e.path) except GenericFSError as e: self.do_error(str(e), 500) model["format"] = format or "text" model["content"] = content model["mimetype"] = mimetypes.guess_type(path)[0] or "text/plain" if format == "base64": model["format"] = format or "base64" from base64 import b64decode model["content"] = b64decode(content) return model def _convert_file_records(self, paths): """ Applies _notebook_model_from_s3_path or _file_model_from_s3_path to each entry of `paths`, depending on the result of `guess_type`. """ ret = [] for path in paths: # path = self.fs.remove_prefix(path, self.prefix) # Remove bucket prefix from paths if os.path.basename(path) == self.fs.dir_keep_file: continue type_ = self.guess_type(path, allow_directory=True) if type_ == "notebook": ret.append(self._notebook_model_from_path(path, False)) elif type_ == "file": ret.append(self._file_model_from_path(path, False, None)) elif type_ == "directory": ret.append(self._directory_model_from_path(path, False)) else: self.do_error( "Unknown file type %s for file '%s'" % (type_, path), 500) return ret def save(self, model, path): """Save a file or directory model to path. """ self.log.debug("S3contents.GenericManager: save %s: '%s'", model, path) if "type" not in model: self.do_error("No model type provided", 400) if "content" not in model and model["type"] != "directory": self.do_error("No file content provided", 400) if model["type"] not in ("file", "directory", "notebook"): self.do_error("Unhandled contents type: %s" % model["type"], 400) try: if model["type"] == "notebook": validation_message = self._save_notebook(model, path) elif model["type"] == "file": validation_message = self._save_file(model, path) else: validation_message = self._save_directory(path) except Exception as e: self.log.error("Error while saving file: %s %s", path, e, exc_info=True) self.do_error( "Unexpected error while saving file: %s %s" % (path, e), 500) model = self.get(path, type=model["type"], content=False) if validation_message is not None: model["message"] = validation_message return model def _save_notebook(self, model, path): nb_contents = from_dict(model['content']) self.check_and_sign(nb_contents, path) file_contents = json.dumps(model["content"]) self.fs.write(path, file_contents) self.validate_notebook_model(model) return model.get("message") def _save_file(self, model, path): file_contents = model["content"] file_format = model.get('format') self.fs.write(path, file_contents, file_format) def _save_directory(self, path): self.fs.mkdir(path) def rename_file(self, old_path, new_path): """Rename a file or directory. NOTE: This method is unfortunately named on the base class. It actually moves a file or a directory. """ self.log.debug( "S3contents.GenericManager: Init rename of '%s' to '%s'", old_path, new_path) if self.file_exists(new_path) or self.dir_exists(new_path): self.already_exists(new_path) elif self.file_exists(old_path) or self.dir_exists(old_path): self.log.debug( "S3contents.GenericManager: Actually renaming '%s' to '%s'", old_path, new_path) self.fs.mv(old_path, new_path) else: self.no_such_entity(old_path) def delete_file(self, path): """Delete the file or directory at path. """ self.log.debug("S3contents.GenericManager: delete_file '%s'", path) if self.file_exists(path) or self.dir_exists(path): self.fs.rm(path) else: self.no_such_entity(path) def is_hidden(self, path): """Is path a hidden directory or file? """ self.log.debug("S3contents.GenericManager: is_hidden '%s'", path) return False
class S3ContentsManager(ContentsManager, HasTraits): access_key_id = Unicode(help="S3/AWS access key ID", allow_none=True, default_value=None).tag(config=True, env="JPYNB_S3_ACCESS_KEY_ID") secret_access_key = Unicode(help="S3/AWS secret access key", allow_none=True, default_value=None).tag(config=True, env="JPYNB_S3_SECRET_ACCESS_KEY") endpoint_url = Unicode("s3.amazonaws.com", help="S3 endpoint URL").tag(config=True, env="JPYNB_S3_ENDPOINT_URL") region_name = Unicode("us-east-1", help="Region Name").tag(config=True, env="JPYNB_S3_REGION_NAME") bucket_name = Unicode("notebooks", help="Bucket name to store notebooks").tag(config=True, env="JPYNB_S3_BUCKET_NAME") prefix = Unicode("", help="Prefix path inside the specified bucket").tag(config=True) signature_version = Unicode(help="").tag(config=True) delimiter = Unicode("/", help="Path delimiter").tag(config=True) def __init__(self, *args, **kwargs): super(S3ContentsManager, self).__init__(*args, **kwargs) self.s3fs = S3FS( log=self.log, access_key_id=self.access_key_id, secret_access_key=self.secret_access_key, endpoint_url=self.endpoint_url, region_name=self.region_name, bucket_name=self.bucket_name, prefix=self.prefix, signature_version=self.signature_version, delimiter=self.delimiter ) def _checkpoints_class_default(self): return GenericFileCheckpoints def do_error(self, msg, code=500): raise HTTPError(code, msg) def no_such_entity(self, path): self.do_error("No such entity: [{path}]".format(path=path), 404) def already_exists(self, path): thing = "File" if self.file_exists(path) else "Directory" self.do_error(u"%s already exists: [{path}]".format(thing=thing, path=path), 409) def guess_type(self, path, allow_directory=True): """ Guess the type of a file. If allow_directory is False, don't consider the possibility that the file is a directory. Parameters ---------- obj: s3.Object or string """ if path.endswith(".ipynb"): return "notebook" elif allow_directory and self.dir_exists(path): return "directory" else: return "file" def file_exists(self, path): # Does a file exist at the given path? self.log.debug("S3contents[S3manager]: file_exists '%s'", path) return self.s3fs.isfile(path) def dir_exists(self, path): # Does a directory exist at the given path? self.log.debug("S3contents[S3manager]: dir_exists '%s'", path) return self.s3fs.isdir(path) def get(self, path, content=True, type=None, format=None): # Get a file or directory model. self.log.debug("S3contents[S3manager]: get '%s' %s %s", path, type, format) path = path.strip('/') if type is None: type = self.guess_type(path) try: fn = { "directory": self._get_directory, "notebook": self._get_notebook, "file": self._get_file, }[type] except KeyError: raise ValueError("Unknown type passed: '{}'".format(type)) return fn(path=path, content=content, format=format) def _get_directory(self, path, content=True, format=None): self.log.debug("S3contents[S3manager]: get_directory '%s' %s %s", path, type, format) return self._directory_model_from_path(path, content=content) def _get_notebook(self, path, content=True, format=None): self.log.debug("S3contents[S3manager]: get_notebook '%s' %s %s", path, content, format) return self._notebook_model_from_path(path, content=content, format=format) def _get_file(self, path, content=True, format=None): self.log.debug("S3contents[S3manager]: get_file '%s' %s %s", path, content, format) return self._file_model_from_path(path, content=content, format=format) def _directory_model_from_path(self, path, content=False): self.log.debug("S3contents[S3manager]: _directory_model_from_path '%s' %s", path, content) model = base_directory_model(path) if content: if not self.dir_exists(path): self.no_such_entity(path) model["format"] = "json" dir_content = self.s3fs.listdir(path=path, with_prefix=True) model["content"] = self._convert_file_records(dir_content) return model def _notebook_model_from_path(self, path, content=False, format=None): """ Build a notebook model from database record. """ # path = to_api_path(record['parent_name'] + record['name']) model = base_model(path) model['type'] = 'notebook' # model['last_modified'] = model['created'] = record['created_at'] model['last_modified'] = model['created'] = DUMMY_CREATED_DATE if content: if not self.s3fs.isfile(path): self.no_such_entity(path) file_content = self.s3fs.read(path) nb_content = reads(file_content, as_version=NBFORMAT_VERSION) self.mark_trusted_cells(nb_content, path) model["format"] = "json" model["content"] = nb_content self.validate_notebook_model(model) return model def _file_model_from_path(self, path, content=False, format=None): """ Build a file model from database record. """ model = base_model(path) model['type'] = 'file' model['last_modified'] = model['created'] = DUMMY_CREATED_DATE if content: try: content = self.s3fs.read(path) except NoSuchFile as e: self.no_such_entity(e.path) except S3FSError as e: self.do_error(str(e), 500) model["format"] = format or "text" model["content"] = content model["mimetype"] = mimetypes.guess_type(path)[0] or "text/plain" if format == "base64": model["format"] = format or "base64" from base64 import b64decode model["content"] = b64decode(content) return model def _convert_file_records(self, paths): """ Applies _notebook_model_from_s3_path or _file_model_from_s3_path to each entry of `paths`, depending on the result of `guess_type`. """ ret = [] for path in paths: path = self.s3fs.remove_prefix(path, self.prefix) # Remove bucket prefix from paths if os.path.basename(path) == self.s3fs.dir_keep_file: continue type_ = self.guess_type(path, allow_directory=True) if type_ == "notebook": ret.append(self._notebook_model_from_path(path, False)) elif type_ == "file": ret.append(self._file_model_from_path(path, False, None)) elif type_ == "directory": ret.append(self._directory_model_from_path(path, False)) else: self.do_error("Unknown file type %s for file '%s'" % (type_, path), 500) return ret def save(self, model, path): """Save a file or directory model to path. """ self.log.debug("S3contents[S3manager]: save %s: '%s'", model, path) if "type" not in model: self.do_error("No model type provided", 400) if "content" not in model and model["type"] != "directory": self.do_error("No file content provided", 400) if model["type"] not in ("file", "directory", "notebook"): self.do_error("Unhandled contents type: %s" % model["type"], 400) try: if model["type"] == "notebook": validation_message = self._save_notebook(model, path) elif model["type"] == "file": validation_message = self._save_file(model, path) else: validation_message = self._save_directory(path) except Exception as e: self.log.error("Error while saving file: %s %s", path, e, exc_info=True) self.do_error("Unexpected error while saving file: %s %s" % (path, e), 500) model = self.get(path, type=model["type"], content=False) if validation_message is not None: model["message"] = validation_message return model def _save_notebook(self, model, path): nb_contents = from_dict(model['content']) self.check_and_sign(nb_contents, path) file_contents = json.dumps(model["content"]) self.s3fs.write(path, file_contents) self.validate_notebook_model(model) return model.get("message") def _save_file(self, model, path): file_contents = model["content"] self.s3fs.write(path, file_contents) def _save_directory(self, path): self.s3fs.mkdir(path) def rename_file(self, old_path, new_path): """Rename a file or directory. NOTE: This method is unfortunately named on the base class. It actually moves a file or a directory. """ self.log.debug("S3contents[S3manager]: Init rename of '%s' to '%s'", old_path, new_path) if self.file_exists(new_path) or self.dir_exists(new_path): self.already_exists(new_path) elif self.file_exists(old_path) or self.dir_exists(old_path): self.log.debug("S3contents[S3manager]: Actually renaming '%s' to '%s'", old_path, new_path) self.s3fs.mv(old_path, new_path) else: self.no_such_entity(old_path) def delete_file(self, path): """Delete the file or directory at path. """ self.log.debug("S3contents[S3manager]: delete_file '%s'", path) if self.file_exists(path) or self.dir_exists(path): self.s3fs.rm(path) else: self.no_such_entity(path) def is_hidden(self, path): """Is path a hidden directory or file? """ self.log.debug("S3contents[S3manager]: is_hidden '%s'", path) return False
class S3FS(HasTraits): access_key_id = Unicode(help="S3/AWS access key ID", allow_none=True, default_value=None).tag( config=True, env="JPYNB_S3_ACCESS_KEY_ID") secret_access_key = Unicode(help="S3/AWS secret access key", allow_none=True, default_value=None).tag( config=True, env="JPYNB_S3_SECRET_ACCESS_KEY") endpoint_url = Unicode("s3.amazonaws.com", help="S3 endpoint URL").tag( config=True, env="JPYNB_S3_ENDPOINT_URL") region_name = Unicode("us-east-1", help="Region Name").tag(config=True, env="JPYNB_S3_REGION_NAME") bucket_name = Unicode("notebooks", help="Bucket name to store notebooks").tag( config=True, env="JPYNB_S3_BUCKET_NAME") prefix = Unicode( "", help="Prefix path inside the specified bucket").tag(config=True) signature_version = Unicode(help="").tag(config=True) delimiter = Unicode("/", help="Path delimiter").tag(config=True) dir_keep_file = Unicode( ".s3keep", help="Empty file to create when creating directories").tag(config=True) def __init__(self, log, **kwargs): super(S3FS, self).__init__(**kwargs) self.log = log config = None if self.signature_version: config = Config(signature_version=self.signature_version) self.client = boto3.client( "s3", aws_access_key_id=self.access_key_id, aws_secret_access_key=self.secret_access_key, endpoint_url=self.endpoint_url, region_name=self.region_name, config=config) self.resource = boto3.resource( "s3", aws_access_key_id=self.access_key_id, aws_secret_access_key=self.secret_access_key, endpoint_url=self.endpoint_url, region_name=self.region_name, config=config) self.bucket = self.resource.Bucket(self.bucket_name) self.delimiter = "/" if self.prefix: self.mkdir("") def get_keys(self, prefix=""): ret = [] for obj in self.bucket.objects.filter(Prefix=prefix): ret.append(obj.key) return ret def listdir(self, path="", with_prefix=False): self.log.debug("S3contents[S3FS] Listing directory: `%s`", path) prefix = self.as_key(path) fnames = self.get_keys(prefix=prefix) fnames_no_prefix = [ self.remove_prefix(fname, prefix=prefix) for fname in fnames ] fnames_no_prefix = [ fname.lstrip(self.delimiter) for fname in fnames_no_prefix ] files = set( fname.split(self.delimiter)[0] for fname in fnames_no_prefix) if with_prefix: files = [ self.join(prefix.strip(self.delimiter), f).strip(self.delimiter) for f in files ] else: files = list(files) return map(self.as_path, files) def isfile(self, path): self.log.debug("S3contents[S3FS] Checking if `%s` is a file", path) key = self.as_key(path) is_file = None if key == "": is_file = False try: self.client.head_object(Bucket=self.bucket_name, Key=key) is_file = True except Exception as e: is_file = False self.log.debug("S3contents[S3FS] `%s` is a file: %s", path, is_file) return is_file def isdir(self, path): self.log.debug("S3contents[S3FS] Checking if `%s` is a directory", path) key = self.as_key(path) if key == "": return True if not key.endswith(self.delimiter): key = key + self.delimiter if key == "": return True objs = list(self.bucket.objects.filter(Prefix=key)) is_dir = len(objs) > 0 self.log.debug("S3contents[S3FS] `%s` is a directory: %s", path, is_dir) return is_dir def mv(self, old_path, new_path): self.cp(old_path, new_path) self.rm(old_path) def cp(self, old_path, new_path): self.log.debug("S3contents[S3FS] Copy `%s` to `%s`", old_path, new_path) if self.isdir(old_path): old_dir_path, new_dir_path = old_path, new_path old_dir_key = self.as_key(old_dir_path) for obj in self.bucket.objects.filter(Prefix=old_dir_key): old_item_path = self.as_path(obj.key) new_item_path = old_item_path.replace(old_dir_path, new_dir_path, 1) self.cp(old_item_path, new_item_path) elif self.isfile(old_path): old_key = self.as_key(old_path) new_key = self.as_key(new_path) source = "{bucket_name}/{old_key}".format( bucket_name=self.bucket_name, old_key=old_key) self.client.copy_object(Bucket=self.bucket_name, CopySource=source, Key=new_key) def rm(self, path): self.log.debug("S3contents[S3FS] Deleting: `%s`", path) if self.isfile(path): key = self.as_key(path) self.client.delete_object(Bucket=self.bucket_name, Key=key) elif self.isdir(path): key = self.as_key(path) key = key + "/" objects_to_delete = [] for obj in self.bucket.objects.filter(Prefix=key): objects_to_delete.append({"Key": obj.key}) self.bucket.delete_objects(Delete={"Objects": objects_to_delete}) def mkdir(self, path): self.log.debug("S3contents[S3FS] Making dir: `%s`", path) if self.isfile(path): self.log.debug( "S3contents[S3FS] File `%s` already exists, not creating anything", path) elif self.isdir(path): self.log.debug( "S3contents[S3FS] Directory `%s` already exists, not creating anything", path) else: obj_path = self.join(path, self.dir_keep_file) self.write(obj_path, "") def read(self, path): key = self.as_key(path) if not self.isfile(path): raise NoSuchFile(self.as_path(key)) obj = self.resource.Object(self.bucket_name, key) text = obj.get()["Body"].read().decode("utf-8") return text def write(self, path, content): key = self.as_key(path) self.client.put_object(Bucket=self.bucket_name, Key=key, Body=content) def as_key(self, path): """Utility: Make a path a S3 key """ path_ = self.abspath(path) self.log.debug("S3contents[S3FS] Understanding `%s` as `%s`", path, path_) if isinstance(path_, six.string_types): return path_.strip(self.delimiter) if isinstance(path_, list): return [self.as_key(item) for item in path_] def as_path(self, key): """Utility: Make a S3 key a path """ key_ = self.remove_prefix(key) if isinstance(key_, six.string_types): return key_.strip(self.delimiter) def remove_prefix(self, text, prefix=None): """Utility: remove a prefix from a string """ if prefix is None: prefix = self.prefix if text.startswith(prefix): return text[len(prefix):].strip("/") return text.strip("/") def join(self, *args): """Utility: join using the delimiter """ return self.delimiter.join(args) def abspath(self, path): """Utility: Return a normalized absolutized version of the pathname path Basically prepends the path with the prefix """ path = path.strip("/") if self.prefix: path = self.join(self.prefix, path) return path.strip("/")
class GenericContentsManager(ContentsManager, HasTraits): # This makes the checkpoints get saved on this directory root_dir = Unicode("./", config=True) post_save_hook = Any( None, config=True, allow_none=True, help="""Python callable or importstring thereof to be called on the path of a file just saved. This can be used to process the file on disk, such as converting the notebook to a script or HTML via nbconvert. It will be called as (all arguments passed by keyword):: hook(s3_path=s3_path, model=model, contents_manager=instance) - s3_path: the S3 path to the file just written (sans bucket/prefix) - model: the model representing the file - contents_manager: this ContentsManager instance """, ) def __init__(self, *args, **kwargs): super(GenericContentsManager, self).__init__(*args, **kwargs) self._fs = None def get_fs(self): return self._fs fs = property(get_fs) def _checkpoints_class_default(self): return GenericFileCheckpoints def do_error(self, msg, code=500): raise HTTPError(code, msg) def no_such_entity(self, path): self.do_error("No such entity: [{path}]".format(path=path), 404) def already_exists(self, path): thing = "File" if self.file_exists(path) else "Directory" self.do_error( "{thing} already exists: [{path}]".format(thing=thing, path=path), 409) def guess_type(self, path, allow_directory=True): """ Guess the type of a file. If allow_directory is False, don't consider the possibility that the file is a directory. Parameters ---------- obj: s3.Object or string """ if path.endswith(".ipynb"): return "notebook" elif allow_directory and self.dir_exists(path): return "directory" else: return "file" def file_exists(self, path): # Does a file exist at the given path? self.log.debug("S3contents.GenericManager.file_exists: ('%s')", path) return self.fs.isfile(path) def dir_exists(self, path): # Does a directory exist at the given path? self.log.debug("S3contents.GenericManager.dir_exists: path('%s')", path) return self.fs.isdir(path) def get(self, path, content=True, type=None, format=None): # Get a file or directory model. self.log.debug( "S3contents.GenericManager.get] path('%s') type(%s) format(%s)", path, type, format, ) path = path.strip("/") if type is None: type = self.guess_type(path) try: func = { "directory": self._get_directory, "notebook": self._get_notebook, "file": self._get_file, }[type] except KeyError: raise ValueError("Unknown type passed: '{}'".format(type)) return func(path=path, content=content, format=format) def _get_directory(self, path, content=True, format=None): self.log.debug( "S3contents.GenericManager._get_directory: path('%s') content(%s) format(%s)", path, content, format, ) return self._directory_model_from_path(path, content=content) def _get_notebook(self, path, content=True, format=None): self.log.debug( "S3contents.GenericManager._get_notebook: path('%s') type(%s) format(%s)", path, content, format, ) return self._notebook_model_from_path(path, content=content, format=format) def _get_file(self, path, content=True, format=None): self.log.debug( "S3contents.GenericManager._get_file: path('%s') type(%s) format(%s)", path, content, format, ) return self._file_model_from_path(path, content=content, format=format) def _directory_model_from_path(self, path, content=False): self.log.debug( "S3contents.GenericManager._directory_model_from_path: path('%s') type(%s)", path, content, ) model = base_directory_model(path) if self.fs.isdir(path): lstat = self.fs.lstat(path) if "ST_MTIME" in lstat and lstat["ST_MTIME"]: model["last_modified"] = model["created"] = lstat["ST_MTIME"] if content: if not self.dir_exists(path): self.no_such_entity(path) model["format"] = "json" dir_content = self.fs.ls(path=path) model["content"] = self._convert_file_records(dir_content) return model def _notebook_model_from_path(self, path, content=False, format=None): """ Build a notebook model from database record. """ model = base_model(path) model["type"] = "notebook" if self.fs.isfile(path): model["last_modified"] = model["created"] = self.fs.lstat( path)["ST_MTIME"] else: model["last_modified"] = model["created"] = DUMMY_CREATED_DATE if content: if not self.fs.isfile(path): self.no_such_entity(path) file_content, _ = self.fs.read(path, format) nb_content = reads(file_content, as_version=NBFORMAT_VERSION) self.mark_trusted_cells(nb_content, path) model["format"] = "json" model["content"] = nb_content self.validate_notebook_model(model) return model def _file_model_from_path(self, path, content=False, format=None): """ Build a file model from database record. """ model = base_model(path) model["type"] = "file" if self.fs.isfile(path): model["last_modified"] = model["created"] = DUMMY_CREATED_DATE else: model["last_modified"] = model["created"] = DUMMY_CREATED_DATE if content: try: # Get updated format from fs.read() content, format_ = self.fs.read(path, format) except NoSuchFile as e: self.no_such_entity(e.path) except GenericFSError as e: self.do_error(str(e), 500) model["format"] = format_ model["content"] = content model["mimetype"] = mimetypes.guess_type(path)[0] or "text/plain" return model def _convert_file_records(self, paths): """ Applies _notebook_model_from_s3_path or _file_model_from_s3_path to each entry of `paths`, depending on the result of `guess_type`. """ ret = [] for path in paths: # path = self.fs.remove_prefix(path, self.prefix) # Remove bucket prefix from paths if os.path.basename(path) == self.fs.dir_keep_file: continue type_ = self.guess_type(path, allow_directory=True) if type_ == "notebook": ret.append(self._notebook_model_from_path(path, False)) elif type_ == "file": ret.append(self._file_model_from_path(path, False, None)) elif type_ == "directory": ret.append(self._directory_model_from_path(path, False)) else: self.do_error( "Unknown file type %s for file '%s'" % (type_, path), 500) return ret def save(self, model, path): """Save a file or directory model to path. """ self.log.debug("S3contents.GenericManager.save %s: '%s'", model, path) if "type" not in model: self.do_error("No model type provided", 400) if "content" not in model and model["type"] != "directory": self.do_error("No file content provided", 400) if model["type"] not in ("file", "directory", "notebook"): self.do_error("Unhandled contents type: %s" % model["type"], 400) self.run_pre_save_hook(model=model, path=path) try: if model["type"] == "notebook": validation_message = self._save_notebook(model, path) elif model["type"] == "file": validation_message = self._save_file(model, path) else: validation_message = self._save_directory(path) except Exception as e: self.log.error("Error while saving file: %s %s", path, e, exc_info=True) self.do_error( "Unexpected error while saving file: %s %s" % (path, e), 500) model = self.get(path, type=model["type"], content=False) self.run_post_save_hook(model=model, s3_path=model["path"]) if validation_message is not None: model["message"] = validation_message return model def _save_notebook(self, model, path): nb_contents = from_dict(model["content"]) self.check_and_sign(nb_contents, path) file_contents = json.dumps(model["content"]) self.fs.write(path, file_contents) self.validate_notebook_model(model) return model.get("message") def _save_file(self, model, path): file_contents = model["content"] file_format = model.get("format") self.fs.write(path, file_contents, file_format) def _save_directory(self, path): self.fs.mkdir(path) def rename_file(self, old_path, new_path): """Rename a file or directory. NOTE: This method is unfortunately named on the base class. It actually moves a file or a directory. """ self.log.debug( "S3contents.GenericManager.rename_file: Init rename of '%s' to '%s'", old_path, new_path, ) if self.file_exists(new_path) or self.dir_exists(new_path): self.already_exists(new_path) elif self.file_exists(old_path) or self.dir_exists(old_path): self.log.debug( "S3contents.GenericManager: Actually renaming '%s' to '%s'", old_path, new_path, ) self.fs.mv(old_path, new_path) else: self.no_such_entity(old_path) def delete_file(self, path): """Delete the file or directory at path. """ self.log.debug("S3contents.GenericManager.delete_file '%s'", path) if self.file_exists(path) or self.dir_exists(path): self.fs.rm(path) else: self.no_such_entity(path) def is_hidden(self, path): """Is path a hidden directory or file? """ self.log.debug("S3contents.GenericManager.is_hidden '%s'", path) return False @validate("post_save_hook") def _validate_post_save_hook(self, proposal): value = proposal["value"] if isinstance(value, string_types): value = import_item(value) if not callable(value): raise TraitError("post_save_hook must be callable") return value def run_post_save_hook(self, model, s3_path): """Run the post-save hook if defined, and log errors""" if self.post_save_hook: try: self.log.debug("Running post-save hook on %s", s3_path) self.post_save_hook(s3_path=s3_path, model=model, contents_manager=self) except Exception as e: self.log.error("Post-save hook failed o-n %s", s3_path, exc_info=True) raise HTTPError( 500, "Unexpected error while running post hook save: %s" % e) from e
class S3FS(GenericFS): access_key_id = Unicode(help="S3/AWS access key ID", allow_none=True, default_value=None).tag( config=True, env="JPYNB_S3_ACCESS_KEY_ID") secret_access_key = Unicode(help="S3/AWS secret access key", allow_none=True, default_value=None).tag( config=True, env="JPYNB_S3_SECRET_ACCESS_KEY") endpoint_url = Unicode("s3.amazonaws.com", help="S3 endpoint URL").tag( config=True, env="JPYNB_S3_ENDPOINT_URL") region_name = Unicode("us-east-1", help="Region name").tag(config=True, env="JPYNB_S3_REGION_NAME") bucket = Unicode("notebooks", help="Bucket name to store notebooks").tag( config=True, env="JPYNB_S3_BUCKET") signature_version = Unicode(help="").tag(config=True) sse = Unicode(help="Type of server-side encryption to use").tag( config=True) prefix = Unicode( "", help="Prefix path inside the specified bucket").tag(config=True) delimiter = Unicode("/", help="Path delimiter").tag(config=True) dir_keep_file = Unicode( ".s3keep", help="Empty file to create when creating directories").tag(config=True) def __init__(self, log, **kwargs): super(S3FS, self).__init__(**kwargs) self.log = log client_kwargs = { "endpoint_url": self.endpoint_url, "region_name": self.region_name, } config_kwargs = {} if self.signature_version: config_kwargs["signature_version"] = self.signature_version s3_additional_kwargs = {} if self.sse: s3_additional_kwargs["ServerSideEncryption"] = self.sse self.fs = s3fs.S3FileSystem(key=self.access_key_id, secret=self.secret_access_key, client_kwargs=client_kwargs, config_kwargs=config_kwargs, s3_additional_kwargs=s3_additional_kwargs) self.init() def init(self): self.mkdir("") self.ls("") self.isdir("") # GenericFS methods ----------------------------------------------------------------------------------------------- def ls(self, path=""): path_ = self.path(path) self.log.debug("S3contents.S3FS: Listing directory: `%s`", path_) files = self.fs.ls(path_, refresh=True) return self.unprefix(files) def isfile(self, path): path_ = self.path(path) is_file = False exists = self.fs.exists(path_) if not exists: is_file = False else: try: # Info will fail if path is a dir self.fs.info(path_, refresh=True) is_file = True except FileNotFoundError: pass self.log.debug("S3contents.S3FS: `%s` is a file: %s", path_, is_file) return is_file def isdir(self, path): path_ = self.path(path) is_dir = False exists = self.fs.exists(path_) if not exists: is_dir = False else: try: # Info will fail if path is a dir self.fs.info(path_, refresh=True) is_dir = False except FileNotFoundError: is_dir = True self.log.debug("S3contents.S3FS: `%s` is a directory: %s", path_, is_dir) return is_dir def mv(self, old_path, new_path): self.log.debug("S3contents.S3FS: Move file `%s` to `%s`", old_path, new_path) self.cp(old_path, new_path) self.rm(old_path) def cp(self, old_path, new_path): old_path_, new_path_ = self.path(old_path), self.path(new_path) self.log.debug("S3contents.S3FS: Coping `%s` to `%s`", old_path_, new_path_) if self.isdir(old_path): old_dir_path, new_dir_path = old_path, new_path for obj in self.ls(old_dir_path): old_item_path = obj new_item_path = old_item_path.replace(old_dir_path, new_dir_path, 1) self.cp(old_item_path, new_item_path) elif self.isfile(old_path): self.fs.copy(old_path_, new_path_) def rm(self, path): path_ = self.path(path) self.log.debug("S3contents.S3FS: Removing: `%s`", path_) if self.isfile(path): self.log.debug("S3contents.S3FS: Removing file: `%s`", path_) self.fs.rm(path_) elif self.isdir(path): self.log.debug("S3contents.S3FS: Removing directory: `%s`", path_) self.fs.rm(path_ + self.delimiter, recursive=True) # self.fs.rmdir(path_ + self.delimiter, recursive=True) def mkdir(self, path): path_ = self.path(path, self.dir_keep_file) self.log.debug("S3contents.S3FS: Making dir: `%s`", path_) self.fs.touch(path_) def read(self, path): path_ = self.path(path) if not self.isfile(path): raise NoSuchFile(path_) with self.fs.open(path_, mode='rb') as f: content = f.read().decode("utf-8") return content def lstat(self, path): path_ = self.path(path) info = self.fs.info(path_, refresh=True) ret = {} ret["ST_MTIME"] = info["LastModified"] return ret def write(self, path, content): path_ = self.path(self.unprefix(path)) content_ = base64.b64decode(content) self.log.debug("S3contents.S3FS: Writing file: `%s`", path_) with self.fs.open(path_, mode='wb') as f: f.write(content_) def writenotebook(self, path, content): path_ = self.path(self.unprefix(path)) self.log.debug("S3contents.S3FS: Writing notebook: `%s`", path_) with self.fs.open(path_, mode='wb') as f: f.write(content.encode("utf-8")) # Utilities ------------------------------------------------------------------------------------------------------- def get_prefix(self): """Full prefix: bucket + optional prefix""" prefix = self.bucket if self.prefix: prefix += self.delimiter + self.prefix return prefix prefix_ = property(get_prefix) def unprefix(self, path): """Remove the self.prefix_ (if present) from a path or list of paths""" if isinstance(path, six.string_types): path = path[len(self.prefix_):] if path.startswith( self.prefix_) else path path = path[1:] if path.startswith(self.delimiter) else path return path if isinstance(path, (list, tuple)): path = [ p[len(self.prefix_):] if p.startswith(self.prefix_) else p for p in path ] path = [p[1:] if p.startswith(self.delimiter) else p for p in path] return path def path(self, *path): """Utility to join paths including the bucket and prefix""" path = list(filter(None, path)) path = self.unprefix(path) items = [self.prefix_] + path return self.delimiter.join(items)
class S3FS(GenericFS): access_key_id = Unicode( help="S3/AWS access key ID", allow_none=True, default_value=None).tag( config=True, env="JPYNB_S3_ACCESS_KEY_ID") secret_access_key = Unicode( help="S3/AWS secret access key", allow_none=True, default_value=None).tag( config=True, env="JPYNB_S3_SECRET_ACCESS_KEY") endpoint_url = Unicode( "s3.amazonaws.com", help="S3 endpoint URL").tag( config=True, env="JPYNB_S3_ENDPOINT_URL") region_name = Unicode( "us-east-1", help="Region name").tag( config=True, env="JPYNB_S3_REGION_NAME") bucket = Unicode( "notebooks", help="Bucket name to store notebooks").tag( config=True, env="JPYNB_S3_BUCKET") signature_version = Unicode(help="").tag(config=True) sse = Unicode(help="Type of server-side encryption to use").tag(config=True) kms_key_id = Unicode(help="KMS ID to use to encrypt workbooks").tag(config=True) prefix = Unicode("", help="Prefix path inside the specified bucket").tag(config=True) delimiter = Unicode("/", help="Path delimiter").tag(config=True) dir_keep_file = Unicode( ".s3keep", help="Empty file to create when creating directories").tag(config=True) session_token = Unicode( help="S3/AWS session token", allow_none=True, default_value=None ).tag(config=True, env="JPYNB_S3_SESSION_TOKEN") def refresh_fs_connection(self): with open("/secrets/config.env") as fp: for line in fp: # print(line) key, value = line.replace('"', '').replace( 'export ', '', 1).strip().split('=', 1) print(key + " " + value) if key == "AWS_ACCESS_KEY_ID": self.access_key_id = value elif key == "AWS_SECRET_ACCESS_KEY": self.secret_access_key = value elif key == "AWS_SESSION_TOKEN": self.session_token = value elif key == "AWS_S3_BUCKET": self.bucket = value elif key == "AWS_REGION": self.region_name = value elif key == "AWS_S3_KMS_KEY_ARN": self.sse_kms_key_id = value def __init__(self, log, **kwargs): super(S3FS, self).__init__(**kwargs) self.log = log self.refresh_fs_connection() client_kwargs = { "endpoint_url": self.endpoint_url, "region_name": self.region_name, } config_kwargs = {} if self.signature_version: config_kwargs["signature_version"] = self.signature_version s3_additional_kwargs = {} if self.sse: s3_additional_kwargs["ServerSideEncryption"] = self.sse if self.kms_key_id: s3_additional_kwargs["SSEKMSKeyId"]= self.kms_key_id self.fs = s3fs.S3FileSystem(key=self.access_key_id, secret=self.secret_access_key, token=self.session_token, client_kwargs=client_kwargs, config_kwargs=config_kwargs, s3_additional_kwargs=s3_additional_kwargs) self.init() def init(self): try: self.mkdir("") self.ls("") self.isdir("") except ClientError as ex: if "AccessDenied" in str(ex): policy = SAMPLE_ACCESS_POLICY.format(bucket=os.path.join(self.bucket, self.prefix)) self.log.error("AccessDenied error while creating initial S3 objects. Create an IAM policy like:\n{policy}".format(policy=policy)) sys.exit(1) else: raise ex # GenericFS methods ----------------------------------------------------------------------------------------------- def ls(self, path=""): path_ = self.path(path) self.log.debug("S3contents.S3FS Auto Reload ls: Started") self.refresh_fs_connection() self.log.debug("S3contents.S3FS Auto Reload ls: Completed") self.log.debug("S3contents.S3FS: Listing directory: `%s`", path_) files = self.fs.ls(path_, refresh=True) return self.unprefix(files) def isfile(self, path): path_ = self.path(path) is_file = False exists = self.fs.exists(path_) if not exists: is_file = False else: try: # Info will fail if path is a dir self.fs.info(path_) is_file = True except FileNotFoundError: pass self.log.debug("S3contents.S3FS: `%s` is a file: %s", path_, is_file) return is_file def isdir(self, path): path_ = self.path(path) is_dir = False exists = self.fs.exists(path_) if not exists: is_dir = False else: try: # Info will fail if path is a dir self.fs.info(path_) is_dir = False except FileNotFoundError: is_dir = True self.log.debug("S3contents.S3FS: `%s` is a directory: %s", path_, is_dir) return is_dir def mv(self, old_path, new_path): self.log.debug("S3contents.S3FS: Move file `%s` to `%s`", old_path, new_path) self.cp(old_path, new_path) self.rm(old_path) def cp(self, old_path, new_path): old_path_, new_path_ = self.path(old_path), self.path(new_path) self.log.debug("S3contents.S3FS: Coping `%s` to `%s`", old_path_, new_path_) if self.isdir(old_path): old_dir_path, new_dir_path = old_path, new_path for obj in self.ls(old_dir_path): old_item_path = obj new_item_path = old_item_path.replace(old_dir_path, new_dir_path, 1) self.cp(old_item_path, new_item_path) elif self.isfile(old_path): self.fs.copy(old_path_, new_path_) def rm(self, path): path_ = self.path(path) self.log.debug("S3contents.S3FS: Removing: `%s`", path_) if self.isfile(path): self.log.debug("S3contents.S3FS: Removing file: `%s`", path_) self.fs.rm(path_) elif self.isdir(path): self.log.debug("S3contents.S3FS: Removing directory: `%s`", path_) self.fs.rm(path_ + self.delimiter, recursive=True) # self.fs.rmdir(path_ + self.delimiter, recursive=True) def mkdir(self, path): path_ = self.path(path, self.dir_keep_file) self.log.debug("S3contents.S3FS: Making dir: `%s`", path_) self.fs.touch(path_, acl='private') def read(self, path): path_ = self.path(path) if not self.isfile(path): raise NoSuchFile(path_) with self.fs.open(path_, mode='rb', acl='private') as f: content = f.read().decode("utf-8") return content def lstat(self, path): path_ = self.path(path) info = self.fs.info(path_) ret = {} ret["ST_MTIME"] = info["LastModified"] return ret def write(self, path, content, format): path_ = self.path(self.unprefix(path)) self.log.debug("S3contents.S3FS: Writing file: `%s`", path_) if format not in {'text', 'base64'}: raise HTTPError( 400, "Must specify format of file contents as 'text' or 'base64'", ) try: if format == 'text': content_ = content.encode('utf8') else: b64_bytes = content.encode('ascii') content_ = base64.b64decode(b64_bytes) except Exception as e: raise HTTPError( 400, u'Encoding error saving %s: %s' % (path_, e) ) with self.fs.open(path_, mode='wb', acl='private') as f: f.write(content_) def writenotebook(self, path, content): path_ = self.path(self.unprefix(path)) self.log.debug("S3contents.S3FS: Writing notebook: `%s`", path_) with self.fs.open(path_, mode='wb', acl='private') as f: f.write(content.encode("utf-8")) # Utilities ------------------------------------------------------------------------------------------------------- def get_prefix(self): """Full prefix: bucket + optional prefix""" prefix = self.bucket if self.prefix: prefix += self.delimiter + self.prefix return prefix prefix_ = property(get_prefix) def unprefix(self, path): """Remove the self.prefix_ (if present) from a path or list of paths""" if isinstance(path, six.string_types): path = path[len(self.prefix_):] if path.startswith(self.prefix_) else path path = path[1:] if path.startswith(self.delimiter) else path return path if isinstance(path, (list, tuple)): path = [p[len(self.prefix_):] if p.startswith(self.prefix_) else p for p in path] path = [p[1:] if p.startswith(self.delimiter) else p for p in path] return path def path(self, *path): """Utility to join paths including the bucket and prefix""" path = list(filter(None, path)) path = self.unprefix(path) items = [self.prefix_] + path return self.delimiter.join(items)
class S3ContentsManager(GenericContentsManager): access_key_id = Unicode(help="S3/AWS access key ID", allow_none=True, default_value=None).tag( config=True, env="JPYNB_S3_ACCESS_KEY_ID") secret_access_key = Unicode(help="S3/AWS secret access key", allow_none=True, default_value=None).tag( config=True, env="JPYNB_S3_SECRET_ACCESS_KEY") endpoint_url = Unicode("https://s3.amazonaws.com", help="S3 endpoint URL").tag( config=True, env="JPYNB_S3_ENDPOINT_URL") region_name = Unicode("us-east-1", help="Region name").tag(config=True, env="JPYNB_S3_REGION_NAME") bucket = Unicode("notebooks", help="Bucket name to store notebooks").tag( config=True, env="JPYNB_S3_BUCKET") prefix = Unicode( "", help="Prefix path inside the specified bucket").tag(config=True) signature_version = Unicode(help="").tag(config=True) delimiter = Unicode("/", help="Path delimiter").tag(config=True) sse = Unicode(help="Type of server-side encryption to use").tag( config=True) kms_key_id = Unicode(help="KMS ID to use to encrypt workbooks").tag( config=True) session_token = Unicode(help="S3/AWS session token", allow_none=True, default_value=None).tag( config=True, env="JPYNB_S3_SESSION_TOKEN") boto3_session = Any( help= "Place to store custom boto3 session (passed to S3_FS) - could be set by init_s3_hook" ) init_s3_hook = Any(help="optional hook for init'ing s3").tag(config=True) def __init__(self, *args, **kwargs): super(S3ContentsManager, self).__init__(*args, **kwargs) self.run_init_s3_hook() self._fs = S3FS(log=self.log, access_key_id=self.access_key_id, secret_access_key=self.secret_access_key, endpoint_url=self.endpoint_url, region_name=self.region_name, bucket=self.bucket, prefix=self.prefix, session_token=self.session_token, signature_version=self.signature_version, delimiter=self.delimiter, sse=self.sse, kms_key_id=self.kms_key_id, boto3_session=self.boto3_session) def run_init_s3_hook(self): if self.init_s3_hook is not None: self.init_s3_hook(self) def _save_notebook(self, model, path): def save_model(): nb_contents = from_dict(model['content']) self.check_and_sign(nb_contents, path) file_contents = json.dumps(model["content"]) self._fs.writenotebook(path, file_contents) self.validate_notebook_model(model) return model.get("message") m = model['content']['metadata'] has_versioning = ('s3_requested_version' in m) and ('s3_current_version' in m) and ('s3_latest_version' in m) if not has_versioning: return save_model() version_changed = m['s3_requested_version'] != m['s3_current_version'] version_latest = m['s3_current_version'] == m['s3_latest_version'] version_requested = m['s3_latest_version'] != None version_changed_not_latest = version_changed and ( not version_latest) and version_requested version_changed_latest = version_changed and version_latest and version_requested content_changed_not_latest = ( (not version_changed) or (not version_requested)) and (not version_latest) content_changed_latest = ((not version_changed) or (not version_requested)) and version_latest if version_changed_not_latest: self._fs.requested_version_id_lookup[path] = m[ 's3_requested_version'] if content_changed_not_latest: raise Exception('Cannot overwrite older versions') if content_changed_latest or version_changed_latest: if 's3_create_release' in m: if 's3_latest_release_tag' in m: m['s3_latest_release_tag'] = int( m['s3_latest_release_tag']) + 1 else: m['s3_latest_release_tag'] = 0 self._fs.create_release_tag(path, m['s3_latest_release_tag'], m['s3_create_release']) del m['s3_create_release'] if version_changed_latest: self._fs.requested_version_id_lookup[path] = m[ 's3_requested_version'] else: self._fs.requested_version_id_lookup[path] = None return save_model()
class GFFS(GenericFS): project = Unicode(help="GFile Project", allow_none=True, default_value=None).tag(config=True, env="JPYNB_GCS_PROJECT") region_name = Unicode("us-east-1", help="Region name").tag(config=True, env="JPYNB_GCS_REGION_NAME") prefix = Unicode( "", help="Prefix path inside the specified bucket").tag(config=True) separator = Unicode("/", help="Path separator").tag(config=True) dir_keep_file = Unicode( "", help="Empty file to create when creating directories").tag(config=True) def __init__(self, log, **kwargs): super(GFFS, self).__init__(**kwargs) self.log = log self.fs = tf.io.gfile self.dstat = OrderedDictCache() self.init() def init(self): self.mkdir("") self.ls("") assert self.isdir(""), "The root directory should exists :)" # GenericFS methods ----------------------------------------------------------------------------------------------- def ls(self, path, contain_hidden=False): path_ = self.path(path) self.log.debug("S3contents.GFFS: Listing directory: `%s`", path_) files = [ path + self.separator + f for f in self.fs.listdir(path_) if contain_hidden or not is_file_hidden(f) ] return self.unprefix(files) def isfile(self, path): st = self.lstat(path) return st['type'] == 'file' def isdir(self, path): st = self.lstat(path) return st['type'] == 'directory' def mv(self, old_path, new_path): self.log.debug("S3contents.GFFS: Move file `%s` to `%s`", old_path, new_path) self.cp(old_path, new_path) self.rm(old_path) def cp(self, old_path, new_path): old_path_, new_path_ = self.path(old_path), self.path(new_path) self.log.debug("S3contents.GFFS: Coping `%s` to `%s`", old_path_, new_path_) if self.isdir(old_path): old_dir_path, new_dir_path = old_path, new_path subdirs = self.ls(old_dir_path, True) if subdirs: for obj in subdirs: old_item_path = obj new_item_path = old_item_path.replace( old_dir_path, new_dir_path, 1) self.cp(old_item_path, new_item_path) else: self.fs.mkdir(new_path_) # empty dir elif self.isfile(old_path): self.fs.copy(old_path_, new_path_) def rm(self, path): path_ = self.path(path) self.log.debug("S3contents.GFFS: Removing: `%s`", path_) if self.isfile(path): self.log.debug("S3contents.GFFS: Removing file: `%s`", path_) self.fs.remove(path_) elif self.isdir(path): self.log.debug("S3contents.GFFS: Removing directory: `%s`", path_) self.fs.rmtree(path_) def mkdir(self, path): path_ = self.path(path) #, self.dir_keep_file) self.log.debug("S3contents.GFFS: Making dir (touch): `%s`", path_) self.fs.makedirs(path_) def read(self, path, format=None): path_ = self.path(path) if not self.isfile(path): raise NoSuchFile(path_) with self.fs.GFile(path_, mode='rb') as f: if f.size() > LARGEFSIZE: def downchunk(): while True: buf = f.read(n=1048576) if not buf: break yield buf return downchunk(), 'base64' bcontent = f.read() if format is None or format == 'text': # Try to interpret as unicode if format is unknown or if unicode # was explicitly requested. try: self.log.debug("S3contents.GFFS: read: `%s`", path_) return bcontent.decode('utf8'), 'text' except UnicodeError: if format == 'text': raise HTTPError( 400, "%s is not UTF-8 encoded" % os_path, reason='bad format', ) return encodebytes(bcontent).decode('ascii'), 'base64' def lstat(self, path): calltime = time.time() if path in self.dstat: st = self.dstat[path] if calltime - st["calltime"] < 5: return st path_ = self.path(path) self.log.debug("S3contents.GFFS: lstat file: `%s` `%s`", path, path_) try: info = self.fs.stat(path_) self.dstat[path] = { "calltime": calltime, "ST_MTIME": info.mtime_nsec // 1000000, "size": info.length, "type": "directory" if info.is_directory else "file" } except tf.errors.NotFoundError: self.dstat[path] = { "calltime": calltime, "ST_MTIME": 0, "type": None } return self.dstat[path] def write(self, path, content, format=None, mode='wb'): path_ = self.path(self.unprefix(path)) self.log.debug("S3contents.GFFS: Writing file: `%s`", path_) with self.fs.GFile(path_, mode=mode) as f: if format == 'base64': b64_bytes = content.encode('ascii') f.write(decodebytes(b64_bytes)) else: f.write(content.encode("utf-8")) # Utilities ------------------------------------------------------------------------------------------------------- def strip(self, path): if isinstance(path, six.string_types): return path.strip(self.separator) if isinstance(path, (list, tuple)): return list(map(self.strip, path)) def join(self, *paths): paths = self.strip(paths) return self.separator.join(paths) def get_prefix(self): return self.prefix prefix_ = property(get_prefix) def unprefix(self, path): """Remove the self.prefix_ (if present) from a path or list of paths""" path = self.strip(path) if isinstance(path, six.string_types): path = path[len(self.prefix_):] if path.startswith( self.prefix_) else path path = path[1:] if path.startswith(self.separator) else path return path if isinstance(path, (list, tuple)): path = [ p[len(self.prefix_):] if p.startswith(self.prefix_) else p for p in path ] path = [p[1:] if p.startswith(self.separator) else p for p in path] return path def path(self, *path): """Utility to join paths including the bucket and prefix""" path = list(filter(None, path)) path = self.unprefix(path) items = [self.prefix_] + path return self.join(*items)
class S3FS(GenericFS): access_key_id = Unicode(help="S3/AWS access key ID", allow_none=True, default_value=None).tag( config=True, env="JPYNB_S3_ACCESS_KEY_ID") secret_access_key = Unicode(help="S3/AWS secret access key", allow_none=True, default_value=None).tag( config=True, env="JPYNB_S3_SECRET_ACCESS_KEY") endpoint_url = Unicode("s3-us-gov-east-1.amazonaws.com", help="S3 endpoint URL").tag( config=True, env="JPYNB_S3_ENDPOINT_URL") region_name = Unicode("us-gov-east-1", help="Region name").tag(config=True, env="JPYNB_S3_REGION_NAME") bucket = Unicode("notebooks", help="Bucket name to store notebooks").tag( config=True, env="JPYNB_S3_BUCKET") signature_version = Unicode(help="").tag(config=True) sse = Unicode(help="Type of server-side encryption to use").tag( config=True) kms_key_id = Unicode(help="KMS ID to use to encrypt workbooks").tag( config=True) prefix = Unicode( "", help="Prefix path inside the specified bucket").tag(config=True) delimiter = Unicode("/", help="Path delimiter").tag(config=True) dir_keep_file = Unicode( ".s3keep", help="Empty file to create when creating directories").tag(config=True) session_token = Unicode(help="S3/AWS session token", allow_none=True, default_value=None).tag( config=True, env="JPYNB_S3_SESSION_TOKEN") boto3_session = Any( help="Place to store customer boto3 session instance - likely passed in" ) s3fs_additional_kwargs = Any( help="optional dictionary to be appended to s3fs additional kwargs" ).tag(config=True) def __init__(self, log, **kwargs): super(S3FS, self).__init__(**kwargs) self.log = log client_kwargs = { "endpoint_url": self.endpoint_url, "region_name": self.region_name, } config_kwargs = {} if self.signature_version: config_kwargs["signature_version"] = self.signature_version if self.s3fs_additional_kwargs: self.must_be_dictionary(self.s3fs_additional_kwargs) s3_additional_kwargs = self.s3fs_additional_kwargs else: s3_additional_kwargs = {} if self.sse: s3_additional_kwargs["ServerSideEncryption"] = self.sse if self.kms_key_id: s3_additional_kwargs["SSEKMSKeyId"] = self.kms_key_id self.fs = s3fs.S3FileSystem( key=self.access_key_id, secret=self.secret_access_key, token=self.session_token, client_kwargs=client_kwargs, config_kwargs=config_kwargs, s3_additional_kwargs=s3_additional_kwargs, session=self.boto3_session, ) self.init() def init(self): try: self.mkdir("") self.ls("") self.isdir("") except ClientError as ex: if "AccessDenied" in str(ex): policy = SAMPLE_ACCESS_POLICY.format( bucket=os.path.join(self.bucket, self.prefix)) self.log.error( "AccessDenied error while creating initial S3 objects. Create an IAM policy like:\n{policy}" .format(policy=policy)) sys.exit(1) else: raise ex # GenericFS methods ----------------------------------------------------------------------------------------------- def ls(self, path=""): path_ = self.path(path) self.log.debug("S3contents.S3FS.ls: Listing directory: `%s`", path_) files = self.fs.ls(path_, refresh=True) return self.unprefix(files) def isfile(self, path): path_ = self.path(path) # FileNotFoundError handled by s3fs is_file = self.fs.isfile(path_) self.log.debug("S3contents.S3FS: `%s` is a file: %s", path_, is_file) return is_file def isdir(self, path): path_ = self.path(path) # FileNotFoundError handled by s3fs is_dir = self.fs.isdir(path_) self.log.debug("S3contents.S3FS: `%s` is a directory: %s", path_, is_dir) return is_dir def mv(self, old_path, new_path): self.log.debug("S3contents.S3FS: Move file `%s` to `%s`", old_path, new_path) self.cp(old_path, new_path) self.rm(old_path) def cp(self, old_path, new_path): old_path_, new_path_ = self.path(old_path), self.path(new_path) self.log.debug("S3contents.S3FS: Coping `%s` to `%s`", old_path_, new_path_) if self.isdir(old_path): old_dir_path, new_dir_path = old_path, new_path for obj in self.ls(old_dir_path): old_item_path = obj new_item_path = old_item_path.replace(old_dir_path, new_dir_path, 1) self.cp(old_item_path, new_item_path) self.mkdir(new_path) # Touch with dir_keep_file elif self.isfile(old_path): self.fs.copy(old_path_, new_path_) def rm(self, path): path_ = self.path(path) self.log.debug("S3contents.S3FS: Removing: `%s`", path_) if self.isfile(path): self.log.debug("S3contents.S3FS: Removing file: `%s`", path_) self.fs.rm(path_) elif self.isdir(path): self.log.debug("S3contents.S3FS: Removing directory: `%s`", path_) self.fs.rm(path_ + self.delimiter, recursive=True) # self.fs.rmdir(path_ + self.delimiter, recursive=True) def mkdir(self, path): path_ = self.path(path, self.dir_keep_file) self.log.debug("S3contents.S3FS: Making dir: `%s`", path_) self.fs.touch(path_) def read(self, path, format): path_ = self.path(path) if not self.isfile(path): raise NoSuchFile(path_) with self.fs.open(path_, mode="rb") as f: content = f.read() if format is None or format == "text": # Try to interpret as unicode if format is unknown or if unicode # was explicitly requested. try: return content.decode("utf-8"), "text" except UnicodeError: if format == "text": err = "{} is not UTF-8 encoded".format(path_) self.log.error(err) raise HTTPError(400, err, reason="bad format") return base64.b64encode(content).decode("ascii"), "base64" def lstat(self, path): path_ = self.path(path) if self.fs.isdir(path_): # Try to get status of the dir_keep_file path_ = self.path(path, self.dir_keep_file) try: self.fs.invalidate_cache(path_) info = self.fs.info(path_) except FileNotFoundError: return {"ST_MTIME": None} ret = {} ret["ST_MTIME"] = info["LastModified"] return ret def write(self, path, content, format): path_ = self.path(self.unprefix(path)) self.log.debug("S3contents.S3FS: Writing file: `%s`", path_) if format not in {"text", "base64"}: raise HTTPError( 400, "Must specify format of file contents as 'text' or 'base64'", ) try: if format == "text": content_ = content.encode("utf8") else: b64_bytes = content.encode("ascii") content_ = base64.b64decode(b64_bytes) except Exception as e: raise HTTPError(400, "Encoding error saving %s: %s" % (path_, e)) with self.fs.open(path_, mode="wb") as f: f.write(content_) def writenotebook(self, path, content): path_ = self.path(self.unprefix(path)) self.log.debug("S3contents.S3FS: Writing notebook: `%s`", path_) with self.fs.open(path_, mode="wb") as f: f.write(content.encode("utf-8")) # Utilities ------------------------------------------------------------------------------------------------------- def get_prefix(self): """Full prefix: bucket + optional prefix""" prefix = self.bucket if prefix.startswith("s3://"): prefix = prefix[5:] if self.prefix: prefix += self.delimiter + self.prefix return prefix prefix_ = property(get_prefix) def unprefix(self, path): """Remove the self.prefix_ (if present) from a path or list of paths""" self.log.debug( f"S3FS.unprefix: self.prefix_: {self.prefix_} path: {path}") if isinstance(path, str): path = path[len(self.prefix_):] if path.startswith( self.prefix_) else path path = path[1:] if path.startswith(self.delimiter) else path return path if isinstance(path, (list, tuple)): path = [ p[len(self.prefix_):] if p.startswith(self.prefix_) else p for p in path ] path = [p[1:] if p.startswith(self.delimiter) else p for p in path] return path def path(self, *path): """Utility to join paths including the bucket and prefix""" path = list(filter(None, path)) path = self.unprefix(path) items = [self.prefix_] + path return self.delimiter.join(items) @staticmethod def must_be_dictionary(dictionary): if type(dictionary) is dict: pass else: raise ValueError( 's3fs_additional_kwargs must be a dictionary or None, its default value.' )
class GCSFS(GenericFS): project = Unicode(help="GCP Project", allow_none=True, default_value=None).tag(config=True, env="JPYNB_GCS_PROJECT") token = Unicode(help="Path to the GCP token", allow_none=True, default_value=None).tag(config=True, env="JPYNB_GCS_TOKEN_PATH") region_name = Unicode("us-east-1", help="Region name").tag(config=True, env="JPYNB_GCS_REGION_NAME") bucket = Unicode("notebooks", help="Bucket name to store notebooks").tag( config=True, env="JPYNB_GCS_BUCKET") prefix = Unicode( "", help="Prefix path inside the specified bucket").tag(config=True) separator = Unicode("/", help="Path separator").tag(config=True) dir_keep_file = Unicode( ".gcskeep", help="Empty file to create when creating directories").tag(config=True) def __init__(self, log, **kwargs): super(GCSFS, self).__init__(**kwargs) self.log = log token = os.path.expanduser(self.token) self.fs = gcsfs.GCSFileSystem(project=self.project, token=token) self.init() def init(self): self.mkdir("") self.ls("") assert self.isdir(""), "The root directory should exists :)" # GenericFS methods ----------------------------------------------------------------------------------------------- def ls(self, path): path_ = self.path(path) self.log.debug("S3contents.GCSFS: Listing directory: `%s`", path_) files = self.fs.ls(path_) return self.unprefix(files) def isfile(self, path): path_ = self.path(path) is_file = False exists = self.fs.exists(path_) if not exists: is_file = False else: try: # Info will fail if path is a dir self.fs.info(path_) is_file = True except FileNotFoundError: pass self.log.debug("S3contents.GCSFS: `%s` is a file: %s", path_, is_file) return is_file def isdir(self, path): # GCSFS doesnt return exists=True for a directory with no files so # we need to check if the dir_keep_file exists is_dir = self.isfile(path + self.separator + self.dir_keep_file) path_ = self.path(path) self.log.debug("S3contents.GCSFS: `%s` is a directory: %s", path_, is_dir) return is_dir def mv(self, old_path, new_path): self.log.debug("S3contents.GCSFS: Move file `%s` to `%s`", old_path, new_path) self.cp(old_path, new_path) self.rm(old_path) def cp(self, old_path, new_path): old_path_, new_path_ = self.path(old_path), self.path(new_path) self.log.debug("S3contents.GCSFS: Coping `%s` to `%s`", old_path_, new_path_) if self.isdir(old_path): old_dir_path, new_dir_path = old_path, new_path for obj in self.ls(old_dir_path): old_item_path = obj new_item_path = old_item_path.replace(old_dir_path, new_dir_path, 1) self.cp(old_item_path, new_item_path) elif self.isfile(old_path): self.fs.copy(old_path_, new_path_) def rm(self, path): path_ = self.path(path) self.log.debug("S3contents.GCSFS: Removing: `%s`", path_) if self.isfile(path): self.log.debug("S3contents.GCSFS: Removing file: `%s`", path_) self.fs.rm(path_) elif self.isdir(path): self.log.debug("S3contents.GCSFS: Removing directory: `%s`", path_) files = self.fs.walk(path_) for f in files: self.fs.rm(f) def mkdir(self, path): path_ = self.path(path, self.dir_keep_file) self.log.debug("S3contents.GCSFS: Making dir (touch): `%s`", path_) self.fs.touch(path_) def read(self, path): path_ = self.path(path) if not self.isfile(path): raise NoSuchFile(path_) with self.fs.open(path_, mode='rb') as f: content = f.read().decode("utf-8") return content def lstat(self, path): path_ = self.path(path) info = self.fs.info(path_) ret = {} ret["ST_MTIME"] = info["updated"] return ret def write(self, path, content, format): path_ = self.path(self.unprefix(path)) self.log.debug("S3contents.GCSFS: Writing file: `%s`", path_) with self.fs.open(path_, mode='wb') as f: f.write(content.encode("utf-8")) # Utilities ------------------------------------------------------------------------------------------------------- def strip(self, path): if isinstance(path, six.string_types): return path.strip(self.separator) if isinstance(path, (list, tuple)): return list(map(self.strip, path)) def join(self, *paths): paths = self.strip(paths) return self.separator.join(paths) def get_prefix(self): """Full prefix: bucket + optional prefix""" prefix = self.bucket if self.prefix: prefix += self.separator + self.prefix return prefix prefix_ = property(get_prefix) def unprefix(self, path): """Remove the self.prefix_ (if present) from a path or list of paths""" path = self.strip(path) if isinstance(path, six.string_types): path = path[len(self.prefix_):] if path.startswith( self.prefix_) else path path = path[1:] if path.startswith(self.separator) else path return path if isinstance(path, (list, tuple)): path = [ p[len(self.prefix_):] if p.startswith(self.prefix_) else p for p in path ] path = [p[1:] if p.startswith(self.separator) else p for p in path] return path def path(self, *path): """Utility to join paths including the bucket and prefix""" path = list(filter(None, path)) path = self.unprefix(path) items = [self.prefix_] + path return self.join(*items)
class GenericContentsManager(ContentsManager, HasTraits): # This makes the checkpoints get saved on this directory root_dir = Unicode("./", config=True) post_save_hook = Any( None, config=True, allow_none=True, help="""Python callable or importstring thereof to be called on the path of a file just saved. This can be used to process the file on disk, such as converting the notebook to a script or HTML via nbconvert. It will be called as (all arguments passed by keyword):: hook(s3_path=s3_path, model=model, contents_manager=instance) - s3_path: the S3 path to the file just written (sans bucket/prefix) - model: the model representing the file - contents_manager: this ContentsManager instance """, ) def __init__(self, *args, **kwargs): super(GenericContentsManager, self).__init__(*args, **kwargs) self._fs = None def get_fs(self): return self._fs fs = property(get_fs) def _checkpoints_class_default(self): return GenericFileCheckpoints def do_error(self, msg, code=500): raise HTTPError(code, msg) def no_such_entity(self, path): self.do_error("No such entity: [{path}]".format(path=path), 404) def already_exists(self, path): thing = "File" if self.file_exists(path) else "Directory" self.do_error( "{thing} already exists: [{path}]".format(thing=thing, path=path), 409) def guess_type(self, path, allow_directory=True): """ Guess the type of a file. If allow_directory is False, don't consider the possibility that the file is a directory. Parameters ---------- obj: s3.Object or string """ if path.endswith(".ipynb"): return "notebook" elif allow_directory and self.dir_exists(path): return "directory" else: return "file" def file_exists(self, path): # Does a file exist at the given path? self.log.debug("S3contents.GenericManager.file_exists: ('%s')", path) return self.fs.isfile(path) def dir_exists(self, path): # Does a directory exist at the given path? self.log.debug("S3contents.GenericManager.dir_exists: path('%s')", path) return self.fs.isdir(path) def get(self, path, content=True, type=None, format=None): # Get a file or directory model. self.log.debug( "S3contents.GenericManager.get] path('%s') type(%s) format(%s)", path, type, format, ) path = path.strip("/") if type is None: type = self.guess_type(path) try: func = { "directory": self._get_directory, "notebook": self._get_notebook, "file": self._get_file, }[type] except KeyError: raise ValueError("Unknown type passed: '{}'".format(type)) return func(path=path, content=content, format=format) def _get_directory(self, path, content=True, format=None): self.log.debug( "S3contents.GenericManager._get_directory: path('%s') content(%s) format(%s)", path, content, format, ) return self._directory_model_from_path(path, content=content) def _get_notebook(self, path, content=True, format=None): self.log.debug( "S3contents.GenericManager._get_notebook: path('%s') type(%s) format(%s)", path, content, format, ) return self._notebook_model_from_path(path, content=content, format=format) def _get_file(self, path, content=True, format=None): self.log.debug( "S3contents.GenericManager._get_file: path('%s') type(%s) format(%s)", path, content, format, ) return self._file_model_from_path(path, content=content, format=format) def _directory_model_from_path(self, path, content=False): def s3_detail_to_model(s3_detail): model_path = s3_detail["Key"] model = base_model(self.fs.unprefix(model_path)) if s3_detail["StorageClass"] == 'DIRECTORY': model["created"] = model["last_modified"] = DUMMY_CREATED_DATE model["type"] = "directory" lstat = self.fs.lstat(model_path) if "ST_MTIME" in lstat and lstat["ST_MTIME"]: model["last_modified"] = model["created"] = lstat[ "ST_MTIME"] else: model["last_modified"] = s3_detail.get("LastModified").replace( microsecond=0, tzinfo=tzutc()) model["created"] = model["last_modified"] # model["size"] = s3_detail.get("Size") model["type"] = "notebook" if model_path.endswith( ".ipynb") else "file" return model self.log.debug( "S3contents.GenericManager._directory_model_from_path: path('%s') type(%s)", path, content, ) model = base_directory_model(path) if self.fs.isdir(path): lstat = self.fs.lstat(path) if "ST_MTIME" in lstat and lstat["ST_MTIME"]: model["last_modified"] = model["created"] = lstat["ST_MTIME"] if content: if not self.dir_exists(path): self.no_such_entity(path) model["format"] = "json" prefixed_path = self.fs.path(path) files_s3_detail = sync(self.fs.fs.loop, self.fs.fs._lsdir, prefixed_path) filtered_files_s3_detail = list( filter( lambda detail: os.path.basename(detail['Key']) != self.fs. dir_keep_file, files_s3_detail)) model["content"] = list( map(s3_detail_to_model, filtered_files_s3_detail)) return model def _notebook_model_from_path(self, path, content=False, format=None): """ Build a notebook model from database record. """ model = base_model(path) model["type"] = "notebook" if self.fs.isfile(path): model["last_modified"] = model["created"] = self.fs.lstat( path)["ST_MTIME"] else: model["last_modified"] = model["created"] = DUMMY_CREATED_DATE if content: if not self.fs.isfile(path): self.no_such_entity(path) file_content, _ = self.fs.read(path, format) nb_content = reads(file_content, as_version=NBFORMAT_VERSION) self.mark_trusted_cells(nb_content, path) model["format"] = "json" model["content"] = nb_content self.validate_notebook_model(model) return model def _file_model_from_path(self, path, content=False, format=None): """ Build a file model from database record. """ model = base_model(path) model["type"] = "file" if self.fs.isfile(path): model["last_modified"] = model["created"] = self.fs.lstat( path)["ST_MTIME"] else: model["last_modified"] = model["created"] = DUMMY_CREATED_DATE if content: try: # Get updated format from fs.read() content, format_ = self.fs.read(path, format) except NoSuchFile as e: self.no_such_entity(e.path) except GenericFSError as e: self.do_error(str(e), 500) model["format"] = format_ model["content"] = content model["mimetype"] = mimetypes.guess_type(path)[0] or "text/plain" return model def save(self, model, path): """Save a file or directory model to path. """ # Chunked uploads # See https://jupyter-notebook.readthedocs.io/en/stable/extending/contents.html#chunked-saving chunk = model.get("chunk", None) if chunk is not None: return self._save_large_file(chunk, model, path, model.get("format")) self.log.debug("S3contents.GenericManager.save %s: '%s'", model, path) if "type" not in model: self.do_error("No model type provided", 400) if "content" not in model and model["type"] != "directory": self.do_error("No file content provided", 400) if model["type"] not in ("file", "directory", "notebook"): self.do_error("Unhandled contents type: %s" % model["type"], 400) self.run_pre_save_hook(model=model, path=path) try: if model["type"] == "notebook": validation_message = self._save_notebook(model, path) elif model["type"] == "file": validation_message = self._save_file(model, path) else: validation_message = self._save_directory(path) except Exception as e: self.log.error("Error while saving file: %s %s", path, e, exc_info=True) self.do_error( "Unexpected error while saving file: %s %s" % (path, e), 500) model = self.get(path, type=model["type"], content=False) self.run_post_save_hook(model=model, s3_path=model["path"]) if validation_message is not None: model["message"] = validation_message return model def _save_large_file(self, chunk, model, path, format): if "type" not in model: self.do_error("No file type provided", 400) if model["type"] != "file": self.do_error( 'File type "{}" is not supported for large file transfer'. format(model["type"]), 400, ) if "content" not in model and model["type"] != "directory": self.do_error("No file content provided", 400) if format not in {"text", "base64"}: self.do_error( "Must specify format of file contents as 'text' or 'base64'", 400) prune_stale_chunks() self.log.debug("S3contents.GenericManager.save (chunk %s) %s: '%s'", chunk, model, path) try: if chunk == 1: self.run_pre_save_hook(model=model, path=path) # Store the chunk in our registry store_content_chunk(path, model["content"]) except Exception as e: self.log.error( "S3contents.GenericManager._save_large_file: error while saving file: %s %s", path, e, exc_info=True, ) self.do_error(f"Unexpected error while saving file: {path} {e}") if chunk == -1: # Last chunk: we want to combine the chunks in the registry to compose the full file content model["content"] = assemble_chunks(path) delete_chunks(path) self._save_file(model, path) return self.get(path, content=False) def _save_notebook(self, model, path): nb_contents = from_dict(model["content"]) self.check_and_sign(nb_contents, path) file_contents = json.dumps(model["content"]) self.fs.write(path, file_contents) self.validate_notebook_model(model) return model.get("message") def _save_file(self, model, path): file_contents = model["content"] file_format = model.get("format") self.fs.write(path, file_contents, file_format) def _save_directory(self, path): self.fs.mkdir(path) def rename_file(self, old_path, new_path): """Rename a file or directory. NOTE: This method is unfortunately named on the base class. It actually moves a file or a directory. """ self.log.debug( "S3contents.GenericManager.rename_file: Init rename of '%s' to '%s'", old_path, new_path, ) if self.file_exists(new_path) or self.dir_exists(new_path): self.already_exists(new_path) elif self.file_exists(old_path) or self.dir_exists(old_path): self.log.debug( "S3contents.GenericManager: Actually renaming '%s' to '%s'", old_path, new_path, ) self.fs.mv(old_path, new_path) else: self.no_such_entity(old_path) def delete_file(self, path): """Delete the file or directory at path. """ self.log.debug("S3contents.GenericManager.delete_file '%s'", path) if self.file_exists(path) or self.dir_exists(path): self.fs.rm(path) else: self.no_such_entity(path) def is_hidden(self, path): """Is path a hidden directory or file? """ self.log.debug("S3contents.GenericManager.is_hidden '%s'", path) return False @validate("post_save_hook") def _validate_post_save_hook(self, proposal): value = proposal["value"] if isinstance(value, string_types): value = import_item(value) if not callable(value): raise TraitError("post_save_hook must be callable") return value def run_post_save_hook(self, model, s3_path): """Run the post-save hook if defined, and log errors""" if self.post_save_hook: try: self.log.debug("Running post-save hook on %s", s3_path) self.post_save_hook(s3_path=s3_path, model=model, contents_manager=self) except Exception as e: self.log.error("Post-save hook failed o-n %s", s3_path, exc_info=True) raise HTTPError( 500, "Unexpected error while running post hook save: %s" % e) from e