Example #1
0
class S3ContentsManager(GenericContentsManager):

    access_key_id = Unicode(help="S3/AWS access key ID",
                            allow_none=True,
                            default_value=None).tag(
                                config=True, env="JPYNB_S3_ACCESS_KEY_ID")
    secret_access_key = Unicode(help="S3/AWS secret access key",
                                allow_none=True,
                                default_value=None).tag(
                                    config=True,
                                    env="JPYNB_S3_SECRET_ACCESS_KEY")

    endpoint_url = Unicode("https://s3.amazonaws.com",
                           help="S3 endpoint URL").tag(
                               config=True, env="JPYNB_S3_ENDPOINT_URL")
    region_name = Unicode("us-east-1",
                          help="Region name").tag(config=True,
                                                  env="JPYNB_S3_REGION_NAME")
    bucket = Unicode("notebooks", help="Bucket name to store notebooks").tag(
        config=True, env="JPYNB_S3_BUCKET")
    prefix = Unicode(
        "", help="Prefix path inside the specified bucket").tag(config=True)
    signature_version = Unicode(help="").tag(config=True)
    delimiter = Unicode("/", help="Path delimiter").tag(config=True)
    sse = Unicode(help="Type of server-side encryption to use").tag(
        config=True)

    session_token = Unicode(help="S3/AWS session token",
                            allow_none=True,
                            default_value=None).tag(
                                config=True, env="JPYNB_S3_SESSION_TOKEN")

    def __init__(self, *args, **kwargs):
        super(S3ContentsManager, self).__init__(*args, **kwargs)

        self._fs = S3FS(log=self.log,
                        access_key_id=self.access_key_id,
                        secret_access_key=self.secret_access_key,
                        endpoint_url=self.endpoint_url,
                        region_name=self.region_name,
                        bucket=self.bucket,
                        prefix=self.prefix,
                        session_token=self.session_token,
                        signature_version=self.signature_version,
                        delimiter=self.delimiter,
                        sse=self.sse)

    def _save_notebook(self, model, path):
        nb_contents = from_dict(model['content'])
        self.check_and_sign(nb_contents, path)
        file_contents = json.dumps(model["content"])
        self._fs.writenotebook(path, file_contents)
        self.validate_notebook_model(model)
        return model.get("message")
Example #2
0
class S3ContentsManager(GenericContentsManager):

    access_key_id = Unicode(
        help="S3/AWS access key ID", allow_none=True, default_value=None).tag(
            config=True, env="JPYNB_S3_ACCESS_KEY_ID")
    secret_access_key = Unicode(
        help="S3/AWS secret access key", allow_none=True, default_value=None).tag(
            config=True, env="JPYNB_S3_SECRET_ACCESS_KEY")

    endpoint_url = Unicode(
        "https://s3.amazonaws.com", help="S3 endpoint URL").tag(
            config=True, env="JPYNB_S3_ENDPOINT_URL")
    region_name = Unicode(
        "us-east-1", help="Region name").tag(
            config=True, env="JPYNB_S3_REGION_NAME")
    bucket = Unicode(
        "notebooks", help="Bucket name to store notebooks").tag(
            config=True, env="JPYNB_S3_BUCKET")
    prefix = Unicode("", help="Prefix path inside the specified bucket").tag(config=True)
    signature_version = Unicode(help="").tag(config=True)
    delimiter = Unicode("/", help="Path delimiter").tag(config=True)

    def __init__(self, *args, **kwargs):
        super(S3ContentsManager, self).__init__(*args, **kwargs)

        self._fs = S3FS(
            log=self.log,
            access_key_id=self.access_key_id,
            secret_access_key=self.secret_access_key,
            endpoint_url=self.endpoint_url,
            region_name=self.region_name,
            bucket=self.bucket,
            prefix=self.prefix,
            signature_version=self.signature_version,
            delimiter=self.delimiter)
Example #3
0
class GFContentsManager(GenericContentsManager):
    project = Unicode(help="GCP Project", allow_none=True,
                      default_value=None).tag(config=True,
                                              env="JPYNB_GCS_PROJECT")
    prefix = Unicode(
        "", help="Prefix path inside the specified bucket").tag(config=True)
    separator = Unicode("/", help="Path separator").tag(config=True)

    def __init__(self, *args, **kwargs):
        super(GFContentsManager, self).__init__(*args, **kwargs)
        self._fs = GFFS(log=self.log,
                        project=self.project,
                        prefix=self.prefix,
                        separator=self.separator)

    @default('checkpoints_class')
    def _checkpoints_class_default(self):
        return RemoteFileCheckpoints
Example #4
0
class GCSContentsManager(GenericContentsManager):

    project = Unicode(help="GCP Project", allow_none=True,
                      default_value=None).tag(config=True,
                                              env="JPYNB_GCS_PROJECT")
    token = Unicode(help="Path to the GCP token",
                    allow_none=True,
                    default_value=None).tag(config=True,
                                            env="JPYNB_GCS_TOKEN_PATH")

    region_name = Unicode("us-east-1",
                          help="Region name").tag(config=True,
                                                  env="JPYNB_GCS_REGION_NAME")
    bucket = Unicode("notebooks", help="Bucket name to store notebooks").tag(
        config=True, env="JPYNB_GCS_BUCKET")

    prefix = Unicode(
        "", help="Prefix path inside the specified bucket").tag(config=True)
    separator = Unicode("/", help="Path separator").tag(config=True)

    def __init__(self, *args, **kwargs):
        super(GCSContentsManager, self).__init__(*args, **kwargs)

        self._fs = GCSFS(log=self.log,
                         project=self.project,
                         token=self.token,
                         bucket=self.bucket,
                         prefix=self.prefix,
                         separator=self.separator)
Example #5
0
class S3ContentsManager(GenericContentsManager):

    access_key_id = Unicode(help="S3/AWS access key ID",
                            allow_none=True,
                            default_value=None).tag(
                                config=True, env="JPYNB_S3_ACCESS_KEY_ID")
    secret_access_key = Unicode(help="S3/AWS secret access key",
                                allow_none=True,
                                default_value=None).tag(
                                    config=True,
                                    env="JPYNB_S3_SECRET_ACCESS_KEY")

    endpoint_url = Unicode("s3-us-gov-east-1.amazonaws.com",
                           help="S3 endpoint URL").tag(
                               config=True, env="JPYNB_S3_ENDPOINT_URL")
    region_name = Unicode("us-gov-east-1",
                          help="Region name").tag(config=True,
                                                  env="JPYNB_S3_REGION_NAME")
    bucket = Unicode("notebooks", help="Bucket name to store notebooks").tag(
        config=True, env="JPYNB_S3_BUCKET")
    prefix = Unicode(
        "", help="Prefix path inside the specified bucket").tag(config=True)
    signature_version = Unicode(help="").tag(config=True)
    delimiter = Unicode("/", help="Path delimiter").tag(config=True)
    sse = Unicode(help="Type of server-side encryption to use").tag(
        config=True)

    kms_key_id = Unicode(help="KMS ID to use to encrypt workbooks").tag(
        config=True)

    session_token = Unicode(help="S3/AWS session token",
                            allow_none=True,
                            default_value=None).tag(
                                config=True, env="JPYNB_S3_SESSION_TOKEN")

    boto3_session = Any(
        help=
        "Place to store custom boto3 session (passed to S3_FS) - could be set by init_s3_hook"
    )
    init_s3_hook = Any(help="optional hook for init'ing s3").tag(config=True)

    s3fs_additional_kwargs = Any(
        help="optional dictionary to be appended to s3fs additional kwargs"
    ).tag(config=True)

    def __init__(self, *args, **kwargs):
        super(S3ContentsManager, self).__init__(*args, **kwargs)

        self.run_init_s3_hook()
        self.bucket = _validate_bucket(self.bucket, self.log)
        self._fs = S3FS(log=self.log,
                        access_key_id=self.access_key_id,
                        secret_access_key=self.secret_access_key,
                        endpoint_url=self.endpoint_url,
                        region_name=self.region_name,
                        bucket=self.bucket,
                        prefix=self.prefix,
                        session_token=self.session_token,
                        signature_version=self.signature_version,
                        delimiter=self.delimiter,
                        sse=self.sse,
                        kms_key_id=self.kms_key_id,
                        boto3_session=self.boto3_session,
                        s3fs_additional_kwargs=self.s3fs_additional_kwargs)

    def run_init_s3_hook(self):
        if self.init_s3_hook is not None:
            self.init_s3_hook(self)

    def _save_notebook(self, model, path):
        nb_contents = from_dict(model["content"])
        self.check_and_sign(nb_contents, path)
        file_contents = json.dumps(model["content"])
        self._fs.writenotebook(path, file_contents)
        self.validate_notebook_model(model)
        return model.get("message")
Example #6
0
class S3FS(GenericFS):

    access_key_id = Unicode(help="S3/AWS access key ID",
                            allow_none=True,
                            default_value=None).tag(
                                config=True, env="JPYNB_S3_ACCESS_KEY_ID")
    secret_access_key = Unicode(help="S3/AWS secret access key",
                                allow_none=True,
                                default_value=None).tag(
                                    config=True,
                                    env="JPYNB_S3_SECRET_ACCESS_KEY")

    endpoint_url = Unicode("s3.amazonaws.com", help="S3 endpoint URL").tag(
        config=True, env="JPYNB_S3_ENDPOINT_URL")
    region_name = Unicode("us-east-1",
                          help="Region name").tag(config=True,
                                                  env="JPYNB_S3_REGION_NAME")
    bucket = Unicode("notebooks", help="Bucket name to store notebooks").tag(
        config=True, env="JPYNB_S3_BUCKET")
    signature_version = Unicode(help="").tag(config=True)
    sse = Unicode(help="Type of server-side encryption to use").tag(
        config=True)

    prefix = Unicode(
        "", help="Prefix path inside the specified bucket").tag(config=True)
    delimiter = Unicode("/", help="Path delimiter").tag(config=True)

    dir_keep_file = Unicode(
        ".s3keep",
        help="Empty file to create when creating directories").tag(config=True)

    def __init__(self, log, **kwargs):
        super(S3FS, self).__init__(**kwargs)
        self.log = log

        client_kwargs = {
            "endpoint_url": self.endpoint_url,
            "region_name": self.region_name,
        }
        config_kwargs = {}
        if self.signature_version:
            config_kwargs["signature_version"] = self.signature_version
        s3_additional_kwargs = {}
        if self.sse:
            s3_additional_kwargs["ServerSideEncryption"] = self.sse

        self.fs = s3fs.S3FileSystem(key=self.access_key_id,
                                    secret=self.secret_access_key,
                                    client_kwargs=client_kwargs,
                                    config_kwargs=config_kwargs,
                                    s3_additional_kwargs=s3_additional_kwargs)

        self._invalidator = threading.Timer(interval=60,
                                            function=self.fs.invalidate_cache)
        self._invalidator.setDaemon(True)
        self._invalidator.start()
        self.init()

    def __del__(self):
        self._invalidator.cancel()

    def init(self):
        self.mkdir("")
        self.ls("")
        self.isdir("")

    #  GenericFS methods -----------------------------------------------------------------------------------------------

    def ls(self, path=""):
        path_ = self.path(path)
        self.log.debug("S3contents.S3FS: Listing directory: `%s`", path_)
        files = self.fs.ls(path_)
        return self.unprefix(files)

    def isfile(self, path):
        path_ = self.path(path)

        exists = self.fs.exists(path_)
        if not exists:
            is_file = False
        else:
            is_file = path_ in set(self.fs.ls(path_))

        self.log.debug("S3contents.S3FS: `%s` is a file: %s", path_, is_file)
        return is_file

    def isdir(self, path):
        path_ = self.path(path)

        exists = self.fs.exists(path_)
        if not exists:
            is_dir = False
        else:
            is_dir = path_ not in set(self.fs.ls(path_))
        return is_dir

    def mv(self, old_path, new_path):
        self.log.debug("S3contents.S3FS: Move file `%s` to `%s`", old_path,
                       new_path)
        self.cp(old_path, new_path)
        self.rm(old_path)

    def cp(self, old_path, new_path):
        old_path_, new_path_ = self.path(old_path), self.path(new_path)
        self.log.debug("S3contents.S3FS: Coping `%s` to `%s`", old_path_,
                       new_path_)

        if self.isdir(old_path):
            old_dir_path, new_dir_path = old_path, new_path
            for obj in self.ls(old_dir_path):
                old_item_path = obj
                new_item_path = old_item_path.replace(old_dir_path,
                                                      new_dir_path, 1)
                self.cp(old_item_path, new_item_path)
        elif self.isfile(old_path):
            self.fs.copy(old_path_, new_path_)
        self.fs.invalidate_cache(new_path_)

    def rm(self, path):
        path_ = self.path(path)
        self.log.debug("S3contents.S3FS: Removing: `%s`", path_)
        if self.isfile(path):
            self.log.debug("S3contents.S3FS: Removing file: `%s`", path_)
            self.fs.rm(path_)
        elif self.isdir(path):
            self.log.debug("S3contents.S3FS: Removing directory: `%s`", path_)
            self.fs.rm(path_ + self.delimiter, recursive=True)
        self.fs.invalidate_cache(path_)

    def mkdir(self, path):
        path_ = self.path(path, self.dir_keep_file)
        self.log.debug("S3contents.S3FS: Making dir: `%s`", path_)
        self.fs.touch(path_)

        parent = path_.rsplit('/', 2)[0]
        self.log.info("S3contents.S3FS: Invalidaing: `%s`", parent)
        self.fs.invalidate_cache(parent)

    def read(self, path):
        path_ = self.path(path)
        if not self.isfile(path):
            raise NoSuchFile(path_)
        with self.fs.open(path_, mode='rb') as f:
            content = f.read().decode("utf-8")
        return content

    def lstat(self, path):
        path_ = self.path(path)
        if self.isdir(path):
            # use the modification timestamps of immediate children to determine our path's mtime
            try:
                modification_dates = filter(
                    None, (e.get('LastModified')
                           for e in self.fs.ls(path_, detail=True)))
                ret = {
                    "ST_MTIME": max(modification_dates, default=None),
                    "ST_SIZE": None,
                }
            except FileNotFoundError:
                ret = {}
        else:
            info = self.fs.info(path_)
            ret = {
                "ST_MTIME": info["LastModified"],
                "ST_SIZE": info["Size"],
            }
        return ret

    def write(self, path, content, format):
        path_ = self.path(self.unprefix(path))
        self.log.debug("S3contents.S3FS: Writing file: `%s`", path_)
        if format not in {'text', 'base64'}:
            raise HTTPError(
                400,
                "Must specify format of file contents as 'text' or 'base64'",
            )
        try:
            if format == 'text':
                content_ = content.encode('utf8')
            else:
                b64_bytes = content.encode('ascii')
                content_ = base64.b64decode(b64_bytes)
        except Exception as e:
            raise HTTPError(400, u'Encoding error saving %s: %s' % (path_, e))
        with self.fs.open(path_, mode='wb') as f:
            f.write(content_)

    def writenotebook(self, path, content):
        path_ = self.path(self.unprefix(path))
        self.log.debug("S3contents.S3FS: Writing notebook: `%s`", path_)
        with self.fs.open(path_, mode='wb') as f:
            f.write(content.encode("utf-8"))

    #  Utilities -------------------------------------------------------------------------------------------------------

    def get_prefix(self):
        """Full prefix: bucket + optional prefix"""
        prefix = self.bucket
        if self.prefix:
            prefix += self.delimiter + self.prefix
        return prefix

    prefix_ = property(get_prefix)

    def unprefix(self, path):
        """Remove the self.prefix_ (if present) from a path or list of paths"""
        if isinstance(path, six.string_types):
            path = path[len(self.prefix_):] if path.startswith(
                self.prefix_) else path
            path = path[1:] if path.startswith(self.delimiter) else path
            return path
        if isinstance(path, (list, tuple)):
            path = [
                p[len(self.prefix_):] if p.startswith(self.prefix_) else p
                for p in path
            ]
            path = [p[1:] if p.startswith(self.delimiter) else p for p in path]
            return path

    def path(self, *path):
        """Utility to join paths including the bucket and prefix"""
        path = list(filter(None, path))
        path = self.unprefix(path)
        items = [self.prefix_] + path
        return self.delimiter.join(items)
Example #7
0
class GenericContentsManager(ContentsManager, HasTraits):

    # This makes the checkpoints get saved on this directory
    root_dir = Unicode("./", config=True)

    def __init__(self, *args, **kwargs):
        super(GenericContentsManager, self).__init__(*args, **kwargs)
        self._fs = None

    def get_fs(self):
        return self._fs

    fs = property(get_fs)

    def _checkpoints_class_default(self):
        return GenericFileCheckpoints

    def do_error(self, msg, code=500):
        raise HTTPError(code, msg)

    def no_such_entity(self, path):
        self.do_error("No such entity: [{path}]".format(path=path), 404)

    def already_exists(self, path):
        thing = "File" if self.file_exists(path) else "Directory"
        self.do_error(
            u"{thing} already exists: [{path}]".format(thing=thing, path=path),
            409)

    def guess_type(self, path, allow_directory=True):
        """
        Guess the type of a file.
        If allow_directory is False, don't consider the possibility that the
        file is a directory.

        Parameters
        ----------
            obj: s3.Object or string
        """
        if path.endswith(".ipynb"):
            return "notebook"
        elif allow_directory and self.dir_exists(path):
            return "directory"
        else:
            return "file"

    def file_exists(self, path):
        # Does a file exist at the given path?
        self.log.debug("S3contents.GenericManager.file_exists: ('%s')", path)
        return self.fs.isfile(path)

    def dir_exists(self, path):
        # Does a directory exist at the given path?
        self.log.debug("S3contents.GenericManager.dir_exists: path('%s')",
                       path)
        return self.fs.isdir(path)

    def get(self, path, content=True, type=None, format=None):
        # Get a file or directory model.
        self.log.debug(
            "S3contents.GenericManager.get] path('%s') type(%s) format(%s)",
            path, type, format)
        path = path.strip('/')

        if type is None:
            type = self.guess_type(path)
        try:
            func = {
                "directory": self._get_directory,
                "notebook": self._get_notebook,
                "file": self._get_file,
            }[type]
        except KeyError:
            raise ValueError("Unknown type passed: '{}'".format(type))

        return func(path=path, content=content, format=format)

    def _get_directory(self, path, content=True, format=None):
        self.log.debug(
            "S3contents.GenericManager.get_directory: path('%s') content(%s) format(%s)",
            path, content, format)
        return self._directory_model_from_path(path, content=content)

    def _get_notebook(self, path, content=True, format=None):
        self.log.debug(
            "S3contents.GenericManager.get_notebook: path('%s') type(%s) format(%s)",
            path, content, format)
        return self._notebook_model_from_path(path,
                                              content=content,
                                              format=format)

    def _get_file(self, path, content=True, format=None):
        self.log.debug(
            "S3contents.GenericManager.get_file: path('%s') type(%s) format(%s)",
            path, content, format)
        return self._file_model_from_path(path, content=content, format=format)

    def _directory_model_from_path(self, path, content=False):
        self.log.debug(
            "S3contents.GenericManager._directory_model_from_path: path('%s') type(%s)",
            path, content)
        model = base_directory_model(path)
        if content:
            if not self.dir_exists(path):
                self.no_such_entity(path)
            model["format"] = "json"
            dir_content = self.fs.ls(path=path)
            model["content"] = self._convert_file_records(dir_content)
        return model

    def _notebook_model_from_path(self, path, content=False, format=None):
        """
        Build a notebook model from database record.
        """
        model = base_model(path)
        model["type"] = "notebook"
        if self.fs.isfile(path):
            model["last_modified"] = model["created"] = self.fs.lstat(
                path)["ST_MTIME"]
        else:
            model["last_modified"] = model["created"] = DUMMY_CREATED_DATE
        if content:
            if not self.fs.isfile(path):
                self.no_such_entity(path)
            file_content = self.fs.read(path)
            nb_content = reads(file_content, as_version=NBFORMAT_VERSION)
            self.mark_trusted_cells(nb_content, path)
            model["format"] = "json"
            model["content"] = nb_content
            self.validate_notebook_model(model)
        return model

    def _file_model_from_path(self, path, content=False, format=None):
        """
        Build a file model from database record.
        """
        model = base_model(path)
        model["type"] = "file"
        if self.fs.isfile(path):
            model["last_modified"] = model["created"] = self.fs.lstat(
                path)["ST_MTIME"]
        else:
            model["last_modified"] = model["created"] = DUMMY_CREATED_DATE
        if content:
            try:
                content = self.fs.read(path)
            except NoSuchFile as e:
                self.no_such_entity(e.path)
            except GenericFSError as e:
                self.do_error(str(e), 500)
            model["format"] = format or "text"
            model["content"] = content
            model["mimetype"] = mimetypes.guess_type(path)[0] or "text/plain"
            if format == "base64":
                model["format"] = format or "base64"
                from base64 import b64decode
                model["content"] = b64decode(content)
        return model

    def _convert_file_records(self, paths):
        """
        Applies _notebook_model_from_s3_path or _file_model_from_s3_path to each entry of `paths`,
        depending on the result of `guess_type`.
        """
        ret = []
        for path in paths:
            # path = self.fs.remove_prefix(path, self.prefix)  # Remove bucket prefix from paths
            if os.path.basename(path) == self.fs.dir_keep_file:
                continue
            type_ = self.guess_type(path, allow_directory=True)
            if type_ == "notebook":
                ret.append(self._notebook_model_from_path(path, False))
            elif type_ == "file":
                ret.append(self._file_model_from_path(path, False, None))
            elif type_ == "directory":
                ret.append(self._directory_model_from_path(path, False))
            else:
                self.do_error(
                    "Unknown file type %s for file '%s'" % (type_, path), 500)
        return ret

    def save(self, model, path):
        """Save a file or directory model to path.
        """
        self.log.debug("S3contents.GenericManager: save %s: '%s'", model, path)
        if "type" not in model:
            self.do_error("No model type provided", 400)
        if "content" not in model and model["type"] != "directory":
            self.do_error("No file content provided", 400)

        if model["type"] not in ("file", "directory", "notebook"):
            self.do_error("Unhandled contents type: %s" % model["type"], 400)

        try:
            if model["type"] == "notebook":
                validation_message = self._save_notebook(model, path)
            elif model["type"] == "file":
                validation_message = self._save_file(model, path)
            else:
                validation_message = self._save_directory(path)
        except Exception as e:
            self.log.error("Error while saving file: %s %s",
                           path,
                           e,
                           exc_info=True)
            self.do_error(
                "Unexpected error while saving file: %s %s" % (path, e), 500)

        model = self.get(path, type=model["type"], content=False)
        if validation_message is not None:
            model["message"] = validation_message
        return model

    def _save_notebook(self, model, path):
        nb_contents = from_dict(model['content'])
        self.check_and_sign(nb_contents, path)
        file_contents = json.dumps(model["content"])
        self.fs.write(path, file_contents)
        self.validate_notebook_model(model)
        return model.get("message")

    def _save_file(self, model, path):
        file_contents = model["content"]
        file_format = model.get('format')
        self.fs.write(path, file_contents, file_format)

    def _save_directory(self, path):
        self.fs.mkdir(path)

    def rename_file(self, old_path, new_path):
        """Rename a file or directory.

        NOTE: This method is unfortunately named on the base class.  It
        actually moves a file or a directory.
        """
        self.log.debug(
            "S3contents.GenericManager: Init rename of '%s' to '%s'", old_path,
            new_path)
        if self.file_exists(new_path) or self.dir_exists(new_path):
            self.already_exists(new_path)
        elif self.file_exists(old_path) or self.dir_exists(old_path):
            self.log.debug(
                "S3contents.GenericManager: Actually renaming '%s' to '%s'",
                old_path, new_path)
            self.fs.mv(old_path, new_path)
        else:
            self.no_such_entity(old_path)

    def delete_file(self, path):
        """Delete the file or directory at path.
        """
        self.log.debug("S3contents.GenericManager: delete_file '%s'", path)
        if self.file_exists(path) or self.dir_exists(path):
            self.fs.rm(path)
        else:
            self.no_such_entity(path)

    def is_hidden(self, path):
        """Is path a hidden directory or file?
        """
        self.log.debug("S3contents.GenericManager: is_hidden '%s'", path)
        return False
Example #8
0
class S3ContentsManager(ContentsManager, HasTraits):

    access_key_id = Unicode(help="S3/AWS access key ID", allow_none=True, default_value=None).tag(config=True, env="JPYNB_S3_ACCESS_KEY_ID")
    secret_access_key = Unicode(help="S3/AWS secret access key", allow_none=True, default_value=None).tag(config=True, env="JPYNB_S3_SECRET_ACCESS_KEY")

    endpoint_url = Unicode("s3.amazonaws.com", help="S3 endpoint URL").tag(config=True, env="JPYNB_S3_ENDPOINT_URL")
    region_name = Unicode("us-east-1", help="Region Name").tag(config=True, env="JPYNB_S3_REGION_NAME")
    bucket_name = Unicode("notebooks", help="Bucket name to store notebooks").tag(config=True, env="JPYNB_S3_BUCKET_NAME")
    prefix = Unicode("", help="Prefix path inside the specified bucket").tag(config=True)
    signature_version = Unicode(help="").tag(config=True)
    delimiter = Unicode("/", help="Path delimiter").tag(config=True)

    def __init__(self, *args, **kwargs):
        super(S3ContentsManager, self).__init__(*args, **kwargs)

        self.s3fs = S3FS(
            log=self.log,
            access_key_id=self.access_key_id,
            secret_access_key=self.secret_access_key,
            endpoint_url=self.endpoint_url,
            region_name=self.region_name,
            bucket_name=self.bucket_name,
            prefix=self.prefix,
            signature_version=self.signature_version,
            delimiter=self.delimiter
        )

    def _checkpoints_class_default(self):
        return GenericFileCheckpoints

    def do_error(self, msg, code=500):
        raise HTTPError(code, msg)

    def no_such_entity(self, path):
        self.do_error("No such entity: [{path}]".format(path=path), 404)

    def already_exists(self, path):
        thing = "File" if self.file_exists(path) else "Directory"
        self.do_error(u"%s already exists: [{path}]".format(thing=thing, path=path), 409)

    def guess_type(self, path, allow_directory=True):
        """
        Guess the type of a file.
        If allow_directory is False, don't consider the possibility that the
        file is a directory.

        Parameters
        ----------
            obj: s3.Object or string
        """
        if path.endswith(".ipynb"):
            return "notebook"
        elif allow_directory and self.dir_exists(path):
            return "directory"
        else:
            return "file"

    def file_exists(self, path):
        # Does a file exist at the given path?
        self.log.debug("S3contents[S3manager]: file_exists '%s'", path)
        return self.s3fs.isfile(path)

    def dir_exists(self, path):
        # Does a directory exist at the given path?
        self.log.debug("S3contents[S3manager]: dir_exists '%s'", path)
        return self.s3fs.isdir(path)

    def get(self, path, content=True, type=None, format=None):
        # Get a file or directory model.
        self.log.debug("S3contents[S3manager]: get '%s' %s %s", path, type, format)
        path = path.strip('/')

        if type is None:
            type = self.guess_type(path)
        try:
            fn = {
                "directory": self._get_directory,
                "notebook": self._get_notebook,
                "file": self._get_file,
            }[type]
        except KeyError:
            raise ValueError("Unknown type passed: '{}'".format(type))

        return fn(path=path, content=content, format=format)

    def _get_directory(self, path, content=True, format=None):
        self.log.debug("S3contents[S3manager]: get_directory '%s' %s %s", path, type, format)
        return self._directory_model_from_path(path, content=content)

    def _get_notebook(self, path, content=True, format=None):
        self.log.debug("S3contents[S3manager]: get_notebook '%s' %s %s", path, content, format)
        return self._notebook_model_from_path(path, content=content, format=format)

    def _get_file(self, path, content=True, format=None):
        self.log.debug("S3contents[S3manager]: get_file '%s' %s %s", path, content, format)
        return self._file_model_from_path(path, content=content, format=format)

    def _directory_model_from_path(self, path, content=False):
        self.log.debug("S3contents[S3manager]: _directory_model_from_path '%s' %s", path, content)
        model = base_directory_model(path)
        if content:
            if not self.dir_exists(path):
                self.no_such_entity(path)
            model["format"] = "json"
            dir_content = self.s3fs.listdir(path=path, with_prefix=True)
            model["content"] = self._convert_file_records(dir_content)
        return model

    def _notebook_model_from_path(self, path, content=False, format=None):
        """
        Build a notebook model from database record.
        """
        # path = to_api_path(record['parent_name'] + record['name'])
        model = base_model(path)
        model['type'] = 'notebook'
        # model['last_modified'] = model['created'] = record['created_at']
        model['last_modified'] = model['created'] = DUMMY_CREATED_DATE
        if content:
            if not self.s3fs.isfile(path):
                self.no_such_entity(path)
            file_content = self.s3fs.read(path)
            nb_content = reads(file_content, as_version=NBFORMAT_VERSION)
            self.mark_trusted_cells(nb_content, path)
            model["format"] = "json"
            model["content"] = nb_content
            self.validate_notebook_model(model)
        return model

    def _file_model_from_path(self, path, content=False, format=None):
        """
        Build a file model from database record.
        """
        model = base_model(path)
        model['type'] = 'file'
        model['last_modified'] = model['created'] = DUMMY_CREATED_DATE
        if content:
            try:
                content = self.s3fs.read(path)
            except NoSuchFile as e:
                self.no_such_entity(e.path)
            except S3FSError as e:
                self.do_error(str(e), 500)
            model["format"] = format or "text"
            model["content"] = content
            model["mimetype"] = mimetypes.guess_type(path)[0] or "text/plain"
            if format == "base64":
                model["format"] = format or "base64"
                from base64 import b64decode
                model["content"] = b64decode(content)
        return model

    def _convert_file_records(self, paths):
        """
        Applies _notebook_model_from_s3_path or _file_model_from_s3_path to each entry of `paths`,
        depending on the result of `guess_type`.
        """
        ret = []
        for path in paths:
            path = self.s3fs.remove_prefix(path, self.prefix)  # Remove bucket prefix from paths
            if os.path.basename(path) == self.s3fs.dir_keep_file:
                continue
            type_ = self.guess_type(path, allow_directory=True)
            if type_ == "notebook":
                ret.append(self._notebook_model_from_path(path, False))
            elif type_ == "file":
                ret.append(self._file_model_from_path(path, False, None))
            elif type_ == "directory":
                ret.append(self._directory_model_from_path(path, False))
            else:
                self.do_error("Unknown file type %s for file '%s'" % (type_, path), 500)
        return ret

    def save(self, model, path):
        """Save a file or directory model to path.
        """
        self.log.debug("S3contents[S3manager]: save %s: '%s'", model, path)
        if "type" not in model:
            self.do_error("No model type provided", 400)
        if "content" not in model and model["type"] != "directory":
            self.do_error("No file content provided", 400)

        if model["type"] not in ("file", "directory", "notebook"):
            self.do_error("Unhandled contents type: %s" % model["type"], 400)

        try:
            if model["type"] == "notebook":
                validation_message = self._save_notebook(model, path)
            elif model["type"] == "file":
                validation_message = self._save_file(model, path)
            else:
                validation_message = self._save_directory(path)
        except Exception as e:
            self.log.error("Error while saving file: %s %s", path, e, exc_info=True)
            self.do_error("Unexpected error while saving file: %s %s" % (path, e), 500)

        model = self.get(path, type=model["type"], content=False)
        if validation_message is not None:
            model["message"] = validation_message
        return model

    def _save_notebook(self, model, path):
        nb_contents = from_dict(model['content'])
        self.check_and_sign(nb_contents, path)
        file_contents = json.dumps(model["content"])
        self.s3fs.write(path, file_contents)
        self.validate_notebook_model(model)
        return model.get("message")

    def _save_file(self, model, path):
        file_contents = model["content"]
        self.s3fs.write(path, file_contents)

    def _save_directory(self, path):
        self.s3fs.mkdir(path)

    def rename_file(self, old_path, new_path):
        """Rename a file or directory.

        NOTE: This method is unfortunately named on the base class.  It
        actually moves a file or a directory.
        """
        self.log.debug("S3contents[S3manager]: Init rename of '%s' to '%s'", old_path, new_path)
        if self.file_exists(new_path) or self.dir_exists(new_path):
            self.already_exists(new_path)
        elif self.file_exists(old_path) or self.dir_exists(old_path):
            self.log.debug("S3contents[S3manager]: Actually renaming '%s' to '%s'", old_path, new_path)
            self.s3fs.mv(old_path, new_path)
        else:
            self.no_such_entity(old_path)

    def delete_file(self, path):
        """Delete the file or directory at path.
        """
        self.log.debug("S3contents[S3manager]: delete_file '%s'", path)
        if self.file_exists(path) or self.dir_exists(path):
            self.s3fs.rm(path)
        else:
            self.no_such_entity(path)

    def is_hidden(self, path):
        """Is path a hidden directory or file?
        """
        self.log.debug("S3contents[S3manager]: is_hidden '%s'", path)
        return False
Example #9
0
class S3FS(HasTraits):

    access_key_id = Unicode(help="S3/AWS access key ID",
                            allow_none=True,
                            default_value=None).tag(
                                config=True, env="JPYNB_S3_ACCESS_KEY_ID")
    secret_access_key = Unicode(help="S3/AWS secret access key",
                                allow_none=True,
                                default_value=None).tag(
                                    config=True,
                                    env="JPYNB_S3_SECRET_ACCESS_KEY")

    endpoint_url = Unicode("s3.amazonaws.com", help="S3 endpoint URL").tag(
        config=True, env="JPYNB_S3_ENDPOINT_URL")
    region_name = Unicode("us-east-1",
                          help="Region Name").tag(config=True,
                                                  env="JPYNB_S3_REGION_NAME")
    bucket_name = Unicode("notebooks",
                          help="Bucket name to store notebooks").tag(
                              config=True, env="JPYNB_S3_BUCKET_NAME")
    prefix = Unicode(
        "", help="Prefix path inside the specified bucket").tag(config=True)
    signature_version = Unicode(help="").tag(config=True)
    delimiter = Unicode("/", help="Path delimiter").tag(config=True)

    dir_keep_file = Unicode(
        ".s3keep",
        help="Empty file to create when creating directories").tag(config=True)

    def __init__(self, log, **kwargs):
        super(S3FS, self).__init__(**kwargs)
        self.log = log

        config = None
        if self.signature_version:
            config = Config(signature_version=self.signature_version)

        self.client = boto3.client(
            "s3",
            aws_access_key_id=self.access_key_id,
            aws_secret_access_key=self.secret_access_key,
            endpoint_url=self.endpoint_url,
            region_name=self.region_name,
            config=config)

        self.resource = boto3.resource(
            "s3",
            aws_access_key_id=self.access_key_id,
            aws_secret_access_key=self.secret_access_key,
            endpoint_url=self.endpoint_url,
            region_name=self.region_name,
            config=config)

        self.bucket = self.resource.Bucket(self.bucket_name)
        self.delimiter = "/"

        if self.prefix:
            self.mkdir("")

    def get_keys(self, prefix=""):
        ret = []
        for obj in self.bucket.objects.filter(Prefix=prefix):
            ret.append(obj.key)
        return ret

    def listdir(self, path="", with_prefix=False):
        self.log.debug("S3contents[S3FS] Listing directory: `%s`", path)
        prefix = self.as_key(path)
        fnames = self.get_keys(prefix=prefix)
        fnames_no_prefix = [
            self.remove_prefix(fname, prefix=prefix) for fname in fnames
        ]
        fnames_no_prefix = [
            fname.lstrip(self.delimiter) for fname in fnames_no_prefix
        ]
        files = set(
            fname.split(self.delimiter)[0] for fname in fnames_no_prefix)
        if with_prefix:
            files = [
                self.join(prefix.strip(self.delimiter),
                          f).strip(self.delimiter) for f in files
            ]
        else:
            files = list(files)
        return map(self.as_path, files)

    def isfile(self, path):
        self.log.debug("S3contents[S3FS] Checking if `%s` is a file", path)
        key = self.as_key(path)
        is_file = None
        if key == "":
            is_file = False
        try:
            self.client.head_object(Bucket=self.bucket_name, Key=key)
            is_file = True
        except Exception as e:
            is_file = False
        self.log.debug("S3contents[S3FS] `%s` is a file: %s", path, is_file)
        return is_file

    def isdir(self, path):
        self.log.debug("S3contents[S3FS] Checking if `%s` is a directory",
                       path)
        key = self.as_key(path)
        if key == "":
            return True
        if not key.endswith(self.delimiter):
            key = key + self.delimiter
        if key == "":
            return True
        objs = list(self.bucket.objects.filter(Prefix=key))
        is_dir = len(objs) > 0
        self.log.debug("S3contents[S3FS] `%s` is a directory: %s", path,
                       is_dir)
        return is_dir

    def mv(self, old_path, new_path):
        self.cp(old_path, new_path)
        self.rm(old_path)

    def cp(self, old_path, new_path):
        self.log.debug("S3contents[S3FS] Copy `%s` to `%s`", old_path,
                       new_path)
        if self.isdir(old_path):
            old_dir_path, new_dir_path = old_path, new_path
            old_dir_key = self.as_key(old_dir_path)
            for obj in self.bucket.objects.filter(Prefix=old_dir_key):
                old_item_path = self.as_path(obj.key)
                new_item_path = old_item_path.replace(old_dir_path,
                                                      new_dir_path, 1)
                self.cp(old_item_path, new_item_path)
        elif self.isfile(old_path):
            old_key = self.as_key(old_path)
            new_key = self.as_key(new_path)
            source = "{bucket_name}/{old_key}".format(
                bucket_name=self.bucket_name, old_key=old_key)
            self.client.copy_object(Bucket=self.bucket_name,
                                    CopySource=source,
                                    Key=new_key)

    def rm(self, path):
        self.log.debug("S3contents[S3FS] Deleting: `%s`", path)
        if self.isfile(path):
            key = self.as_key(path)
            self.client.delete_object(Bucket=self.bucket_name, Key=key)
        elif self.isdir(path):
            key = self.as_key(path)
            key = key + "/"
            objects_to_delete = []
            for obj in self.bucket.objects.filter(Prefix=key):
                objects_to_delete.append({"Key": obj.key})
            self.bucket.delete_objects(Delete={"Objects": objects_to_delete})

    def mkdir(self, path):
        self.log.debug("S3contents[S3FS] Making dir: `%s`", path)
        if self.isfile(path):
            self.log.debug(
                "S3contents[S3FS] File `%s` already exists, not creating anything",
                path)
        elif self.isdir(path):
            self.log.debug(
                "S3contents[S3FS] Directory `%s` already exists, not creating anything",
                path)
        else:
            obj_path = self.join(path, self.dir_keep_file)
            self.write(obj_path, "")

    def read(self, path):
        key = self.as_key(path)
        if not self.isfile(path):
            raise NoSuchFile(self.as_path(key))
        obj = self.resource.Object(self.bucket_name, key)
        text = obj.get()["Body"].read().decode("utf-8")
        return text

    def write(self, path, content):
        key = self.as_key(path)
        self.client.put_object(Bucket=self.bucket_name, Key=key, Body=content)

    def as_key(self, path):
        """Utility: Make a path a S3 key
        """
        path_ = self.abspath(path)
        self.log.debug("S3contents[S3FS] Understanding `%s` as `%s`", path,
                       path_)
        if isinstance(path_, six.string_types):
            return path_.strip(self.delimiter)
        if isinstance(path_, list):
            return [self.as_key(item) for item in path_]

    def as_path(self, key):
        """Utility: Make a S3 key a path
        """
        key_ = self.remove_prefix(key)
        if isinstance(key_, six.string_types):
            return key_.strip(self.delimiter)

    def remove_prefix(self, text, prefix=None):
        """Utility: remove a prefix from a string
        """
        if prefix is None:
            prefix = self.prefix
        if text.startswith(prefix):
            return text[len(prefix):].strip("/")
        return text.strip("/")

    def join(self, *args):
        """Utility: join using the delimiter
        """
        return self.delimiter.join(args)

    def abspath(self, path):
        """Utility: Return a normalized absolutized version of the pathname path
        Basically prepends the path with the prefix
        """
        path = path.strip("/")
        if self.prefix:
            path = self.join(self.prefix, path)
        return path.strip("/")
Example #10
0
class GenericContentsManager(ContentsManager, HasTraits):

    # This makes the checkpoints get saved on this directory
    root_dir = Unicode("./", config=True)

    post_save_hook = Any(
        None,
        config=True,
        allow_none=True,
        help="""Python callable or importstring thereof
        to be called on the path of a file just saved.
        This can be used to process the file on disk,
        such as converting the notebook to a script or HTML via nbconvert.
        It will be called as (all arguments passed by keyword)::
            hook(s3_path=s3_path, model=model, contents_manager=instance)
        - s3_path: the S3 path to the file just written (sans bucket/prefix)
        - model: the model representing the file
        - contents_manager: this ContentsManager instance
        """,
    )

    def __init__(self, *args, **kwargs):
        super(GenericContentsManager, self).__init__(*args, **kwargs)
        self._fs = None

    def get_fs(self):
        return self._fs

    fs = property(get_fs)

    def _checkpoints_class_default(self):
        return GenericFileCheckpoints

    def do_error(self, msg, code=500):
        raise HTTPError(code, msg)

    def no_such_entity(self, path):
        self.do_error("No such entity: [{path}]".format(path=path), 404)

    def already_exists(self, path):
        thing = "File" if self.file_exists(path) else "Directory"
        self.do_error(
            "{thing} already exists: [{path}]".format(thing=thing, path=path),
            409)

    def guess_type(self, path, allow_directory=True):
        """
        Guess the type of a file.
        If allow_directory is False, don't consider the possibility that the
        file is a directory.

        Parameters
        ----------
            obj: s3.Object or string
        """
        if path.endswith(".ipynb"):
            return "notebook"
        elif allow_directory and self.dir_exists(path):
            return "directory"
        else:
            return "file"

    def file_exists(self, path):
        # Does a file exist at the given path?
        self.log.debug("S3contents.GenericManager.file_exists: ('%s')", path)
        return self.fs.isfile(path)

    def dir_exists(self, path):
        # Does a directory exist at the given path?
        self.log.debug("S3contents.GenericManager.dir_exists: path('%s')",
                       path)
        return self.fs.isdir(path)

    def get(self, path, content=True, type=None, format=None):
        # Get a file or directory model.
        self.log.debug(
            "S3contents.GenericManager.get] path('%s') type(%s) format(%s)",
            path,
            type,
            format,
        )
        path = path.strip("/")

        if type is None:
            type = self.guess_type(path)
        try:
            func = {
                "directory": self._get_directory,
                "notebook": self._get_notebook,
                "file": self._get_file,
            }[type]
        except KeyError:
            raise ValueError("Unknown type passed: '{}'".format(type))

        return func(path=path, content=content, format=format)

    def _get_directory(self, path, content=True, format=None):
        self.log.debug(
            "S3contents.GenericManager._get_directory: path('%s') content(%s) format(%s)",
            path,
            content,
            format,
        )
        return self._directory_model_from_path(path, content=content)

    def _get_notebook(self, path, content=True, format=None):
        self.log.debug(
            "S3contents.GenericManager._get_notebook: path('%s') type(%s) format(%s)",
            path,
            content,
            format,
        )
        return self._notebook_model_from_path(path,
                                              content=content,
                                              format=format)

    def _get_file(self, path, content=True, format=None):
        self.log.debug(
            "S3contents.GenericManager._get_file: path('%s') type(%s) format(%s)",
            path,
            content,
            format,
        )
        return self._file_model_from_path(path, content=content, format=format)

    def _directory_model_from_path(self, path, content=False):
        self.log.debug(
            "S3contents.GenericManager._directory_model_from_path: path('%s') type(%s)",
            path,
            content,
        )
        model = base_directory_model(path)
        if self.fs.isdir(path):
            lstat = self.fs.lstat(path)
            if "ST_MTIME" in lstat and lstat["ST_MTIME"]:
                model["last_modified"] = model["created"] = lstat["ST_MTIME"]
        if content:
            if not self.dir_exists(path):
                self.no_such_entity(path)
            model["format"] = "json"
            dir_content = self.fs.ls(path=path)
            model["content"] = self._convert_file_records(dir_content)
        return model

    def _notebook_model_from_path(self, path, content=False, format=None):
        """
        Build a notebook model from database record.
        """
        model = base_model(path)
        model["type"] = "notebook"
        if self.fs.isfile(path):
            model["last_modified"] = model["created"] = self.fs.lstat(
                path)["ST_MTIME"]
        else:
            model["last_modified"] = model["created"] = DUMMY_CREATED_DATE
        if content:
            if not self.fs.isfile(path):
                self.no_such_entity(path)
            file_content, _ = self.fs.read(path, format)
            nb_content = reads(file_content, as_version=NBFORMAT_VERSION)
            self.mark_trusted_cells(nb_content, path)
            model["format"] = "json"
            model["content"] = nb_content
            self.validate_notebook_model(model)
        return model

    def _file_model_from_path(self, path, content=False, format=None):
        """
        Build a file model from database record.
        """
        model = base_model(path)
        model["type"] = "file"
        if self.fs.isfile(path):
            model["last_modified"] = model["created"] = DUMMY_CREATED_DATE
        else:
            model["last_modified"] = model["created"] = DUMMY_CREATED_DATE
        if content:
            try:
                # Get updated format from fs.read()
                content, format_ = self.fs.read(path, format)
            except NoSuchFile as e:
                self.no_such_entity(e.path)
            except GenericFSError as e:
                self.do_error(str(e), 500)
            model["format"] = format_
            model["content"] = content
            model["mimetype"] = mimetypes.guess_type(path)[0] or "text/plain"
        return model

    def _convert_file_records(self, paths):
        """
        Applies _notebook_model_from_s3_path or _file_model_from_s3_path to each entry of `paths`,
        depending on the result of `guess_type`.
        """
        ret = []
        for path in paths:
            # path = self.fs.remove_prefix(path, self.prefix)  # Remove bucket prefix from paths
            if os.path.basename(path) == self.fs.dir_keep_file:
                continue
            type_ = self.guess_type(path, allow_directory=True)
            if type_ == "notebook":
                ret.append(self._notebook_model_from_path(path, False))
            elif type_ == "file":
                ret.append(self._file_model_from_path(path, False, None))
            elif type_ == "directory":
                ret.append(self._directory_model_from_path(path, False))
            else:
                self.do_error(
                    "Unknown file type %s for file '%s'" % (type_, path), 500)
        return ret

    def save(self, model, path):
        """Save a file or directory model to path.
        """
        self.log.debug("S3contents.GenericManager.save %s: '%s'", model, path)
        if "type" not in model:
            self.do_error("No model type provided", 400)
        if "content" not in model and model["type"] != "directory":
            self.do_error("No file content provided", 400)

        if model["type"] not in ("file", "directory", "notebook"):
            self.do_error("Unhandled contents type: %s" % model["type"], 400)

        self.run_pre_save_hook(model=model, path=path)

        try:
            if model["type"] == "notebook":
                validation_message = self._save_notebook(model, path)
            elif model["type"] == "file":
                validation_message = self._save_file(model, path)
            else:
                validation_message = self._save_directory(path)
        except Exception as e:
            self.log.error("Error while saving file: %s %s",
                           path,
                           e,
                           exc_info=True)
            self.do_error(
                "Unexpected error while saving file: %s %s" % (path, e), 500)

        model = self.get(path, type=model["type"], content=False)

        self.run_post_save_hook(model=model, s3_path=model["path"])

        if validation_message is not None:
            model["message"] = validation_message
        return model

    def _save_notebook(self, model, path):
        nb_contents = from_dict(model["content"])
        self.check_and_sign(nb_contents, path)
        file_contents = json.dumps(model["content"])
        self.fs.write(path, file_contents)
        self.validate_notebook_model(model)
        return model.get("message")

    def _save_file(self, model, path):
        file_contents = model["content"]
        file_format = model.get("format")
        self.fs.write(path, file_contents, file_format)

    def _save_directory(self, path):
        self.fs.mkdir(path)

    def rename_file(self, old_path, new_path):
        """Rename a file or directory.

        NOTE: This method is unfortunately named on the base class.  It
        actually moves a file or a directory.
        """
        self.log.debug(
            "S3contents.GenericManager.rename_file: Init rename of '%s' to '%s'",
            old_path,
            new_path,
        )
        if self.file_exists(new_path) or self.dir_exists(new_path):
            self.already_exists(new_path)
        elif self.file_exists(old_path) or self.dir_exists(old_path):
            self.log.debug(
                "S3contents.GenericManager: Actually renaming '%s' to '%s'",
                old_path,
                new_path,
            )
            self.fs.mv(old_path, new_path)
        else:
            self.no_such_entity(old_path)

    def delete_file(self, path):
        """Delete the file or directory at path.
        """
        self.log.debug("S3contents.GenericManager.delete_file '%s'", path)
        if self.file_exists(path) or self.dir_exists(path):
            self.fs.rm(path)
        else:
            self.no_such_entity(path)

    def is_hidden(self, path):
        """Is path a hidden directory or file?
        """
        self.log.debug("S3contents.GenericManager.is_hidden '%s'", path)
        return False

    @validate("post_save_hook")
    def _validate_post_save_hook(self, proposal):
        value = proposal["value"]
        if isinstance(value, string_types):
            value = import_item(value)
        if not callable(value):
            raise TraitError("post_save_hook must be callable")
        return value

    def run_post_save_hook(self, model, s3_path):
        """Run the post-save hook if defined, and log errors"""
        if self.post_save_hook:
            try:
                self.log.debug("Running post-save hook on %s", s3_path)
                self.post_save_hook(s3_path=s3_path,
                                    model=model,
                                    contents_manager=self)
            except Exception as e:
                self.log.error("Post-save hook failed o-n %s",
                               s3_path,
                               exc_info=True)
                raise HTTPError(
                    500, "Unexpected error while running post hook save: %s" %
                    e) from e
Example #11
0
class S3FS(GenericFS):

    access_key_id = Unicode(help="S3/AWS access key ID",
                            allow_none=True,
                            default_value=None).tag(
                                config=True, env="JPYNB_S3_ACCESS_KEY_ID")
    secret_access_key = Unicode(help="S3/AWS secret access key",
                                allow_none=True,
                                default_value=None).tag(
                                    config=True,
                                    env="JPYNB_S3_SECRET_ACCESS_KEY")

    endpoint_url = Unicode("s3.amazonaws.com", help="S3 endpoint URL").tag(
        config=True, env="JPYNB_S3_ENDPOINT_URL")
    region_name = Unicode("us-east-1",
                          help="Region name").tag(config=True,
                                                  env="JPYNB_S3_REGION_NAME")
    bucket = Unicode("notebooks", help="Bucket name to store notebooks").tag(
        config=True, env="JPYNB_S3_BUCKET")
    signature_version = Unicode(help="").tag(config=True)
    sse = Unicode(help="Type of server-side encryption to use").tag(
        config=True)

    prefix = Unicode(
        "", help="Prefix path inside the specified bucket").tag(config=True)
    delimiter = Unicode("/", help="Path delimiter").tag(config=True)

    dir_keep_file = Unicode(
        ".s3keep",
        help="Empty file to create when creating directories").tag(config=True)

    def __init__(self, log, **kwargs):
        super(S3FS, self).__init__(**kwargs)
        self.log = log

        client_kwargs = {
            "endpoint_url": self.endpoint_url,
            "region_name": self.region_name,
        }
        config_kwargs = {}
        if self.signature_version:
            config_kwargs["signature_version"] = self.signature_version
        s3_additional_kwargs = {}
        if self.sse:
            s3_additional_kwargs["ServerSideEncryption"] = self.sse

        self.fs = s3fs.S3FileSystem(key=self.access_key_id,
                                    secret=self.secret_access_key,
                                    client_kwargs=client_kwargs,
                                    config_kwargs=config_kwargs,
                                    s3_additional_kwargs=s3_additional_kwargs)

        self.init()

    def init(self):
        self.mkdir("")
        self.ls("")
        self.isdir("")

    #  GenericFS methods -----------------------------------------------------------------------------------------------

    def ls(self, path=""):
        path_ = self.path(path)
        self.log.debug("S3contents.S3FS: Listing directory: `%s`", path_)
        files = self.fs.ls(path_, refresh=True)
        return self.unprefix(files)

    def isfile(self, path):
        path_ = self.path(path)
        is_file = False

        exists = self.fs.exists(path_)
        if not exists:
            is_file = False
        else:
            try:
                # Info will fail if path is a dir
                self.fs.info(path_, refresh=True)
                is_file = True
            except FileNotFoundError:
                pass

        self.log.debug("S3contents.S3FS: `%s` is a file: %s", path_, is_file)
        return is_file

    def isdir(self, path):
        path_ = self.path(path)
        is_dir = False

        exists = self.fs.exists(path_)
        if not exists:
            is_dir = False
        else:
            try:
                # Info will fail if path is a dir
                self.fs.info(path_, refresh=True)
                is_dir = False
            except FileNotFoundError:
                is_dir = True

        self.log.debug("S3contents.S3FS: `%s` is a directory: %s", path_,
                       is_dir)
        return is_dir

    def mv(self, old_path, new_path):
        self.log.debug("S3contents.S3FS: Move file `%s` to `%s`", old_path,
                       new_path)
        self.cp(old_path, new_path)
        self.rm(old_path)

    def cp(self, old_path, new_path):
        old_path_, new_path_ = self.path(old_path), self.path(new_path)
        self.log.debug("S3contents.S3FS: Coping `%s` to `%s`", old_path_,
                       new_path_)

        if self.isdir(old_path):
            old_dir_path, new_dir_path = old_path, new_path
            for obj in self.ls(old_dir_path):
                old_item_path = obj
                new_item_path = old_item_path.replace(old_dir_path,
                                                      new_dir_path, 1)
                self.cp(old_item_path, new_item_path)
        elif self.isfile(old_path):
            self.fs.copy(old_path_, new_path_)

    def rm(self, path):
        path_ = self.path(path)
        self.log.debug("S3contents.S3FS: Removing: `%s`", path_)
        if self.isfile(path):
            self.log.debug("S3contents.S3FS: Removing file: `%s`", path_)
            self.fs.rm(path_)
        elif self.isdir(path):
            self.log.debug("S3contents.S3FS: Removing directory: `%s`", path_)
            self.fs.rm(path_ + self.delimiter, recursive=True)
            # self.fs.rmdir(path_ + self.delimiter, recursive=True)

    def mkdir(self, path):
        path_ = self.path(path, self.dir_keep_file)
        self.log.debug("S3contents.S3FS: Making dir: `%s`", path_)
        self.fs.touch(path_)

    def read(self, path):
        path_ = self.path(path)
        if not self.isfile(path):
            raise NoSuchFile(path_)
        with self.fs.open(path_, mode='rb') as f:
            content = f.read().decode("utf-8")
        return content

    def lstat(self, path):
        path_ = self.path(path)
        info = self.fs.info(path_, refresh=True)
        ret = {}
        ret["ST_MTIME"] = info["LastModified"]
        return ret

    def write(self, path, content):
        path_ = self.path(self.unprefix(path))
        content_ = base64.b64decode(content)
        self.log.debug("S3contents.S3FS: Writing file: `%s`", path_)
        with self.fs.open(path_, mode='wb') as f:
            f.write(content_)

    def writenotebook(self, path, content):
        path_ = self.path(self.unprefix(path))
        self.log.debug("S3contents.S3FS: Writing notebook: `%s`", path_)
        with self.fs.open(path_, mode='wb') as f:
            f.write(content.encode("utf-8"))

    #  Utilities -------------------------------------------------------------------------------------------------------

    def get_prefix(self):
        """Full prefix: bucket + optional prefix"""
        prefix = self.bucket
        if self.prefix:
            prefix += self.delimiter + self.prefix
        return prefix

    prefix_ = property(get_prefix)

    def unprefix(self, path):
        """Remove the self.prefix_ (if present) from a path or list of paths"""
        if isinstance(path, six.string_types):
            path = path[len(self.prefix_):] if path.startswith(
                self.prefix_) else path
            path = path[1:] if path.startswith(self.delimiter) else path
            return path
        if isinstance(path, (list, tuple)):
            path = [
                p[len(self.prefix_):] if p.startswith(self.prefix_) else p
                for p in path
            ]
            path = [p[1:] if p.startswith(self.delimiter) else p for p in path]
            return path

    def path(self, *path):
        """Utility to join paths including the bucket and prefix"""
        path = list(filter(None, path))
        path = self.unprefix(path)
        items = [self.prefix_] + path
        return self.delimiter.join(items)
Example #12
0
class S3FS(GenericFS):

    access_key_id = Unicode(
        help="S3/AWS access key ID", allow_none=True, default_value=None).tag(
            config=True, env="JPYNB_S3_ACCESS_KEY_ID")
    secret_access_key = Unicode(
        help="S3/AWS secret access key", allow_none=True, default_value=None).tag(
            config=True, env="JPYNB_S3_SECRET_ACCESS_KEY")

    endpoint_url = Unicode(
        "s3.amazonaws.com", help="S3 endpoint URL").tag(
            config=True, env="JPYNB_S3_ENDPOINT_URL")
    region_name = Unicode(
        "us-east-1", help="Region name").tag(
            config=True, env="JPYNB_S3_REGION_NAME")
    bucket = Unicode(
        "notebooks", help="Bucket name to store notebooks").tag(
            config=True, env="JPYNB_S3_BUCKET")
    signature_version = Unicode(help="").tag(config=True)
    sse = Unicode(help="Type of server-side encryption to use").tag(config=True)
    kms_key_id = Unicode(help="KMS ID to use to encrypt workbooks").tag(config=True)

    prefix = Unicode("", help="Prefix path inside the specified bucket").tag(config=True)
    delimiter = Unicode("/", help="Path delimiter").tag(config=True)

    dir_keep_file = Unicode(
        ".s3keep", help="Empty file to create when creating directories").tag(config=True)

    session_token = Unicode(
        help="S3/AWS session token",
        allow_none=True,
        default_value=None
    ).tag(config=True, env="JPYNB_S3_SESSION_TOKEN")

    def refresh_fs_connection(self):
        with open("/secrets/config.env") as fp:
            for line in fp:
                # print(line)
                key, value = line.replace('"', '').replace(
                    'export ', '', 1).strip().split('=', 1)
                print(key + " " + value)
                if key == "AWS_ACCESS_KEY_ID":
                    self.access_key_id = value
                elif key == "AWS_SECRET_ACCESS_KEY":
                    self.secret_access_key = value
                elif key == "AWS_SESSION_TOKEN":
                    self.session_token = value
                elif key == "AWS_S3_BUCKET":
                    self.bucket = value
                elif key == "AWS_REGION":
                    self.region_name = value
                elif key == "AWS_S3_KMS_KEY_ARN":
                    self.sse_kms_key_id = value

    def __init__(self, log, **kwargs):
        super(S3FS, self).__init__(**kwargs)
        self.log = log
        self.refresh_fs_connection()
        client_kwargs = {
            "endpoint_url": self.endpoint_url,
            "region_name": self.region_name,
        }
        config_kwargs = {}
        if self.signature_version:
            config_kwargs["signature_version"] = self.signature_version
        s3_additional_kwargs = {}
        if self.sse:
            s3_additional_kwargs["ServerSideEncryption"] = self.sse
        if self.kms_key_id:
            s3_additional_kwargs["SSEKMSKeyId"]= self.kms_key_id

        self.fs = s3fs.S3FileSystem(key=self.access_key_id,
                                    secret=self.secret_access_key,
                                    token=self.session_token,
                                    client_kwargs=client_kwargs,
                                    config_kwargs=config_kwargs,
                                    s3_additional_kwargs=s3_additional_kwargs)

        self.init()

    def init(self):
        try:
            self.mkdir("")
            self.ls("")
            self.isdir("")
        except ClientError as ex:
            if "AccessDenied" in str(ex):
                policy = SAMPLE_ACCESS_POLICY.format(bucket=os.path.join(self.bucket, self.prefix))
                self.log.error("AccessDenied error while creating initial S3 objects. Create an IAM policy like:\n{policy}".format(policy=policy))
                sys.exit(1)
            else:
                raise ex

    #  GenericFS methods -----------------------------------------------------------------------------------------------

    def ls(self, path=""):
        path_ = self.path(path)
        self.log.debug("S3contents.S3FS Auto Reload ls: Started")
        self.refresh_fs_connection()
        self.log.debug("S3contents.S3FS Auto Reload ls: Completed")
        self.log.debug("S3contents.S3FS: Listing directory: `%s`", path_)
        files = self.fs.ls(path_, refresh=True)
        return self.unprefix(files)

    def isfile(self, path):
        path_ = self.path(path)
        is_file = False

        exists = self.fs.exists(path_)
        if not exists:
            is_file = False
        else:
            try:
                # Info will fail if path is a dir
                self.fs.info(path_)
                is_file = True
            except FileNotFoundError:
                pass

        self.log.debug("S3contents.S3FS: `%s` is a file: %s", path_, is_file)
        return is_file

    def isdir(self, path):
        path_ = self.path(path)
        is_dir = False

        exists = self.fs.exists(path_)
        if not exists:
            is_dir = False
        else:
            try:
                # Info will fail if path is a dir
                self.fs.info(path_)
                is_dir = False
            except FileNotFoundError:
                is_dir = True

        self.log.debug("S3contents.S3FS: `%s` is a directory: %s", path_, is_dir)
        return is_dir

    def mv(self, old_path, new_path):
        self.log.debug("S3contents.S3FS: Move file `%s` to `%s`", old_path, new_path)
        self.cp(old_path, new_path)
        self.rm(old_path)

    def cp(self, old_path, new_path):
        old_path_, new_path_ = self.path(old_path), self.path(new_path)
        self.log.debug("S3contents.S3FS: Coping `%s` to `%s`", old_path_, new_path_)

        if self.isdir(old_path):
            old_dir_path, new_dir_path = old_path, new_path
            for obj in self.ls(old_dir_path):
                old_item_path = obj
                new_item_path = old_item_path.replace(old_dir_path, new_dir_path, 1)
                self.cp(old_item_path, new_item_path)
        elif self.isfile(old_path):
            self.fs.copy(old_path_, new_path_)

    def rm(self, path):
        path_ = self.path(path)
        self.log.debug("S3contents.S3FS: Removing: `%s`", path_)
        if self.isfile(path):
            self.log.debug("S3contents.S3FS: Removing file: `%s`", path_)
            self.fs.rm(path_)
        elif self.isdir(path):
            self.log.debug("S3contents.S3FS: Removing directory: `%s`", path_)
            self.fs.rm(path_ + self.delimiter, recursive=True)
            # self.fs.rmdir(path_ + self.delimiter, recursive=True)

    def mkdir(self, path):
        path_ = self.path(path, self.dir_keep_file)
        self.log.debug("S3contents.S3FS: Making dir: `%s`", path_)
        self.fs.touch(path_, acl='private')

    def read(self, path):
        path_ = self.path(path)
        if not self.isfile(path):
            raise NoSuchFile(path_)
        with self.fs.open(path_, mode='rb', acl='private') as f:
            content = f.read().decode("utf-8")
        return content

    def lstat(self, path):
        path_ = self.path(path)
        info = self.fs.info(path_)
        ret = {}
        ret["ST_MTIME"] = info["LastModified"]
        return ret

    def write(self, path, content, format):
        path_ = self.path(self.unprefix(path))
        self.log.debug("S3contents.S3FS: Writing file: `%s`", path_)
        if format not in {'text', 'base64'}:
            raise HTTPError(
                400,
                "Must specify format of file contents as 'text' or 'base64'",
            )
        try:
            if format == 'text':
                content_ = content.encode('utf8')
            else:
                b64_bytes = content.encode('ascii')
                content_ = base64.b64decode(b64_bytes)
        except Exception as e:
            raise HTTPError(
                400, u'Encoding error saving %s: %s' % (path_, e)
            )
        with self.fs.open(path_, mode='wb', acl='private') as f:
            f.write(content_)

    def writenotebook(self, path, content):
        path_ = self.path(self.unprefix(path))
        self.log.debug("S3contents.S3FS: Writing notebook: `%s`", path_)
        with self.fs.open(path_, mode='wb', acl='private') as f:
            f.write(content.encode("utf-8"))

    #  Utilities -------------------------------------------------------------------------------------------------------

    def get_prefix(self):
        """Full prefix: bucket + optional prefix"""
        prefix = self.bucket
        if self.prefix:
            prefix += self.delimiter + self.prefix
        return prefix
    prefix_ = property(get_prefix)

    def unprefix(self, path):
        """Remove the self.prefix_ (if present) from a path or list of paths"""
        if isinstance(path, six.string_types):
            path = path[len(self.prefix_):] if path.startswith(self.prefix_) else path
            path = path[1:] if path.startswith(self.delimiter) else path
            return path
        if isinstance(path, (list, tuple)):
            path = [p[len(self.prefix_):] if p.startswith(self.prefix_) else p for p in path]
            path = [p[1:] if p.startswith(self.delimiter) else p for p in path]
            return path

    def path(self, *path):
        """Utility to join paths including the bucket and prefix"""
        path = list(filter(None, path))
        path = self.unprefix(path)
        items = [self.prefix_] + path
        return self.delimiter.join(items)
Example #13
0
class S3ContentsManager(GenericContentsManager):

    access_key_id = Unicode(help="S3/AWS access key ID",
                            allow_none=True,
                            default_value=None).tag(
                                config=True, env="JPYNB_S3_ACCESS_KEY_ID")
    secret_access_key = Unicode(help="S3/AWS secret access key",
                                allow_none=True,
                                default_value=None).tag(
                                    config=True,
                                    env="JPYNB_S3_SECRET_ACCESS_KEY")

    endpoint_url = Unicode("https://s3.amazonaws.com",
                           help="S3 endpoint URL").tag(
                               config=True, env="JPYNB_S3_ENDPOINT_URL")
    region_name = Unicode("us-east-1",
                          help="Region name").tag(config=True,
                                                  env="JPYNB_S3_REGION_NAME")
    bucket = Unicode("notebooks", help="Bucket name to store notebooks").tag(
        config=True, env="JPYNB_S3_BUCKET")
    prefix = Unicode(
        "", help="Prefix path inside the specified bucket").tag(config=True)
    signature_version = Unicode(help="").tag(config=True)
    delimiter = Unicode("/", help="Path delimiter").tag(config=True)
    sse = Unicode(help="Type of server-side encryption to use").tag(
        config=True)

    kms_key_id = Unicode(help="KMS ID to use to encrypt workbooks").tag(
        config=True)

    session_token = Unicode(help="S3/AWS session token",
                            allow_none=True,
                            default_value=None).tag(
                                config=True, env="JPYNB_S3_SESSION_TOKEN")

    boto3_session = Any(
        help=
        "Place to store custom boto3 session (passed to S3_FS) - could be set by init_s3_hook"
    )
    init_s3_hook = Any(help="optional hook for init'ing s3").tag(config=True)

    def __init__(self, *args, **kwargs):
        super(S3ContentsManager, self).__init__(*args, **kwargs)

        self.run_init_s3_hook()

        self._fs = S3FS(log=self.log,
                        access_key_id=self.access_key_id,
                        secret_access_key=self.secret_access_key,
                        endpoint_url=self.endpoint_url,
                        region_name=self.region_name,
                        bucket=self.bucket,
                        prefix=self.prefix,
                        session_token=self.session_token,
                        signature_version=self.signature_version,
                        delimiter=self.delimiter,
                        sse=self.sse,
                        kms_key_id=self.kms_key_id,
                        boto3_session=self.boto3_session)

    def run_init_s3_hook(self):
        if self.init_s3_hook is not None:
            self.init_s3_hook(self)

    def _save_notebook(self, model, path):
        def save_model():
            nb_contents = from_dict(model['content'])
            self.check_and_sign(nb_contents, path)
            file_contents = json.dumps(model["content"])
            self._fs.writenotebook(path, file_contents)
            self.validate_notebook_model(model)
            return model.get("message")

        m = model['content']['metadata']
        has_versioning = ('s3_requested_version'
                          in m) and ('s3_current_version'
                                     in m) and ('s3_latest_version' in m)

        if not has_versioning:
            return save_model()

        version_changed = m['s3_requested_version'] != m['s3_current_version']
        version_latest = m['s3_current_version'] == m['s3_latest_version']
        version_requested = m['s3_latest_version'] != None

        version_changed_not_latest = version_changed and (
            not version_latest) and version_requested
        version_changed_latest = version_changed and version_latest and version_requested
        content_changed_not_latest = (
            (not version_changed) or
            (not version_requested)) and (not version_latest)
        content_changed_latest = ((not version_changed) or
                                  (not version_requested)) and version_latest

        if version_changed_not_latest:
            self._fs.requested_version_id_lookup[path] = m[
                's3_requested_version']

        if content_changed_not_latest:
            raise Exception('Cannot overwrite older versions')

        if content_changed_latest or version_changed_latest:
            if 's3_create_release' in m:
                if 's3_latest_release_tag' in m:
                    m['s3_latest_release_tag'] = int(
                        m['s3_latest_release_tag']) + 1
                else:
                    m['s3_latest_release_tag'] = 0

                self._fs.create_release_tag(path, m['s3_latest_release_tag'],
                                            m['s3_create_release'])

                del m['s3_create_release']
            if version_changed_latest:
                self._fs.requested_version_id_lookup[path] = m[
                    's3_requested_version']
            else:
                self._fs.requested_version_id_lookup[path] = None
            return save_model()
Example #14
0
class GFFS(GenericFS):
    project = Unicode(help="GFile Project",
                      allow_none=True,
                      default_value=None).tag(config=True,
                                              env="JPYNB_GCS_PROJECT")
    region_name = Unicode("us-east-1",
                          help="Region name").tag(config=True,
                                                  env="JPYNB_GCS_REGION_NAME")

    prefix = Unicode(
        "", help="Prefix path inside the specified bucket").tag(config=True)
    separator = Unicode("/", help="Path separator").tag(config=True)

    dir_keep_file = Unicode(
        "",
        help="Empty file to create when creating directories").tag(config=True)

    def __init__(self, log, **kwargs):
        super(GFFS, self).__init__(**kwargs)
        self.log = log
        self.fs = tf.io.gfile
        self.dstat = OrderedDictCache()
        self.init()

    def init(self):
        self.mkdir("")
        self.ls("")
        assert self.isdir(""), "The root directory should exists :)"

    #  GenericFS methods -----------------------------------------------------------------------------------------------

    def ls(self, path, contain_hidden=False):
        path_ = self.path(path)
        self.log.debug("S3contents.GFFS: Listing directory: `%s`", path_)
        files = [
            path + self.separator + f for f in self.fs.listdir(path_)
            if contain_hidden or not is_file_hidden(f)
        ]
        return self.unprefix(files)

    def isfile(self, path):
        st = self.lstat(path)
        return st['type'] == 'file'

    def isdir(self, path):
        st = self.lstat(path)
        return st['type'] == 'directory'

    def mv(self, old_path, new_path):
        self.log.debug("S3contents.GFFS: Move file `%s` to `%s`", old_path,
                       new_path)
        self.cp(old_path, new_path)
        self.rm(old_path)

    def cp(self, old_path, new_path):
        old_path_, new_path_ = self.path(old_path), self.path(new_path)
        self.log.debug("S3contents.GFFS: Coping `%s` to `%s`", old_path_,
                       new_path_)

        if self.isdir(old_path):
            old_dir_path, new_dir_path = old_path, new_path
            subdirs = self.ls(old_dir_path, True)
            if subdirs:
                for obj in subdirs:
                    old_item_path = obj
                    new_item_path = old_item_path.replace(
                        old_dir_path, new_dir_path, 1)
                    self.cp(old_item_path, new_item_path)
            else:
                self.fs.mkdir(new_path_)  # empty dir
        elif self.isfile(old_path):
            self.fs.copy(old_path_, new_path_)

    def rm(self, path):
        path_ = self.path(path)
        self.log.debug("S3contents.GFFS: Removing: `%s`", path_)
        if self.isfile(path):
            self.log.debug("S3contents.GFFS: Removing file: `%s`", path_)
            self.fs.remove(path_)
        elif self.isdir(path):
            self.log.debug("S3contents.GFFS: Removing directory: `%s`", path_)
            self.fs.rmtree(path_)

    def mkdir(self, path):
        path_ = self.path(path)  #, self.dir_keep_file)
        self.log.debug("S3contents.GFFS: Making dir (touch): `%s`", path_)
        self.fs.makedirs(path_)

    def read(self, path, format=None):
        path_ = self.path(path)
        if not self.isfile(path):
            raise NoSuchFile(path_)
        with self.fs.GFile(path_, mode='rb') as f:
            if f.size() > LARGEFSIZE:

                def downchunk():
                    while True:
                        buf = f.read(n=1048576)
                        if not buf: break
                        yield buf

                return downchunk(), 'base64'
            bcontent = f.read()
        if format is None or format == 'text':
            # Try to interpret as unicode if format is unknown or if unicode
            # was explicitly requested.
            try:
                self.log.debug("S3contents.GFFS: read: `%s`", path_)
                return bcontent.decode('utf8'), 'text'
            except UnicodeError:
                if format == 'text':
                    raise HTTPError(
                        400,
                        "%s is not UTF-8 encoded" % os_path,
                        reason='bad format',
                    )
        return encodebytes(bcontent).decode('ascii'), 'base64'

    def lstat(self, path):
        calltime = time.time()
        if path in self.dstat:
            st = self.dstat[path]
            if calltime - st["calltime"] < 5:
                return st
        path_ = self.path(path)
        self.log.debug("S3contents.GFFS: lstat file: `%s` `%s`", path, path_)
        try:
            info = self.fs.stat(path_)
            self.dstat[path] = {
                "calltime": calltime,
                "ST_MTIME": info.mtime_nsec // 1000000,
                "size": info.length,
                "type": "directory" if info.is_directory else "file"
            }
        except tf.errors.NotFoundError:
            self.dstat[path] = {
                "calltime": calltime,
                "ST_MTIME": 0,
                "type": None
            }
        return self.dstat[path]

    def write(self, path, content, format=None, mode='wb'):
        path_ = self.path(self.unprefix(path))
        self.log.debug("S3contents.GFFS: Writing file: `%s`", path_)
        with self.fs.GFile(path_, mode=mode) as f:
            if format == 'base64':
                b64_bytes = content.encode('ascii')
                f.write(decodebytes(b64_bytes))
            else:
                f.write(content.encode("utf-8"))

    #  Utilities -------------------------------------------------------------------------------------------------------

    def strip(self, path):
        if isinstance(path, six.string_types):
            return path.strip(self.separator)
        if isinstance(path, (list, tuple)):
            return list(map(self.strip, path))

    def join(self, *paths):
        paths = self.strip(paths)
        return self.separator.join(paths)

    def get_prefix(self):
        return self.prefix

    prefix_ = property(get_prefix)

    def unprefix(self, path):
        """Remove the self.prefix_ (if present) from a path or list of paths"""
        path = self.strip(path)
        if isinstance(path, six.string_types):
            path = path[len(self.prefix_):] if path.startswith(
                self.prefix_) else path
            path = path[1:] if path.startswith(self.separator) else path
            return path
        if isinstance(path, (list, tuple)):
            path = [
                p[len(self.prefix_):] if p.startswith(self.prefix_) else p
                for p in path
            ]
            path = [p[1:] if p.startswith(self.separator) else p for p in path]
            return path

    def path(self, *path):
        """Utility to join paths including the bucket and prefix"""
        path = list(filter(None, path))
        path = self.unprefix(path)
        items = [self.prefix_] + path
        return self.join(*items)
Example #15
0
class S3FS(GenericFS):

    access_key_id = Unicode(help="S3/AWS access key ID",
                            allow_none=True,
                            default_value=None).tag(
                                config=True, env="JPYNB_S3_ACCESS_KEY_ID")
    secret_access_key = Unicode(help="S3/AWS secret access key",
                                allow_none=True,
                                default_value=None).tag(
                                    config=True,
                                    env="JPYNB_S3_SECRET_ACCESS_KEY")

    endpoint_url = Unicode("s3-us-gov-east-1.amazonaws.com",
                           help="S3 endpoint URL").tag(
                               config=True, env="JPYNB_S3_ENDPOINT_URL")
    region_name = Unicode("us-gov-east-1",
                          help="Region name").tag(config=True,
                                                  env="JPYNB_S3_REGION_NAME")
    bucket = Unicode("notebooks", help="Bucket name to store notebooks").tag(
        config=True, env="JPYNB_S3_BUCKET")
    signature_version = Unicode(help="").tag(config=True)
    sse = Unicode(help="Type of server-side encryption to use").tag(
        config=True)
    kms_key_id = Unicode(help="KMS ID to use to encrypt workbooks").tag(
        config=True)

    prefix = Unicode(
        "", help="Prefix path inside the specified bucket").tag(config=True)
    delimiter = Unicode("/", help="Path delimiter").tag(config=True)

    dir_keep_file = Unicode(
        ".s3keep",
        help="Empty file to create when creating directories").tag(config=True)

    session_token = Unicode(help="S3/AWS session token",
                            allow_none=True,
                            default_value=None).tag(
                                config=True, env="JPYNB_S3_SESSION_TOKEN")

    boto3_session = Any(
        help="Place to store customer boto3 session instance - likely passed in"
    )

    s3fs_additional_kwargs = Any(
        help="optional dictionary to be appended to s3fs additional kwargs"
    ).tag(config=True)

    def __init__(self, log, **kwargs):
        super(S3FS, self).__init__(**kwargs)
        self.log = log

        client_kwargs = {
            "endpoint_url": self.endpoint_url,
            "region_name": self.region_name,
        }
        config_kwargs = {}
        if self.signature_version:
            config_kwargs["signature_version"] = self.signature_version
        if self.s3fs_additional_kwargs:
            self.must_be_dictionary(self.s3fs_additional_kwargs)
            s3_additional_kwargs = self.s3fs_additional_kwargs
        else:
            s3_additional_kwargs = {}
        if self.sse:
            s3_additional_kwargs["ServerSideEncryption"] = self.sse
        if self.kms_key_id:
            s3_additional_kwargs["SSEKMSKeyId"] = self.kms_key_id

        self.fs = s3fs.S3FileSystem(
            key=self.access_key_id,
            secret=self.secret_access_key,
            token=self.session_token,
            client_kwargs=client_kwargs,
            config_kwargs=config_kwargs,
            s3_additional_kwargs=s3_additional_kwargs,
            session=self.boto3_session,
        )

        self.init()

    def init(self):
        try:
            self.mkdir("")
            self.ls("")
            self.isdir("")
        except ClientError as ex:
            if "AccessDenied" in str(ex):
                policy = SAMPLE_ACCESS_POLICY.format(
                    bucket=os.path.join(self.bucket, self.prefix))
                self.log.error(
                    "AccessDenied error while creating initial S3 objects. Create an IAM policy like:\n{policy}"
                    .format(policy=policy))
                sys.exit(1)
            else:
                raise ex

    #  GenericFS methods -----------------------------------------------------------------------------------------------

    def ls(self, path=""):
        path_ = self.path(path)
        self.log.debug("S3contents.S3FS.ls: Listing directory: `%s`", path_)
        files = self.fs.ls(path_, refresh=True)
        return self.unprefix(files)

    def isfile(self, path):
        path_ = self.path(path)
        # FileNotFoundError handled by s3fs
        is_file = self.fs.isfile(path_)

        self.log.debug("S3contents.S3FS: `%s` is a file: %s", path_, is_file)
        return is_file

    def isdir(self, path):
        path_ = self.path(path)
        # FileNotFoundError handled by s3fs
        is_dir = self.fs.isdir(path_)

        self.log.debug("S3contents.S3FS: `%s` is a directory: %s", path_,
                       is_dir)
        return is_dir

    def mv(self, old_path, new_path):
        self.log.debug("S3contents.S3FS: Move file `%s` to `%s`", old_path,
                       new_path)
        self.cp(old_path, new_path)
        self.rm(old_path)

    def cp(self, old_path, new_path):
        old_path_, new_path_ = self.path(old_path), self.path(new_path)
        self.log.debug("S3contents.S3FS: Coping `%s` to `%s`", old_path_,
                       new_path_)

        if self.isdir(old_path):
            old_dir_path, new_dir_path = old_path, new_path
            for obj in self.ls(old_dir_path):
                old_item_path = obj
                new_item_path = old_item_path.replace(old_dir_path,
                                                      new_dir_path, 1)
                self.cp(old_item_path, new_item_path)
            self.mkdir(new_path)  # Touch with dir_keep_file
        elif self.isfile(old_path):
            self.fs.copy(old_path_, new_path_)

    def rm(self, path):
        path_ = self.path(path)
        self.log.debug("S3contents.S3FS: Removing: `%s`", path_)
        if self.isfile(path):
            self.log.debug("S3contents.S3FS: Removing file: `%s`", path_)
            self.fs.rm(path_)
        elif self.isdir(path):
            self.log.debug("S3contents.S3FS: Removing directory: `%s`", path_)
            self.fs.rm(path_ + self.delimiter, recursive=True)
            # self.fs.rmdir(path_ + self.delimiter, recursive=True)

    def mkdir(self, path):
        path_ = self.path(path, self.dir_keep_file)
        self.log.debug("S3contents.S3FS: Making dir: `%s`", path_)
        self.fs.touch(path_)

    def read(self, path, format):
        path_ = self.path(path)
        if not self.isfile(path):
            raise NoSuchFile(path_)
        with self.fs.open(path_, mode="rb") as f:
            content = f.read()
        if format is None or format == "text":
            # Try to interpret as unicode if format is unknown or if unicode
            # was explicitly requested.
            try:
                return content.decode("utf-8"), "text"
            except UnicodeError:
                if format == "text":
                    err = "{} is not UTF-8 encoded".format(path_)
                    self.log.error(err)
                    raise HTTPError(400, err, reason="bad format")
        return base64.b64encode(content).decode("ascii"), "base64"

    def lstat(self, path):
        path_ = self.path(path)
        if self.fs.isdir(path_):  # Try to get status of the dir_keep_file
            path_ = self.path(path, self.dir_keep_file)
        try:
            self.fs.invalidate_cache(path_)
            info = self.fs.info(path_)
        except FileNotFoundError:
            return {"ST_MTIME": None}
        ret = {}
        ret["ST_MTIME"] = info["LastModified"]
        return ret

    def write(self, path, content, format):
        path_ = self.path(self.unprefix(path))
        self.log.debug("S3contents.S3FS: Writing file: `%s`", path_)
        if format not in {"text", "base64"}:
            raise HTTPError(
                400,
                "Must specify format of file contents as 'text' or 'base64'",
            )
        try:
            if format == "text":
                content_ = content.encode("utf8")
            else:
                b64_bytes = content.encode("ascii")
                content_ = base64.b64decode(b64_bytes)
        except Exception as e:
            raise HTTPError(400, "Encoding error saving %s: %s" % (path_, e))
        with self.fs.open(path_, mode="wb") as f:
            f.write(content_)

    def writenotebook(self, path, content):
        path_ = self.path(self.unprefix(path))
        self.log.debug("S3contents.S3FS: Writing notebook: `%s`", path_)
        with self.fs.open(path_, mode="wb") as f:
            f.write(content.encode("utf-8"))

    #  Utilities -------------------------------------------------------------------------------------------------------

    def get_prefix(self):
        """Full prefix: bucket + optional prefix"""
        prefix = self.bucket
        if prefix.startswith("s3://"):
            prefix = prefix[5:]
        if self.prefix:
            prefix += self.delimiter + self.prefix
        return prefix

    prefix_ = property(get_prefix)

    def unprefix(self, path):
        """Remove the self.prefix_ (if present) from a path or list of paths"""
        self.log.debug(
            f"S3FS.unprefix: self.prefix_: {self.prefix_} path: {path}")
        if isinstance(path, str):
            path = path[len(self.prefix_):] if path.startswith(
                self.prefix_) else path
            path = path[1:] if path.startswith(self.delimiter) else path
            return path
        if isinstance(path, (list, tuple)):
            path = [
                p[len(self.prefix_):] if p.startswith(self.prefix_) else p
                for p in path
            ]
            path = [p[1:] if p.startswith(self.delimiter) else p for p in path]
            return path

    def path(self, *path):
        """Utility to join paths including the bucket and prefix"""
        path = list(filter(None, path))
        path = self.unprefix(path)
        items = [self.prefix_] + path
        return self.delimiter.join(items)

    @staticmethod
    def must_be_dictionary(dictionary):
        if type(dictionary) is dict:
            pass
        else:
            raise ValueError(
                's3fs_additional_kwargs must be a dictionary or None, its default value.'
            )
Example #16
0
class GCSFS(GenericFS):

    project = Unicode(help="GCP Project", allow_none=True,
                      default_value=None).tag(config=True,
                                              env="JPYNB_GCS_PROJECT")
    token = Unicode(help="Path to the GCP token",
                    allow_none=True,
                    default_value=None).tag(config=True,
                                            env="JPYNB_GCS_TOKEN_PATH")

    region_name = Unicode("us-east-1",
                          help="Region name").tag(config=True,
                                                  env="JPYNB_GCS_REGION_NAME")
    bucket = Unicode("notebooks", help="Bucket name to store notebooks").tag(
        config=True, env="JPYNB_GCS_BUCKET")

    prefix = Unicode(
        "", help="Prefix path inside the specified bucket").tag(config=True)
    separator = Unicode("/", help="Path separator").tag(config=True)

    dir_keep_file = Unicode(
        ".gcskeep",
        help="Empty file to create when creating directories").tag(config=True)

    def __init__(self, log, **kwargs):
        super(GCSFS, self).__init__(**kwargs)
        self.log = log

        token = os.path.expanduser(self.token)
        self.fs = gcsfs.GCSFileSystem(project=self.project, token=token)

        self.init()

    def init(self):
        self.mkdir("")
        self.ls("")
        assert self.isdir(""), "The root directory should exists :)"

    #  GenericFS methods -----------------------------------------------------------------------------------------------

    def ls(self, path):
        path_ = self.path(path)
        self.log.debug("S3contents.GCSFS: Listing directory: `%s`", path_)
        files = self.fs.ls(path_)
        return self.unprefix(files)

    def isfile(self, path):
        path_ = self.path(path)
        is_file = False

        exists = self.fs.exists(path_)
        if not exists:
            is_file = False
        else:
            try:
                # Info will fail if path is a dir
                self.fs.info(path_)
                is_file = True
            except FileNotFoundError:
                pass

        self.log.debug("S3contents.GCSFS: `%s` is a file: %s", path_, is_file)
        return is_file

    def isdir(self, path):
        # GCSFS doesnt return exists=True for a directory with no files so
        # we need to check if the dir_keep_file exists
        is_dir = self.isfile(path + self.separator + self.dir_keep_file)
        path_ = self.path(path)
        self.log.debug("S3contents.GCSFS: `%s` is a directory: %s", path_,
                       is_dir)
        return is_dir

    def mv(self, old_path, new_path):
        self.log.debug("S3contents.GCSFS: Move file `%s` to `%s`", old_path,
                       new_path)
        self.cp(old_path, new_path)
        self.rm(old_path)

    def cp(self, old_path, new_path):
        old_path_, new_path_ = self.path(old_path), self.path(new_path)
        self.log.debug("S3contents.GCSFS: Coping `%s` to `%s`", old_path_,
                       new_path_)

        if self.isdir(old_path):
            old_dir_path, new_dir_path = old_path, new_path
            for obj in self.ls(old_dir_path):
                old_item_path = obj
                new_item_path = old_item_path.replace(old_dir_path,
                                                      new_dir_path, 1)
                self.cp(old_item_path, new_item_path)
        elif self.isfile(old_path):
            self.fs.copy(old_path_, new_path_)

    def rm(self, path):
        path_ = self.path(path)
        self.log.debug("S3contents.GCSFS: Removing: `%s`", path_)
        if self.isfile(path):
            self.log.debug("S3contents.GCSFS: Removing file: `%s`", path_)
            self.fs.rm(path_)
        elif self.isdir(path):
            self.log.debug("S3contents.GCSFS: Removing directory: `%s`", path_)
            files = self.fs.walk(path_)
            for f in files:
                self.fs.rm(f)

    def mkdir(self, path):
        path_ = self.path(path, self.dir_keep_file)
        self.log.debug("S3contents.GCSFS: Making dir (touch): `%s`", path_)
        self.fs.touch(path_)

    def read(self, path):
        path_ = self.path(path)
        if not self.isfile(path):
            raise NoSuchFile(path_)
        with self.fs.open(path_, mode='rb') as f:
            content = f.read().decode("utf-8")
        return content

    def lstat(self, path):
        path_ = self.path(path)
        info = self.fs.info(path_)
        ret = {}
        ret["ST_MTIME"] = info["updated"]
        return ret

    def write(self, path, content, format):
        path_ = self.path(self.unprefix(path))
        self.log.debug("S3contents.GCSFS: Writing file: `%s`", path_)
        with self.fs.open(path_, mode='wb') as f:
            f.write(content.encode("utf-8"))

    #  Utilities -------------------------------------------------------------------------------------------------------

    def strip(self, path):
        if isinstance(path, six.string_types):
            return path.strip(self.separator)
        if isinstance(path, (list, tuple)):
            return list(map(self.strip, path))

    def join(self, *paths):
        paths = self.strip(paths)
        return self.separator.join(paths)

    def get_prefix(self):
        """Full prefix: bucket + optional prefix"""
        prefix = self.bucket
        if self.prefix:
            prefix += self.separator + self.prefix
        return prefix

    prefix_ = property(get_prefix)

    def unprefix(self, path):
        """Remove the self.prefix_ (if present) from a path or list of paths"""
        path = self.strip(path)
        if isinstance(path, six.string_types):
            path = path[len(self.prefix_):] if path.startswith(
                self.prefix_) else path
            path = path[1:] if path.startswith(self.separator) else path
            return path
        if isinstance(path, (list, tuple)):
            path = [
                p[len(self.prefix_):] if p.startswith(self.prefix_) else p
                for p in path
            ]
            path = [p[1:] if p.startswith(self.separator) else p for p in path]
            return path

    def path(self, *path):
        """Utility to join paths including the bucket and prefix"""
        path = list(filter(None, path))
        path = self.unprefix(path)
        items = [self.prefix_] + path
        return self.join(*items)
Example #17
0
class GenericContentsManager(ContentsManager, HasTraits):

    # This makes the checkpoints get saved on this directory
    root_dir = Unicode("./", config=True)

    post_save_hook = Any(
        None,
        config=True,
        allow_none=True,
        help="""Python callable or importstring thereof
        to be called on the path of a file just saved.
        This can be used to process the file on disk,
        such as converting the notebook to a script or HTML via nbconvert.
        It will be called as (all arguments passed by keyword)::
            hook(s3_path=s3_path, model=model, contents_manager=instance)
        - s3_path: the S3 path to the file just written (sans bucket/prefix)
        - model: the model representing the file
        - contents_manager: this ContentsManager instance
        """,
    )

    def __init__(self, *args, **kwargs):
        super(GenericContentsManager, self).__init__(*args, **kwargs)
        self._fs = None

    def get_fs(self):
        return self._fs

    fs = property(get_fs)

    def _checkpoints_class_default(self):
        return GenericFileCheckpoints

    def do_error(self, msg, code=500):
        raise HTTPError(code, msg)

    def no_such_entity(self, path):
        self.do_error("No such entity: [{path}]".format(path=path), 404)

    def already_exists(self, path):
        thing = "File" if self.file_exists(path) else "Directory"
        self.do_error(
            "{thing} already exists: [{path}]".format(thing=thing, path=path),
            409)

    def guess_type(self, path, allow_directory=True):
        """
        Guess the type of a file.
        If allow_directory is False, don't consider the possibility that the
        file is a directory.

        Parameters
        ----------
            obj: s3.Object or string
        """
        if path.endswith(".ipynb"):
            return "notebook"
        elif allow_directory and self.dir_exists(path):
            return "directory"
        else:
            return "file"

    def file_exists(self, path):
        # Does a file exist at the given path?
        self.log.debug("S3contents.GenericManager.file_exists: ('%s')", path)
        return self.fs.isfile(path)

    def dir_exists(self, path):
        # Does a directory exist at the given path?
        self.log.debug("S3contents.GenericManager.dir_exists: path('%s')",
                       path)
        return self.fs.isdir(path)

    def get(self, path, content=True, type=None, format=None):
        # Get a file or directory model.
        self.log.debug(
            "S3contents.GenericManager.get] path('%s') type(%s) format(%s)",
            path,
            type,
            format,
        )
        path = path.strip("/")

        if type is None:
            type = self.guess_type(path)
        try:
            func = {
                "directory": self._get_directory,
                "notebook": self._get_notebook,
                "file": self._get_file,
            }[type]
        except KeyError:
            raise ValueError("Unknown type passed: '{}'".format(type))

        return func(path=path, content=content, format=format)

    def _get_directory(self, path, content=True, format=None):
        self.log.debug(
            "S3contents.GenericManager._get_directory: path('%s') content(%s) format(%s)",
            path,
            content,
            format,
        )
        return self._directory_model_from_path(path, content=content)

    def _get_notebook(self, path, content=True, format=None):
        self.log.debug(
            "S3contents.GenericManager._get_notebook: path('%s') type(%s) format(%s)",
            path,
            content,
            format,
        )
        return self._notebook_model_from_path(path,
                                              content=content,
                                              format=format)

    def _get_file(self, path, content=True, format=None):
        self.log.debug(
            "S3contents.GenericManager._get_file: path('%s') type(%s) format(%s)",
            path,
            content,
            format,
        )
        return self._file_model_from_path(path, content=content, format=format)

    def _directory_model_from_path(self, path, content=False):
        def s3_detail_to_model(s3_detail):
            model_path = s3_detail["Key"]
            model = base_model(self.fs.unprefix(model_path))
            if s3_detail["StorageClass"] == 'DIRECTORY':
                model["created"] = model["last_modified"] = DUMMY_CREATED_DATE
                model["type"] = "directory"
                lstat = self.fs.lstat(model_path)
                if "ST_MTIME" in lstat and lstat["ST_MTIME"]:
                    model["last_modified"] = model["created"] = lstat[
                        "ST_MTIME"]
            else:
                model["last_modified"] = s3_detail.get("LastModified").replace(
                    microsecond=0, tzinfo=tzutc())
                model["created"] = model["last_modified"]
                # model["size"] = s3_detail.get("Size")
                model["type"] = "notebook" if model_path.endswith(
                    ".ipynb") else "file"
            return model

        self.log.debug(
            "S3contents.GenericManager._directory_model_from_path: path('%s') type(%s)",
            path,
            content,
        )
        model = base_directory_model(path)
        if self.fs.isdir(path):
            lstat = self.fs.lstat(path)
            if "ST_MTIME" in lstat and lstat["ST_MTIME"]:
                model["last_modified"] = model["created"] = lstat["ST_MTIME"]
        if content:
            if not self.dir_exists(path):
                self.no_such_entity(path)
            model["format"] = "json"
            prefixed_path = self.fs.path(path)
            files_s3_detail = sync(self.fs.fs.loop, self.fs.fs._lsdir,
                                   prefixed_path)
            filtered_files_s3_detail = list(
                filter(
                    lambda detail: os.path.basename(detail['Key']) != self.fs.
                    dir_keep_file, files_s3_detail))
            model["content"] = list(
                map(s3_detail_to_model, filtered_files_s3_detail))
        return model

    def _notebook_model_from_path(self, path, content=False, format=None):
        """
        Build a notebook model from database record.
        """
        model = base_model(path)
        model["type"] = "notebook"
        if self.fs.isfile(path):
            model["last_modified"] = model["created"] = self.fs.lstat(
                path)["ST_MTIME"]
        else:
            model["last_modified"] = model["created"] = DUMMY_CREATED_DATE
        if content:
            if not self.fs.isfile(path):
                self.no_such_entity(path)
            file_content, _ = self.fs.read(path, format)
            nb_content = reads(file_content, as_version=NBFORMAT_VERSION)
            self.mark_trusted_cells(nb_content, path)
            model["format"] = "json"
            model["content"] = nb_content
            self.validate_notebook_model(model)
        return model

    def _file_model_from_path(self, path, content=False, format=None):
        """
        Build a file model from database record.
        """
        model = base_model(path)
        model["type"] = "file"
        if self.fs.isfile(path):
            model["last_modified"] = model["created"] = self.fs.lstat(
                path)["ST_MTIME"]
        else:
            model["last_modified"] = model["created"] = DUMMY_CREATED_DATE
        if content:
            try:
                # Get updated format from fs.read()
                content, format_ = self.fs.read(path, format)
            except NoSuchFile as e:
                self.no_such_entity(e.path)
            except GenericFSError as e:
                self.do_error(str(e), 500)
            model["format"] = format_
            model["content"] = content
            model["mimetype"] = mimetypes.guess_type(path)[0] or "text/plain"
        return model

    def save(self, model, path):
        """Save a file or directory model to path.
        """

        # Chunked uploads
        # See https://jupyter-notebook.readthedocs.io/en/stable/extending/contents.html#chunked-saving
        chunk = model.get("chunk", None)
        if chunk is not None:
            return self._save_large_file(chunk, model, path,
                                         model.get("format"))

        self.log.debug("S3contents.GenericManager.save %s: '%s'", model, path)
        if "type" not in model:
            self.do_error("No model type provided", 400)
        if "content" not in model and model["type"] != "directory":
            self.do_error("No file content provided", 400)

        if model["type"] not in ("file", "directory", "notebook"):
            self.do_error("Unhandled contents type: %s" % model["type"], 400)

        self.run_pre_save_hook(model=model, path=path)

        try:
            if model["type"] == "notebook":
                validation_message = self._save_notebook(model, path)
            elif model["type"] == "file":
                validation_message = self._save_file(model, path)
            else:
                validation_message = self._save_directory(path)
        except Exception as e:
            self.log.error("Error while saving file: %s %s",
                           path,
                           e,
                           exc_info=True)
            self.do_error(
                "Unexpected error while saving file: %s %s" % (path, e), 500)

        model = self.get(path, type=model["type"], content=False)

        self.run_post_save_hook(model=model, s3_path=model["path"])

        if validation_message is not None:
            model["message"] = validation_message
        return model

    def _save_large_file(self, chunk, model, path, format):
        if "type" not in model:
            self.do_error("No file type provided", 400)
        if model["type"] != "file":
            self.do_error(
                'File type "{}" is not supported for large file transfer'.
                format(model["type"]),
                400,
            )
        if "content" not in model and model["type"] != "directory":
            self.do_error("No file content provided", 400)

        if format not in {"text", "base64"}:
            self.do_error(
                "Must specify format of file contents as 'text' or 'base64'",
                400)

        prune_stale_chunks()

        self.log.debug("S3contents.GenericManager.save (chunk %s) %s: '%s'",
                       chunk, model, path)

        try:
            if chunk == 1:
                self.run_pre_save_hook(model=model, path=path)
            # Store the chunk in our registry
            store_content_chunk(path, model["content"])
        except Exception as e:
            self.log.error(
                "S3contents.GenericManager._save_large_file: error while saving file: %s %s",
                path,
                e,
                exc_info=True,
            )
            self.do_error(f"Unexpected error while saving file: {path} {e}")

        if chunk == -1:
            # Last chunk: we want to combine the chunks in the registry to compose the full file content
            model["content"] = assemble_chunks(path)
            delete_chunks(path)
            self._save_file(model, path)

        return self.get(path, content=False)

    def _save_notebook(self, model, path):
        nb_contents = from_dict(model["content"])
        self.check_and_sign(nb_contents, path)
        file_contents = json.dumps(model["content"])
        self.fs.write(path, file_contents)
        self.validate_notebook_model(model)
        return model.get("message")

    def _save_file(self, model, path):
        file_contents = model["content"]
        file_format = model.get("format")
        self.fs.write(path, file_contents, file_format)

    def _save_directory(self, path):
        self.fs.mkdir(path)

    def rename_file(self, old_path, new_path):
        """Rename a file or directory.

        NOTE: This method is unfortunately named on the base class.  It
        actually moves a file or a directory.
        """
        self.log.debug(
            "S3contents.GenericManager.rename_file: Init rename of '%s' to '%s'",
            old_path,
            new_path,
        )
        if self.file_exists(new_path) or self.dir_exists(new_path):
            self.already_exists(new_path)
        elif self.file_exists(old_path) or self.dir_exists(old_path):
            self.log.debug(
                "S3contents.GenericManager: Actually renaming '%s' to '%s'",
                old_path,
                new_path,
            )
            self.fs.mv(old_path, new_path)
        else:
            self.no_such_entity(old_path)

    def delete_file(self, path):
        """Delete the file or directory at path.
        """
        self.log.debug("S3contents.GenericManager.delete_file '%s'", path)
        if self.file_exists(path) or self.dir_exists(path):
            self.fs.rm(path)
        else:
            self.no_such_entity(path)

    def is_hidden(self, path):
        """Is path a hidden directory or file?
        """
        self.log.debug("S3contents.GenericManager.is_hidden '%s'", path)
        return False

    @validate("post_save_hook")
    def _validate_post_save_hook(self, proposal):
        value = proposal["value"]
        if isinstance(value, string_types):
            value = import_item(value)
        if not callable(value):
            raise TraitError("post_save_hook must be callable")
        return value

    def run_post_save_hook(self, model, s3_path):
        """Run the post-save hook if defined, and log errors"""
        if self.post_save_hook:
            try:
                self.log.debug("Running post-save hook on %s", s3_path)
                self.post_save_hook(s3_path=s3_path,
                                    model=model,
                                    contents_manager=self)
            except Exception as e:
                self.log.error("Post-save hook failed o-n %s",
                               s3_path,
                               exc_info=True)
                raise HTTPError(
                    500, "Unexpected error while running post hook save: %s" %
                    e) from e