Beispiel #1
0
class SwiftStorage(Storage):
    """Storage on OpenStack swift service."""

    def __init__(self, storage_id, container_name, auth_config=None, transfer_config=None):
        super(SwiftStorage, self).__init__(storage_id)
        opts = transfer_config or {}
        opts["auth_version"] = "2.0"
        if auth_config:
            for k, v in six.iteritems(auth_config):
                opts[k] = v
        self._client = SwiftService(opts)
        self._container = container_name

    def _get_file_safe(self, remote_path, local_path):
        tmpdir = tempfile.mkdtemp()
        results = self._client.download(container=self._container,
                                        objects=[remote_path],
                                        options={"out_directory": tmpdir})
        has_results = False
        for r in results:
            has_results = True
            if not r["success"]:
                raise RuntimeError("Cannot download [%s]: %s" % (remote_path, r["error"]))
            timestamp = float(r["response_dict"]["headers"]["x-timestamp"])
            os.utime(os.path.join(tmpdir, remote_path), (timestamp, timestamp))
        if not has_results:
            raise RuntimeError("Cannot copy download [%s]" % (remote_path, "NO RESULT"))
        shutil.move(os.path.join(tmpdir, remote_path), local_path)
        shutil.rmtree(tmpdir, ignore_errors=True)

    def _check_existing_file(self, remote_path, local_path):
        (local_dir, basename) = os.path.split(local_path)
        if os.path.exists(local_path):
            results = self._client.stat(self._container, objects=[remote_path])
            local_stat = os.stat(local_path)
            for r in results:
                if r['success']:
                    if int(r['headers']['content-length']) != local_stat.st_size:
                        return False
                    timestamp = float(r["headers"]["x-timestamp"])
                    if int(local_stat.st_mtime) == int(timestamp):
                        return True
        else:
            LOGGER.debug('Cannot find %s or %s', local_path)
        return False

    def stat(self, remote_path):
        if not remote_path.endswith('/'):
            results = self._client.stat(self._container, objects=[remote_path])
            for r in results:
                if r['success']:
                    return {'is_dir': False,
                            'size': r['headers']['content-length'],
                            'last_modified': r['headers']['x-timestamp']}
            remote_path += '/'
        results = self._client.list(container=self._container, options={"prefix": remote_path,
                                                                       "delimiter": "/"})
        for r in results:
            if r['success']:
                return {'is_dir': True}
        return False

    def push_file(self, local_path, remote_path):
        (local_dir, basename) = os.path.split(local_path)
        obj = SwiftUploadObject(local_path, object_name=remote_path)
        results = self._client.upload(self._container, [obj])
        has_results = False
        for r in results:
            has_results = True
            if not r["success"]:
                raise RuntimeError("Cannot push file [%s]>[%s]: %s" % (local_path, remote_path, r["error"]))
        if not has_results:
            raise RuntimeError("Cannot push file [%s]>[%s]: %s" % (local_path, remote_path, "NO RESULTS"))

    def stream(self, remote_path, buffer_size=1024):
        def generate():
            tmpdir = tempfile.mkdtemp()
            results = self._client.download(container=self._container,
                                            objects=[remote_path],
                                            options={"out_directory": tmpdir})
            has_results = False
            for r in results:
                has_results = True
                if not r["success"]:
                    raise RuntimeError("Cannot download file [%s]: %s", (remote_path, r["error"]))
            if not has_results:
                raise RuntimeError("Cannot download file [%s]: NO RESULTS", (remote_path))

            with open(os.path.join(tmpdir, remote_path), "rb") as f:
                for chunk in iter(lambda: f.read(buffer_size), b''):
                    yield chunk

            shutil.rmtree(tmpdir, ignore_errors=True)

        return generate()

    def listdir(self, remote_path, recursive=False):
        options = {"prefix": remote_path}
        if not recursive:
            options["delimiter"] = "/"
        list_parts_gen = self._client.list(container=self._container,
                                           options=options)
        lsdir = {}
        for page in list_parts_gen:
            if page["success"]:
                for item in page["listing"]:
                    if "subdir" in item:
                        lsdir[item["subdir"]] = {'is_dir': True}
                    else:
                        path = item["name"]
                        last_modified = datetime.strptime(item["last_modified"], '%Y-%m-%dT%H:%M:%S.%f')
                        lsdir[path] = {'size': item["bytes"],
                                       'last_modified': datetime.timestamp(last_modified)}
        return lsdir

    def mkdir(self, remote_path):
        pass

    def _delete_single(self, remote_path, isdir):
        if not isdir:
            results = self._client.delete(container=self._container, objects=[remote_path])
            has_results = False
            for r in results:
                has_results = True
                if not r["success"]:
                    raise RuntimeError("Cannot delete file [%s]: %s" % (remote_path, r["error"]))
            if not has_results:
                raise RuntimeError("Cannot delete file [%s]: NO RESULT" % (remote_path))

    def rename(self, old_remote_path, new_remote_path):
        listfiles = self.listdir(old_remote_path, True)
        for f in listfiles:
            assert f[:len(old_remote_path)] == old_remote_path, "inconsistent listdir result"
            obj = SwiftCopyObject(f, {"destination": "/%s/%s%s" % (
                                                                   self._container,
                                                                   new_remote_path,
                                                                   f[len(old_remote_path):])})
            results = self._client.copy(self._container, [obj])
            has_results = False
            for r in results:
                has_results = True
                if not r["success"]:
                    raise RuntimeError("Cannot copy file [%s]: %s" % (old_remote_path, r["error"]))
            if not has_results:
                raise RuntimeError("Cannot copy file [%s]: NO RESULT" % (old_remote_path))
            self._delete_single(f, False)

    def exists(self, remote_path):
        result = self._client.list(container=self._container, options={"prefix": remote_path,
                                                                       "delimiter": "/"})
        for page in result:
            if page["success"]:
                for item in page["listing"]:
                    if "subdir" in item:
                        return True
                    if (item["name"] == remote_path or
                            remote_path == '' or
                            remote_path.endswith('/') or
                            item["name"].startswith(remote_path + '/')):
                        return True
        return False

    def isdir(self, remote_path):
        if not remote_path.endswith('/'):
            return self.exists(remote_path+'/')
        return self.exists(remote_path)

    def _internal_path(self, path):
        # OpenStack does not work with paths but keys. This function possibly adapts a
        # path-like representation to a OpenStack key.
        if path.startswith('/'):
            return path[1:]
        return path
Beispiel #2
0
class SwiftFS(HasTraits):

    container = Unicode(os.environ.get('CONTAINER', 'demo'))
    storage_url = Unicode(help="The base URL for containers",
                          default_value='http://example.com',
                          config=True)

    delimiter = Unicode("/", help="Path delimiter", config=True)

    root_dir = Unicode("/", config=True)

    log = logging.getLogger('SwiftFS')

    def __init__(self, **kwargs):
        super(self.__class__, self).__init__(**kwargs)

        # With the python swift client, the connection is automagically
        # created using environment variables (I know... horrible or what?)
        self.log.info("using swift container `%s`", self.container)

        # open connection to swift container
        self.swift = SwiftService()

        # make sure container exists
        try:
            result = self.swift.post(container=self.container)
        except SwiftError as e:
            self.log.error("creating container %s", e.value)
            raise HTTPError(404, e.value)

        if not result["success"]:
            msg = "could not create container %s" % self.container
            self.log.error(msg)
            raise HTTPError(404, msg)

    # see 'list' at https://docs.openstack.org/developer/python-swiftclient/service-api.html
    # Returns a list of all objects that start with the prefix given
    # Of course, in a proper heirarchical file-system, list-dir only returns the files
    # in that dir, so we need to filter the list to me ONLY those objects where the
    # 'heirarchical' bit of the name stops at the path given
    # The method has 2 modes: 1 when the list of names is returned with the full
    # path-name, and one where the name is just the "file name"
    @LogMethodResults()
    def listdir(self, path="", with_prefix=False, this_dir_only=True):
        """
        list all the "files" in the "directory" for the given path.

        If the 'this_dir_only' is False (it is True by default), then
        the full list of all objects in that path are returned (needed for a
        rename, for example)

        returns a list of dictionaries for each object:
            {'bytes': 11,
             'hash': '3e25960a79dbc69b674cd4ec67a72c62',
             'last_modified': '2017-06-06T08:55:36.473Z',
             'name': 'foo/bar/thingamy.bob'}
        """
        files = []

        # Get all objects that match the known path
        path = self.clean_path(path)
        _opts = {'prefix': path}
        try:
            dir_listing = self.swift.list(container=self.container,
                                          options=_opts)
            for page in dir_listing:  # each page is up to 10,000 items
                if page["success"]:
                    files.extend(page["listing"])  # page is returning a list
                else:
                    raise page["error"]
        except SwiftError as e:
            self.log.error("SwiftFS.listdir %s", e.value)

        if this_dir_only:
            # make up the pattern to compile into our regex engine
            regex_delim = re.escape(self.delimiter)
            if len(path) > 0:
                regex_path = re.escape(path.rstrip(self.delimiter))
                pattern = '^({0}{1}[^{1}]+{1}?|{0})$'.format(
                    regex_path, regex_delim)
            else:
                pattern = '^[^{0}]+{0}?$'.format(regex_delim)
            self.log.debug("restrict directory pattern is: `%s`", pattern)
            regex = re.compile(pattern, re.UNICODE)

            new_files = []
            for f in files:
                if regex.match(f['name']):
                    new_files.append(f)
            files = new_files

        return files

    # We can 'stat' files, but not directories
    @LogMethodResults()
    def isfile(self, path):

        if path is None or path == '':
            self.log.debug("SwiftFS.isfile has no path, returning False")
            return False

        _isfile = False
        if not path.endswith(self.delimiter):
            path = self.clean_path(path)
            try:
                response = self.swift.stat(container=self.container,
                                           objects=[path])
            except Exception as e:
                self.log.error("SwiftFS.isfile %s", e.value)
            for r in response:
                if r['success']:
                    _isfile = True
                else:
                    self.log.error('Failed to retrieve stats for %s' %
                                   r['object'])
                break
        return _isfile

    # We can 'list' direcotries, but not 'stat' them
    @LogMethodResults()
    def isdir(self, path):

        # directories mush have a trailing slash on them.
        # The core code seems to remove any trailing slash, so lets add it back
        # on
        if not path.endswith(self.delimiter):
            path = path + self.delimiter

        # Root directory checks
        if path == self.delimiter:  # effectively root directory
            self.log.debug("SwiftFS.isdir found root dir - returning True")
            return True

        _isdir = False

        path = self.clean_path(path)
        _opts = {}
        if re.search('\w', path):
            _opts = {'prefix': path}
        try:
            self.log.debug("SwiftFS.isdir setting prefix to '%s'", path)
            response = self.swift.list(container=self.container, options=_opts)
        except SwiftError as e:
            self.log.error("SwiftFS.isdir %s", e.value)
        for r in response:
            if r['success']:
                _isdir = True
            else:
                self.log.error('Failed to retrieve stats for %s' % path)
            break
        return _isdir

    @LogMethod()
    def cp(self, old_path, new_path):
        self._copymove(old_path, new_path, with_delete=False)

    @LogMethod()
    def mv(self, old_path, new_path):
        self._copymove(old_path, new_path, with_delete=True)

    @LogMethod()
    def remove_container(self):
        response = {}
        try:
            response = self.swift.stat(container=self.container)
        except SwiftError as e:
            self.log.error("SwiftFS.remove_container %s", e.value)
        if 'success' in response and response['success'] == True:
            try:
                response = self.swift.delete(container=self.container)
            except SwiftError as e:
                self.log.error("SwiftFS.remove_container %s", e.value)
            for r in response:
                self.log.debug("SwiftFS.rm action: `%s` success: `%s`",
                               r['action'], r['success'])

    @LogMethod()
    def rm(self, path, recursive=False):

        if path in ["", self.delimiter]:
            self.do_error('Cannot delete root directory', code=400)
            return False
        if not (self.isdir(path) or self.isfile(path)):
            return False

        if recursive:
            for f in self._walk_path(path, dir_first=True):
                self.log.debug("SwiftFS.rm recurse into `%s`", f)
                self.rm(f)
            self.log.info("SwiftFS.rm and now remove `%s`", path)
            self.rm(path)
        else:
            self.log.info("SwiftFS.rm not recursing for `%s`", path)
            files = self.listdir(path)
            isEmpty = True
            if len(files) > 1:
                isEmpty = False
            if len(files) == 1 and files[0]['name'] != path:
                isEmpty = False
            if not isEmpty:
                self.do_error("directory %s not empty" % path, code=400)

            path = self.clean_path(path)
            try:
                response = self.swift.delete(container=self.container,
                                             objects=[path])
            except SwiftError as e:
                self.log.error("SwiftFS.rm %s", e.value)
                return False
            for r in response:
                self.log.debug("SwiftFS.rm action: `%s` success: `%s`",
                               r['action'], r['success'])
            return True

    @LogMethod()
    def _walk_path(self, path, dir_first=False):
        if not dir_first:
            yield path
        for f in self.listdir(path):
            if not dir_first:
                yield f['name']
            if self.guess_type(f['name']) == 'directory':
                for ff in self._walk_path(f['name'], dir_first=dir_first):
                    yield ff
            if dir_first:
                yield f['name']
        if dir_first:
            yield path

    # core function to copy or move file-objects
    # does clever recursive stuff for directory trees
    @LogMethod()
    def _copymove(self, old_path, new_path, with_delete=False):

        # check parent directory exists
        self.checkParentDirExists(new_path)

        for f in self._walk_path(old_path):
            new_f = f.replace(old_path, new_path, 1)
            if self.guess_type(f) == 'directory':
                self.mkdir(new_f)
            else:
                old_path = self.clean_path(old_path)
                new_path = self.clean_path(new_path)
                try:
                    response = self.swift.copy(
                        self.container, [f], {
                            'destination':
                            self.delimiter + self.container + self.delimiter +
                            new_f
                        })
                except SwiftError as e:
                    self.log.error(e.value)
                    raise
                for r in response:
                    if r["success"]:
                        if r["action"] == "copy_object":
                            self.log.debug("object %s copied from /%s/%s" %
                                           (r["destination"], r["container"],
                                            r["object"]))
                        if r["action"] == "create_container":
                            self.log.debug("container %s created" %
                                           r["container"])
                    else:
                        if "error" in r and isinstance(r["error"], Exception):
                            raise r["error"]
        # we always test for delete: file or directory...
        if with_delete:
            self.rm(old_path, recursive=True)

    # Directories are just objects that have a trailing '/'
    @LogMethod()
    def mkdir(self, path):
        path = path.rstrip(self.delimiter)
        path = path + self.delimiter
        self._do_write(path, None)

    # This works by downloading the file to disk then reading the contents of
    # that file into memory, before deleting the file
    # NOTE this is reading text files!
    # NOTE this really only works with files in the local direcotry, but given
    # local filestore will disappear when the docker ends, I'm not too bothered.
    @LogMethod()
    def read(self, path):
        if self.guess_type(path) == "directory":
            msg = "cannot read from path %s: it is a directory" % path
            self.do_error(msg, code=400)

        content = ''
        fhandle, localFile = tempfile.mkstemp(prefix="swiftfs_")
        os.close(fhandle)
        path = self.clean_path(path)
        try:
            response = self.swift.download(container=self.container,
                                           objects=[path],
                                           options={"out_file": localFile})
        except SwiftError as e:
            self.log.error("SwiftFS.read %s", e.value)
            return ''

        for r in response:
            if r['success']:
                self.log.debug("SwiftFS.read: using local file %s", localFile)
                with open(localFile) as lf:
                    content = lf.read()
                os.remove(localFile)
        return content

    # Write is 'upload' and 'upload' needs a "file" it can read from
    # We use io.StringIO for this
    @LogMethod()
    def write(self, path, content):
        if self.guess_type(path) == "directory":
            msg = "cannot write to path %s: it is a directory" % path
            self.do_error(msg, code=400)

        #path = self.clean_path(path)
        # If we can't make the directory path, then we can't make the file!
        #success = self._make_intermedate_dirs(path)
        self._do_write(path, content)

    @LogMethod()
    def _make_intermedate_dirs(self, path):
        # we loop over the path, checking for an object at every level
        # of the hierachy, except the last item (which may be a file,
        # or a directory itself
        path_parts = re.split(self.delimiter, path)
        current_path = ''
        for p in path_parts[:-1]:
            this_path = current_path + p + self.delimiter
            if self.isfile(this_path):
                self.log.error(
                    "SwiftFS._make_intermedate_dirs failure: dir exists at path `%s`"
                    % this_path)
                return False
            if not self.isdir(this_path):
                self.log.debug(
                    "SwiftFS._make_intermedate_dirs making directory")
                self._do_write(this_path, None)
            current_path = this_path

        return True

    @LogMethod()
    def _do_write(self, path, content):

        # check parent directory exists
        self.checkParentDirExists(path)

        type = self.guess_type(path)
        things = []
        if type == "directory":
            self.log.debug("SwiftFS._do_write create directory")
            things.append(SwiftUploadObject(None, object_name=path))
        else:
            self.log.debug("SwiftFS._do_write create file/notebook from '%s'",
                           content)
            output = io.BytesIO(content.encode('utf-8'))
            things.append(SwiftUploadObject(output, object_name=path))

        # Now do the upload
        path = self.clean_path(path)
        try:
            response = self.swift.upload(self.container, things)
        except SwiftError as e:
            self.log.error("SwiftFS._do_write swift-error: %s", e.value)
            raise
        except ClientException as e:
            self.log.error("SwiftFS._do_write client-error: %s", e.value)
            raise
        for r in response:
            self.log.debug("SwiftFS._do_write action: '%s', response: '%s'",
                           r['action'], r['success'])

    @LogMethodResults()
    def guess_type(self, path, allow_directory=True):
        """
        Guess the type of a file.
        If allow_directory is False, don't consider the possibility that the
        file is a directory.

        Parameters
        ----------
            path: string
        """
        _type = ''
        if path.endswith(".ipynb"):
            _type = "notebook"
        elif allow_directory and path.endswith(self.delimiter):
            _type = "directory"
        elif allow_directory and self.isdir(path):
            _type = "directory"
        else:
            _type = "file"
        return _type

    @LogMethod()
    def clean_path(self, path):
        # strip of any leading '/'
        path = path.lstrip(self.delimiter)
        if self.guess_type(path) == 'directory':
            # ensure we have a / at the end of directory paths
            path = path.rstrip(self.delimiter) + self.delimiter
            if path == self.delimiter:
                path = ''
        return path

    @LogMethodResults()
    def checkParentDirExists(self, path):
        """checks if the parent directory of a path exists"""
        p = path.strip(self.delimiter)
        p = p.split(self.delimiter)[:-1]
        p = self.delimiter.join(p)
        self.log.debug("SwiftFS.checkDirExists: directory name %s", p)
        if not self.isdir(p):
            self.do_error('parent directory does not exist %s' % p, code=400)

    @LogMethod()
    def do_error(self, msg, code=500):
        self.log.error(msg)
        raise HTTPError(code, msg)