def _unlinkold(self):
        if self._is_dir:
            # Marker, or object, directory.
            #
            # Delete from the filesystem only if it contains no objects.
            # If it does contain objects, then just remove the object
            # metadata tag which will make this directory a
            # fake-filesystem-only directory and will be deleted when the
            # container or parent directory is deleted.
            #
            # FIXME: Ideally we should use an atomic metadata update operation
            metadata = read_metadata(self._data_file)
            if dir_is_object(metadata):
                metadata[X_OBJECT_TYPE] = DIR_NON_OBJECT
                write_metadata(self._data_file, metadata)
            rmobjdir(self._data_file)
        else:
            # Delete file object
            do_unlink(self._data_file)

        # Garbage collection of non-object directories.  Now that we
        # deleted the file, determine if the current directory and any
        # parent directory may be deleted.
        dirname = os.path.dirname(self._data_file)
        while dirname and dirname != self._container_path:
            # Try to remove any directories that are not objects.
            if not rmobjdir(dirname):
                # If a directory with objects has been found, we can stop
                # garabe collection
                break
            else:
                dirname = os.path.dirname(dirname)
Example #2
0
    def put(self, metadata):
        """
        Finalize writing the file on disk, and renames it from the temp file
        to the real location.  This should be called after the data has been
        written to the temp file.

        :param metadata: dictionary of metadata to be written
        :raises AlreadyExistsAsDir : If there exists a directory of the same
                                     name
        """
        assert self._tmppath is not None
        metadata = _adjust_metadata(metadata)
        df = self._disk_file

        if dir_is_object(metadata):
            df._threadpool.force_run_in_thread(df._create_dir_object,
                                               df._data_file, metadata)
            return

        if df._is_dir:
            # A pre-existing directory already exists on the file
            # system, perhaps gratuitously created when another
            # object was created, or created externally to Swift
            # REST API servicing (UFO use case).
            raise AlreadyExistsAsDir('DiskFile.put(): file creation failed'
                                     ' since the target, %s, already exists'
                                     ' as a directory' % df._data_file)

        df._threadpool.force_run_in_thread(self._finalize_put, metadata)

        # Avoid the unlink() system call as part of the mkstemp context
        # cleanup
        self.tmppath = None
    def put(self, metadata):
        """
        Finalize writing the file on disk, and renames it from the temp file
        to the real location.  This should be called after the data has been
        written to the temp file.

        :param metadata: dictionary of metadata to be written
        :raises AlreadyExistsAsDir : If there exists a directory of the same
                                     name
        """
        assert self._tmppath is not None
        metadata = _adjust_metadata(metadata)
        df = self._disk_file

        if dir_is_object(metadata):
            df._threadpool.force_run_in_thread(
                df._create_dir_object, df._data_file, metadata)
            return

        if df._is_dir:
            # A pre-existing directory already exists on the file
            # system, perhaps gratuitously created when another
            # object was created, or created externally to Swift
            # REST API servicing (UFO use case).
            raise AlreadyExistsAsDir('DiskFile.put(): file creation failed'
                                     ' since the target, %s, already exists'
                                     ' as a directory' % df._data_file)

        df._threadpool.force_run_in_thread(self._finalize_put, metadata)

        # Avoid the unlink() system call as part of the mkstemp context
        # cleanup
        self.tmppath = None
Example #4
0
    def _unlinkold(self):
        if self._is_dir:
            # Marker, or object, directory.
            #
            # Delete from the filesystem only if it contains no objects.
            # If it does contain objects, then just remove the object
            # metadata tag which will make this directory a
            # fake-filesystem-only directory and will be deleted when the
            # container or parent directory is deleted.
            #
            # FIXME: Ideally we should use an atomic metadata update operation
            metadata = read_metadata(self._data_file)
            if dir_is_object(metadata):
                metadata[X_OBJECT_TYPE] = DIR_NON_OBJECT
                write_metadata(self._data_file, metadata)
            rmobjdir(self._data_file)
        else:
            # Delete file object
            do_unlink(self._data_file)

        # Garbage collection of non-object directories.  Now that we
        # deleted the file, determine if the current directory and any
        # parent directory may be deleted.
        dirname = os.path.dirname(self._data_file)
        while dirname and dirname != self._container_path:
            # Try to remove any directories that are not objects.
            if not rmobjdir(dirname):
                # If a directory with objects has been found, we can stop
                # garabe collection
                break
            else:
                dirname = os.path.dirname(dirname)
Example #5
0
    def unlinkold(self, timestamp):
        """
        Remove any older versions of the object file.  Any file that has an
        older timestamp than timestamp will be deleted.

        :param timestamp: timestamp to compare with each file
        """
        if not self.metadata or self.metadata[X_TIMESTAMP] >= timestamp:
            return

        assert self.data_file, \
            "Have metadata, %r, but no data_file" % self.metadata

        if self._is_dir:
            # Marker, or object, directory.
            #
            # Delete from the filesystem only if it contains
            # no objects.  If it does contain objects, then just
            # remove the object metadata tag which will make this directory a
            # fake-filesystem-only directory and will be deleted
            # when the container or parent directory is deleted.
            metadata = read_metadata(self.data_file)
            if dir_is_object(metadata):
                metadata[X_OBJECT_TYPE] = DIR_NON_OBJECT
                write_metadata(self.data_file, metadata)
            rmobjdir(self.data_file)

        else:
            # Delete file object
            do_unlink(self.data_file)

        # Garbage collection of non-object directories.
        # Now that we deleted the file, determine
        # if the current directory and any parent
        # directory may be deleted.
        dirname = os.path.dirname(self.data_file)
        while dirname and dirname != self._container_path:
            # Try to remove any directories that are not
            # objects.
            if not rmobjdir(dirname):
                # If a directory with objects has been
                # found, we can stop garabe collection
                break
            else:
                dirname = os.path.dirname(dirname)

        self.metadata = {}
        self.data_file = None
Example #6
0
    def put(self, fd, metadata, extension='.data'):
        """
        Finalize writing the file on disk, and renames it from the temp file
        to the real location.  This should be called after the data has been
        written to the temp file.

        :param fd: file descriptor of the temp file
        :param metadata: dictionary of metadata to be written
        :param extension: extension to be used when making the file
        """
        # Our caller will use '.data' here; we just ignore it since we map the
        # URL directly to the file system.

        metadata = _adjust_metadata(metadata)

        if dir_is_object(metadata):
            if not self.data_file:
                # Does not exist, create it
                data_file = os.path.join(self._obj_path, self._obj)
                _, self.metadata = self._create_dir_object(data_file, metadata)
                self.data_file = os.path.join(self._container_path, data_file)
            elif not self.is_dir:
                # Exists, but as a file
                raise DiskFileError('DiskFile.put(): directory creation failed'
                                    ' since the target, %s, already exists as'
                                    ' a file' % self.data_file)
            return

        if self._is_dir:
            # A pre-existing directory already exists on the file
            # system, perhaps gratuitously created when another
            # object was created, or created externally to Swift
            # REST API servicing (UFO use case).
            raise DiskFileError('DiskFile.put(): file creation failed since'
                                ' the target, %s, already exists as a'
                                ' directory' % self.data_file)

        # Write out metadata before fsync() to ensure it is also forced to
        # disk.
        write_metadata(fd, metadata)

        if not _relaxed_writes:
            do_fsync(fd)
            if X_CONTENT_LENGTH in metadata:
                # Don't bother doing this before fsync in case the OS gets any
                # ideas to issue partial writes.
                fsize = int(metadata[X_CONTENT_LENGTH])
                self.drop_cache(fd, 0, fsize)

        # At this point we know that the object's full directory path exists,
        # so we can just rename it directly without using Swift's
        # swift.common.utils.renamer(), which makes the directory path and
        # adds extra stat() calls.
        data_file = os.path.join(self.put_datadir, self._obj)
        while True:
            try:
                os.rename(self.tmppath, data_file)
            except OSError as err:
                if err.errno in (errno.ENOENT, errno.EIO):
                    # FIXME: Why either of these two error conditions is
                    # happening is unknown at this point. This might be a FUSE
                    # issue of some sort or a possible race condition. So
                    # let's sleep on it, and double check the environment
                    # after a good nap.
                    _random_sleep()
                    # Tease out why this error occurred. The man page for
                    # rename reads:
                    #   "The link named by tmppath does not exist; or, a
                    #    directory component in data_file does not exist;
                    #    or, tmppath or data_file is an empty string."
                    assert len(self.tmppath) > 0 and len(data_file) > 0
                    tpstats = do_stat(self.tmppath)
                    tfstats = do_fstat(fd)
                    assert tfstats
                    if not tpstats or tfstats.st_ino != tpstats.st_ino:
                        # Temporary file name conflict
                        raise DiskFileError('DiskFile.put(): temporary file,'
                                            ' %s, was already renamed'
                                            ' (targeted for %s)' % (
                                                self.tmppath, data_file))
                    else:
                        # Data file target name now has a bad path!
                        dfstats = do_stat(self.put_datadir)
                        if not dfstats:
                            raise DiskFileError('DiskFile.put(): path to'
                                                ' object, %s, no longer exists'
                                                ' (targeted for %s)' % (
                                                    self.put_datadir,
                                                    data_file))
                        else:
                            is_dir = stat.S_ISDIR(dfstats.st_mode)
                            if not is_dir:
                                raise DiskFileError('DiskFile.put(): path to'
                                                    ' object, %s, no longer a'
                                                    ' directory (targeted for'
                                                    ' %s)' % (self.put_datadir,
                                                              data_file))
                            else:
                                # Let's retry since everything looks okay
                                logging.warn("DiskFile.put(): os.rename('%s',"
                                             "'%s') initially failed (%s) but"
                                             " a stat('%s') following that"
                                             " succeeded: %r" % (
                                                 self.tmppath, data_file,
                                                 str(err), self.put_datadir,
                                                 dfstats))
                                continue
                else:
                    raise GlusterFileSystemOSError(
                        err.errno, "%s, os.rename('%s', '%s')" % (
                            err.strerror, self.tmppath, data_file))
            else:
                # Success!
                break

        # Avoid the unlink() system call as part of the mkstemp context cleanup
        self.tmppath = None

        self.metadata = metadata
        self.filter_metadata()

        # Mark that it actually exists now
        self.data_file = os.path.join(self.datadir, self._obj)
Example #7
0
    def put(self, metadata, extension='.data'):
        """
        Finalize writing the file on disk, and renames it from the temp file
        to the real location.  This should be called after the data has been
        written to the temp file.

        :param metadata: dictionary of metadata to be written
        :param extension: extension to be used when making the file
        """
        # Our caller will use '.data' here; we just ignore it since we map the
        # URL directly to the file system.

        assert self.tmppath is not None
        metadata = _adjust_metadata(metadata)
        df = self.disk_file

        if dir_is_object(metadata):
            if not df.data_file:
                # Does not exist, create it
                data_file = os.path.join(df._obj_path, df._obj)
                _, df.metadata = self.threadpool.force_run_in_thread(
                    df._create_dir_object, data_file, metadata)
                df.data_file = os.path.join(df._container_path, data_file)
            elif not df.is_dir:
                # Exists, but as a file
                raise DiskFileError('DiskFile.put(): directory creation failed'
                                    ' since the target, %s, already exists as'
                                    ' a file' % df.data_file)
            return

        if df._is_dir:
            # A pre-existing directory already exists on the file
            # system, perhaps gratuitously created when another
            # object was created, or created externally to Swift
            # REST API servicing (UFO use case).
            raise DiskFileError('DiskFile.put(): file creation failed since'
                                ' the target, %s, already exists as a'
                                ' directory' % df.data_file)

        def finalize_put():
            # Write out metadata before fsync() to ensure it is also forced to
            # disk.
            write_metadata(self.fd, metadata)

            # We call fsync() before calling drop_cache() to lower the
            # amount of redundant work the drop cache code will perform on
            # the pages (now that after fsync the pages will be all
            # clean).
            do_fsync(self.fd)
            # From the Department of the Redundancy Department, make sure
            # we call drop_cache() after fsync() to avoid redundant work
            # (pages all clean).
            drop_buffer_cache(self.fd, 0, self.upload_size)

            # At this point we know that the object's full directory path
            # exists, so we can just rename it directly without using Swift's
            # swift.common.utils.renamer(), which makes the directory path and
            # adds extra stat() calls.
            data_file = os.path.join(df.put_datadir, df._obj)
            attempts = 1
            while True:
                try:
                    os.rename(self.tmppath, data_file)
                except OSError as err:
                    if err.errno in (errno.ENOENT, errno.EIO) \
                            and attempts < MAX_RENAME_ATTEMPTS:
                        # FIXME: Why either of these two error conditions is
                        # happening is unknown at this point. This might be a
                        # FUSE issue of some sort or a possible race
                        # condition. So let's sleep on it, and double check
                        # the environment after a good nap.
                        _random_sleep()
                        # Tease out why this error occurred. The man page for
                        # rename reads:
                        #   "The link named by tmppath does not exist; or, a
                        #    directory component in data_file does not exist;
                        #    or, tmppath or data_file is an empty string."
                        assert len(self.tmppath) > 0 and len(data_file) > 0
                        tpstats = do_stat(self.tmppath)
                        tfstats = do_fstat(self.fd)
                        assert tfstats
                        if not tpstats or tfstats.st_ino != tpstats.st_ino:
                            # Temporary file name conflict
                            raise DiskFileError(
                                'DiskFile.put(): temporary file, %s, was'
                                ' already renamed (targeted for %s)' % (
                                    self.tmppath, data_file))
                        else:
                            # Data file target name now has a bad path!
                            dfstats = do_stat(df.put_datadir)
                            if not dfstats:
                                raise DiskFileError(
                                    'DiskFile.put(): path to object, %s, no'
                                    ' longer exists (targeted for %s)' % (
                                        df.put_datadir,
                                        data_file))
                            else:
                                is_dir = stat.S_ISDIR(dfstats.st_mode)
                                if not is_dir:
                                    raise DiskFileError(
                                        'DiskFile.put(): path to object, %s,'
                                        ' no longer a directory (targeted for'
                                        ' %s)' % (df.put_datadir,
                                                  data_file))
                                else:
                                    # Let's retry since everything looks okay
                                    logging.warn(
                                        "DiskFile.put(): os.rename('%s','%s')"
                                        " initially failed (%s) but a"
                                        " stat('%s') following that succeeded:"
                                        " %r" % (
                                            self.tmppath, data_file,
                                            str(err), df.put_datadir,
                                            dfstats))
                                    attempts += 1
                                    continue
                    else:
                        raise GlusterFileSystemOSError(
                            err.errno, "%s, os.rename('%s', '%s')" % (
                                err.strerror, self.tmppath, data_file))
                else:
                    # Success!
                    break
            # Close here so the calling context does not have to perform this
            # in a thread.
            do_close(self.fd)

        self.threadpool.force_run_in_thread(finalize_put)

        # Avoid the unlink() system call as part of the mkstemp context
        # cleanup
        self.tmppath = None

        df.metadata = metadata
        df._filter_metadata()

        # Mark that it actually exists now
        df.data_file = os.path.join(df.datadir, df._obj)
Example #8
0
    def list_objects_iter(self, limit, marker, end_marker,
                          prefix, delimiter, path=None):
        """
        Returns tuple of name, created_at, size, content_type, etag.
        """
        assert limit >= 0
        assert not delimiter or (len(delimiter) == 1 and ord(delimiter) <= 254)

        if path is not None:
            if path:
                prefix = path = path.rstrip('/') + '/'
            else:
                prefix = path
            delimiter = '/'
        elif delimiter and not prefix:
            prefix = ''

        container_list = []

        objects = self._update_object_count()
        if objects:
            objects.sort()
        else:
            return container_list

        if end_marker:
            objects = filter_end_marker(objects, end_marker)

        if marker and marker >= prefix:
            objects = filter_marker(objects, marker)
        elif prefix:
            objects = filter_prefix_as_marker(objects, prefix)

        if prefix is None:
            # No prefix, we don't need to apply the other arguments, we just
            # return what we have.
            pass
        else:
            # We have a non-None (for all intents and purposes it is a string)
            # prefix.
            if not delimiter:
                if not prefix:
                    # We have nothing more to do
                    pass
                else:
                    objects = filter_prefix(objects, prefix)
            else:
                objects = filter_delimiter(objects, delimiter, prefix, marker,
                                           path)

        count = 0
        for obj in objects:
            obj_path = os.path.join(self.datadir, obj)
            metadata = read_metadata(obj_path)
            if not metadata or not validate_object(metadata):
                if delimiter == '/' and obj_path[-1] == delimiter:
                    clean_obj_path = obj_path[:-1]
                else:
                    clean_obj_path = obj_path
                try:
                    metadata = create_object_metadata(clean_obj_path)
                except OSError as e:
                    # FIXME - total hack to get upstream swift ported unit
                    # test cases working for now.
                    if e.errno != errno.ENOENT:
                        raise
            if Glusterfs.OBJECT_ONLY and metadata \
                    and metadata[X_CONTENT_TYPE] == DIR_TYPE \
                    and not dir_is_object(metadata):
                continue
            list_item = []
            list_item.append(obj)
            if metadata:
                list_item.append(metadata[X_TIMESTAMP])
                list_item.append(int(metadata[X_CONTENT_LENGTH]))
                list_item.append(metadata[X_CONTENT_TYPE])
                list_item.append(metadata[X_ETAG])
            container_list.append(list_item)
            count += 1
            if count >= limit:
                break

        return container_list
Example #9
0
    def list_objects_iter(self, limit, marker, end_marker,
                          prefix, delimiter, path=None,
                          storage_policy_index=0,
                          out_content_type=None, reverse=False):
        """
        Returns tuple of name, created_at, size, content_type, etag.
        """
        assert limit >= 0
        assert not delimiter or (len(delimiter) == 1 and ord(delimiter) <= 254)
        if path is not None:
            if path:
                prefix = path = path.rstrip('/') + '/'
            else:
                prefix = path
            delimiter = '/'
        elif delimiter and not prefix:
            prefix = ''

        container_list = []

        if self.account == 'gsexpiring':
            objects = list_objects_gsexpiring_container(self.datadir)
        else:
            objects = self._update_object_count()
        if objects:
            objects.sort()
        else:
            # No objects in container , return empty list
            return container_list

        if marker and end_marker and reverse:
            marker, end_marker = end_marker, marker

        if end_marker:
            objects = filter_end_marker(objects, end_marker)

        if marker and marker >= prefix:
            objects = filter_marker(objects, marker)
        elif prefix:
            objects = filter_prefix_as_marker(objects, prefix)

        if prefix is None:
            # No prefix, we don't need to apply the other arguments, we just
            # return what we have.
            pass
        else:
            # We have a non-None (for all intents and purposes it is a string)
            # prefix.
            if not delimiter:
                if not prefix:
                    # We have nothing more to do
                    pass
                else:
                    objects = filter_prefix(objects, prefix)
            else:
                objects = filter_delimiter(objects, delimiter, prefix, marker,
                                           path)

        if out_content_type == 'text/plain' or \
                self.account == 'gsexpiring':
            # When out_content_type == 'text/plain':
            #
            # The client is only asking for a plain list of objects and NOT
            # asking for any extended information about objects such as
            # bytes used or etag.
            #
            # When self.account == 'gsexpiring':
            #
            # This is a JSON request sent by the object expirer to list
            # tracker objects in a container in gsexpiring volume.
            # When out_content_type is 'application/json', the caller
            # expects each record entry to have the following ordered
            # fields: (name, timestamp, size, content_type, etag)
            for obj in objects:
                container_list.append((obj, '0', 0, 'text/plain', ''))
                if len(container_list) >= limit:
                        break
            if reverse:
                container_list.reverse()
            return container_list

        count = 0
        for obj in objects:
            obj_path = os.path.join(self.datadir, obj)
            try:
                metadata = read_metadata(obj_path)
            except GlusterFileSystemIOError as err:
                if err.errno in (errno.ENOENT, errno.ESTALE):
                    # obj might have been deleted by another process
                    # since the objects list was originally built
                    continue
                else:
                    raise err
            if not metadata or not validate_object(metadata):
                if delimiter == '/' and obj_path[-1] == delimiter:
                    clean_obj_path = obj_path[:-1]
                else:
                    clean_obj_path = obj_path
                try:
                    metadata = create_object_metadata(clean_obj_path)
                except OSError as e:
                    # FIXME - total hack to get upstream swift ported unit
                    # test cases working for now.
                    if e.errno not in (errno.ENOENT, errno.ESTALE):
                        raise
            if not Glusterfs._implicit_dir_objects and metadata \
                    and metadata[X_CONTENT_TYPE] == DIR_TYPE \
                    and not dir_is_object(metadata):
                continue
            list_item = []

            list_item.append(obj)

            if metadata:
                list_item.append(metadata[X_TIMESTAMP])
                list_item.append(int(metadata[X_CONTENT_LENGTH]))
                list_item.append(metadata[X_CONTENT_TYPE])
                list_item.append(metadata[X_ETAG])
            container_list.append(list_item)
            count += 1
            if count >= limit:
                break
        if reverse:
            container_list.reverse()
        return container_list
Example #10
0
    def list_objects_iter(self,
                          limit,
                          marker,
                          end_marker,
                          prefix,
                          delimiter,
                          path=None):
        """
        Returns tuple of name, created_at, size, content_type, etag.
        """
        assert limit >= 0
        assert not delimiter or (len(delimiter) == 1 and ord(delimiter) <= 254)

        if path is not None:
            if path:
                prefix = path = path.rstrip('/') + '/'
            else:
                prefix = path
            delimiter = '/'
        elif delimiter and not prefix:
            prefix = ''

        container_list = []

        objects = self._update_object_count()
        if objects:
            objects.sort()
        else:
            return container_list

        if end_marker:
            objects = filter_end_marker(objects, end_marker)

        if marker and marker >= prefix:
            objects = filter_marker(objects, marker)
        elif prefix:
            objects = filter_prefix_as_marker(objects, prefix)

        if prefix is None:
            # No prefix, we don't need to apply the other arguments, we just
            # return what we have.
            pass
        else:
            # We have a non-None (for all intents and purposes it is a string)
            # prefix.
            if not delimiter:
                if not prefix:
                    # We have nothing more to do
                    pass
                else:
                    objects = filter_prefix(objects, prefix)
            else:
                objects = filter_delimiter(objects, delimiter, prefix, marker,
                                           path)

        count = 0
        for obj in objects:
            obj_path = os.path.join(self.datadir, obj)
            metadata = read_metadata(obj_path)
            if not metadata or not validate_object(metadata):
                if delimiter == '/' and obj_path[-1] == delimiter:
                    clean_obj_path = obj_path[:-1]
                else:
                    clean_obj_path = obj_path
                try:
                    metadata = create_object_metadata(clean_obj_path)
                except OSError as e:
                    # FIXME - total hack to get upstream swift ported unit
                    # test cases working for now.
                    if e.errno != errno.ENOENT:
                        raise
            if not Glusterfs._implicit_dir_objects and metadata \
                    and metadata[X_CONTENT_TYPE] == DIR_TYPE \
                    and not dir_is_object(metadata):
                continue
            list_item = []
            list_item.append(obj)
            if metadata:
                list_item.append(metadata[X_TIMESTAMP])
                list_item.append(int(metadata[X_CONTENT_LENGTH]))
                list_item.append(metadata[X_CONTENT_TYPE])
                list_item.append(metadata[X_ETAG])
            container_list.append(list_item)
            count += 1
            if count >= limit:
                break

        return container_list