Ejemplo n.º 1
0
def write_pickle(obj, dest, tmp=None, pickle_protocol=0):
    """
    Ensure that a pickle file gets written to disk.  The file is first written
    to a tmp file location in the destination directory path, ensured it is
    synced to disk, then moved to its final destination name.

    This version takes advantage of Gluster's dot-prefix-dot-suffix naming
    where the a file named ".thefile.name.9a7aasv" is hashed to the same
    Gluster node as "thefile.name". This ensures the renaming of a temp file
    once written does not move it to another Gluster node.

    :param obj: python object to be pickled
    :param dest: path of final destination file
    :param tmp: path to tmp to use, defaults to None (ignored)
    :param pickle_protocol: protocol to pickle the obj with, defaults to 0
    """
    dirname = os.path.dirname(dest)
    basename = os.path.basename(dest)
    tmpname = '.' + basename + '.' + \
        md5(basename + str(random.random())).hexdigest()
    tmppath = os.path.join(dirname, tmpname)
    with open(tmppath, 'wb') as fo:
        pickle.dump(obj, fo, pickle_protocol)
        # TODO: This flush() method call turns into a flush() system call
        # We'll need to wrap this as well, but we would do this by writing
        #a context manager for our own open() method which returns an object
        # in fo which makes the gluster API call.
        fo.flush()
        do_fsync(fo)
    do_rename(tmppath, dest)
Ejemplo n.º 2
0
    def put(self, fd, metadata, extension='.data'):
        """
        Finalize writing the file on disk, and renames it from the temp file
        to the real location.  This should be called after the data has been
        written to the temp file.

        :param fd: file descriptor of the temp file
        :param metadata: dictionary of metadata to be written
        :param extension: extension to be used when making the file
        """
        # Our caller will use '.data' here; we just ignore it since we map the
        # URL directly to the file system.

        metadata = _adjust_metadata(metadata)

        if metadata[X_OBJECT_TYPE] == MARKER_DIR:
            if not self.data_file:
                self.data_file = os.path.join(self.datadir, self._obj)
                self._create_dir_object(self.data_file)
            self.put_metadata(metadata)
            return

        # Check if directory already exists.
        if self._is_dir:
            # FIXME: How can we have a directory and it not be marked as a
            # MARKER_DIR (see above)?
            msg = 'File object exists as a directory: %s' % self.data_file
            raise AlreadyExistsAsDir(msg)

        write_metadata(self.tmppath, metadata)
        if X_CONTENT_LENGTH in metadata:
            self.drop_cache(fd, 0, int(metadata[X_CONTENT_LENGTH]))
        do_fsync(fd)
        if self._obj_path:
            dir_objs = self._obj_path.split('/')
            assert len(dir_objs) >= 1
            tmp_path = self._container_path
            for dir_name in dir_objs:
                tmp_path = os.path.join(tmp_path, dir_name)
                self._create_dir_object(tmp_path)

        newpath = os.path.join(self.datadir, self._obj)
        renamer(self.tmppath, newpath)
        do_chown(newpath, self.uid, self.gid)
        self.metadata = metadata
        self.data_file = newpath
        self.filter_metadata()
        return
 def test_do_fsync_err(self):
     tmpdir = mkdtemp()
     try:
         fd, tmpfile = mkstemp(dir=tmpdir)
         os.write(fd, 'test')
         with patch('os.fsync', mock_os_fsync):
             assert fs.do_fsync(fd) is None
         os.close(fd)
         try:
             fs.do_fsync(fd)
         except GlusterFileSystemOSError:
             pass
         else:
             self.fail("Expected GlusterFileSystemOSError")
     finally:
         shutil.rmtree(tmpdir)
Ejemplo n.º 4
0
 def test_do_fsync_err(self):
     tmpdir = mkdtemp()
     try:
         fd, tmpfile = mkstemp(dir=tmpdir)
         os.write(fd, 'test')
         with patch('eventlet.tpool.execute', mock_tpool_execute):
             with patch('os.fsync', mock_os_fsync):
                 assert fs.do_fsync(fd)
             os.close(fd)
             try:
                 fs.do_fsync(fd)
             except OSError:
                 pass
             else:
                 self.fail("Expected OSError")
     finally:
         shutil.rmtree(tmpdir)
 def test_do_fsync(self):
     tmpdir = mkdtemp()
     try:
         fd, tmpfile = mkstemp(dir=tmpdir)
         try:
             os.write(fd, 'test')
             with patch('os.fsync', mock_os_fsync):
                 assert fs.do_fsync(fd) is None
         except GlusterFileSystemOSError as ose:
             self.fail('Opening a temporary file failed with %s' %ose.strerror)
         else:
             os.close(fd)
     finally:
         shutil.rmtree(tmpdir)
Ejemplo n.º 6
0
def write_pickle(obj, dest, tmp=None, pickle_protocol=0):
    """
    Ensure that a pickle file gets written to disk.  The file is first written
    to a tmp file location in the destination directory path, ensured it is
    synced to disk, then moved to its final destination name.

    This version takes advantage of Gluster's dot-prefix-dot-suffix naming
    where the a file named ".thefile.name.9a7aasv" is hashed to the same
    Gluster node as "thefile.name". This ensures the renaming of a temp file
    once written does not move it to another Gluster node.

    :param obj: python object to be pickled
    :param dest: path of final destination file
    :param tmp: path to tmp to use, defaults to None (ignored)
    :param pickle_protocol: protocol to pickle the obj with, defaults to 0
    """
    dirname = os.path.dirname(dest)
    # Create destination directory
    try:
        os.makedirs(dirname)
    except OSError as err:
        if err.errno != errno.EEXIST:
            raise
    basename = os.path.basename(dest)
    tmpname = '.' + basename + '.' + \
        md5(basename + str(random.random())).hexdigest()
    tmppath = os.path.join(dirname, tmpname)
    with open(tmppath, 'wb') as fo:
        pickle.dump(obj, fo, pickle_protocol)
        # TODO: This flush() method call turns into a flush() system call
        # We'll need to wrap this as well, but we would do this by writing
        # a context manager for our own open() method which returns an object
        # in fo which makes the gluster API call.
        fo.flush()
        do_fsync(fo)
    do_rename(tmppath, dest)
Ejemplo n.º 7
0
 def test_do_fsync(self):
     tmpdir = mkdtemp()
     try:
         fd, tmpfile = mkstemp(dir=tmpdir)
         try:
             os.write(fd, 'test')
             with patch('eventlet.tpool.execute', mock_tpool_execute):
                 with patch('os.fsync', mock_os_fsync):
                     assert fs.do_fsync(fd)
         except OSError as ose:
             self.fail('Opening a temporary file failed with %s' %ose.strerror)
         else:
             os.close(fd)
     finally:
         shutil.rmtree(tmpdir)
Ejemplo n.º 8
0
    def _finalize_put(self, metadata):
        # Write out metadata before fsync() to ensure it is also forced to
        # disk.
        write_metadata(self._fd, metadata)

        # We call fsync() before calling drop_cache() to lower the
        # amount of redundant work the drop cache code will perform on
        # the pages (now that after fsync the pages will be all
        # clean).
        do_fsync(self._fd)
        # From the Department of the Redundancy Department, make sure
        # we call drop_cache() after fsync() to avoid redundant work
        # (pages all clean).
        do_fadvise64(self._fd, self._last_sync, self._upload_size)

        # At this point we know that the object's full directory path
        # exists, so we can just rename it directly without using Swift's
        # swift.common.utils.renamer(), which makes the directory path and
        # adds extra stat() calls.
        df = self._disk_file
        attempts = 1
        while True:
            try:
                do_rename(self._tmppath, df._data_file)
            except OSError as err:
                if err.errno in (errno.ENOENT, errno.EIO) \
                        and attempts < MAX_RENAME_ATTEMPTS:
                    # FIXME: Why either of these two error conditions is
                    # happening is unknown at this point. This might be a
                    # FUSE issue of some sort or a possible race
                    # condition. So let's sleep on it, and double check
                    # the environment after a good nap.
                    _random_sleep()
                    # Tease out why this error occurred. The man page for
                    # rename reads:
                    #   "The link named by tmppath does not exist; or, a
                    #    directory component in data_file does not exist;
                    #    or, tmppath or data_file is an empty string."
                    assert len(self._tmppath) > 0 and len(df._data_file) > 0
                    tpstats = do_stat(self._tmppath)
                    tfstats = do_fstat(self._fd)
                    assert tfstats
                    if not tpstats or tfstats.st_ino != tpstats.st_ino:
                        # Temporary file name conflict
                        raise DiskFileError(
                            'DiskFile.put(): temporary file, %s, was'
                            ' already renamed (targeted for %s)' % (
                                self._tmppath, df._data_file))
                    else:
                        # Data file target name now has a bad path!
                        dfstats = do_stat(df._put_datadir)
                        if not dfstats:
                            raise DiskFileError(
                                'DiskFile.put(): path to object, %s, no'
                                ' longer exists (targeted for %s)' % (
                                    df._put_datadir, df._data_file))
                        else:
                            is_dir = stat.S_ISDIR(dfstats.st_mode)
                            if not is_dir:
                                raise DiskFileError(
                                    'DiskFile.put(): path to object, %s,'
                                    ' no longer a directory (targeted for'
                                    ' %s)' % (self._put_datadir,
                                              df._data_file))
                            else:
                                # Let's retry since everything looks okay
                                logging.warn(
                                    "DiskFile.put(): os.rename('%s','%s')"
                                    " initially failed (%s) but a"
                                    " stat('%s') following that succeeded:"
                                    " %r" % (
                                        self._tmppath, df._data_file, str(err),
                                        df._put_datadir, dfstats))
                                attempts += 1
                                continue
                else:
                    raise GlusterFileSystemOSError(
                        err.errno, "%s, os.rename('%s', '%s')" % (
                            err.strerror, self._tmppath, df._data_file))
            else:
                # Success!
                break
        # Close here so the calling context does not have to perform this
        # in a thread.
        self.close()
Ejemplo n.º 9
0
    def put(self, fd, metadata, extension='.data'):
        """
        Finalize writing the file on disk, and renames it from the temp file
        to the real location.  This should be called after the data has been
        written to the temp file.

        :param fd: file descriptor of the temp file
        :param metadata: dictionary of metadata to be written
        :param extension: extension to be used when making the file
        """
        # Our caller will use '.data' here; we just ignore it since we map the
        # URL directly to the file system.

        metadata = _adjust_metadata(metadata)

        if dir_is_object(metadata):
            if not self.data_file:
                # Does not exist, create it
                data_file = os.path.join(self._obj_path, self._obj)
                _, self.metadata = self._create_dir_object(data_file, metadata)
                self.data_file = os.path.join(self._container_path, data_file)
            elif not self.is_dir:
                # Exists, but as a file
                raise DiskFileError('DiskFile.put(): directory creation failed'
                                    ' since the target, %s, already exists as'
                                    ' a file' % self.data_file)
            return

        if self._is_dir:
            # A pre-existing directory already exists on the file
            # system, perhaps gratuitously created when another
            # object was created, or created externally to Swift
            # REST API servicing (UFO use case).
            raise DiskFileError('DiskFile.put(): file creation failed since'
                                ' the target, %s, already exists as a'
                                ' directory' % self.data_file)

        # Write out metadata before fsync() to ensure it is also forced to
        # disk.
        write_metadata(fd, metadata)

        if not _relaxed_writes:
            do_fsync(fd)
            if X_CONTENT_LENGTH in metadata:
                # Don't bother doing this before fsync in case the OS gets any
                # ideas to issue partial writes.
                fsize = int(metadata[X_CONTENT_LENGTH])
                self.drop_cache(fd, 0, fsize)

        # At this point we know that the object's full directory path exists,
        # so we can just rename it directly without using Swift's
        # swift.common.utils.renamer(), which makes the directory path and
        # adds extra stat() calls.
        data_file = os.path.join(self.put_datadir, self._obj)
        while True:
            try:
                os.rename(self.tmppath, data_file)
            except OSError as err:
                if err.errno in (errno.ENOENT, errno.EIO):
                    # FIXME: Why either of these two error conditions is
                    # happening is unknown at this point. This might be a FUSE
                    # issue of some sort or a possible race condition. So
                    # let's sleep on it, and double check the environment
                    # after a good nap.
                    _random_sleep()
                    # Tease out why this error occurred. The man page for
                    # rename reads:
                    #   "The link named by tmppath does not exist; or, a
                    #    directory component in data_file does not exist;
                    #    or, tmppath or data_file is an empty string."
                    assert len(self.tmppath) > 0 and len(data_file) > 0
                    tpstats = do_stat(self.tmppath)
                    tfstats = do_fstat(fd)
                    assert tfstats
                    if not tpstats or tfstats.st_ino != tpstats.st_ino:
                        # Temporary file name conflict
                        raise DiskFileError('DiskFile.put(): temporary file,'
                                            ' %s, was already renamed'
                                            ' (targeted for %s)' % (
                                                self.tmppath, data_file))
                    else:
                        # Data file target name now has a bad path!
                        dfstats = do_stat(self.put_datadir)
                        if not dfstats:
                            raise DiskFileError('DiskFile.put(): path to'
                                                ' object, %s, no longer exists'
                                                ' (targeted for %s)' % (
                                                    self.put_datadir,
                                                    data_file))
                        else:
                            is_dir = stat.S_ISDIR(dfstats.st_mode)
                            if not is_dir:
                                raise DiskFileError('DiskFile.put(): path to'
                                                    ' object, %s, no longer a'
                                                    ' directory (targeted for'
                                                    ' %s)' % (self.put_datadir,
                                                              data_file))
                            else:
                                # Let's retry since everything looks okay
                                logging.warn("DiskFile.put(): os.rename('%s',"
                                             "'%s') initially failed (%s) but"
                                             " a stat('%s') following that"
                                             " succeeded: %r" % (
                                                 self.tmppath, data_file,
                                                 str(err), self.put_datadir,
                                                 dfstats))
                                continue
                else:
                    raise GlusterFileSystemOSError(
                        err.errno, "%s, os.rename('%s', '%s')" % (
                            err.strerror, self.tmppath, data_file))
            else:
                # Success!
                break

        # Avoid the unlink() system call as part of the mkstemp context cleanup
        self.tmppath = None

        self.metadata = metadata
        self.filter_metadata()

        # Mark that it actually exists now
        self.data_file = os.path.join(self.datadir, self._obj)
Ejemplo n.º 10
0
    def _finalize_put(self, metadata):
        # Write out metadata before fsync() to ensure it is also forced to
        # disk.
        write_metadata(self._fd, metadata)

        # We call fsync() before calling drop_cache() to lower the
        # amount of redundant work the drop cache code will perform on
        # the pages (now that after fsync the pages will be all
        # clean).
        do_fsync(self._fd)
        # From the Department of the Redundancy Department, make sure
        # we call drop_cache() after fsync() to avoid redundant work
        # (pages all clean).
        do_fadvise64(self._fd, self._last_sync, self._upload_size)

        # At this point we know that the object's full directory path
        # exists, so we can just rename it directly without using Swift's
        # swift.common.utils.renamer(), which makes the directory path and
        # adds extra stat() calls.
        df = self._disk_file
        attempts = 1
        while True:
            try:
                do_rename(self._tmppath, df._data_file)
            except OSError as err:
                if err.errno in (errno.ENOENT, errno.EIO) \
                        and attempts < MAX_RENAME_ATTEMPTS:
                    # FIXME: Why either of these two error conditions is
                    # happening is unknown at this point. This might be a
                    # FUSE issue of some sort or a possible race
                    # condition. So let's sleep on it, and double check
                    # the environment after a good nap.
                    _random_sleep()
                    # Tease out why this error occurred. The man page for
                    # rename reads:
                    #   "The link named by tmppath does not exist; or, a
                    #    directory component in data_file does not exist;
                    #    or, tmppath or data_file is an empty string."
                    assert len(self._tmppath) > 0 and len(df._data_file) > 0
                    tpstats = do_stat(self._tmppath)
                    tfstats = do_fstat(self._fd)
                    assert tfstats
                    if not tpstats or tfstats.st_ino != tpstats.st_ino:
                        # Temporary file name conflict
                        raise DiskFileError(
                            'DiskFile.put(): temporary file, %s, was'
                            ' already renamed (targeted for %s)' %
                            (self._tmppath, df._data_file))
                    else:
                        # Data file target name now has a bad path!
                        dfstats = do_stat(df._put_datadir)
                        if not dfstats:
                            raise DiskFileError(
                                'DiskFile.put(): path to object, %s, no'
                                ' longer exists (targeted for %s)' %
                                (df._put_datadir, df._data_file))
                        else:
                            is_dir = stat.S_ISDIR(dfstats.st_mode)
                            if not is_dir:
                                raise DiskFileError(
                                    'DiskFile.put(): path to object, %s,'
                                    ' no longer a directory (targeted for'
                                    ' %s)' %
                                    (self._put_datadir, df._data_file))
                            else:
                                # Let's retry since everything looks okay
                                logging.warn(
                                    "DiskFile.put(): rename('%s','%s')"
                                    " initially failed (%s) but a"
                                    " stat('%s') following that succeeded:"
                                    " %r" %
                                    (self._tmppath, df._data_file, str(err),
                                     df._put_datadir, dfstats))
                                attempts += 1
                                continue
                else:
                    raise GlusterFileSystemOSError(
                        err.errno, "%s, rename('%s', '%s')" %
                        (err.strerror, self._tmppath, df._data_file))
            else:
                # Success!
                break
        # Close here so the calling context does not have to perform this
        # in a thread.
        self.close()