Exemple #1
0
def write_pickle(obj, dest, tmp=None, pickle_protocol=0):
    """
    Ensure that a pickle file gets written to disk.  The file is first written
    to a tmp file location in the destination directory path, ensured it is
    synced to disk, then moved to its final destination name.

    This version takes advantage of Gluster's dot-prefix-dot-suffix naming
    where the a file named ".thefile.name.9a7aasv" is hashed to the same
    Gluster node as "thefile.name". This ensures the renaming of a temp file
    once written does not move it to another Gluster node.

    :param obj: python object to be pickled
    :param dest: path of final destination file
    :param tmp: path to tmp to use, defaults to None (ignored)
    :param pickle_protocol: protocol to pickle the obj with, defaults to 0
    """
    dirname = os.path.dirname(dest)
    basename = os.path.basename(dest)
    tmpname = '.' + basename + '.' + \
        md5(basename + str(random.random())).hexdigest()
    tmppath = os.path.join(dirname, tmpname)
    with open(tmppath, 'wb') as fo:
        pickle.dump(obj, fo, pickle_protocol)
        # TODO: This flush() method call turns into a flush() system call
        # We'll need to wrap this as well, but we would do this by writing
        #a context manager for our own open() method which returns an object
        # in fo which makes the gluster API call.
        fo.flush()
        do_fsync(fo)
    do_rename(tmppath, dest)
 def test_do_rename_err(self):
     try:
         srcpath = os.path.join('/tmp', str(random.random()))
         destpath = os.path.join('/tmp', str(random.random()))
         fs.do_rename(srcpath, destpath)
     except GlusterFileSystemOSError:
         pass
     else:
         self.fail("GlusterFileSystemOSError expected")
 def test_do_rename(self):
     srcpath = mkdtemp()
     try:
         destpath = os.path.join('/tmp', str(random.random()))
         fs.do_rename(srcpath, destpath)
         assert not os.path.exists(srcpath)
         assert os.path.exists(destpath)
     finally:
         os.rmdir(destpath)
Exemple #4
0
def write_pickle(obj, dest, tmp=None, pickle_protocol=0):
    """
    Ensure that a pickle file gets written to disk.  The file is first written
    to a tmp file location in the destination directory path, ensured it is
    synced to disk, then moved to its final destination name.

    This version takes advantage of Gluster's dot-prefix-dot-suffix naming
    where the a file named ".thefile.name.9a7aasv" is hashed to the same
    Gluster node as "thefile.name". This ensures the renaming of a temp file
    once written does not move it to another Gluster node.

    :param obj: python object to be pickled
    :param dest: path of final destination file
    :param tmp: path to tmp to use, defaults to None (ignored)
    :param pickle_protocol: protocol to pickle the obj with, defaults to 0
    """
    dirname = os.path.dirname(dest)
    # Create destination directory
    try:
        os.makedirs(dirname)
    except OSError as err:
        if err.errno != errno.EEXIST:
            raise
    basename = os.path.basename(dest)
    tmpname = '.' + basename + '.' + \
        md5(basename + str(random.random())).hexdigest()
    tmppath = os.path.join(dirname, tmpname)
    with open(tmppath, 'wb') as fo:
        pickle.dump(obj, fo, pickle_protocol)
        # TODO: This flush() method call turns into a flush() system call
        # We'll need to wrap this as well, but we would do this by writing
        # a context manager for our own open() method which returns an object
        # in fo which makes the gluster API call.
        fo.flush()
        do_fsync(fo)
    do_rename(tmppath, dest)
    def _finalize_put(self, metadata):
        # Write out metadata before fsync() to ensure it is also forced to
        # disk.
        write_metadata(self._fd, metadata)

        # We call fsync() before calling drop_cache() to lower the
        # amount of redundant work the drop cache code will perform on
        # the pages (now that after fsync the pages will be all
        # clean).
        do_fsync(self._fd)
        # From the Department of the Redundancy Department, make sure
        # we call drop_cache() after fsync() to avoid redundant work
        # (pages all clean).
        do_fadvise64(self._fd, self._last_sync, self._upload_size)

        # At this point we know that the object's full directory path
        # exists, so we can just rename it directly without using Swift's
        # swift.common.utils.renamer(), which makes the directory path and
        # adds extra stat() calls.
        df = self._disk_file
        attempts = 1
        while True:
            try:
                do_rename(self._tmppath, df._data_file)
            except OSError as err:
                if err.errno in (errno.ENOENT, errno.EIO) \
                        and attempts < MAX_RENAME_ATTEMPTS:
                    # FIXME: Why either of these two error conditions is
                    # happening is unknown at this point. This might be a
                    # FUSE issue of some sort or a possible race
                    # condition. So let's sleep on it, and double check
                    # the environment after a good nap.
                    _random_sleep()
                    # Tease out why this error occurred. The man page for
                    # rename reads:
                    #   "The link named by tmppath does not exist; or, a
                    #    directory component in data_file does not exist;
                    #    or, tmppath or data_file is an empty string."
                    assert len(self._tmppath) > 0 and len(df._data_file) > 0
                    tpstats = do_stat(self._tmppath)
                    tfstats = do_fstat(self._fd)
                    assert tfstats
                    if not tpstats or tfstats.st_ino != tpstats.st_ino:
                        # Temporary file name conflict
                        raise DiskFileError(
                            'DiskFile.put(): temporary file, %s, was'
                            ' already renamed (targeted for %s)' % (
                                self._tmppath, df._data_file))
                    else:
                        # Data file target name now has a bad path!
                        dfstats = do_stat(df._put_datadir)
                        if not dfstats:
                            raise DiskFileError(
                                'DiskFile.put(): path to object, %s, no'
                                ' longer exists (targeted for %s)' % (
                                    df._put_datadir, df._data_file))
                        else:
                            is_dir = stat.S_ISDIR(dfstats.st_mode)
                            if not is_dir:
                                raise DiskFileError(
                                    'DiskFile.put(): path to object, %s,'
                                    ' no longer a directory (targeted for'
                                    ' %s)' % (self._put_datadir,
                                              df._data_file))
                            else:
                                # Let's retry since everything looks okay
                                logging.warn(
                                    "DiskFile.put(): os.rename('%s','%s')"
                                    " initially failed (%s) but a"
                                    " stat('%s') following that succeeded:"
                                    " %r" % (
                                        self._tmppath, df._data_file, str(err),
                                        df._put_datadir, dfstats))
                                attempts += 1
                                continue
                else:
                    raise GlusterFileSystemOSError(
                        err.errno, "%s, os.rename('%s', '%s')" % (
                            err.strerror, self._tmppath, df._data_file))
            else:
                # Success!
                break
        # Close here so the calling context does not have to perform this
        # in a thread.
        self.close()
Exemple #6
0
    def _finalize_put(self, metadata):
        # Write out metadata before fsync() to ensure it is also forced to
        # disk.
        write_metadata(self._fd, metadata)

        # We call fsync() before calling drop_cache() to lower the
        # amount of redundant work the drop cache code will perform on
        # the pages (now that after fsync the pages will be all
        # clean).
        do_fsync(self._fd)
        # From the Department of the Redundancy Department, make sure
        # we call drop_cache() after fsync() to avoid redundant work
        # (pages all clean).
        do_fadvise64(self._fd, self._last_sync, self._upload_size)

        # At this point we know that the object's full directory path
        # exists, so we can just rename it directly without using Swift's
        # swift.common.utils.renamer(), which makes the directory path and
        # adds extra stat() calls.
        df = self._disk_file
        attempts = 1
        while True:
            try:
                do_rename(self._tmppath, df._data_file)
            except OSError as err:
                if err.errno in (errno.ENOENT, errno.EIO) \
                        and attempts < MAX_RENAME_ATTEMPTS:
                    # FIXME: Why either of these two error conditions is
                    # happening is unknown at this point. This might be a
                    # FUSE issue of some sort or a possible race
                    # condition. So let's sleep on it, and double check
                    # the environment after a good nap.
                    _random_sleep()
                    # Tease out why this error occurred. The man page for
                    # rename reads:
                    #   "The link named by tmppath does not exist; or, a
                    #    directory component in data_file does not exist;
                    #    or, tmppath or data_file is an empty string."
                    assert len(self._tmppath) > 0 and len(df._data_file) > 0
                    tpstats = do_stat(self._tmppath)
                    tfstats = do_fstat(self._fd)
                    assert tfstats
                    if not tpstats or tfstats.st_ino != tpstats.st_ino:
                        # Temporary file name conflict
                        raise DiskFileError(
                            'DiskFile.put(): temporary file, %s, was'
                            ' already renamed (targeted for %s)' %
                            (self._tmppath, df._data_file))
                    else:
                        # Data file target name now has a bad path!
                        dfstats = do_stat(df._put_datadir)
                        if not dfstats:
                            raise DiskFileError(
                                'DiskFile.put(): path to object, %s, no'
                                ' longer exists (targeted for %s)' %
                                (df._put_datadir, df._data_file))
                        else:
                            is_dir = stat.S_ISDIR(dfstats.st_mode)
                            if not is_dir:
                                raise DiskFileError(
                                    'DiskFile.put(): path to object, %s,'
                                    ' no longer a directory (targeted for'
                                    ' %s)' %
                                    (self._put_datadir, df._data_file))
                            else:
                                # Let's retry since everything looks okay
                                logging.warn(
                                    "DiskFile.put(): rename('%s','%s')"
                                    " initially failed (%s) but a"
                                    " stat('%s') following that succeeded:"
                                    " %r" %
                                    (self._tmppath, df._data_file, str(err),
                                     df._put_datadir, dfstats))
                                attempts += 1
                                continue
                else:
                    raise GlusterFileSystemOSError(
                        err.errno, "%s, rename('%s', '%s')" %
                        (err.strerror, self._tmppath, df._data_file))
            else:
                # Success!
                break
        # Close here so the calling context does not have to perform this
        # in a thread.
        self.close()