def close(self): """ Close the file descriptor """ if self._fd: do_close(self._fd) self._fd = None
def _get_etag(path_or_fd): """ FIXME: It would be great to have a translator that returns the md5sum() of the file as an xattr that can be simply fetched. Since we don't have that we should yield after each chunk read and computed so that we don't consume the worker thread. """ etag = '' if isinstance(path_or_fd, int): # We are given a file descriptor, so this is an invocation from the # DiskFile.open() method. fd = path_or_fd dup_fd = do_dup(fd) try: etag = _read_for_etag(dup_fd) do_lseek(fd, 0, os.SEEK_SET) finally: do_close(dup_fd) else: # We are given a path to the object when the DiskDir.list_objects_iter # method invokes us. path = path_or_fd fd = do_open(path, os.O_RDONLY) try: etag = _read_for_etag(fd) finally: do_close(fd) return etag
def mkstemp(self): """Contextmanager to make a temporary file.""" # Creating intermidiate directories and corresponding metadata. # For optimization, check if the subdirectory already exists, # if exists, then it means that it also has its metadata. # Not checking for container, since the container should already # exist for the call to come here. if not os_path.exists(self.datadir): path = self._container_path subdir_list = self._obj_path.split(os.path.sep) for i in range(len(subdir_list)): path = os.path.join(path, subdir_list[i]) if not os_path.exists(path): self._create_dir_object(path) tmpfile = '.' + self._obj + '.' + md5(self._obj + str(random.random())).hexdigest() self.tmppath = os.path.join(self.datadir, tmpfile) fd = do_open(self.tmppath, os.O_RDWR | os.O_CREAT | os.O_EXCL) try: yield fd finally: try: do_close(fd) except OSError: pass tmppath, self.tmppath = self.tmppath, None do_unlink(tmppath)
def close(self): """ Close the open file handle if present. """ if self._fd is not None: fd, self._fd = self._fd, None if fd > -1: do_close(fd)
def close(self, verify_file=True): """ Close the file. Will handle quarantining file if necessary. :param verify_file: Defaults to True. If false, will not check file to see if it needs quarantining. """ if self.fp: do_close(self.fp) self.fp = None
def test_do_close(self): fd, tmpfile = mkstemp() try: fs.do_close(fd) try: os.write(fd, "test") except OSError: pass else: self.fail("OSError expected") finally: os.remove(tmpfile)
def open(self): """ Open the object. This implementation opens the data file representing the object, reads the associated metadata in the extended attributes, additionally combining metadata from fast-POST `.meta` files. .. note:: An implementation is allowed to raise any of the following exceptions, but is only required to raise `DiskFileNotExist` when the object representation does not exist. :raises DiskFileNotExist: if the object does not exist :raises DiskFileExpired: if the object has expired :returns: itself for use as a context manager """ # Writes are always performed to a temporary file try: fd = do_open(self._data_file, os.O_RDONLY | O_CLOEXEC) except GlusterFileSystemOSError as err: if err.errno in (errno.ENOENT, errno.ENOTDIR): # If the file does exist, or some part of the path does not # exist, raise the expected DiskFileNotExist raise DiskFileNotExist raise else: stats = do_fstat(fd) if not stats: return self._is_dir = stat.S_ISDIR(stats.st_mode) obj_size = stats.st_size self._metadata = read_metadata(fd) if not validate_object(self._metadata): create_object_metadata(fd) self._metadata = read_metadata(fd) assert self._metadata is not None self._filter_metadata() if self._is_dir: do_close(fd) obj_size = 0 self._fd = -1 else: if self._is_object_expired(self._metadata): raise DiskFileExpired(metadata=self._metadata) self._fd = fd self._obj_size = obj_size return self
def test_do_close_err_fd(self): fd, tmpfile = mkstemp() try: fs.do_close(fd) try: fs.do_close(fd) except GlusterFileSystemOSError: pass else: self.fail("GlusterFileSystemOSError expected") finally: os.remove(tmpfile)
def close(self, verify_file=True): """ Close the file. Will handle quarantining file if necessary. :param verify_file: Defaults to True. If false, will not check file to see if it needs quarantining. """ # Marker directory if self._is_dir: assert not self.fp return if self.fp: do_close(self.fp) self.fp = None
def test_do_close_err_fp(self): fd, tmpfile = mkstemp() os.close(fd) fp = open(tmpfile, 'w') try: fd = fp.fileno() os.close(fd) try: fs.do_close(fp) except GlusterFileSystemIOError: pass else: self.fail("GlusterFileSystemIOError expected") finally: os.remove(tmpfile)
def __exit__(self, t, v, tb): """ Context exit. .. note:: This method will be invoked by the object server while servicing the REST API *before* the object has actually been read. It is the responsibility of the implementation to properly handle that. """ self._metadata = None if self._fd is not None: fd, self._fd = self._fd, None if fd > -1: do_close(fd)
def open(self): """ Open the object. This implementation opens the data file representing the object, reads the associated metadata in the extended attributes, additionally combining metadata from fast-POST `.meta` files. .. note:: An implementation is allowed to raise any of the following exceptions, but is only required to raise `DiskFileNotExist` when the object representation does not exist. :raises DiskFileNotExist: if the object does not exist :raises DiskFileExpired: if the object has expired :returns: itself for use as a context manager """ # Writes are always performed to a temporary file try: self._fd = do_open(self._data_file, os.O_RDONLY | O_CLOEXEC) except GlusterFileSystemOSError as err: if err.errno in (errno.ENOENT, errno.ENOTDIR): # If the file does exist, or some part of the path does not # exist, raise the expected DiskFileNotExist raise DiskFileNotExist raise try: if not self._stat: self._stat = do_fstat(self._fd) obj_size = self._stat.st_size if not self._metadata: self._metadata = read_metadata(self._fd) if not validate_object(self._metadata, self._stat): self._metadata = create_object_metadata(self._fd, self._stat, self._metadata) assert self._metadata is not None self._filter_metadata() if stat.S_ISDIR(self._stat.st_mode): do_close(self._fd) obj_size = 0 self._fd = -1 else: if self._is_object_expired(self._metadata): raise DiskFileExpired(metadata=self._metadata) self._obj_size = obj_size except (OSError, IOError, DiskFileExpired) as err: # Something went wrong. Context manager will not call # __exit__. So we close the fd manually here. self._close_fd() if hasattr(err, 'errno') and \ err.errno in (errno.ENOENT, errno.ESTALE): # Handle races: ENOENT/ESTALE can be raised by read_metadata() # call in GlusterFS if file gets deleted by another # client after do_open() succeeds logging.warn("open(%s) succeeded but one of the subsequent " "syscalls failed with ENOENT/ESTALE. Raising " "DiskFileNotExist." % (self._data_file)) raise DiskFileNotExist else: # Re-raise the original exception after fd has been closed raise self._disk_file_open = True return self
def _close_fd(self): if self._fd is not None: fd, self._fd = self._fd, None if fd > -1: do_close(fd)
def mkstemp(self, size=None): """ Contextmanager to make a temporary file, optionally of a specified initial size. For Gluster, we first optimistically create the temporary file using the "rsync-friendly" .NAME.random naming. If we find that some path to the file does not exist, we then create that path and then create the temporary file again. If we get file name conflict, we'll retry using different random suffixes 1,000 times before giving up. """ data_file = os.path.join(self.put_datadir, self._obj) # Assume the full directory path exists to the file already, and # construct the proper name for the temporary file. for i in range(0, 1000): tmpfile = '.' + self._obj + '.' + md5(self._obj + str(random.random())).hexdigest() tmppath = os.path.join(self.put_datadir, tmpfile) try: fd = do_open(tmppath, os.O_WRONLY | os.O_CREAT | os.O_EXCL | O_CLOEXEC) except GlusterFileSystemOSError as gerr: if gerr.errno == errno.EEXIST: # Retry with a different random number. continue if gerr.errno == errno.EIO: # FIXME: Possible FUSE issue or race condition, let's # sleep on it and retry the operation. _random_sleep() logging.warn("DiskFile.mkstemp(): %s ... retrying in" " 0.1 secs", gerr) continue if gerr.errno != errno.ENOENT: # FIXME: Other cases we should handle? raise if not self._obj_path: # No directory hierarchy and the create failed telling us # the container or volume directory does not exist. This # could be a FUSE issue or some race condition, so let's # sleep a bit and retry. _random_sleep() logging.warn("DiskFile.mkstemp(): %s ... retrying in" " 0.1 secs", gerr) continue if i != 0: # Got ENOENT after previously making the path. This could # also be a FUSE issue or some race condition, nap and # retry. _random_sleep() logging.warn("DiskFile.mkstemp(): %s ... retrying in" " 0.1 secs" % gerr) continue # It looks like the path to the object does not already exist self._create_dir_object(self._obj_path) continue else: break else: # We failed after 1,000 attempts to create the temporary file. raise DiskFileError('DiskFile.mkstemp(): failed to successfully' ' create a temporary file without running' ' into a name conflict after 1,000 attempts' ' for: %s' % (data_file,)) self.tmppath = tmppath try: # Ensure it is properly owned before we make it available. do_fchown(fd, self.uid, self.gid) if _preallocate and size: # For XFS, fallocate() turns off speculative pre-allocation # until a write is issued either to the last block of the file # before the EOF or beyond the EOF. This means that we are # less likely to fragment free space with pre-allocated # extents that get truncated back to the known file size. # However, this call also turns holes into allocated but # unwritten extents, so that allocation occurs before the # write, not during XFS writeback. This effectively defeats # any allocation optimizations the filesystem can make at # writeback time. fallocate(fd, size) yield fd finally: try: do_close(fd) except OSError: pass if self.tmppath: tmppath, self.tmppath = self.tmppath, None do_unlink(tmppath)
def finalize_put(): # Write out metadata before fsync() to ensure it is also forced to # disk. write_metadata(self.fd, metadata) # We call fsync() before calling drop_cache() to lower the # amount of redundant work the drop cache code will perform on # the pages (now that after fsync the pages will be all # clean). do_fsync(self.fd) # From the Department of the Redundancy Department, make sure # we call drop_cache() after fsync() to avoid redundant work # (pages all clean). drop_buffer_cache(self.fd, 0, self.upload_size) # At this point we know that the object's full directory path # exists, so we can just rename it directly without using Swift's # swift.common.utils.renamer(), which makes the directory path and # adds extra stat() calls. data_file = os.path.join(df.put_datadir, df._obj) attempts = 1 while True: try: os.rename(self.tmppath, data_file) except OSError as err: if err.errno in (errno.ENOENT, errno.EIO) \ and attempts < MAX_RENAME_ATTEMPTS: # FIXME: Why either of these two error conditions is # happening is unknown at this point. This might be a # FUSE issue of some sort or a possible race # condition. So let's sleep on it, and double check # the environment after a good nap. _random_sleep() # Tease out why this error occurred. The man page for # rename reads: # "The link named by tmppath does not exist; or, a # directory component in data_file does not exist; # or, tmppath or data_file is an empty string." assert len(self.tmppath) > 0 and len(data_file) > 0 tpstats = do_stat(self.tmppath) tfstats = do_fstat(self.fd) assert tfstats if not tpstats or tfstats.st_ino != tpstats.st_ino: # Temporary file name conflict raise DiskFileError( 'DiskFile.put(): temporary file, %s, was' ' already renamed (targeted for %s)' % ( self.tmppath, data_file)) else: # Data file target name now has a bad path! dfstats = do_stat(df.put_datadir) if not dfstats: raise DiskFileError( 'DiskFile.put(): path to object, %s, no' ' longer exists (targeted for %s)' % ( df.put_datadir, data_file)) else: is_dir = stat.S_ISDIR(dfstats.st_mode) if not is_dir: raise DiskFileError( 'DiskFile.put(): path to object, %s,' ' no longer a directory (targeted for' ' %s)' % (df.put_datadir, data_file)) else: # Let's retry since everything looks okay logging.warn( "DiskFile.put(): os.rename('%s','%s')" " initially failed (%s) but a" " stat('%s') following that succeeded:" " %r" % ( self.tmppath, data_file, str(err), df.put_datadir, dfstats)) attempts += 1 continue else: raise GlusterFileSystemOSError( err.errno, "%s, os.rename('%s', '%s')" % ( err.strerror, self.tmppath, data_file)) else: # Success! break # Close here so the calling context does not have to perform this # in a thread. do_close(self.fd)
def writer(self, size=None): """ Contextmanager to make a temporary file, optionally of a specified initial size. For Gluster, we first optimistically create the temporary file using the "rsync-friendly" .NAME.random naming. If we find that some path to the file does not exist, we then create that path and then create the temporary file again. If we get file name conflict, we'll retry using different random suffixes 1,000 times before giving up. """ data_file = os.path.join(self.put_datadir, self._obj) # Assume the full directory path exists to the file already, and # construct the proper name for the temporary file. attempts = 1 cur_thread = str(getcurrent()) while True: postfix = md5(self._obj + _cur_host + _cur_pid + cur_thread + str(random.random())).hexdigest() tmpfile = '.' + self._obj + '.' + postfix tmppath = os.path.join(self.put_datadir, tmpfile) try: fd = do_open(tmppath, os.O_WRONLY | os.O_CREAT | os.O_EXCL | O_CLOEXEC) except GlusterFileSystemOSError as gerr: if gerr.errno == errno.ENOSPC: # Raise DiskFileNoSpace to be handled by upper layers raise DiskFileNoSpace() if gerr.errno not in (errno.ENOENT, errno.EEXIST, errno.EIO): # FIXME: Other cases we should handle? raise if attempts >= MAX_OPEN_ATTEMPTS: # We failed after N attempts to create the temporary # file. raise DiskFileError('DiskFile.mkstemp(): failed to' ' successfully create a temporary file' ' without running into a name conflict' ' after %d of %d attempts for: %s' % ( attempts, MAX_OPEN_ATTEMPTS, data_file)) if gerr.errno == errno.EEXIST: # Retry with a different random number. attempts += 1 elif gerr.errno == errno.EIO: # FIXME: Possible FUSE issue or race condition, let's # sleep on it and retry the operation. _random_sleep() logging.warn("DiskFile.mkstemp(): %s ... retrying in" " 0.1 secs", gerr) attempts += 1 elif not self._obj_path: # No directory hierarchy and the create failed telling us # the container or volume directory does not exist. This # could be a FUSE issue or some race condition, so let's # sleep a bit and retry. _random_sleep() logging.warn("DiskFile.mkstemp(): %s ... retrying in" " 0.1 secs", gerr) attempts += 1 elif attempts > 1: # Got ENOENT after previously making the path. This could # also be a FUSE issue or some race condition, nap and # retry. _random_sleep() logging.warn("DiskFile.mkstemp(): %s ... retrying in" " 0.1 secs" % gerr) attempts += 1 else: # It looks like the path to the object does not already # exist; don't count this as an attempt, though, since # we perform the open() system call optimistically. self._create_dir_object(self._obj_path) else: break dw = None try: # Ensure it is properly owned before we make it available. do_fchown(fd, self.uid, self.gid) # NOTE: we do not perform the fallocate() call at all. We ignore # it completely. dw = DiskWriter(self, fd, tmppath, self.threadpool) yield dw finally: try: if dw.fd: do_close(dw.fd) except OSError: pass if dw.tmppath: do_unlink(dw.tmppath)