def _unlinkold(self): if self._is_dir: # Marker, or object, directory. # # Delete from the filesystem only if it contains no objects. # If it does contain objects, then just remove the object # metadata tag which will make this directory a # fake-filesystem-only directory and will be deleted when the # container or parent directory is deleted. # # FIXME: Ideally we should use an atomic metadata update operation metadata = read_metadata(self._data_file) if dir_is_object(metadata): metadata[X_OBJECT_TYPE] = DIR_NON_OBJECT write_metadata(self._data_file, metadata) rmobjdir(self._data_file) else: # Delete file object do_unlink(self._data_file) # Garbage collection of non-object directories. Now that we # deleted the file, determine if the current directory and any # parent directory may be deleted. dirname = os.path.dirname(self._data_file) while dirname and dirname != self._container_path: # Try to remove any directories that are not objects. if not rmobjdir(dirname): # If a directory with objects has been found, we can stop # garabe collection break else: dirname = os.path.dirname(dirname)
def put(self, metadata): """ Finalize writing the file on disk, and renames it from the temp file to the real location. This should be called after the data has been written to the temp file. :param metadata: dictionary of metadata to be written :raises AlreadyExistsAsDir : If there exists a directory of the same name """ assert self._tmppath is not None metadata = _adjust_metadata(metadata) df = self._disk_file if dir_is_object(metadata): df._threadpool.force_run_in_thread(df._create_dir_object, df._data_file, metadata) return if df._is_dir: # A pre-existing directory already exists on the file # system, perhaps gratuitously created when another # object was created, or created externally to Swift # REST API servicing (UFO use case). raise AlreadyExistsAsDir('DiskFile.put(): file creation failed' ' since the target, %s, already exists' ' as a directory' % df._data_file) df._threadpool.force_run_in_thread(self._finalize_put, metadata) # Avoid the unlink() system call as part of the mkstemp context # cleanup self.tmppath = None
def put(self, metadata): """ Finalize writing the file on disk, and renames it from the temp file to the real location. This should be called after the data has been written to the temp file. :param metadata: dictionary of metadata to be written :raises AlreadyExistsAsDir : If there exists a directory of the same name """ assert self._tmppath is not None metadata = _adjust_metadata(metadata) df = self._disk_file if dir_is_object(metadata): df._threadpool.force_run_in_thread( df._create_dir_object, df._data_file, metadata) return if df._is_dir: # A pre-existing directory already exists on the file # system, perhaps gratuitously created when another # object was created, or created externally to Swift # REST API servicing (UFO use case). raise AlreadyExistsAsDir('DiskFile.put(): file creation failed' ' since the target, %s, already exists' ' as a directory' % df._data_file) df._threadpool.force_run_in_thread(self._finalize_put, metadata) # Avoid the unlink() system call as part of the mkstemp context # cleanup self.tmppath = None
def unlinkold(self, timestamp): """ Remove any older versions of the object file. Any file that has an older timestamp than timestamp will be deleted. :param timestamp: timestamp to compare with each file """ if not self.metadata or self.metadata[X_TIMESTAMP] >= timestamp: return assert self.data_file, \ "Have metadata, %r, but no data_file" % self.metadata if self._is_dir: # Marker, or object, directory. # # Delete from the filesystem only if it contains # no objects. If it does contain objects, then just # remove the object metadata tag which will make this directory a # fake-filesystem-only directory and will be deleted # when the container or parent directory is deleted. metadata = read_metadata(self.data_file) if dir_is_object(metadata): metadata[X_OBJECT_TYPE] = DIR_NON_OBJECT write_metadata(self.data_file, metadata) rmobjdir(self.data_file) else: # Delete file object do_unlink(self.data_file) # Garbage collection of non-object directories. # Now that we deleted the file, determine # if the current directory and any parent # directory may be deleted. dirname = os.path.dirname(self.data_file) while dirname and dirname != self._container_path: # Try to remove any directories that are not # objects. if not rmobjdir(dirname): # If a directory with objects has been # found, we can stop garabe collection break else: dirname = os.path.dirname(dirname) self.metadata = {} self.data_file = None
def put(self, fd, metadata, extension='.data'): """ Finalize writing the file on disk, and renames it from the temp file to the real location. This should be called after the data has been written to the temp file. :param fd: file descriptor of the temp file :param metadata: dictionary of metadata to be written :param extension: extension to be used when making the file """ # Our caller will use '.data' here; we just ignore it since we map the # URL directly to the file system. metadata = _adjust_metadata(metadata) if dir_is_object(metadata): if not self.data_file: # Does not exist, create it data_file = os.path.join(self._obj_path, self._obj) _, self.metadata = self._create_dir_object(data_file, metadata) self.data_file = os.path.join(self._container_path, data_file) elif not self.is_dir: # Exists, but as a file raise DiskFileError('DiskFile.put(): directory creation failed' ' since the target, %s, already exists as' ' a file' % self.data_file) return if self._is_dir: # A pre-existing directory already exists on the file # system, perhaps gratuitously created when another # object was created, or created externally to Swift # REST API servicing (UFO use case). raise DiskFileError('DiskFile.put(): file creation failed since' ' the target, %s, already exists as a' ' directory' % self.data_file) # Write out metadata before fsync() to ensure it is also forced to # disk. write_metadata(fd, metadata) if not _relaxed_writes: do_fsync(fd) if X_CONTENT_LENGTH in metadata: # Don't bother doing this before fsync in case the OS gets any # ideas to issue partial writes. fsize = int(metadata[X_CONTENT_LENGTH]) self.drop_cache(fd, 0, fsize) # At this point we know that the object's full directory path exists, # so we can just rename it directly without using Swift's # swift.common.utils.renamer(), which makes the directory path and # adds extra stat() calls. data_file = os.path.join(self.put_datadir, self._obj) while True: try: os.rename(self.tmppath, data_file) except OSError as err: if err.errno in (errno.ENOENT, errno.EIO): # FIXME: Why either of these two error conditions is # happening is unknown at this point. This might be a FUSE # issue of some sort or a possible race condition. So # let's sleep on it, and double check the environment # after a good nap. _random_sleep() # Tease out why this error occurred. The man page for # rename reads: # "The link named by tmppath does not exist; or, a # directory component in data_file does not exist; # or, tmppath or data_file is an empty string." assert len(self.tmppath) > 0 and len(data_file) > 0 tpstats = do_stat(self.tmppath) tfstats = do_fstat(fd) assert tfstats if not tpstats or tfstats.st_ino != tpstats.st_ino: # Temporary file name conflict raise DiskFileError('DiskFile.put(): temporary file,' ' %s, was already renamed' ' (targeted for %s)' % ( self.tmppath, data_file)) else: # Data file target name now has a bad path! dfstats = do_stat(self.put_datadir) if not dfstats: raise DiskFileError('DiskFile.put(): path to' ' object, %s, no longer exists' ' (targeted for %s)' % ( self.put_datadir, data_file)) else: is_dir = stat.S_ISDIR(dfstats.st_mode) if not is_dir: raise DiskFileError('DiskFile.put(): path to' ' object, %s, no longer a' ' directory (targeted for' ' %s)' % (self.put_datadir, data_file)) else: # Let's retry since everything looks okay logging.warn("DiskFile.put(): os.rename('%s'," "'%s') initially failed (%s) but" " a stat('%s') following that" " succeeded: %r" % ( self.tmppath, data_file, str(err), self.put_datadir, dfstats)) continue else: raise GlusterFileSystemOSError( err.errno, "%s, os.rename('%s', '%s')" % ( err.strerror, self.tmppath, data_file)) else: # Success! break # Avoid the unlink() system call as part of the mkstemp context cleanup self.tmppath = None self.metadata = metadata self.filter_metadata() # Mark that it actually exists now self.data_file = os.path.join(self.datadir, self._obj)
def put(self, metadata, extension='.data'): """ Finalize writing the file on disk, and renames it from the temp file to the real location. This should be called after the data has been written to the temp file. :param metadata: dictionary of metadata to be written :param extension: extension to be used when making the file """ # Our caller will use '.data' here; we just ignore it since we map the # URL directly to the file system. assert self.tmppath is not None metadata = _adjust_metadata(metadata) df = self.disk_file if dir_is_object(metadata): if not df.data_file: # Does not exist, create it data_file = os.path.join(df._obj_path, df._obj) _, df.metadata = self.threadpool.force_run_in_thread( df._create_dir_object, data_file, metadata) df.data_file = os.path.join(df._container_path, data_file) elif not df.is_dir: # Exists, but as a file raise DiskFileError('DiskFile.put(): directory creation failed' ' since the target, %s, already exists as' ' a file' % df.data_file) return if df._is_dir: # A pre-existing directory already exists on the file # system, perhaps gratuitously created when another # object was created, or created externally to Swift # REST API servicing (UFO use case). raise DiskFileError('DiskFile.put(): file creation failed since' ' the target, %s, already exists as a' ' directory' % df.data_file) def finalize_put(): # Write out metadata before fsync() to ensure it is also forced to # disk. write_metadata(self.fd, metadata) # We call fsync() before calling drop_cache() to lower the # amount of redundant work the drop cache code will perform on # the pages (now that after fsync the pages will be all # clean). do_fsync(self.fd) # From the Department of the Redundancy Department, make sure # we call drop_cache() after fsync() to avoid redundant work # (pages all clean). drop_buffer_cache(self.fd, 0, self.upload_size) # At this point we know that the object's full directory path # exists, so we can just rename it directly without using Swift's # swift.common.utils.renamer(), which makes the directory path and # adds extra stat() calls. data_file = os.path.join(df.put_datadir, df._obj) attempts = 1 while True: try: os.rename(self.tmppath, data_file) except OSError as err: if err.errno in (errno.ENOENT, errno.EIO) \ and attempts < MAX_RENAME_ATTEMPTS: # FIXME: Why either of these two error conditions is # happening is unknown at this point. This might be a # FUSE issue of some sort or a possible race # condition. So let's sleep on it, and double check # the environment after a good nap. _random_sleep() # Tease out why this error occurred. The man page for # rename reads: # "The link named by tmppath does not exist; or, a # directory component in data_file does not exist; # or, tmppath or data_file is an empty string." assert len(self.tmppath) > 0 and len(data_file) > 0 tpstats = do_stat(self.tmppath) tfstats = do_fstat(self.fd) assert tfstats if not tpstats or tfstats.st_ino != tpstats.st_ino: # Temporary file name conflict raise DiskFileError( 'DiskFile.put(): temporary file, %s, was' ' already renamed (targeted for %s)' % ( self.tmppath, data_file)) else: # Data file target name now has a bad path! dfstats = do_stat(df.put_datadir) if not dfstats: raise DiskFileError( 'DiskFile.put(): path to object, %s, no' ' longer exists (targeted for %s)' % ( df.put_datadir, data_file)) else: is_dir = stat.S_ISDIR(dfstats.st_mode) if not is_dir: raise DiskFileError( 'DiskFile.put(): path to object, %s,' ' no longer a directory (targeted for' ' %s)' % (df.put_datadir, data_file)) else: # Let's retry since everything looks okay logging.warn( "DiskFile.put(): os.rename('%s','%s')" " initially failed (%s) but a" " stat('%s') following that succeeded:" " %r" % ( self.tmppath, data_file, str(err), df.put_datadir, dfstats)) attempts += 1 continue else: raise GlusterFileSystemOSError( err.errno, "%s, os.rename('%s', '%s')" % ( err.strerror, self.tmppath, data_file)) else: # Success! break # Close here so the calling context does not have to perform this # in a thread. do_close(self.fd) self.threadpool.force_run_in_thread(finalize_put) # Avoid the unlink() system call as part of the mkstemp context # cleanup self.tmppath = None df.metadata = metadata df._filter_metadata() # Mark that it actually exists now df.data_file = os.path.join(df.datadir, df._obj)
def list_objects_iter(self, limit, marker, end_marker, prefix, delimiter, path=None): """ Returns tuple of name, created_at, size, content_type, etag. """ assert limit >= 0 assert not delimiter or (len(delimiter) == 1 and ord(delimiter) <= 254) if path is not None: if path: prefix = path = path.rstrip('/') + '/' else: prefix = path delimiter = '/' elif delimiter and not prefix: prefix = '' container_list = [] objects = self._update_object_count() if objects: objects.sort() else: return container_list if end_marker: objects = filter_end_marker(objects, end_marker) if marker and marker >= prefix: objects = filter_marker(objects, marker) elif prefix: objects = filter_prefix_as_marker(objects, prefix) if prefix is None: # No prefix, we don't need to apply the other arguments, we just # return what we have. pass else: # We have a non-None (for all intents and purposes it is a string) # prefix. if not delimiter: if not prefix: # We have nothing more to do pass else: objects = filter_prefix(objects, prefix) else: objects = filter_delimiter(objects, delimiter, prefix, marker, path) count = 0 for obj in objects: obj_path = os.path.join(self.datadir, obj) metadata = read_metadata(obj_path) if not metadata or not validate_object(metadata): if delimiter == '/' and obj_path[-1] == delimiter: clean_obj_path = obj_path[:-1] else: clean_obj_path = obj_path try: metadata = create_object_metadata(clean_obj_path) except OSError as e: # FIXME - total hack to get upstream swift ported unit # test cases working for now. if e.errno != errno.ENOENT: raise if Glusterfs.OBJECT_ONLY and metadata \ and metadata[X_CONTENT_TYPE] == DIR_TYPE \ and not dir_is_object(metadata): continue list_item = [] list_item.append(obj) if metadata: list_item.append(metadata[X_TIMESTAMP]) list_item.append(int(metadata[X_CONTENT_LENGTH])) list_item.append(metadata[X_CONTENT_TYPE]) list_item.append(metadata[X_ETAG]) container_list.append(list_item) count += 1 if count >= limit: break return container_list
def list_objects_iter(self, limit, marker, end_marker, prefix, delimiter, path=None, storage_policy_index=0, out_content_type=None, reverse=False): """ Returns tuple of name, created_at, size, content_type, etag. """ assert limit >= 0 assert not delimiter or (len(delimiter) == 1 and ord(delimiter) <= 254) if path is not None: if path: prefix = path = path.rstrip('/') + '/' else: prefix = path delimiter = '/' elif delimiter and not prefix: prefix = '' container_list = [] if self.account == 'gsexpiring': objects = list_objects_gsexpiring_container(self.datadir) else: objects = self._update_object_count() if objects: objects.sort() else: # No objects in container , return empty list return container_list if marker and end_marker and reverse: marker, end_marker = end_marker, marker if end_marker: objects = filter_end_marker(objects, end_marker) if marker and marker >= prefix: objects = filter_marker(objects, marker) elif prefix: objects = filter_prefix_as_marker(objects, prefix) if prefix is None: # No prefix, we don't need to apply the other arguments, we just # return what we have. pass else: # We have a non-None (for all intents and purposes it is a string) # prefix. if not delimiter: if not prefix: # We have nothing more to do pass else: objects = filter_prefix(objects, prefix) else: objects = filter_delimiter(objects, delimiter, prefix, marker, path) if out_content_type == 'text/plain' or \ self.account == 'gsexpiring': # When out_content_type == 'text/plain': # # The client is only asking for a plain list of objects and NOT # asking for any extended information about objects such as # bytes used or etag. # # When self.account == 'gsexpiring': # # This is a JSON request sent by the object expirer to list # tracker objects in a container in gsexpiring volume. # When out_content_type is 'application/json', the caller # expects each record entry to have the following ordered # fields: (name, timestamp, size, content_type, etag) for obj in objects: container_list.append((obj, '0', 0, 'text/plain', '')) if len(container_list) >= limit: break if reverse: container_list.reverse() return container_list count = 0 for obj in objects: obj_path = os.path.join(self.datadir, obj) try: metadata = read_metadata(obj_path) except GlusterFileSystemIOError as err: if err.errno in (errno.ENOENT, errno.ESTALE): # obj might have been deleted by another process # since the objects list was originally built continue else: raise err if not metadata or not validate_object(metadata): if delimiter == '/' and obj_path[-1] == delimiter: clean_obj_path = obj_path[:-1] else: clean_obj_path = obj_path try: metadata = create_object_metadata(clean_obj_path) except OSError as e: # FIXME - total hack to get upstream swift ported unit # test cases working for now. if e.errno not in (errno.ENOENT, errno.ESTALE): raise if not Glusterfs._implicit_dir_objects and metadata \ and metadata[X_CONTENT_TYPE] == DIR_TYPE \ and not dir_is_object(metadata): continue list_item = [] list_item.append(obj) if metadata: list_item.append(metadata[X_TIMESTAMP]) list_item.append(int(metadata[X_CONTENT_LENGTH])) list_item.append(metadata[X_CONTENT_TYPE]) list_item.append(metadata[X_ETAG]) container_list.append(list_item) count += 1 if count >= limit: break if reverse: container_list.reverse() return container_list
def list_objects_iter(self, limit, marker, end_marker, prefix, delimiter, path=None): """ Returns tuple of name, created_at, size, content_type, etag. """ assert limit >= 0 assert not delimiter or (len(delimiter) == 1 and ord(delimiter) <= 254) if path is not None: if path: prefix = path = path.rstrip('/') + '/' else: prefix = path delimiter = '/' elif delimiter and not prefix: prefix = '' container_list = [] objects = self._update_object_count() if objects: objects.sort() else: return container_list if end_marker: objects = filter_end_marker(objects, end_marker) if marker and marker >= prefix: objects = filter_marker(objects, marker) elif prefix: objects = filter_prefix_as_marker(objects, prefix) if prefix is None: # No prefix, we don't need to apply the other arguments, we just # return what we have. pass else: # We have a non-None (for all intents and purposes it is a string) # prefix. if not delimiter: if not prefix: # We have nothing more to do pass else: objects = filter_prefix(objects, prefix) else: objects = filter_delimiter(objects, delimiter, prefix, marker, path) count = 0 for obj in objects: obj_path = os.path.join(self.datadir, obj) metadata = read_metadata(obj_path) if not metadata or not validate_object(metadata): if delimiter == '/' and obj_path[-1] == delimiter: clean_obj_path = obj_path[:-1] else: clean_obj_path = obj_path try: metadata = create_object_metadata(clean_obj_path) except OSError as e: # FIXME - total hack to get upstream swift ported unit # test cases working for now. if e.errno != errno.ENOENT: raise if not Glusterfs._implicit_dir_objects and metadata \ and metadata[X_CONTENT_TYPE] == DIR_TYPE \ and not dir_is_object(metadata): continue list_item = [] list_item.append(obj) if metadata: list_item.append(metadata[X_TIMESTAMP]) list_item.append(int(metadata[X_CONTENT_LENGTH])) list_item.append(metadata[X_CONTENT_TYPE]) list_item.append(metadata[X_ETAG]) container_list.append(list_item) count += 1 if count >= limit: break return container_list