def find_and_process(self): src_filename = time.strftime(self.filename_format) working_dir = os.path.join(self.target_dir, ".%-stats_tmp" % self.stats_type) shutil.rmtree(working_dir, ignore_errors=True) mkdirs(working_dir) tmp_filename = os.path.join(working_dir, src_filename) hasher = hashlib.md5() try: with open(tmp_filename, "wb") as statfile: statfile.write(self.get_header()) for device in os.listdir(self.devices): if self.mount_check and not check_mount(self.devices, device): self.logger.error(_("Device %s is not mounted, skipping.") % device) continue db_dir = os.path.join(self.devices, device, self.data_dir) if not os.path.exists(db_dir): self.logger.debug(_("Path %s does not exist, skipping.") % db_dir) continue for root, dirs, files in os.walk(db_dir, topdown=False): for filename in files: if filename.endswith(".db"): db_path = os.path.join(root, filename) try: line_data = self.get_data(db_path) except sqlite3.Error, err: self.logger.info(_("Error accessing db %s: %s") % (db_path, err)) continue if line_data: statfile.write(line_data) hasher.update(line_data) src_filename += hasher.hexdigest() renamer(tmp_filename, os.path.join(self.target_dir, src_filename))
def process_object_update(self, update_path, device): """ Process the object information to be updated and update. :param update_path: path to pickled object update file :param device: path to device """ try: update = pickle.load(open(update_path, "rb")) except Exception: self.logger.exception(_("ERROR Pickle problem, quarantining %s"), update_path) renamer(update_path, os.path.join(device, "quarantined", "objects", os.path.basename(update_path))) return successes = update.get("successes", []) part, nodes = self.get_container_ring().get_nodes(update["account"], update["container"]) obj = "/%s/%s/%s" % (update["account"], update["container"], update["obj"]) success = True for node in nodes: if node["id"] not in successes: status = self.object_update(node, part, update["op"], obj, update["headers"]) if not (200 <= status < 300) and status != 404: success = False else: successes.append(node["id"]) if success: self.successes += 1 self.logger.debug(_("Update sent for %(obj)s %(path)s"), {"obj": obj, "path": update_path}) os.unlink(update_path) else: self.failures += 1 self.logger.debug(_("Update failed for %(obj)s %(path)s"), {"obj": obj, "path": update_path}) update["successes"] = successes write_pickle(update, update_path, os.path.join(device, "tmp"))
def copy_put(self, fd, tmppath): tpool.execute(os.fsync, fd) if self.obj_path: dir_objs = self.obj_path.split('/') tmp_path = '' if len(dir_objs): for dir_name in dir_objs: if tmp_path: tmp_path = tmp_path + '/' + dir_name else: tmp_path = dir_name if not self.create_dir_object(os.path.join(self.container_path, tmp_path)): self.logger.error("Failed in subdir %s",\ os.path.join(self.container_path,tmp_path)) return False renamer(tmppath, os.path.join(self.datadir, self.obj)) do_chown(os.path.join(self.datadir, self.obj), self.uid, self.gid) return True
def put(self, fd, metadata): """ Finalize writing the file on disk, and renames it from the temp file to the real location. This should be called after the data has been written to the temp file. :param fd: file descriptor of the temp file :param metadata: dictionary of metadata to be written """ assert self.tmppath is not None assert self._type == 0 # wait, what? #metadata['name'] = self.name timestamp = normalize_timestamp(metadata['X-Timestamp']) base_path = os.path.join(self.datadir, timestamp) # P3 fp = open("/tmp/dump","a") print >>fp, "posix put old", self.tmppath, "new", base_path fp.close() write_meta_file(base_path + '.meta', metadata) #if 'Content-Length' in metadata: # self.drop_cache(fd, 0, int(metadata['Content-Length'])) # XXX os.fsync maybe? #tpool.execute(fsync, fd) renamer(self.tmppath, base_path + ".data") # but not setting self.data_file here, is this right? self.metadata = metadata
def put(self, metadata, extension='.data'): """ Finalize writing the file on disk, and renames it from the temp file to the real location. This should be called after the data has been written to the temp file. :param metadata: dictionary of metadata to be written :param extension: extension to be used when making the file """ assert self.tmppath is not None timestamp = normalize_timestamp(metadata['X-Timestamp']) metadata['name'] = self.disk_file.name # Write the metadata before calling fsync() so that both data and # metadata are flushed to disk. write_metadata(self.fd, metadata) # We call fsync() before calling drop_cache() to lower the amount of # redundant work the drop cache code will perform on the pages (now # that after fsync the pages will be all clean). tpool.execute(fsync, self.fd) # From the Department of the Redundancy Department, make sure we # call drop_cache() after fsync() to avoid redundant work (pages # all clean). drop_buffer_cache(self.fd, 0, self.upload_size) invalidate_hash(os.path.dirname(self.disk_file.datadir)) # After the rename completes, this object will be available for other # requests to reference. renamer(self.tmppath, os.path.join(self.disk_file.datadir, timestamp + extension)) self.disk_file.metadata = metadata
def quarantine(self, reason): """ The database will be quarantined and a sqlite3.DatabaseError will be raised indicating the action taken. """ prefix_path = os.path.dirname(self.db_dir) partition_path = os.path.dirname(prefix_path) dbs_path = os.path.dirname(partition_path) device_path = os.path.dirname(dbs_path) quar_path = os.path.join(device_path, 'quarantined', self.db_type + 's', os.path.basename(self.db_dir)) try: renamer(self.db_dir, quar_path, fsync=False) except OSError as e: if e.errno not in (errno.EEXIST, errno.ENOTEMPTY): raise quar_path = "%s-%s" % (quar_path, uuid4().hex) renamer(self.db_dir, quar_path, fsync=False) detail = _('Quarantined %(db_dir)s to %(quar_path)s due to ' '%(reason)s') % {'db_dir': self.db_dir, 'quar_path': quar_path, 'reason': reason} self.logger.error(detail) raise sqlite3.DatabaseError(detail)
def recalculate_hashes(partition_dir, suffixes, reclaim_age=ONE_WEEK): """ Recalculates hashes for the given suffixes in the partition and updates them in the partition's hashes file. :param partition_dir: directory of the partition in which to recalculate :param suffixes: list of suffixes to recalculate :param reclaim_age: age in seconds at which tombstones should be removed """ def tpool_listdir(partition_dir): return dict(((suff, None) for suff in os.listdir(partition_dir) if len(suff) == 3 and isdir(join(partition_dir, suff)))) hashes_file = join(partition_dir, HASH_FILE) with lock_path(partition_dir): try: with open(hashes_file, 'rb') as fp: hashes = pickle.load(fp) except Exception: hashes = tpool.execute(tpool_listdir, partition_dir) for suffix in suffixes: suffix_dir = join(partition_dir, suffix) if os.path.exists(suffix_dir): hashes[suffix] = hash_suffix(suffix_dir, reclaim_age) elif suffix in hashes: del hashes[suffix] with open(hashes_file + '.tmp', 'wb') as fp: pickle.dump(hashes, fp, PICKLE_PROTOCOL) renamer(hashes_file + '.tmp', hashes_file)
def possibly_quarantine(self, exc_type, exc_value, exc_traceback): """ Checks the exception info to see if it indicates a quarantine situation (malformed or corrupted database). If not, the original exception will be reraised. If so, the database will be quarantined and a new sqlite3.DatabaseError will be raised indicating the action taken. """ if 'database disk image is malformed' in str(exc_value): exc_hint = 'malformed' elif 'file is encrypted or is not a database' in str(exc_value): exc_hint = 'corrupted' else: raise exc_type, exc_value, exc_traceback prefix_path = os.path.dirname(self.db_dir) partition_path = os.path.dirname(prefix_path) dbs_path = os.path.dirname(partition_path) device_path = os.path.dirname(dbs_path) quar_path = os.path.join(device_path, 'quarantined', self.db_type + 's', os.path.basename(self.db_dir)) try: renamer(self.db_dir, quar_path) except OSError as e: if e.errno not in (errno.EEXIST, errno.ENOTEMPTY): raise quar_path = "%s-%s" % (quar_path, uuid4().hex) renamer(self.db_dir, quar_path) detail = _('Quarantined %s to %s due to %s database') % \ (self.db_dir, quar_path, exc_hint) self.logger.error(detail) raise sqlite3.DatabaseError(detail)
def put(self, fd, tmppath, metadata, extension=''): """ Finalize writing the file on disk, and renames it from the temp file to the real location. This should be called after the data has been written to the temp file. :params fd: file descriptor of the temp file :param tmppath: path to the temporary file being used :param metadata: dictionary of metadata to be written :param extention: extension to be used when making the file """ #Marker dir. if extension == '.ts': return True if extension == '.meta': self.put_metadata(metadata) return True else: extension = '' if metadata[X_OBJECT_TYPE] == MARKER_DIR: self.create_dir_object(os.path.join(self.datadir, self.obj)) self.put_metadata(metadata) self.data_file = self.datadir + '/' + self.obj return True #Check if directory already exists. if self.is_dir: self.logger.error('Directory already exists %s/%s' % \ (self.datadir , self.obj)) return False #metadata['name'] = self.name timestamp = normalize_timestamp(metadata[X_TIMESTAMP]) write_metadata(tmppath, metadata) if X_CONTENT_LENGTH in metadata: self.drop_cache(fd, 0, int(metadata[X_CONTENT_LENGTH])) tpool.execute(os.fsync, fd) if self.obj_path: dir_objs = self.obj_path.split('/') tmp_path = '' if len(dir_objs): for dir_name in dir_objs: if tmp_path: tmp_path = tmp_path + '/' + dir_name else: tmp_path = dir_name if not self.create_dir_object(os.path.join(self.container_path, tmp_path)): self.logger.error("Failed in subdir %s",\ os.path.join(self.container_path,tmp_path)) return False renamer(tmppath, os.path.join(self.datadir, self.obj + extension)) do_chown(os.path.join(self.datadir, self.obj + extension), \ self.uid, self.gid) self.metadata = metadata #self.logger.error("Meta %s", self.metadata) self.data_file = self.datadir + '/' + self.obj + extension return True
def process_object_update(self, update_path, device, policy): """ Process the object information to be updated and update. :param update_path: path to pickled object update file :param device: path to device :param policy: storage policy of object update """ try: update = pickle.load(open(update_path, 'rb')) except Exception: self.logger.exception( _('ERROR Pickle problem, quarantining %s'), update_path) self.stats.quarantines += 1 self.logger.increment('quarantines') target_path = os.path.join(device, 'quarantined', 'objects', os.path.basename(update_path)) renamer(update_path, target_path, fsync=False) return successes = update.get('successes', []) part, nodes = self.get_container_ring().get_nodes( update['account'], update['container']) obj = '/%s/%s/%s' % \ (update['account'], update['container'], update['obj']) headers_out = HeaderKeyDict(update['headers']) headers_out['user-agent'] = 'object-updater %s' % os.getpid() headers_out.setdefault('X-Backend-Storage-Policy-Index', str(int(policy))) events = [spawn(self.object_update, node, part, update['op'], obj, headers_out) for node in nodes if node['id'] not in successes] success = True new_successes = False for event in events: event_success, node_id = event.wait() if event_success is True: successes.append(node_id) new_successes = True else: success = False if success: self.stats.successes += 1 self.logger.increment('successes') self.logger.debug('Update sent for %(obj)s %(path)s', {'obj': obj, 'path': update_path}) self.stats.unlinks += 1 self.logger.increment('unlinks') os.unlink(update_path) else: self.stats.failures += 1 self.logger.increment('failures') self.logger.debug('Update failed for %(obj)s %(path)s', {'obj': obj, 'path': update_path}) if new_successes: update['successes'] = successes write_pickle(update, update_path, os.path.join( device, get_tmp_dir(policy)))
def complete_rsync(self, drive, db_file, args): old_filename = os.path.join(self.root, drive, 'tmp', args[0]) if os.path.exists(db_file): return HTTPNotFound() if not os.path.exists(old_filename): return HTTPNotFound() broker = self.broker_class(old_filename) broker.newid(args[0]) renamer(old_filename, db_file) return HTTPNoContent()
def put(self, fd, tmppath, metadata, extension=''): """ Finalize writing the file on disk, and renames it from the temp file to the real location. This should be called after the data has been written to the temp file. :params fd: file descriptor of the temp file :param tmppath: path to the temporary file being used :param metadata: dictionary of metadata to be written :param extention: extension to be used when making the file """ #Marker dir. if metadata[X_OBJECT_TYPE] == MARKER_DIR: if os.path.exists(os.path.join(self.datadir, self.obj)) and \ not os.path.isdir(os.path.join(self.datadir, self.obj)): os.unlink(os.path.join(self.datadir, self.obj)) mkdirs(os.path.join(self.datadir, self.obj)) os.chown(os.path.join(self.datadir, self.obj), self.uid, self.gid) self.put_metadata(metadata) self.data_file = self.datadir + '/' + self.obj return True #Check if directory already exists. if self.is_dir: logging.error('Directory already exists %s/%s' % \ (self.datadir , self.obj)) return False #metadata['name'] = self.name timestamp = normalize_timestamp(metadata[X_TIMESTAMP]) write_metadata(fd, metadata) if X_CONTENT_LENGTH in metadata: self.drop_cache(fd, 0, int(metadata[X_CONTENT_LENGTH])) tpool.execute(os.fsync, fd) if self.obj_path: dir_objs = self.obj_path.split('/') tmp_path = '' if len(dir_objs): for dir_name in dir_objs: if tmp_path: tmp_path = tmp_path + '/' + dir_name else: tmp_path = dir_name if not self.create_dir_object(tmp_path, metadata[X_TIMESTAMP]): return False #print 'Gaurav put tmppath', tmppath, os.path.join(self.datadir, #self.obj+extension) #invalidate_hash(os.path.dirname(self.datadir)) renamer(tmppath, os.path.join(self.datadir, self.obj + extension)) os.chown(os.path.join(self.datadir, self.obj + extension), \ self.uid, self.gid) self.metadata = metadata self.data_file = self.datadir + '/' + self.obj + extension return True
def process_object_update(self, update_path, device, policy_idx): """ Process the object information to be updated and update. :param update_path: path to pickled object update file :param device: path to device :param policy_idx: storage policy index of object update """ try: update = pickle.load(open(update_path, 'rb')) except Exception: self.logger.exception( _('ERROR Pickle problem, quarantining %s'), update_path) self.logger.increment('quarantines') renamer(update_path, os.path.join( device, 'quarantined', 'objects', os.path.basename(update_path))) return successes = update.get('successes', []) part, nodes = self.get_container_ring().get_nodes( update['account'], update['container']) obj = '/%s/%s/%s' % \ (update['account'], update['container'], update['obj']) success = True new_successes = False for node in nodes: if node['id'] not in successes: headers = update['headers'].copy() headers.setdefault('X-Backend-Storage-Policy-Index', str(policy_idx)) status = self.object_update(node, part, update['op'], obj, headers) if not is_success(status) and status != HTTP_NOT_FOUND: success = False else: successes.append(node['id']) new_successes = True if success: self.successes += 1 self.logger.increment('successes') self.logger.debug('Update sent for %(obj)s %(path)s', {'obj': obj, 'path': update_path}) self.logger.increment("unlinks") os.unlink(update_path) else: self.failures += 1 self.logger.increment('failures') self.logger.debug('Update failed for %(obj)s %(path)s', {'obj': obj, 'path': update_path}) if new_successes: update['successes'] = successes write_pickle(update, update_path, os.path.join( device, get_tmp_dir(policy_idx)))
def find_and_process(self): src_filename = time.strftime(self.filename_format) working_dir = os.path.join(self.target_dir, '.stats_tmp') shutil.rmtree(working_dir, ignore_errors=True) mkdirs(working_dir) tmp_filename = os.path.join(working_dir, src_filename) hasher = hashlib.md5() with open(tmp_filename, 'wb') as statfile: # csv has the following columns: # Account Name, Container Count, Object Count, Bytes Used for device in os.listdir(self.devices): if self.mount_check and not check_mount(self.devices, device): self.logger.error( _("Device %s is not mounted, skipping.") % device) continue accounts = os.path.join(self.devices, device, account_server_data_dir) if not os.path.exists(accounts): self.logger.debug(_("Path %s does not exist, skipping.") % accounts) continue for root, dirs, files in os.walk(accounts, topdown=False): for filename in files: if filename.endswith('.db'): db_path = os.path.join(root, filename) broker = AccountBroker(db_path) if not broker.is_deleted(): (account_name, _junk, _junk, _junk, container_count, object_count, bytes_used, _junk, _junk) = broker.get_info() line_data = '"%s",%d,%d,%d\n' % ( account_name, container_count, object_count, bytes_used) statfile.write(line_data) hasher.update(line_data) file_hash = hasher.hexdigest() hash_index = src_filename.find('*') if hash_index < 0: # if there is no * in the target filename, the uploader probably # won't work because we are crafting a filename that doesn't # fit the pattern src_filename = '_'.join([src_filename, file_hash]) else: parts = src_filename[:hash_index], src_filename[hash_index + 1:] src_filename = ''.join([parts[0], file_hash, parts[1]]) renamer(tmp_filename, os.path.join(self.target_dir, src_filename)) shutil.rmtree(working_dir, ignore_errors=True)
def quarantine_db(object_file, server_type): """ In the case that a corrupt file is found, move it to a quarantined area to allow replication to fix it. :param object_file: path to corrupt file :param server_type: type of file that is corrupt ('container' or 'account') """ object_dir = os.path.dirname(object_file) quarantine_dir = os.path.abspath(os.path.join(object_dir, '..', '..', '..', '..', 'quarantined', server_type + 's', os.path.basename(object_dir))) renamer(object_dir, quarantine_dir)
def put(self, fd, metadata, extension='.data'): """ Finalize writing the file on disk, and renames it from the temp file to the real location. This should be called after the data has been written to the temp file. :param fd: file descriptor of the temp file :param metadata: dictionary of metadata to be written :param extension: extension to be used when making the file """ # Our caller will use '.data' here; we just ignore it since we map the # URL directly to the file system. extension = '' metadata = _adjust_metadata(metadata) if metadata[X_OBJECT_TYPE] == MARKER_DIR: if not self.data_file: self.data_file = os.path.join(self.datadir, self._obj) self._create_dir_object(self.data_file) self.put_metadata(metadata) return # Check if directory already exists. if self._is_dir: # FIXME: How can we have a directory and it not be marked as a # MARKER_DIR (see above)? msg = 'File object exists as a directory: %s' % self.data_file raise AlreadyExistsAsDir(msg) timestamp = normalize_timestamp(metadata[X_TIMESTAMP]) write_metadata(self.tmppath, metadata) if X_CONTENT_LENGTH in metadata: self.drop_cache(fd, 0, int(metadata[X_CONTENT_LENGTH])) tpool.execute(os.fsync, fd) if self._obj_path: dir_objs = self._obj_path.split('/') assert len(dir_objs) >= 1 tmp_path = self._container_path for dir_name in dir_objs: tmp_path = os.path.join(tmp_path, dir_name) self._create_dir_object(tmp_path) newpath = os.path.join(self.datadir, self._obj) renamer(self.tmppath, newpath) do_chown(newpath, self.uid, self.gid) self.metadata = metadata self.data_file = newpath self.filter_metadata() return
def get_hashes(partition_dir, do_listdir=True, reclaim_age=ONE_WEEK): """ Get a list of hashes for the suffix dir. do_listdir causes it to mistrust the hash cache for suffix existence at the (unexpectedly high) cost of a listdir. reclaim_age is just passed on to hash_suffix. :param partition_dir: absolute path of partition to get hashes for :param do_listdir: force existence check for all hashes in the partition :param reclaim_age: age at which to remove tombstones :returns: tuple of (number of suffix dirs hashed, dictionary of hashes) """ def tpool_listdir(hashes, partition_dir): return dict(((suff, hashes.get(suff, None)) for suff in os.listdir(partition_dir) if len(suff) == 3 and isdir(join(partition_dir, suff)))) hashed = 0 hashes_file = join(partition_dir, HASH_FILE) with lock_path(partition_dir): modified = False hashes = {} try: with open(hashes_file, 'rb') as fp: hashes = pickle.load(fp) except Exception: do_listdir = True if do_listdir: hashes = tpool.execute(tpool_listdir, hashes, partition_dir) modified = True for suffix, hash_ in hashes.items(): if not hash_: suffix_dir = join(partition_dir, suffix) if os.path.exists(suffix_dir): try: hashes[suffix] = hash_suffix(suffix_dir, reclaim_age) hashed += 1 except OSError: logging.exception(_('Error hashing suffix')) hashes[suffix] = None else: del hashes[suffix] modified = True sleep() if modified: with open(hashes_file + '.tmp', 'wb') as fp: pickle.dump(hashes, fp, PICKLE_PROTOCOL) renamer(hashes_file + '.tmp', hashes_file) return hashed, hashes
def rsync_then_merge(self, drive, db_file, args): old_filename = os.path.join(self.root, drive, 'tmp', args[0]) if not os.path.exists(db_file) or not os.path.exists(old_filename): return HTTPNotFound() new_broker = self.broker_class(old_filename) existing_broker = self.broker_class(db_file) point = -1 objects = existing_broker.get_items_since(point, 1000) while len(objects): new_broker.merge_items(objects) point = objects[-1]['ROWID'] objects = existing_broker.get_items_since(point, 1000) sleep() new_broker.newid(args[0]) renamer(old_filename, db_file) return HTTPNoContent()
def finalize_put(): # Write the metadata before calling fsync() so that both data and # metadata are flushed to disk. write_metadata(self.fd, metadata) # We call fsync() before calling drop_cache() to lower the amount # of redundant work the drop cache code will perform on the pages # (now that after fsync the pages will be all clean). fsync(self.fd) # From the Department of the Redundancy Department, make sure # we call drop_cache() after fsync() to avoid redundant work # (pages all clean). drop_buffer_cache(self.fd, 0, self.upload_size) invalidate_hash(os.path.dirname(self.disk_file.datadir)) # After the rename completes, this object will be available for # other requests to reference. renamer(self.tmppath, os.path.join(self.disk_file.datadir, timestamp + extension))
def put(self, fd, tmppath, metadata,extension=''): if extension == '.ts': # TombStone marker (deleted) return True metadata[X_TYPE] = OBJECT if extension == '.meta': # Metadata recorded separately from the file self.meta_put_metadata(metadata) return True # Check if directory already exists. if self.is_dir: self.logger.error('Directory already exists %s/%s' % \ (self.datadir , self.obj)) return False meta_write_metadata(self.metafile, metadata) if X_CONTENT_LENGTH in metadata: self.drop_cache(fd, 0, int(metadata[X_CONTENT_LENGTH])) tpool.execute(os.fsync, fd) if self.obj_path: dir_objs = self.obj_path.split('/') tmp_path = '' if len(dir_objs): for dir_name in dir_objs: if tmp_path: tmp_path = tmp_path + '/' + dir_name else: tmp_path = dir_name if not self.create_dir_object(os.path.join(self.container_path, tmp_path)): self.logger.error("Failed in subdir %s",\ os.path.join(self.container_path,tmp_path)) return False renamer(tmppath, os.path.join(self.datadir, self.obj)) do_chown(os.path.join(self.datadir, self.obj), self.uid, self.gid) self.metadata = metadata return True
def rsync_then_merge(self, drive, db_file, args): old_filename = os.path.join(self.root, drive, 'tmp', args[0]) if not os.path.exists(db_file) or not os.path.exists(old_filename): return HTTPNotFound() new_broker = self.broker_class(old_filename) existing_broker = self.broker_class(db_file) point = -1 objects = existing_broker.get_items_since(point, 1000) while len(objects): new_broker.merge_items(objects) point = objects[-1]['ROWID'] objects = existing_broker.get_items_since(point, 1000) sleep() new_broker.newid(args[0]) new_broker.update_metadata(existing_broker.metadata) renamer(old_filename, db_file) return HTTPNoContent()
def process_object_update(self, update_path, device): """ Process the object information to be updated and update. :param update_path: path to pickled object update file :param device: path to device """ try: update = pickle.load(open(update_path, 'rb')) except Exception: self.logger.exception( _('ERROR Pickle problem, quarantining %s'), update_path) self.logger.increment('quarantines') renamer(update_path, os.path.join(device, 'quarantined', 'objects', os.path.basename(update_path))) return successes = update.get('successes', []) part, nodes = self.get_container_ring().get_nodes( update['account'], update['container']) obj = '/%s/%s/%s' % \ (update['account'], update['container'], update['obj']) success = True new_successes = False for node in nodes: if node['id'] not in successes: status = self.object_update(node, part, update['op'], obj, update['headers']) if not is_success(status) and status != HTTP_NOT_FOUND: success = False else: successes.append(node['id']) new_successes = True if success: self.successes += 1 self.logger.increment('successes') self.logger.debug(_('Update sent for %(obj)s %(path)s'), {'obj': obj, 'path': update_path}) os.unlink(update_path) else: self.failures += 1 self.logger.increment('failures') self.logger.debug(_('Update failed for %(obj)s %(path)s'), {'obj': obj, 'path': update_path}) if new_successes: update['successes'] = successes write_pickle(update, update_path, os.path.join(device, 'tmp'))
def _finalize_put(self, metadata, target_path): # Write the metadata before calling fsync() so that both data and # metadata are flushed to disk. write_metadata(self._fd, metadata) # We call fsync() before calling drop_cache() to lower the amount of # redundant work the drop cache code will perform on the pages (now # that after fsync the pages will be all clean). fsync(self._fd) # From the Department of the Redundancy Department, make sure we call # drop_cache() after fsync() to avoid redundant work (pages all # clean). drop_buffer_cache(self._fd, 0, self._upload_size) invalidate_hash(dirname(self._datadir)) # After the rename completes, this object will be available for other # requests to reference. renamer(self._tmppath, target_path) hash_cleanup_listdir(self._datadir)
def _finalize_put(self, metadata, target_path): #在调用fsync()之前写元数据,因此,元数据和数据都刷到磁盘上 # Write the metadata before calling fsync() so that both data and # metadata are flushed to disk. write_metadata(self.fd, metadata) # We call fsync() before calling drop_cache() to lower the amount # of redundant work the drop cache code will perform on the pages # (now that after fsync the pages will be all clean). fsync(self.fd) # From the Department of the Redundancy Department, make sure # we call drop_cache() after fsync() to avoid redundant work # (pages all clean). drop_buffer_cache(self.fd, 0, self.upload_size) invalidate_hash(dirname(self.disk_file.datadir)) # After the rename completes, this object will be available for # other requests to reference. renamer(self.tmppath, target_path) hash_cleanup_listdir(self.disk_file.datadir)
def finalize_put(): # Write the metadata before calling fsync() so that both data and # metadata are flushed to disk. write_metadata(self.fd, metadata) # We call fsync() before calling drop_cache() to lower the amount # of redundant work the drop cache code will perform on the pages # (now that after fsync the pages will be all clean). fsync(self.fd) # From the Department of the Redundancy Department, make sure # we call drop_cache() after fsync() to avoid redundant work # (pages all clean). drop_buffer_cache(self.fd, 0, self.upload_size) invalidate_hash(dirname(self.disk_file.datadir)) # After the rename completes, this object will be available for # other requests to reference. renamer(self.tmppath, join(self.disk_file.datadir, timestamp + extension)) hash_cleanup_listdir(self.disk_file.datadir)
def process_object_update(self, update_path, device): """ Process the object information to be updated and update. :param update_path: path to pickled object update file :param device: path to device """ try: update = pickle.load(open(update_path, 'rb')) except Exception: self.logger.exception( _('ERROR Pickle problem, quarantining %s'), update_path) renamer(update_path, os.path.join(device, 'quarantined', 'objects', os.path.basename(update_path))) return successes = update.get('successes', []) part, nodes = self.get_container_ring().get_nodes( update['account'], update['container']) obj = '/%s/%s/%s' % \ (update['account'], update['container'], update['obj']) success = True new_successes = False for node in nodes: if node['id'] not in successes: status = self.object_update(node, part, update['op'], obj, update['headers']) if not is_success(status) and status != HTTP_NOT_FOUND: success = False else: successes.append(node['id']) new_successes = True if success: self.successes += 1 self.logger.debug(_('Update sent for %(obj)s %(path)s'), {'obj': obj, 'path': update_path}) os.unlink(update_path) else: self.failures += 1 self.logger.debug(_('Update failed for %(obj)s %(path)s'), {'obj': obj, 'path': update_path}) if new_successes: update['successes'] = successes write_pickle(update, update_path, os.path.join(device, 'tmp'))
def quarantine_db(object_file, server_type): """ In the case that a corrupt file is found, move it to a quarantined area to allow replication to fix it. :param object_file: path to corrupt file :param server_type: type of file that is corrupt ('container' or 'account') """ object_dir = os.path.dirname(object_file) quarantine_dir = os.path.abspath(os.path.join(object_dir, '..', '..', '..', '..', 'quarantined', server_type + 's', os.path.basename(object_dir))) try: renamer(object_dir, quarantine_dir) except OSError, e: if e.errno not in (errno.EEXIST, errno.ENOTEMPTY): raise quarantine_dir = "%s-%s" % (quarantine_dir, uuid.uuid4().hex) renamer(object_dir, quarantine_dir)
def put(self, fd, tmppath, metadata, extension='.data'): """ Finalize writing the file on disk, and renames it from the temp file to the real location. This should be called after the data has been written to the temp file. :params fd: file descriptor of the temp file :param tmppath: path to the temporary file being used :param metadata: dictionary of metadata to be written :param extention: extension to be used when making the file """ metadata['name'] = self.name timestamp = normalize_timestamp(metadata['X-Timestamp']) write_metadata(fd, metadata) if 'Content-Length' in metadata: self.drop_cache(fd, 0, int(metadata['Content-Length'])) tpool.execute(os.fsync, fd) invalidate_hash(os.path.dirname(self.datadir)) renamer(tmppath, os.path.join(self.datadir, timestamp + extension)) self.metadata = metadata
def put(self, fd, metadata, extension=".data"): """ Finalize writing the file on disk, and renames it from the temp file to the real location. This should be called after the data has been written to the temp file. :param fd: file descriptor of the temp file :param metadata: dictionary of metadata to be written :param extension: extension to be used when making the file """ assert self.tmppath is not None metadata["name"] = self.name timestamp = normalize_timestamp(metadata["X-Timestamp"]) write_metadata(fd, metadata) if "Content-Length" in metadata: self.drop_cache(fd, 0, int(metadata["Content-Length"])) tpool.execute(fsync, fd) invalidate_hash(os.path.dirname(self.datadir)) renamer(self.tmppath, os.path.join(self.datadir, timestamp + extension)) self.metadata = metadata
def quarantine_db(object_file, server_type): """ In the case that a corrupt file is found, move it to a quarantined area to allow replication to fix it. :param object_file: path to corrupt file :param server_type: type of file that is corrupt ('container' or 'account') """ object_dir = os.path.dirname(object_file) quarantine_dir = os.path.abspath( os.path.join(object_dir, '..', '..', '..', '..', 'quarantined', server_type + 's', os.path.basename(object_dir))) try: renamer(object_dir, quarantine_dir, fsync=False) except OSError as e: if e.errno not in (errno.EEXIST, errno.ENOTEMPTY): raise quarantine_dir = "%s-%s" % (quarantine_dir, uuid.uuid4().hex) renamer(object_dir, quarantine_dir, fsync=False)
def put(self, fd, tmppath, metadata, extension='.data'): """ Finalize writing the file on disk, and renames it from the temp file to the real location. This should be called after the data has been written to the temp file. :params fd: file descriptor of the temp file :param tmppath: path to the temporary file being used :param metadata: dictionary of metadata to be written :param extension: extension to be used when making the file """ metadata['name'] = self.name timestamp = normalize_timestamp(metadata['X-Timestamp']) write_metadata(fd, metadata) if 'Content-Length' in metadata: self.drop_cache(fd, 0, int(metadata['Content-Length'])) tpool.execute(fsync, fd) invalidate_hash(os.path.dirname(self.datadir)) renamer(tmppath, os.path.join(self.datadir, timestamp + extension)) self.metadata = metadata
def possibly_quarantine(self, exc_type, exc_value, exc_traceback): """ Checks the exception info to see if it indicates a quarantine situation (malformed or corrupted database). If not, the original exception will be reraised. If so, the database will be quarantined and a new sqlite3.DatabaseError will be raised indicating the action taken. """ if 'database disk image is malformed' in str(exc_value): exc_hint = 'malformed' elif 'malformed database schema' in str(exc_value): exc_hint = 'malformed' elif ' is not a database' in str(exc_value): # older versions said 'file is not a database' # now 'file is encrypted or is not a database' exc_hint = 'corrupted' elif 'disk I/O error' in str(exc_value): exc_hint = 'disk error while accessing' else: six.reraise(exc_type, exc_value, exc_traceback) prefix_path = os.path.dirname(self.db_dir) partition_path = os.path.dirname(prefix_path) dbs_path = os.path.dirname(partition_path) device_path = os.path.dirname(dbs_path) quar_path = os.path.join(device_path, 'quarantined', self.db_type + 's', os.path.basename(self.db_dir)) try: renamer(self.db_dir, quar_path, fsync=False) except OSError as e: if e.errno not in (errno.EEXIST, errno.ENOTEMPTY): raise quar_path = "%s-%s" % (quar_path, uuid4().hex) renamer(self.db_dir, quar_path, fsync=False) detail = _('Quarantined %(db_dir)s to %(quar_path)s due to ' '%(exc_hint)s database') % { 'db_dir': self.db_dir, 'quar_path': quar_path, 'exc_hint': exc_hint } self.logger.error(detail) raise sqlite3.DatabaseError(detail)
def quarantine(self): """ In the case that a file is corrupted, move it to a quarantined area to allow replication to fix it. :returns: if quarantine is successful, path to quarantined directory otherwise None """ #from swift.obj.replicator quarantine_renamer, get_hashes #if not (self.is_deleted() or self.quarantined_dir): # self.quarantined_dir = quarantine_renamer(self.device_path, # self.data_file) # self.logger.increment('quarantines') # return self.quarantined_dir # stub for now XXX # P3 fp = open("/tmp/dump","a") print >>fp, "posix quarantine", self.data_file + ".quar" fp.close() renamer(self.data_file, self.data_file + ".quar")
def find_and_process(self): src_filename = datetime.now(self.time_zone).strftime( self.filename_format) working_dir = os.path.join(self.target_dir, '.%-stats_tmp' % self.stats_type) shutil.rmtree(working_dir, ignore_errors=True) mkdirs(working_dir) tmp_filename = os.path.join(working_dir, src_filename) hasher = hashlib.md5() try: with open(tmp_filename, 'wb') as statfile: statfile.write(self.get_header()) for device in os.listdir(self.devices): if self.mount_check and not check_mount( self.devices, device): self.logger.error( _("Device %s is not mounted, skipping.") % device) continue db_dir = os.path.join(self.devices, device, self.data_dir) if not os.path.exists(db_dir): self.logger.debug( _("Path %s does not exist, skipping.") % db_dir) continue for root, dirs, files in os.walk(db_dir, topdown=False): for filename in files: if filename.endswith('.db'): db_path = os.path.join(root, filename) try: line_data = self.get_data(db_path) except sqlite3.Error, err: self.logger.info( _("Error accessing db %s: %s") % (db_path, err)) continue if line_data: statfile.write(line_data) hasher.update(line_data) src_filename += hasher.hexdigest() renamer(tmp_filename, os.path.join(self.target_dir, src_filename))
def rsync_then_merge(self, drive, db_file, args): tmp_filename = os.path.join(self.root, drive, 'tmp', args[0]) if self._abort_rsync_then_merge(db_file, tmp_filename): return HTTPNotFound() new_broker = self.broker_class(tmp_filename) existing_broker = self.broker_class(db_file) db_file = existing_broker.db_file point = -1 objects = existing_broker.get_items_since(point, 1000) while len(objects): new_broker.merge_items(objects) point = objects[-1]['ROWID'] objects = existing_broker.get_items_since(point, 1000) sleep() new_broker.merge_syncs(existing_broker.get_syncs()) self._post_rsync_then_merge_hook(existing_broker, new_broker) new_broker.newid(args[0]) new_broker.update_metadata(existing_broker.metadata) if self._abort_rsync_then_merge(db_file, tmp_filename): return HTTPNotFound() renamer(tmp_filename, db_file) return HTTPNoContent()
def quarantine(self, reason): """ The database will be quarantined and a sqlite3.DatabaseError will be raised indicating the action taken. """ device_path = self.get_device_path() quar_path = os.path.join(device_path, 'quarantined', self.db_type + 's', os.path.basename(self.db_dir)) try: renamer(self.db_dir, quar_path, fsync=False) except OSError as e: if e.errno not in (errno.EEXIST, errno.ENOTEMPTY): raise quar_path = "%s-%s" % (quar_path, uuid4().hex) renamer(self.db_dir, quar_path, fsync=False) detail = _('Quarantined %(db_dir)s to %(quar_path)s due to ' '%(reason)s') % {'db_dir': self.db_dir, 'quar_path': quar_path, 'reason': reason} self.logger.error(detail) raise sqlite3.DatabaseError(detail)
def rsync_then_merge(self, drive, db_file, args): tmp_filename = os.path.join(self.root, drive, 'tmp', args[0]) if self._abort_rsync_then_merge(db_file, tmp_filename): return HTTPNotFound() new_broker = self.broker_class(tmp_filename, logger=self.logger) existing_broker = self.broker_class(db_file, logger=self.logger) db_file = existing_broker.db_file point = -1 objects = existing_broker.get_items_since(point, 1000) while len(objects): new_broker.merge_items(objects) point = objects[-1]['ROWID'] objects = existing_broker.get_items_since(point, 1000) sleep() new_broker.merge_syncs(existing_broker.get_syncs()) self._post_rsync_then_merge_hook(existing_broker, new_broker) new_broker.newid(args[0]) new_broker.update_metadata(existing_broker.metadata) if self._abort_rsync_then_merge(db_file, tmp_filename): return HTTPNotFound() renamer(tmp_filename, db_file) return HTTPNoContent()
def possibly_quarantine(self, exc_type, exc_value, exc_traceback): """ Checks the exception info to see if it indicates a quarantine situation (malformed or corrupted database). If not, the original exception will be reraised. If so, the database will be quarantined and a new sqlite3.DatabaseError will be raised indicating the action taken. """ if 'database disk image is malformed' in str(exc_value): exc_hint = 'malformed' elif 'malformed database schema' in str(exc_value): exc_hint = 'malformed' elif ' is not a database' in str(exc_value): # older versions said 'file is not a database' # now 'file is encrypted or is not a database' exc_hint = 'corrupted' elif 'disk I/O error' in str(exc_value): exc_hint = 'disk error while accessing' else: six.reraise(exc_type, exc_value, exc_traceback) prefix_path = os.path.dirname(self.db_dir) partition_path = os.path.dirname(prefix_path) dbs_path = os.path.dirname(partition_path) device_path = os.path.dirname(dbs_path) quar_path = os.path.join(device_path, 'quarantined', self.db_type + 's', os.path.basename(self.db_dir)) try: renamer(self.db_dir, quar_path, fsync=False) except OSError as e: if e.errno not in (errno.EEXIST, errno.ENOTEMPTY): raise quar_path = "%s-%s" % (quar_path, uuid4().hex) renamer(self.db_dir, quar_path, fsync=False) detail = _('Quarantined %(db_dir)s to %(quar_path)s due to ' '%(exc_hint)s database') % {'db_dir': self.db_dir, 'quar_path': quar_path, 'exc_hint': exc_hint} self.logger.error(detail) raise sqlite3.DatabaseError(detail)
def quarantine_renamer(device_path, corrupted_file_path): """ In the case that a file is corrupted, move it to a quarantined area to allow replication to fix it. :params device_path: The path to the device the corrupted file is on. :params corrupted_file_path: The path to the file you want quarantined. :returns: path (str) of directory the file was moved to :raises OSError: re-raises non errno.EEXIST / errno.ENOTEMPTY exceptions from rename """ from_dir = dirname(corrupted_file_path) to_dir = join(device_path, 'quarantined', 'objects', basename(from_dir)) invalidate_hash(dirname(from_dir)) try: renamer(from_dir, to_dir) except OSError, e: if e.errno not in (errno.EEXIST, errno.ENOTEMPTY): raise to_dir = "%s-%s" % (to_dir, uuid.uuid4().hex) renamer(from_dir, to_dir)
def invalidate_hash(suffix_dir): """ Invalidates the hash for a suffix_dir in the partition's hashes file. :param suffix_dir: absolute path to suffix dir whose hash needs invalidating """ suffix = os.path.basename(suffix_dir) partition_dir = os.path.dirname(suffix_dir) hashes_file = join(partition_dir, HASH_FILE) with lock_path(partition_dir): try: with open(hashes_file, 'rb') as fp: hashes = pickle.load(fp) if suffix in hashes and not hashes[suffix]: return except Exception: return hashes[suffix] = None with open(hashes_file + '.tmp', 'wb') as fp: pickle.dump(hashes, fp, PICKLE_PROTOCOL) renamer(hashes_file + '.tmp', hashes_file)
class AuditorWorker(object): """Walk through file system to audit object""" def __init__(self, conf, zero_byte_only_at_fps=0): self.conf = conf self.logger = get_logger(conf, log_route='object-auditor') self.devices = conf.get('devices', '/srv/node') self.mount_check = conf.get('mount_check', 'true').lower() in \ TRUE_VALUES self.max_files_per_second = float(conf.get('files_per_second', 20)) self.max_bytes_per_second = float( conf.get('bytes_per_second', 10000000)) self.auditor_type = 'ALL' self.zero_byte_only_at_fps = zero_byte_only_at_fps if self.zero_byte_only_at_fps: self.max_files_per_second = float(self.zero_byte_only_at_fps) self.auditor_type = 'ZBF' self.log_time = int(conf.get('log_time', 3600)) self.files_running_time = 0 self.bytes_running_time = 0 self.bytes_processed = 0 self.total_bytes_processed = 0 self.total_files_processed = 0 self.passes = 0 self.quarantines = 0 self.errors = 0 def audit_all_objects(self, mode='once'): self.logger.info( _('Begin object audit "%s" mode (%s)' % (mode, self.auditor_type))) begin = reported = time.time() self.total_bytes_processed = 0 self.total_files_processed = 0 files_running_time = 0 all_locs = audit_location_generator(self.devices, object_server.DATADIR, mount_check=self.mount_check, logger=self.logger) for path, device, partition in all_locs: self.object_audit(path, device, partition) self.files_running_time = ratelimit_sleep( self.files_running_time, self.max_files_per_second) self.total_files_processed += 1 if time.time() - reported >= self.log_time: self.logger.info( _('Object audit (%(type)s). ' 'Since %(start_time)s: Locally: %(passes)d passed, ' '%(quars)d quarantined, %(errors)d errors ' 'files/sec: %(frate).2f , bytes/sec: %(brate).2f') % { 'type': self.auditor_type, 'start_time': time.ctime(reported), 'passes': self.passes, 'quars': self.quarantines, 'errors': self.errors, 'frate': self.passes / (time.time() - reported), 'brate': self.bytes_processed / (time.time() - reported) }) reported = time.time() self.passes = 0 self.quarantines = 0 self.errors = 0 self.bytes_processed = 0 elapsed = time.time() - begin self.logger.info( _('Object audit (%(type)s) "%(mode)s" mode ' 'completed: %(elapsed).02fs. ' 'Total files/sec: %(frate).2f , ' 'Total bytes/sec: %(brate).2f ') % { 'type': self.auditor_type, 'mode': mode, 'elapsed': elapsed, 'frate': self.total_files_processed / elapsed, 'brate': self.total_bytes_processed / elapsed }) def object_audit(self, path, device, partition): """ Audits the given object path. :param path: a path to an object :param device: the device the path is on :param partition: the partition the path is on """ try: if not path.endswith('.data'): return try: name = object_server.read_metadata(path)['name'] except Exception, exc: raise AuditException('Error when reading metadata: %s' % exc) _junk, account, container, obj = name.split('/', 3) df = object_server.DiskFile(self.devices, device, partition, account, container, obj, keep_data_fp=True) if df.data_file is None: # file is deleted, we found the tombstone return obj_size = os.path.getsize(df.data_file) if obj_size != int(df.metadata['Content-Length']): raise AuditException('Content-Length of %s does not match ' 'file size of %s' % (int(df.metadata['Content-Length']), os.path.getsize(df.data_file))) if self.zero_byte_only_at_fps and obj_size: return etag = md5() for chunk in df: self.bytes_running_time = ratelimit_sleep( self.bytes_running_time, self.max_bytes_per_second, incr_by=len(chunk)) etag.update(chunk) self.bytes_processed += len(chunk) self.total_bytes_processed += len(chunk) etag = etag.hexdigest() if etag != df.metadata['ETag']: raise AuditException("ETag of %s does not match file's md5 of " "%s" % (df.metadata['ETag'], etag)) except AuditException, err: self.quarantines += 1 self.logger.error( _('ERROR Object %(obj)s failed audit and will ' 'be quarantined: %(err)s'), { 'obj': path, 'err': err }) object_dir = os.path.dirname(path) invalidate_hash(os.path.dirname(object_dir)) renamer_path = os.path.dirname(path) to_path = os.path.join(self.devices, device, 'quarantined', 'objects', os.path.basename(renamer_path)) try: renamer(renamer_path, to_path) except OSError, e: if e.errno == errno.EEXIST: to_path = "%s-%s" % (to_path, uuid.uuid4().hex) renamer(renamer_path, to_path)
def put(self, fd, tmppath, metadata, extension=''): """ Finalize writing the file on disk, and renames it from the temp file to the real location. This should be called after the data has been written to the temp file. :params fd: file descriptor of the temp file :param tmppath: path to the temporary file being used :param metadata: dictionary of metadata to be written :param extention: extension to be used when making the file """ if extension == '.ts': # TombStone marker (deleted) return True # Fix up the metadata to ensure it has a proper value for the # Content-Type metadata, as well as an X_TYPE and X_OBJECT_TYPE # metadata values. content_type = metadata['Content-Type'] if not content_type: metadata['Content-Type'] = FILE_TYPE x_object_type = FILE else: x_object_type = MARKER_DIR if content_type.lower( ) == DIR_TYPE else FILE metadata[X_TYPE] = OBJECT metadata[X_OBJECT_TYPE] = x_object_type if extension == '.meta': # Metadata recorded separately from the file self.put_metadata(metadata) return True extension = '' if metadata[X_OBJECT_TYPE] == MARKER_DIR: self.create_dir_object(os.path.join(self.datadir, self.obj)) self.put_metadata(metadata) self.data_file = self.datadir + '/' + self.obj return True # Check if directory already exists. if self.is_dir: self.logger.error('Directory already exists %s/%s' % \ (self.datadir , self.obj)) return False timestamp = normalize_timestamp(metadata[X_TIMESTAMP]) write_metadata(tmppath, metadata) if X_CONTENT_LENGTH in metadata: self.drop_cache(fd, 0, int(metadata[X_CONTENT_LENGTH])) tpool.execute(os.fsync, fd) if self.obj_path: dir_objs = self.obj_path.split('/') tmp_path = '' if len(dir_objs): for dir_name in dir_objs: if tmp_path: tmp_path = tmp_path + '/' + dir_name else: tmp_path = dir_name if not self.create_dir_object( os.path.join(self.container_path, tmp_path)): self.logger.error("Failed in subdir %s",\ os.path.join(self.container_path,tmp_path)) return False renamer(tmppath, os.path.join(self.datadir, self.obj + extension)) do_chown(os.path.join(self.datadir, self.obj + extension), \ self.uid, self.gid) self.metadata = metadata self.data_file = self.datadir + '/' + self.obj + extension return True
def revive_drive(self, device): disabled_name = device + "X" if os.path.isdir(disabled_name): renamer(disabled_name, device) else: os.system('sudo mount %s' % device)
def kill_drive(self, device): if os.path.ismount(device): os.system('sudo umount %s' % device) else: renamer(device, device + "X")
def process_object_update(self, update_path, device, policy): """ Process the object information to be updated and update. :param update_path: path to pickled object update file :param device: path to device :param policy: storage policy of object update """ try: update = pickle.load(open(update_path, 'rb')) except Exception as e: if getattr(e, 'errno', None) == errno.ENOENT: return self.logger.exception(_('ERROR Pickle problem, quarantining %s'), update_path) self.stats.quarantines += 1 self.logger.increment('quarantines') target_path = os.path.join(device, 'quarantined', 'objects', os.path.basename(update_path)) renamer(update_path, target_path, fsync=False) try: # If this was the last async_pending in the directory, # then this will succeed. Otherwise, it'll fail, and # that's okay. os.rmdir(os.path.dirname(update_path)) except OSError: pass return def do_update(): successes = update.get('successes', []) headers_out = HeaderKeyDict(update['headers'].copy()) headers_out['user-agent'] = 'object-updater %s' % os.getpid() headers_out.setdefault('X-Backend-Storage-Policy-Index', str(int(policy))) headers_out.setdefault('X-Backend-Accept-Redirect', 'true') headers_out.setdefault('X-Backend-Accept-Quoted-Location', 'true') container_path = update.get('container_path') if container_path: acct, cont = split_path('/' + container_path, minsegs=2) else: acct, cont = update['account'], update['container'] part, nodes = self.get_container_ring().get_nodes(acct, cont) obj = '/%s/%s/%s' % (acct, cont, update['obj']) events = [ spawn(self.object_update, node, part, update['op'], obj, headers_out) for node in nodes if node['id'] not in successes ] success = True new_successes = rewrite_pickle = False redirect = None redirects = set() for event in events: event_success, node_id, redirect = event.wait() if event_success is True: successes.append(node_id) new_successes = True else: success = False if redirect: redirects.add(redirect) if success: self.stats.successes += 1 self.logger.increment('successes') self.logger.debug('Update sent for %(obj)s %(path)s', { 'obj': obj, 'path': update_path }) self.stats.unlinks += 1 self.logger.increment('unlinks') os.unlink(update_path) try: # If this was the last async_pending in the directory, # then this will succeed. Otherwise, it'll fail, and # that's okay. os.rmdir(os.path.dirname(update_path)) except OSError: pass elif redirects: # erase any previous successes update.pop('successes', None) redirect = max(redirects, key=lambda x: x[-1])[0] redirect_history = update.setdefault('redirect_history', []) if redirect in redirect_history: # force next update to be sent to root, reset history update['container_path'] = None update['redirect_history'] = [] else: update['container_path'] = redirect redirect_history.append(redirect) self.stats.redirects += 1 self.logger.increment("redirects") self.logger.debug( 'Update redirected for %(obj)s %(path)s to %(shard)s', { 'obj': obj, 'path': update_path, 'shard': update['container_path'] }) rewrite_pickle = True else: self.stats.failures += 1 self.logger.increment('failures') self.logger.debug('Update failed for %(obj)s %(path)s', { 'obj': obj, 'path': update_path }) if new_successes: update['successes'] = successes rewrite_pickle = True return rewrite_pickle, redirect rewrite_pickle, redirect = do_update() if redirect: # make one immediate retry to the redirect location rewrite_pickle, redirect = do_update() if rewrite_pickle: write_pickle(update, update_path, os.path.join(device, get_tmp_dir(policy)))
def process_object_update(self, update_path, device, policy): """ Process the object information to be updated and update. :param update_path: path to pickled object update file :param device: path to device :param policy: storage policy of object update """ try: update = pickle.load(open(update_path, 'rb')) except Exception: self.logger.exception(_('ERROR Pickle problem, quarantining %s'), update_path) self.stats.quarantines += 1 self.logger.increment('quarantines') target_path = os.path.join(device, 'quarantined', 'objects', os.path.basename(update_path)) renamer(update_path, target_path, fsync=False) return successes = update.get('successes', []) part, nodes = self.get_container_ring().get_nodes( update['account'], update['container']) obj = '/%s/%s/%s' % \ (update['account'], update['container'], update['obj']) headers_out = HeaderKeyDict(update['headers']) headers_out['user-agent'] = 'object-updater %s' % os.getpid() headers_out.setdefault('X-Backend-Storage-Policy-Index', str(int(policy))) events = [ spawn(self.object_update, node, part, update['op'], obj, headers_out) for node in nodes if node['id'] not in successes ] success = True new_successes = False for event in events: event_success, node_id = event.wait() if event_success is True: successes.append(node_id) new_successes = True else: success = False if success: self.stats.successes += 1 self.logger.increment('successes') self.logger.debug('Update sent for %(obj)s %(path)s', { 'obj': obj, 'path': update_path }) self.stats.unlinks += 1 self.logger.increment('unlinks') os.unlink(update_path) else: self.stats.failures += 1 self.logger.increment('failures') self.logger.debug('Update failed for %(obj)s %(path)s', { 'obj': obj, 'path': update_path }) if new_successes: update['successes'] = successes write_pickle(update, update_path, os.path.join(device, get_tmp_dir(policy)))
def initialize(self, put_timestamp=None): """ Create the DB :param put_timestamp: timestamp of initial PUT request """ if self.db_file == ':memory:': tmp_db_file = None conn = get_db_connection(self.db_file, self.timeout) else: mkdirs(self.db_dir) fd, tmp_db_file = mkstemp(suffix='.tmp', dir=self.db_dir) os.close(fd) conn = sqlite3.connect(tmp_db_file, check_same_thread=False, factory=GreenDBConnection, timeout=0) # creating dbs implicitly does a lot of transactions, so we # pick fast, unsafe options here and do a big fsync at the end. with closing(conn.cursor()) as cur: cur.execute('PRAGMA synchronous = OFF') cur.execute('PRAGMA temp_store = MEMORY') cur.execute('PRAGMA journal_mode = MEMORY') conn.create_function('chexor', 3, chexor) conn.row_factory = sqlite3.Row conn.text_factory = str conn.executescript(""" CREATE TABLE outgoing_sync ( remote_id TEXT UNIQUE, sync_point INTEGER, updated_at TEXT DEFAULT 0 ); CREATE TABLE incoming_sync ( remote_id TEXT UNIQUE, sync_point INTEGER, updated_at TEXT DEFAULT 0 ); CREATE TRIGGER outgoing_sync_insert AFTER INSERT ON outgoing_sync BEGIN UPDATE outgoing_sync SET updated_at = STRFTIME('%s', 'NOW') WHERE ROWID = new.ROWID; END; CREATE TRIGGER outgoing_sync_update AFTER UPDATE ON outgoing_sync BEGIN UPDATE outgoing_sync SET updated_at = STRFTIME('%s', 'NOW') WHERE ROWID = new.ROWID; END; CREATE TRIGGER incoming_sync_insert AFTER INSERT ON incoming_sync BEGIN UPDATE incoming_sync SET updated_at = STRFTIME('%s', 'NOW') WHERE ROWID = new.ROWID; END; CREATE TRIGGER incoming_sync_update AFTER UPDATE ON incoming_sync BEGIN UPDATE incoming_sync SET updated_at = STRFTIME('%s', 'NOW') WHERE ROWID = new.ROWID; END; """) if not put_timestamp: put_timestamp = normalize_timestamp(0) self._initialize(conn, put_timestamp) conn.commit() if tmp_db_file: conn.close() with open(tmp_db_file, 'r+b') as fp: os.fsync(fp.fileno()) with lock_parent_directory(self.db_file, self.pending_timeout): if os.path.exists(self.db_file): # It's as if there was a "condition" where different parts # of the system were "racing" each other. raise DatabaseAlreadyExists(self.db_file) renamer(tmp_db_file, self.db_file) self.conn = get_db_connection(self.db_file, self.timeout) else: self.conn = conn
class ObjectAuditor(Daemon): """Audit objects.""" def __init__(self, conf): self.conf = conf self.logger = get_logger(conf, 'object-auditor') self.devices = conf.get('devices', '/srv/node') self.mount_check = conf.get('mount_check', 'true').lower() in \ ('true', 't', '1', 'on', 'yes', 'y') self.max_files_per_second = float(conf.get('files_per_second', 20)) self.max_bytes_per_second = float( conf.get('bytes_per_second', 10000000)) self.log_time = int(conf.get('log_time', 3600)) self.files_running_time = 0 self.bytes_running_time = 0 self.bytes_processed = 0 self.total_bytes_processed = 0 self.total_files_processed = 0 self.passes = 0 self.quarantines = 0 self.errors = 0 def run_forever(self): """Run the object audit until stopped.""" while True: self.run_once('forever') self.total_bytes_processed = 0 self.total_files_processed = 0 time.sleep(30) def run_once(self, mode='once'): """Run the object audit once.""" self.logger.info(_('Begin object audit "%s" mode' % mode)) begin = reported = time.time() all_locs = audit_location_generator(self.devices, object_server.DATADIR, mount_check=self.mount_check, logger=self.logger) for path, device, partition in all_locs: self.object_audit(path, device, partition) self.files_running_time = ratelimit_sleep( self.files_running_time, self.max_files_per_second) self.total_files_processed += 1 if time.time() - reported >= self.log_time: self.logger.info( _('Since %(start_time)s: Locally: %(passes)d passed audit, ' '%(quars)d quarantined, %(errors)d errors ' 'files/sec: %(frate).2f , bytes/sec: %(brate).2f') % { 'start_time': time.ctime(reported), 'passes': self.passes, 'quars': self.quarantines, 'errors': self.errors, 'frate': self.passes / (time.time() - reported), 'brate': self.bytes_processed / (time.time() - reported) }) reported = time.time() self.passes = 0 self.quarantines = 0 self.errors = 0 self.bytes_processed = 0 elapsed = time.time() - begin self.logger.info( _('Object audit "%(mode)s" mode completed: %(elapsed).02fs. ' 'Total files/sec: %(frate).2f , ' 'Total bytes/sec: %(brate).2f ') % { 'mode': mode, 'elapsed': elapsed, 'frate': self.total_files_processed / elapsed, 'brate': self.total_bytes_processed / elapsed }) def object_audit(self, path, device, partition): """ Audits the given object path :param path: a path to an object :param device: the device the path is on :param partition: the partition the path is on """ try: if not path.endswith('.data'): return try: name = object_server.read_metadata(path)['name'] except Exception, exc: raise AuditException('Error when reading metadata: %s' % exc) _junk, account, container, obj = name.split('/', 3) df = object_server.DiskFile(self.devices, device, partition, account, container, obj, keep_data_fp=True) if df.data_file is None: # file is deleted, we found the tombstone return if os.path.getsize(df.data_file) != \ int(df.metadata['Content-Length']): raise AuditException('Content-Length of %s does not match ' 'file size of %s' % (int(df.metadata['Content-Length']), os.path.getsize(df.data_file))) etag = md5() for chunk in df: self.bytes_running_time = ratelimit_sleep( self.bytes_running_time, self.max_bytes_per_second, incr_by=len(chunk)) etag.update(chunk) self.bytes_processed += len(chunk) self.total_bytes_processed += len(chunk) etag = etag.hexdigest() if etag != df.metadata['ETag']: raise AuditException("ETag of %s does not match file's md5 of " "%s" % (df.metadata['ETag'], etag)) except AuditException, err: self.quarantines += 1 self.logger.error( _('ERROR Object %(obj)s failed audit and will ' 'be quarantined: %(err)s'), { 'obj': path, 'err': err }) invalidate_hash(os.path.dirname(path)) renamer_path = os.path.dirname(path) renamer( renamer_path, os.path.join(self.devices, device, 'quarantined', 'objects', os.path.basename(renamer_path))) return