def test_ratelimit_sleep(self): running_time = 0 start = time.time() for i in range(100): running_time = utils.ratelimit_sleep(running_time, 0) self.assertTrue(abs((time.time() - start) * 100) < 1) running_time = 0 start = time.time() for i in range(50): running_time = utils.ratelimit_sleep(running_time, 200) # make sure it's accurate to 10th of a second self.assertTrue(abs(25 - (time.time() - start) * 100) < 10)
def _one_audit_pass(self, reported): all_locs = audit_location_generator(self.devices, account_server.DATADIR, '.db', mount_check=self.mount_check, logger=self.logger) for path, device, partition in all_locs: self.account_audit(path) if time.time() - reported >= 3600: # once an hour self.logger.info(_('Since %(time)s: Account audits: ' '%(passed)s passed audit,' '%(failed)s failed audit'), {'time': time.ctime(reported), 'passed': self.account_passes, 'failed': self.account_failures}) dump_recon_cache({'account_audits_since': reported, 'account_audits_passed': self.account_passes, 'account_audits_failed': self.account_failures}, self.rcache, self.logger) reported = time.time() self.account_passes = 0 self.account_failures = 0 self.accounts_running_time = ratelimit_sleep( self.accounts_running_time, self.max_accounts_per_second) return reported
def object_audit(self, path, device, partition): """ Audits the given object path. :param path: a path to an object :param device: the device the path is on :param partition: the partition the path is on """ try: try: name = diskfile.read_metadata(path)['name'] except (Exception, Timeout) as exc: raise AuditException('Error when reading metadata: %s' % exc) _junk, account, container, obj = name.split('/', 3) df = diskfile.DiskFile(self.devices, device, partition, account, container, obj, self.logger, keep_data_fp=True) try: try: obj_size = df.get_data_file_size() except DiskFileNotExist: return except DiskFileError as e: raise AuditException(str(e)) if self.stats_sizes: self.record_stats(obj_size) if self.zero_byte_only_at_fps and obj_size: self.passes += 1 return for chunk in df: self.bytes_running_time = ratelimit_sleep( self.bytes_running_time, self.max_bytes_per_second, incr_by=len(chunk)) self.bytes_processed += len(chunk) self.total_bytes_processed += len(chunk) df.close() if df.quarantined_dir: self.quarantines += 1 self.logger.error( _("ERROR Object %(path)s failed audit and will be " "quarantined: ETag and file's md5 do not match"), {'path': path}) finally: df.close(verify_file=False) except AuditException as err: self.logger.increment('quarantines') self.quarantines += 1 self.logger.error(_('ERROR Object %(obj)s failed audit and will ' 'be quarantined: %(err)s'), {'obj': path, 'err': err}) diskfile.quarantine_renamer( os.path.join(self.devices, device), path) return except (Exception, Timeout): self.logger.increment('errors') self.errors += 1 self.logger.exception(_('ERROR Trying to audit %s'), path) return self.passes += 1
def audit_all_objects(self, mode='once'): self.logger.info(_('Begin object audit "%s" mode (%s)' % (mode, self.auditor_type))) begin = reported = time.time() self.total_bytes_processed = 0 self.total_files_processed = 0 total_quarantines = 0 total_errors = 0 files_running_time = 0 time_auditing = 0 all_locs = audit_location_generator(self.devices, object_server.DATADIR, mount_check=self.mount_check, logger=self.logger) for path, device, partition in all_locs: loop_time = time.time() self.object_audit(path, device, partition) self.logger.timing_since('timing', loop_time) self.files_running_time = ratelimit_sleep( self.files_running_time, self.max_files_per_second) self.total_files_processed += 1 now = time.time() if now - reported >= self.log_time: self.logger.info(_( 'Object audit (%(type)s). ' 'Since %(start_time)s: Locally: %(passes)d passed, ' '%(quars)d quarantined, %(errors)d errors ' 'files/sec: %(frate).2f , bytes/sec: %(brate).2f, ' 'Total time: %(total).2f, Auditing time: %(audit).2f, ' 'Rate: %(audit_rate).2f') % { 'type': self.auditor_type, 'start_time': time.ctime(reported), 'passes': self.passes, 'quars': self.quarantines, 'errors': self.errors, 'frate': self.passes / (now - reported), 'brate': self.bytes_processed / (now - reported), 'total': (now - begin), 'audit': time_auditing, 'audit_rate': time_auditing / (now - begin)}) reported = now total_quarantines += self.quarantines total_errors += self.errors self.passes = 0 self.quarantines = 0 self.errors = 0 self.bytes_processed = 0 time_auditing += (now - loop_time) # Avoid divide by zero during very short runs elapsed = (time.time() - begin) or 0.000001 self.logger.info(_( 'Object audit (%(type)s) "%(mode)s" mode ' 'completed: %(elapsed).02fs. Total quarantined: %(quars)d, ' 'Total errors: %(errors)d, Total files/sec: %(frate).2f , ' 'Total bytes/sec: %(brate).2f, Auditing time: %(audit).2f, ' 'Rate: %(audit_rate).2f') % { 'type': self.auditor_type, 'mode': mode, 'elapsed': elapsed, 'quars': total_quarantines, 'errors': total_errors, 'frate': self.total_files_processed / elapsed, 'brate': self.total_bytes_processed / elapsed, 'audit': time_auditing, 'audit_rate': time_auditing / elapsed})
def object_audit(self, path, device, partition): """ Audits the given object path. :param path: a path to an object :param device: the device the path is on :param partition: the partition the path is on """ try: try: name = diskfile.read_metadata(path)['name'] except (Exception, Timeout) as exc: raise AuditException('Error when reading metadata: %s' % exc) _junk, account, container, obj = name.split('/', 3) df = self.diskfile_mgr.get_diskfile( device, partition, account, container, obj) try: with df.open(): metadata = df.get_metadata() obj_size = int(metadata['Content-Length']) if self.stats_sizes: self.record_stats(obj_size) if self.zero_byte_only_at_fps and obj_size: self.passes += 1 return reader = df.reader() with closing(reader): for chunk in reader: chunk_len = len(chunk) self.bytes_running_time = ratelimit_sleep( self.bytes_running_time, self.max_bytes_per_second, incr_by=chunk_len) self.bytes_processed += chunk_len self.total_bytes_processed += chunk_len if reader.was_quarantined: self.quarantines += 1 self.logger.error(_('ERROR Object %(obj)s failed audit and' ' was quarantined: %(err)s'), {'obj': path, 'err': reader.was_quarantined}) return except DiskFileNotExist: return except DiskFileQuarantined as err: self.quarantines += 1 self.logger.error(_('ERROR Object %(obj)s failed audit and was' ' quarantined: %(err)s'), {'obj': path, 'err': err}) except AuditException as err: self.logger.increment('quarantines') self.quarantines += 1 self.logger.error(_('ERROR Object %(obj)s failed audit and will' ' be quarantined: %(err)s'), {'obj': path, 'err': err}) diskfile.quarantine_renamer( os.path.join(self.devices, device), path) return self.passes += 1
def object_sweep(self, device): """ If there are async pendings on the device, walk each one and update. :param device: path to device """ start_time = time.time() # loop through async pending dirs for all policies for asyncdir in self._listdir(device): # we only care about directories async_pending = os.path.join(device, asyncdir) if not os.path.isdir(async_pending): continue if not asyncdir.startswith(ASYNCDIR_BASE): # skip stuff like "accounts", "containers", etc. continue try: base, policy = split_policy_string(asyncdir) except PolicyError as e: self.logger.warning(_('Directory %(directory)r does not map ' 'to a valid policy (%(error)s)') % { 'directory': asyncdir, 'error': e}) continue for prefix in self._listdir(async_pending): prefix_path = os.path.join(async_pending, prefix) if not os.path.isdir(prefix_path): continue last_obj_hash = None for update in sorted(self._listdir(prefix_path), reverse=True): update_path = os.path.join(prefix_path, update) if not os.path.isfile(update_path): continue try: obj_hash, timestamp = update.split('-') except ValueError: self.logger.increment('errors') self.logger.error( _('ERROR async pending file with unexpected ' 'name %s') % (update_path)) continue if obj_hash == last_obj_hash: self.logger.increment("unlinks") os.unlink(update_path) else: self.process_object_update(update_path, device, policy) last_obj_hash = obj_hash self.objects_running_time = ratelimit_sleep( self.objects_running_time, self.max_objects_per_second) try: os.rmdir(prefix_path) except OSError: pass self.logger.timing_since('timing', start_time)
def test_ratelimit_sleep_with_sleep(self): running_time = 0 start = time.time() sleeps = [0] * 7 + [0.2] * 3 + [0] * 30 for i in sleeps: running_time = utils.ratelimit_sleep(running_time, 40, rate_buffer=1) time.sleep(i) # make sure its accurate to 10th of a second self.assertTrue(abs(100 - (time.time() - start) * 100) < 10)
def test_ratelimit_sleep_with_incr(self): running_time = 0 start = time.time() vals = [5, 17, 0, 3, 11, 30, 40, 4, 13, 2, -1] * 2 # adds up to 250 (with no -1) total = 0 for i in vals: running_time = utils.ratelimit_sleep(running_time, 500, incr_by=i) total += i self.assertTrue(abs(50 - (time.time() - start) * 100) < 10)
def object_audit(self, location): """ Audits the given object location. :param location: an audit location (from diskfile.object_audit_location_generator) """ def raise_dfq(msg): raise DiskFileQuarantined(msg) diskfile_mgr = self.diskfile_router[location.policy] # this method doesn't normally raise errors, even if the audit # location does not exist; if this raises an unexpected error it # will get logged in failsafe df = diskfile_mgr.get_diskfile_from_audit_location(location) reader = None try: with df.open(): metadata = df.get_metadata() obj_size = int(metadata['Content-Length']) if self.stats_sizes: self.record_stats(obj_size) if obj_size and not self.zero_byte_only_at_fps: reader = df.reader(_quarantine_hook=raise_dfq) if reader: with closing(reader): for chunk in reader: chunk_len = len(chunk) self.bytes_running_time = ratelimit_sleep( self.bytes_running_time, self.max_bytes_per_second, incr_by=chunk_len) self.bytes_processed += chunk_len self.total_bytes_processed += chunk_len except DiskFileNotExist: pass except DiskFileQuarantined as err: self.quarantines += 1 self.logger.error(_('ERROR Object %(obj)s failed audit and was' ' quarantined: %(err)s'), {'obj': location, 'err': err}) self.passes += 1 # _ondisk_info attr is initialized to None and filled in by open ondisk_info_dict = df._ondisk_info or {} if 'unexpected' in ondisk_info_dict: is_rsync_tempfile = lambda fpath: RE_RSYNC_TEMPFILE.match( os.path.basename(fpath)) rsync_tempfile_paths = filter(is_rsync_tempfile, ondisk_info_dict['unexpected']) mtime = time.time() - self.rsync_tempfile_timeout unlink_paths_older_than(rsync_tempfile_paths, mtime)
def container_sweep(self, path): """ Walk the path looking for container DBs and process them. :param path: path to walk """ for root, dirs, files in os.walk(path): for file in files: if file.endswith('.db'): self.process_container(os.path.join(root, file)) self.containers_running_time = ratelimit_sleep( self.containers_running_time, self.max_containers_per_second)
def object_audit(self, path, device, partition): """ Audits the given object path. :param path: a path to an object :param device: the device the path is on :param partition: the partition the path is on """ try: if not path.endswith(".data"): return try: name = object_server.read_metadata(path)["name"] except (Exception, Timeout), exc: raise AuditException("Error when reading metadata: %s" % exc) _junk, account, container, obj = name.split("/", 3) df = object_server.DiskFile( self.devices, device, partition, account, container, obj, self.logger, keep_data_fp=True ) try: if df.data_file is None: # file is deleted, we found the tombstone return try: obj_size = df.get_data_file_size() except DiskFileError, e: raise AuditException(str(e)) except DiskFileNotExist: return if self.zero_byte_only_at_fps and obj_size: self.passes += 1 return for chunk in df: self.bytes_running_time = ratelimit_sleep( self.bytes_running_time, self.max_bytes_per_second, incr_by=len(chunk) ) self.bytes_processed += len(chunk) self.total_bytes_processed += len(chunk) df.close() if df.quarantined_dir: self.quarantines += 1 self.logger.error( _( "ERROR Object %(path)s failed audit and will be " "quarantined: ETag and file's md5 do not match" ), {"path": path}, )
def audit_all_objects(self, mode='once'): self.logger.info(_('Begin object audit "%s" mode (%s)' % (mode, self.auditor_type))) begin = reported = time.time() self.total_bytes_processed = 0 self.total_files_processed = 0 files_running_time = 0 all_locs = audit_location_generator(self.devices, object_server.DATADIR, mount_check=self.mount_check, logger=self.logger) for path, device, partition in all_locs: self.object_audit(path, device, partition) self.files_running_time = ratelimit_sleep( self.files_running_time, self.max_files_per_second) self.total_files_processed += 1 if time.time() - reported >= self.log_time: self.logger.info(_( 'Object audit (%(type)s). ' 'Since %(start_time)s: Locally: %(passes)d passed, ' '%(quars)d quarantined, %(errors)d errors ' 'files/sec: %(frate).2f , bytes/sec: %(brate).2f') % { 'type': self.auditor_type, 'start_time': time.ctime(reported), 'passes': self.passes, 'quars': self.quarantines, 'errors': self.errors, 'frate': self.passes / (time.time() - reported), 'brate': self.bytes_processed / (time.time() - reported)}) reported = time.time() self.passes = 0 self.quarantines = 0 self.errors = 0 self.bytes_processed = 0 elapsed = time.time() - begin self.logger.info(_( 'Object audit (%(type)s) "%(mode)s" mode ' 'completed: %(elapsed).02fs. ' 'Total files/sec: %(frate).2f , ' 'Total bytes/sec: %(brate).2f ') % { 'type': self.auditor_type, 'mode': mode, 'elapsed': elapsed, 'frate': self.total_files_processed / elapsed, 'brate': self.total_bytes_processed / elapsed})
def object_audit(self, path, device, partition): """ Audits the given object path :param path: a path to an object :param device: the device the path is on :param partition: the partition the path is on """ try: if not path.endswith('.data'): return try: name = object_server.read_metadata(path)['name'] except Exception, exc: raise AuditException('Error when reading metadata: %s' % exc) _junk, account, container, obj = name.split('/', 3) df = object_server.DiskFile(self.devices, device, partition, account, container, obj, keep_data_fp=True) if df.data_file is None: # file is deleted, we found the tombstone return if os.path.getsize(df.data_file) != \ int(df.metadata['Content-Length']): raise AuditException('Content-Length of %s does not match ' 'file size of %s' % (int(df.metadata['Content-Length']), os.path.getsize(df.data_file))) etag = md5() for chunk in df: self.bytes_running_time = ratelimit_sleep( self.bytes_running_time, self.max_bytes_per_second, incr_by=len(chunk)) etag.update(chunk) self.bytes_processed += len(chunk) self.total_bytes_processed += len(chunk) etag = etag.hexdigest() if etag != df.metadata['ETag']: raise AuditException("ETag of %s does not match file's md5 of " "%s" % (df.metadata['ETag'], etag))
def container_sweep(self, path): """ Walk the path looking for container DBs and process them. :param path: path to walk """ for root, dirs, files in os.walk(path): for file in files: if file.endswith('.db'): dbfile = os.path.join(root, file) try: self.process_container(dbfile) except (Exception, Timeout) as e: self.logger.exception( "Error processing container %s: %s", dbfile, e) self.containers_running_time = ratelimit_sleep( self.containers_running_time, self.max_containers_per_second)
def object_audit(self, location): """ Audits the given object location. :param location: an audit location (from diskfile.object_audit_location_generator) """ def raise_dfq(msg): raise DiskFileQuarantined(msg) try: df = self.diskfile_mgr.get_diskfile_from_audit_location(location) with df.open(): metadata = df.get_metadata() obj_size = int(metadata['Content-Length']) if self.stats_sizes: self.record_stats(obj_size) if self.zero_byte_only_at_fps and obj_size: self.passes += 1 return reader = df.reader(_quarantine_hook=raise_dfq) with closing(reader): for chunk in reader: chunk_len = len(chunk) self.bytes_running_time = ratelimit_sleep( self.bytes_running_time, self.max_bytes_per_second, incr_by=chunk_len) self.bytes_processed += chunk_len self.total_bytes_processed += chunk_len except DiskFileNotExist: return except DiskFileQuarantined as err: self.quarantines += 1 self.logger.error( _('ERROR Object %(obj)s failed audit and was' ' quarantined: %(err)s'), { 'obj': location, 'err': err }) self.passes += 1
def object_audit(self, path, device, partition): """ Audits the given object path. :param path: a path to an object :param device: the device the path is on :param partition: the partition the path is on """ try: if not path.endswith('.data'): return try: name = object_server.read_metadata(path)['name'] except Exception, exc: raise AuditException('Error when reading metadata: %s' % exc) _junk, account, container, obj = name.split('/', 3) df = object_server.DiskFile(self.devices, device, partition, account, container, obj, keep_data_fp=True) if df.data_file is None: # file is deleted, we found the tombstone return obj_size = os.path.getsize(df.data_file) if obj_size != int(df.metadata['Content-Length']): raise AuditException('Content-Length of %s does not match ' 'file size of %s' % (int(df.metadata['Content-Length']), os.path.getsize(df.data_file))) if self.zero_byte_only_at_fps and obj_size: return etag = md5() for chunk in df: self.bytes_running_time = ratelimit_sleep( self.bytes_running_time, self.max_bytes_per_second, incr_by=len(chunk)) etag.update(chunk) self.bytes_processed += len(chunk) self.total_bytes_processed += len(chunk) etag = etag.hexdigest() if etag != df.metadata['ETag']: raise AuditException("ETag of %s does not match file's md5 of " "%s" % (df.metadata['ETag'], etag))
def object_audit(self, location): """ Audits the given object location. :param location: an audit location (from diskfile.object_audit_location_generator) """ def raise_dfq(msg): raise DiskFileQuarantined(msg) diskfile_mgr = self.diskfile_router[location.policy] try: df = diskfile_mgr.get_diskfile_from_audit_location(location) with df.open(): metadata = df.get_metadata() obj_size = int(metadata['Content-Length']) if self.stats_sizes: self.record_stats(obj_size) if self.zero_byte_only_at_fps and obj_size: self.passes += 1 return reader = df.reader(_quarantine_hook=raise_dfq) with closing(reader): for chunk in reader: chunk_len = len(chunk) self.bytes_running_time = ratelimit_sleep( self.bytes_running_time, self.max_bytes_per_second, incr_by=chunk_len) self.bytes_processed += chunk_len self.total_bytes_processed += chunk_len except DiskFileNotExist: return except DiskFileQuarantined as err: self.quarantines += 1 self.logger.error( _('ERROR Object %(obj)s failed audit and was' ' quarantined: %(err)s'), { 'obj': location, 'err': err }) self.passes += 1
def run_once(self, mode='once'): """Run the object audit once.""" self.logger.info(_('Begin object audit "%s" mode' % mode)) begin = reported = time.time() all_locs = audit_location_generator(self.devices, object_server.DATADIR, mount_check=self.mount_check, logger=self.logger) for path, device, partition in all_locs: self.object_audit(path, device, partition) self.files_running_time = ratelimit_sleep( self.files_running_time, self.max_files_per_second) self.total_files_processed += 1 if time.time() - reported >= self.log_time: self.logger.info( _('Since %(start_time)s: Locally: %(passes)d passed audit, ' '%(quars)d quarantined, %(errors)d errors ' 'files/sec: %(frate).2f , bytes/sec: %(brate).2f') % { 'start_time': time.ctime(reported), 'passes': self.passes, 'quars': self.quarantines, 'errors': self.errors, 'frate': self.passes / (time.time() - reported), 'brate': self.bytes_processed / (time.time() - reported) }) reported = time.time() self.passes = 0 self.quarantines = 0 self.errors = 0 self.bytes_processed = 0 elapsed = time.time() - begin self.logger.info( _('Object audit "%(mode)s" mode completed: %(elapsed).02fs. ' 'Total files/sec: %(frate).2f , ' 'Total bytes/sec: %(brate).2f ') % { 'mode': mode, 'elapsed': elapsed, 'frate': self.total_files_processed / elapsed, 'brate': self.total_bytes_processed / elapsed })
def _one_audit_pass(self, reported): all_locs = audit_location_generator(self.devices, DATADIR, '.db', mount_check=self.mount_check, logger=self.logger) for path, device, partition in all_locs: self.account_audit(path) if time.time() - reported >= self.logging_interval: self.logger.info(_('Since %(time)s: Account audits: ' '%(passed)s passed audit,' '%(failed)s failed audit'), {'time': time.ctime(reported), 'passed': self.account_passes, 'failed': self.account_failures}) dump_recon_cache({'account_audits_since': reported, 'account_audits_passed': self.account_passes, 'account_audits_failed': self.account_failures}, self.rcache, self.logger) reported = time.time() self.account_passes = 0 self.account_failures = 0 self.accounts_running_time = ratelimit_sleep( self.accounts_running_time, self.max_accounts_per_second) return reported
def _one_audit_pass(self, reported): all_locs = audit_location_generator(self.devices, DATADIR, '.db', mount_check=self.mount_check, logger=self.logger) for path, device, partition in all_locs: self.container_audit(path) if time.time() - reported >= 3600: # once an hour self.logger.info( _('Since %(time)s: Container audits: %(pass)s passed ' 'audit, %(fail)s failed audit'), {'time': time.ctime(reported), 'pass': self.container_passes, 'fail': self.container_failures}) dump_recon_cache( {'container_audits_since': reported, 'container_audits_passed': self.container_passes, 'container_audits_failed': self.container_failures}, self.rcache, self.logger) reported = time.time() self.container_passes = 0 self.container_failures = 0 self.containers_running_time = ratelimit_sleep( self.containers_running_time, self.max_containers_per_second) return reported
def audit_all_objects(self, mode="once"): self.logger.info(_('Begin object audit "%s" mode (%s)' % (mode, self.auditor_type))) begin = reported = time.time() self.total_bytes_processed = 0 self.total_files_processed = 0 total_quarantines = 0 total_errors = 0 files_running_time = 0 time_auditing = 0 all_locs = audit_location_generator( self.devices, object_server.DATADIR, mount_check=self.mount_check, logger=self.logger ) for path, device, partition in all_locs: loop_time = time.time() self.object_audit(path, device, partition) self.files_running_time = ratelimit_sleep(self.files_running_time, self.max_files_per_second) self.total_files_processed += 1 now = time.time() if now - reported >= self.log_time: self.logger.info( _( "Object audit (%(type)s). " "Since %(start_time)s: Locally: %(passes)d passed, " "%(quars)d quarantined, %(errors)d errors " "files/sec: %(frate).2f , bytes/sec: %(brate).2f, " "Total time: %(total).2f, Auditing time: %(audit).2f, " "Rate: %(audit_rate).2f" ) % { "type": self.auditor_type, "start_time": time.ctime(reported), "passes": self.passes, "quars": self.quarantines, "errors": self.errors, "frate": self.passes / (now - reported), "brate": self.bytes_processed / (now - reported), "total": (now - begin), "audit": time_auditing, "audit_rate": time_auditing / (now - begin), } ) reported = now total_quarantines += self.quarantines total_errors += self.errors self.passes = 0 self.quarantines = 0 self.errors = 0 self.bytes_processed = 0 time_auditing += now - loop_time elapsed = time.time() - begin self.logger.info( _( 'Object audit (%(type)s) "%(mode)s" mode ' "completed: %(elapsed).02fs. Total quarantined: %(quars)d, " "Total errors: %(errors)d, Total files/sec: %(frate).2f , " "Total bytes/sec: %(brate).2f, Auditing time: %(audit).2f, " "Rate: %(audit_rate).2f" ) % { "type": self.auditor_type, "mode": mode, "elapsed": elapsed, "quars": total_quarantines, "errors": total_errors, "frate": self.total_files_processed / elapsed, "brate": self.total_bytes_processed / elapsed, "audit": time_auditing, "audit_rate": time_auditing / elapsed, } )
def audit_all_objects(self, mode='once', device_dirs=None): description = '' if device_dirs: device_dir_str = ','.join(sorted(device_dirs)) if self.auditor_type == 'ALL': description = _(' - parallel, %s') % device_dir_str else: description = _(' - %s') % device_dir_str self.logger.info(_('Begin object audit "%(mode)s" mode (%(audi_type)s' '%(description)s)') % {'mode': mode, 'audi_type': self.auditor_type, 'description': description}) begin = reported = time.time() self.total_bytes_processed = 0 self.total_files_processed = 0 total_quarantines = 0 total_errors = 0 time_auditing = 0 # TODO: we should move audit-location generation to the storage policy, # as we may (conceivably) have a different filesystem layout for each. # We'd still need to generate the policies to audit from the actual # directories found on-disk, and have appropriate error reporting if we # find a directory that doesn't correspond to any known policy. This # will require a sizable refactor, but currently all diskfile managers # can find all diskfile locations regardless of policy -- so for now # just use Policy-0's manager. all_locs = (self.diskfile_router[POLICIES[0]] .object_audit_location_generator( device_dirs=device_dirs, auditor_type=self.auditor_type)) for location in all_locs: loop_time = time.time() self.failsafe_object_audit(location) self.logger.timing_since('timing', loop_time) self.files_running_time = ratelimit_sleep( self.files_running_time, self.max_files_per_second) self.total_files_processed += 1 now = time.time() if now - self.last_logged >= self.log_time: self.logger.info(_( 'Object audit (%(type)s). ' 'Since %(start_time)s: Locally: %(passes)d passed, ' '%(quars)d quarantined, %(errors)d errors, ' 'files/sec: %(frate).2f, bytes/sec: %(brate).2f, ' 'Total time: %(total).2f, Auditing time: %(audit).2f, ' 'Rate: %(audit_rate).2f') % { 'type': '%s%s' % (self.auditor_type, description), 'start_time': time.ctime(reported), 'passes': self.passes, 'quars': self.quarantines, 'errors': self.errors, 'frate': self.passes / (now - reported), 'brate': self.bytes_processed / (now - reported), 'total': (now - begin), 'audit': time_auditing, 'audit_rate': time_auditing / (now - begin)}) cache_entry = self.create_recon_nested_dict( 'object_auditor_stats_%s' % (self.auditor_type), device_dirs, {'errors': self.errors, 'passes': self.passes, 'quarantined': self.quarantines, 'bytes_processed': self.bytes_processed, 'start_time': reported, 'audit_time': time_auditing}) dump_recon_cache(cache_entry, self.rcache, self.logger) reported = now total_quarantines += self.quarantines total_errors += self.errors self.passes = 0 self.quarantines = 0 self.errors = 0 self.bytes_processed = 0 self.last_logged = now time_auditing += (now - loop_time) # Avoid divide by zero during very short runs elapsed = (time.time() - begin) or 0.000001 self.logger.info(_( 'Object audit (%(type)s) "%(mode)s" mode ' 'completed: %(elapsed).02fs. Total quarantined: %(quars)d, ' 'Total errors: %(errors)d, Total files/sec: %(frate).2f, ' 'Total bytes/sec: %(brate).2f, Auditing time: %(audit).2f, ' 'Rate: %(audit_rate).2f') % { 'type': '%s%s' % (self.auditor_type, description), 'mode': mode, 'elapsed': elapsed, 'quars': total_quarantines + self.quarantines, 'errors': total_errors + self.errors, 'frate': self.total_files_processed / elapsed, 'brate': self.total_bytes_processed / elapsed, 'audit': time_auditing, 'audit_rate': time_auditing / elapsed}) if self.stats_sizes: self.logger.info( _('Object audit stats: %s') % json.dumps(self.stats_buckets)) # Unset remaining partitions to not skip them in the next run diskfile.clear_auditor_status(self.devices, self.auditor_type)
def audit_all_objects(self, mode="once"): self.logger.info(_('Begin object audit "%s" mode (%s)') % (mode, self.auditor_type)) begin = reported = time.time() self.total_bytes_processed = 0 self.total_files_processed = 0 total_quarantines = 0 total_errors = 0 time_auditing = 0 all_locs = audit_location_generator( self.devices, object_server.DATADIR, ".data", mount_check=self.mount_check, logger=self.logger ) for path, device, partition in all_locs: loop_time = time.time() self.failsafe_object_audit(path, device, partition) self.logger.timing_since("timing", loop_time) self.files_running_time = ratelimit_sleep(self.files_running_time, self.max_files_per_second) self.total_files_processed += 1 now = time.time() if now - reported >= self.log_time: self.logger.info( _( "Object audit (%(type)s). " "Since %(start_time)s: Locally: %(passes)d passed, " "%(quars)d quarantined, %(errors)d errors " "files/sec: %(frate).2f , bytes/sec: %(brate).2f, " "Total time: %(total).2f, Auditing time: %(audit).2f, " "Rate: %(audit_rate).2f" ) % { "type": self.auditor_type, "start_time": time.ctime(reported), "passes": self.passes, "quars": self.quarantines, "errors": self.errors, "frate": self.passes / (now - reported), "brate": self.bytes_processed / (now - reported), "total": (now - begin), "audit": time_auditing, "audit_rate": time_auditing / (now - begin), } ) dump_recon_cache( { "object_auditor_stats_%s" % self.auditor_type: { "errors": self.errors, "passes": self.passes, "quarantined": self.quarantines, "bytes_processed": self.bytes_processed, "start_time": reported, "audit_time": time_auditing, } }, self.rcache, self.logger, ) reported = now total_quarantines += self.quarantines total_errors += self.errors self.passes = 0 self.quarantines = 0 self.errors = 0 self.bytes_processed = 0 time_auditing += now - loop_time # Avoid divide by zero during very short runs elapsed = (time.time() - begin) or 0.000001 self.logger.info( _( 'Object audit (%(type)s) "%(mode)s" mode ' "completed: %(elapsed).02fs. Total quarantined: %(quars)d, " "Total errors: %(errors)d, Total files/sec: %(frate).2f , " "Total bytes/sec: %(brate).2f, Auditing time: %(audit).2f, " "Rate: %(audit_rate).2f" ) % { "type": self.auditor_type, "mode": mode, "elapsed": elapsed, "quars": total_quarantines, "errors": total_errors, "frate": self.total_files_processed / elapsed, "brate": self.total_bytes_processed / elapsed, "audit": time_auditing, "audit_rate": time_auditing / elapsed, } ) if self.stats_sizes: self.logger.info(_("Object audit stats: %s") % json.dumps(self.stats_buckets))
def audit_all_objects(self, mode='once', device_dirs=None): description = '' if device_dirs: device_dir_str = ','.join(sorted(device_dirs)) if self.auditor_type == 'ALL': description = _(' - parallel, %s') % device_dir_str else: description = _(' - %s') % device_dir_str self.logger.info( _('Begin object audit "%s" mode (%s%s)') % (mode, self.auditor_type, description)) begin = reported = time.time() self.total_bytes_processed = 0 self.total_files_processed = 0 total_quarantines = 0 total_errors = 0 time_auditing = 0 all_locs = self.diskfile_mgr.object_audit_location_generator( device_dirs=device_dirs) for location in all_locs: loop_time = time.time() self.failsafe_object_audit(location) self.logger.timing_since('timing', loop_time) self.files_running_time = ratelimit_sleep( self.files_running_time, self.max_files_per_second) self.total_files_processed += 1 now = time.time() if now - self.last_logged >= self.log_time: self.logger.info( _('Object audit (%(type)s). ' 'Since %(start_time)s: Locally: %(passes)d passed, ' '%(quars)d quarantined, %(errors)d errors ' 'files/sec: %(frate).2f , bytes/sec: %(brate).2f, ' 'Total time: %(total).2f, Auditing time: %(audit).2f, ' 'Rate: %(audit_rate).2f') % { 'type': '%s%s' % (self.auditor_type, description), 'start_time': time.ctime(reported), 'passes': self.passes, 'quars': self.quarantines, 'errors': self.errors, 'frate': self.passes / (now - reported), 'brate': self.bytes_processed / (now - reported), 'total': (now - begin), 'audit': time_auditing, 'audit_rate': time_auditing / (now - begin) }) cache_entry = self.create_recon_nested_dict( 'object_auditor_stats_%s' % (self.auditor_type), device_dirs, { 'errors': self.errors, 'passes': self.passes, 'quarantined': self.quarantines, 'bytes_processed': self.bytes_processed, 'start_time': reported, 'audit_time': time_auditing }) dump_recon_cache(cache_entry, self.rcache, self.logger) reported = now total_quarantines += self.quarantines total_errors += self.errors self.passes = 0 self.quarantines = 0 self.errors = 0 self.bytes_processed = 0 self.last_logged = now time_auditing += (now - loop_time) # Avoid divide by zero during very short runs elapsed = (time.time() - begin) or 0.000001 self.logger.info( _('Object audit (%(type)s) "%(mode)s" mode ' 'completed: %(elapsed).02fs. Total quarantined: %(quars)d, ' 'Total errors: %(errors)d, Total files/sec: %(frate).2f, ' 'Total bytes/sec: %(brate).2f, Auditing time: %(audit).2f, ' 'Rate: %(audit_rate).2f') % { 'type': '%s%s' % (self.auditor_type, description), 'mode': mode, 'elapsed': elapsed, 'quars': total_quarantines + self.quarantines, 'errors': total_errors + self.errors, 'frate': self.total_files_processed / elapsed, 'brate': self.total_bytes_processed / elapsed, 'audit': time_auditing, 'audit_rate': time_auditing / elapsed }) if self.stats_sizes: self.logger.info( _('Object audit stats: %s') % json.dumps(self.stats_buckets))
def object_audit(self, path, device, partition): """ Audits the given object path. :param path: a path to an object :param device: the device the path is on :param partition: the partition the path is on """ try: try: name = diskfile.read_metadata(path)['name'] except (Exception, Timeout) as exc: raise AuditException('Error when reading metadata: %s' % exc) _junk, account, container, obj = name.split('/', 3) df = self.diskfile_mgr.get_diskfile(device, partition, account, container, obj) try: with df.open(): metadata = df.get_metadata() obj_size = int(metadata['Content-Length']) if self.stats_sizes: self.record_stats(obj_size) if self.zero_byte_only_at_fps and obj_size: self.passes += 1 return reader = df.reader() with closing(reader): for chunk in reader: chunk_len = len(chunk) self.bytes_running_time = ratelimit_sleep( self.bytes_running_time, self.max_bytes_per_second, incr_by=chunk_len) self.bytes_processed += chunk_len self.total_bytes_processed += chunk_len if reader.was_quarantined: self.quarantines += 1 self.logger.error( _('ERROR Object %(obj)s failed audit and' ' was quarantined: %(err)s'), { 'obj': path, 'err': reader.was_quarantined }) return except DiskFileNotExist: return except DiskFileQuarantined as err: self.quarantines += 1 self.logger.error( _('ERROR Object %(obj)s failed audit and was' ' quarantined: %(err)s'), { 'obj': path, 'err': err }) except AuditException as err: self.logger.increment('quarantines') self.quarantines += 1 self.logger.error( _('ERROR Object %(obj)s failed audit and will' ' be quarantined: %(err)s'), { 'obj': path, 'err': err }) diskfile.quarantine_renamer(os.path.join(self.devices, device), path) return self.passes += 1
def audit_all_objects(self, mode="once", device_dirs=None): description = "" if device_dirs: device_dir_str = ",".join(sorted(device_dirs)) if self.auditor_type == "ALL": description = _(" - parallel, %s") % device_dir_str else: description = _(" - %s") % device_dir_str self.logger.info(_('Begin object audit "%s" mode (%s%s)') % (mode, self.auditor_type, description)) begin = reported = time.time() self.total_bytes_processed = 0 self.total_files_processed = 0 total_quarantines = 0 total_errors = 0 time_auditing = 0 all_locs = self.diskfile_mgr.object_audit_location_generator(device_dirs=device_dirs) for location in all_locs: loop_time = time.time() self.failsafe_object_audit(location) self.logger.timing_since("timing", loop_time) self.files_running_time = ratelimit_sleep(self.files_running_time, self.max_files_per_second) self.total_files_processed += 1 now = time.time() if now - self.last_logged >= self.log_time: self.logger.info( _( "Object audit (%(type)s). " "Since %(start_time)s: Locally: %(passes)d passed, " "%(quars)d quarantined, %(errors)d errors " "files/sec: %(frate).2f , bytes/sec: %(brate).2f, " "Total time: %(total).2f, Auditing time: %(audit).2f, " "Rate: %(audit_rate).2f" ) % { "type": "%s%s" % (self.auditor_type, description), "start_time": time.ctime(reported), "passes": self.passes, "quars": self.quarantines, "errors": self.errors, "frate": self.passes / (now - reported), "brate": self.bytes_processed / (now - reported), "total": (now - begin), "audit": time_auditing, "audit_rate": time_auditing / (now - begin), } ) cache_entry = self.create_recon_nested_dict( "object_auditor_stats_%s" % (self.auditor_type), device_dirs, { "errors": self.errors, "passes": self.passes, "quarantined": self.quarantines, "bytes_processed": self.bytes_processed, "start_time": reported, "audit_time": time_auditing, }, ) dump_recon_cache(cache_entry, self.rcache, self.logger) reported = now total_quarantines += self.quarantines total_errors += self.errors self.passes = 0 self.quarantines = 0 self.errors = 0 self.bytes_processed = 0 self.last_logged = now time_auditing += now - loop_time # Avoid divide by zero during very short runs elapsed = (time.time() - begin) or 0.000001 self.logger.info( _( 'Object audit (%(type)s) "%(mode)s" mode ' "completed: %(elapsed).02fs. Total quarantined: %(quars)d, " "Total errors: %(errors)d, Total files/sec: %(frate).2f, " "Total bytes/sec: %(brate).2f, Auditing time: %(audit).2f, " "Rate: %(audit_rate).2f" ) % { "type": "%s%s" % (self.auditor_type, description), "mode": mode, "elapsed": elapsed, "quars": total_quarantines + self.quarantines, "errors": total_errors + self.errors, "frate": self.total_files_processed / elapsed, "brate": self.total_bytes_processed / elapsed, "audit": time_auditing, "audit_rate": time_auditing / elapsed, } ) if self.stats_sizes: self.logger.info(_("Object audit stats: %s") % json.dumps(self.stats_buckets))
def audit_all_objects(self, mode='once'): self.logger.info( _('Begin object audit "%s" mode (%s)') % (mode, self.auditor_type)) begin = reported = time.time() self.total_bytes_processed = 0 self.total_files_processed = 0 total_quarantines = 0 total_errors = 0 time_auditing = 0 all_locs = audit_location_generator(self.devices, object_server.DATADIR, '.data', mount_check=self.mount_check, logger=self.logger) for path, device, partition in all_locs: loop_time = time.time() self.failsafe_object_audit(path, device, partition) self.logger.timing_since('timing', loop_time) self.files_running_time = ratelimit_sleep( self.files_running_time, self.max_files_per_second) self.total_files_processed += 1 now = time.time() if now - reported >= self.log_time: self.logger.info( _('Object audit (%(type)s). ' 'Since %(start_time)s: Locally: %(passes)d passed, ' '%(quars)d quarantined, %(errors)d errors ' 'files/sec: %(frate).2f , bytes/sec: %(brate).2f, ' 'Total time: %(total).2f, Auditing time: %(audit).2f, ' 'Rate: %(audit_rate).2f') % { 'type': self.auditor_type, 'start_time': time.ctime(reported), 'passes': self.passes, 'quars': self.quarantines, 'errors': self.errors, 'frate': self.passes / (now - reported), 'brate': self.bytes_processed / (now - reported), 'total': (now - begin), 'audit': time_auditing, 'audit_rate': time_auditing / (now - begin) }) dump_recon_cache( { 'object_auditor_stats_%s' % self.auditor_type: { 'errors': self.errors, 'passes': self.passes, 'quarantined': self.quarantines, 'bytes_processed': self.bytes_processed, 'start_time': reported, 'audit_time': time_auditing } }, self.rcache, self.logger) reported = now total_quarantines += self.quarantines total_errors += self.errors self.passes = 0 self.quarantines = 0 self.errors = 0 self.bytes_processed = 0 time_auditing += (now - loop_time) # Avoid divide by zero during very short runs elapsed = (time.time() - begin) or 0.000001 self.logger.info( _('Object audit (%(type)s) "%(mode)s" mode ' 'completed: %(elapsed).02fs. Total quarantined: %(quars)d, ' 'Total errors: %(errors)d, Total files/sec: %(frate).2f , ' 'Total bytes/sec: %(brate).2f, Auditing time: %(audit).2f, ' 'Rate: %(audit_rate).2f') % { 'type': self.auditor_type, 'mode': mode, 'elapsed': elapsed, 'quars': total_quarantines, 'errors': total_errors, 'frate': self.total_files_processed / elapsed, 'brate': self.total_bytes_processed / elapsed, 'audit': time_auditing, 'audit_rate': time_auditing / elapsed }) if self.stats_sizes: self.logger.info( _('Object audit stats: %s') % json.dumps(self.stats_buckets))
def audit_all_objects(self, mode='once', device_dirs=None): description = '' if device_dirs: device_dir_str = ','.join(sorted(device_dirs)) if self.auditor_type == 'ALL': description = _(' - parallel, %s') % device_dir_str else: description = _(' - %s') % device_dir_str self.logger.info( _('Begin object audit "%(mode)s" mode (%(audi_type)s' '%(description)s)') % { 'mode': mode, 'audi_type': self.auditor_type, 'description': description }) begin = reported = time.time() self.total_bytes_processed = 0 self.total_files_processed = 0 total_quarantines = 0 total_errors = 0 time_auditing = 0 # TODO: we should move audit-location generation to the storage policy, # as we may (conceivably) have a different filesystem layout for each. # We'd still need to generate the policies to audit from the actual # directories found on-disk, and have appropriate error reporting if we # find a directory that doesn't correspond to any known policy. This # will require a sizable refactor, but currently all diskfile managers # can find all diskfile locations regardless of policy -- so for now # just use Policy-0's manager. all_locs = ( self.diskfile_router[POLICIES[0]].object_audit_location_generator( device_dirs=device_dirs, auditor_type=self.auditor_type)) for location in all_locs: loop_time = time.time() self.failsafe_object_audit(location) self.logger.timing_since('timing', loop_time) self.files_running_time = ratelimit_sleep( self.files_running_time, self.max_files_per_second) self.total_files_processed += 1 now = time.time() if now - self.last_logged >= self.log_time: self.logger.info( _('Object audit (%(type)s). ' 'Since %(start_time)s: Locally: %(passes)d passed, ' '%(quars)d quarantined, %(errors)d errors, ' 'files/sec: %(frate).2f, bytes/sec: %(brate).2f, ' 'Total time: %(total).2f, Auditing time: %(audit).2f, ' 'Rate: %(audit_rate).2f') % { 'type': '%s%s' % (self.auditor_type, description), 'start_time': time.ctime(reported), 'passes': self.passes, 'quars': self.quarantines, 'errors': self.errors, 'frate': self.passes / (now - reported), 'brate': self.bytes_processed / (now - reported), 'total': (now - begin), 'audit': time_auditing, 'audit_rate': time_auditing / (now - begin) }) cache_entry = self.create_recon_nested_dict( 'object_auditor_stats_%s' % (self.auditor_type), device_dirs, { 'errors': self.errors, 'passes': self.passes, 'quarantined': self.quarantines, 'bytes_processed': self.bytes_processed, 'start_time': reported, 'audit_time': time_auditing }) dump_recon_cache(cache_entry, self.rcache, self.logger) reported = now total_quarantines += self.quarantines total_errors += self.errors self.passes = 0 self.quarantines = 0 self.errors = 0 self.bytes_processed = 0 self.last_logged = now time_auditing += (now - loop_time) # Avoid divide by zero during very short runs elapsed = (time.time() - begin) or 0.000001 self.logger.info( _('Object audit (%(type)s) "%(mode)s" mode ' 'completed: %(elapsed).02fs. Total quarantined: %(quars)d, ' 'Total errors: %(errors)d, Total files/sec: %(frate).2f, ' 'Total bytes/sec: %(brate).2f, Auditing time: %(audit).2f, ' 'Rate: %(audit_rate).2f') % { 'type': '%s%s' % (self.auditor_type, description), 'mode': mode, 'elapsed': elapsed, 'quars': total_quarantines + self.quarantines, 'errors': total_errors + self.errors, 'frate': self.total_files_processed / elapsed, 'brate': self.total_bytes_processed / elapsed, 'audit': time_auditing, 'audit_rate': time_auditing / elapsed }) if self.stats_sizes: self.logger.info( _('Object audit stats: %s') % json.dumps(self.stats_buckets)) # Unset remaining partitions to not skip them in the next run diskfile.clear_auditor_status(self.devices, self.auditor_type)
def object_sweep(self, device): """ If there are async pendings on the device, walk each one and update. :param device: path to device """ start_time = time.time() last_status_update = start_time start_stats = self.stats.copy() my_pid = os.getpid() self.logger.info("Object update sweep starting on %s (pid: %d)", device, my_pid) # loop through async pending dirs for all policies for asyncdir in self._listdir(device): # we only care about directories async_pending = os.path.join(device, asyncdir) if not os.path.isdir(async_pending): continue if not asyncdir.startswith(ASYNCDIR_BASE): # skip stuff like "accounts", "containers", etc. continue try: base, policy = split_policy_string(asyncdir) except PolicyError as e: # This isn't an error, but a misconfiguration. Logging a # warning should be sufficient. self.logger.warning(_('Directory %(directory)r does not map ' 'to a valid policy (%(error)s)') % { 'directory': asyncdir, 'error': e}) continue for prefix in self._listdir(async_pending): prefix_path = os.path.join(async_pending, prefix) if not os.path.isdir(prefix_path): continue last_obj_hash = None for update in sorted(self._listdir(prefix_path), reverse=True): update_path = os.path.join(prefix_path, update) if not os.path.isfile(update_path): continue try: obj_hash, timestamp = update.split('-') except ValueError: self.stats.errors += 1 self.logger.increment('errors') self.logger.error( _('ERROR async pending file with unexpected ' 'name %s') % (update_path)) continue if obj_hash == last_obj_hash: self.stats.unlinks += 1 self.logger.increment('unlinks') os.unlink(update_path) else: self.process_object_update(update_path, device, policy) last_obj_hash = obj_hash self.objects_running_time = ratelimit_sleep( self.objects_running_time, self.max_objects_per_second) now = time.time() if now - last_status_update >= self.report_interval: this_sweep = self.stats.since(start_stats) self.logger.info( ('Object update sweep progress on %(device)s: ' '%(elapsed).02fs, %(stats)s (pid: %(pid)d)'), {'device': device, 'elapsed': now - start_time, 'pid': my_pid, 'stats': this_sweep}) last_status_update = now try: os.rmdir(prefix_path) except OSError: pass self.logger.timing_since('timing', start_time) sweep_totals = self.stats.since(start_stats) self.logger.info( ('Object update sweep completed on %(device)s ' 'in %(elapsed).02fs seconds:, ' '%(successes)d successes, %(failures)d failures, ' '%(quarantines)d quarantines, ' '%(unlinks)d unlinks, %(errors)d errors ' '(pid: %(pid)d)'), {'device': device, 'elapsed': time.time() - start_time, 'pid': my_pid, 'successes': sweep_totals.successes, 'failures': sweep_totals.failures, 'quarantines': sweep_totals.quarantines, 'unlinks': sweep_totals.unlinks, 'errors': sweep_totals.errors})
def object_audit(self, location): """ Audits the given object location. :param location: an audit location (from diskfile.object_audit_location_generator) """ def raise_dfq(msg): raise DiskFileQuarantined(msg) diskfile_mgr = self.diskfile_router[location.policy] # this method doesn't normally raise errors, even if the audit # location does not exist; if this raises an unexpected error it # will get logged in failsafe df = diskfile_mgr.get_diskfile_from_audit_location(location) reader = None try: with df.open(): metadata = df.get_metadata() obj_size = int(metadata['Content-Length']) if self.stats_sizes: self.record_stats(obj_size) if obj_size and not self.zero_byte_only_at_fps: reader = df.reader(_quarantine_hook=raise_dfq) if reader: with closing(reader): for chunk in reader: chunk_len = len(chunk) self.bytes_running_time = ratelimit_sleep( self.bytes_running_time, self.max_bytes_per_second, incr_by=chunk_len) self.bytes_processed += chunk_len self.total_bytes_processed += chunk_len except DiskFileQuarantined as err: self.quarantines += 1 self.logger.error( _('ERROR Object %(obj)s failed audit and was' ' quarantined: %(err)s'), { 'obj': location, 'err': err }) except DiskFileExpired: pass # ignore expired objects except DiskFileDeleted: # If there is a reclaimable tombstone, we'll invalidate the hash # to trigger the replicator to rehash/cleanup this suffix ts = df._ondisk_info['ts_info']['timestamp'] if (not self.zero_byte_only_at_fps and (time.time() - float(ts)) > df.manager.reclaim_age): df.manager.invalidate_hash(dirname(df._datadir)) except DiskFileNotExist: pass self.passes += 1 # _ondisk_info attr is initialized to None and filled in by open ondisk_info_dict = df._ondisk_info or {} if 'unexpected' in ondisk_info_dict: is_rsync_tempfile = lambda fpath: (diskfile.RE_RSYNC_TEMPFILE. match(basename(fpath))) rsync_tempfile_paths = filter(is_rsync_tempfile, ondisk_info_dict['unexpected']) mtime = time.time() - self.rsync_tempfile_timeout unlink_paths_older_than(rsync_tempfile_paths, mtime)
def object_audit(self, path, device, partition): """ Audits the given object path. :param path: a path to an object :param device: the device the path is on :param partition: the partition the path is on """ try: try: name = diskfile.read_metadata(path)['name'] except (Exception, Timeout) as exc: raise AuditException('Error when reading metadata: %s' % exc) _junk, account, container, obj = name.split('/', 3) df = diskfile.DiskFile(self.devices, device, partition, account, container, obj, self.logger, keep_data_fp=True) try: try: obj_size = df.get_data_file_size() except DiskFileNotExist: return except DiskFileError as e: raise AuditException(str(e)) if self.stats_sizes: self.record_stats(obj_size) if self.zero_byte_only_at_fps and obj_size: self.passes += 1 return for chunk in df: self.bytes_running_time = ratelimit_sleep( self.bytes_running_time, self.max_bytes_per_second, incr_by=len(chunk)) self.bytes_processed += len(chunk) self.total_bytes_processed += len(chunk) df.close() if df.quarantined_dir: self.quarantines += 1 self.logger.error( _("ERROR Object %(path)s failed audit and will be " "quarantined: ETag and file's md5 do not match"), {'path': path}) finally: df.close(verify_file=False) except AuditException as err: self.logger.increment('quarantines') self.quarantines += 1 self.logger.error( _('ERROR Object %(obj)s failed audit and will ' 'be quarantined: %(err)s'), { 'obj': path, 'err': err }) diskfile.quarantine_renamer(os.path.join(self.devices, device), path) return except (Exception, Timeout): self.logger.increment('errors') self.errors += 1 self.logger.exception(_('ERROR Trying to audit %s'), path) return self.passes += 1
def audit_all_objects(self, mode='once'): self.logger.info( _('Begin object audit "%s" mode (%s)' % (mode, self.auditor_type))) begin = reported = time.time() self.total_bytes_processed = 0 self.total_files_processed = 0 total_quarantines = 0 total_errors = 0 files_running_time = 0 time_auditing = 0 all_locs = audit_location_generator(self.devices, object_server.DATADIR, mount_check=self.mount_check, logger=self.logger) for path, device, partition in all_locs: loop_time = time.time() self.object_audit(path, device, partition) self.files_running_time = ratelimit_sleep( self.files_running_time, self.max_files_per_second) self.total_files_processed += 1 now = time.time() if now - reported >= self.log_time: self.logger.info( _('Object audit (%(type)s). ' 'Since %(start_time)s: Locally: %(passes)d passed, ' '%(quars)d quarantined, %(errors)d errors ' 'files/sec: %(frate).2f , bytes/sec: %(brate).2f, ' 'Total time: %(total).2f, Auditing time: %(audit).2f, ' 'Rate: %(audit_rate).2f') % { 'type': self.auditor_type, 'start_time': time.ctime(reported), 'passes': self.passes, 'quars': self.quarantines, 'errors': self.errors, 'frate': self.passes / (now - reported), 'brate': self.bytes_processed / (now - reported), 'total': (now - begin), 'audit': time_auditing, 'audit_rate': time_auditing / (now - begin) }) reported = now total_quarantines += self.quarantines total_errors += self.errors self.passes = 0 self.quarantines = 0 self.errors = 0 self.bytes_processed = 0 time_auditing += (now - loop_time) elapsed = time.time() - begin self.logger.info( _('Object audit (%(type)s) "%(mode)s" mode ' 'completed: %(elapsed).02fs. Total quarantined: %(quars)d, ' 'Total errors: %(errors)d, Total files/sec: %(frate).2f , ' 'Total bytes/sec: %(brate).2f, Auditing time: %(audit).2f, ' 'Rate: %(audit_rate).2f') % { 'type': self.auditor_type, 'mode': mode, 'elapsed': elapsed, 'quars': total_quarantines, 'errors': total_errors, 'frate': self.total_files_processed / elapsed, 'brate': self.total_bytes_processed / elapsed, 'audit': time_auditing, 'audit_rate': time_auditing / elapsed })
def object_sweep(self, device): """ If there are async pendings on the device, walk each one and update. :param device: path to device """ start_time = time.time() last_status_update = start_time start_stats = self.stats.copy() my_pid = os.getpid() self.logger.info("Object update sweep starting on %s (pid: %d)", device, my_pid) # loop through async pending dirs for all policies for asyncdir in self._listdir(device): # we only care about directories async_pending = os.path.join(device, asyncdir) if not os.path.isdir(async_pending): continue if not asyncdir.startswith(ASYNCDIR_BASE): # skip stuff like "accounts", "containers", etc. continue try: base, policy = split_policy_string(asyncdir) except PolicyError as e: # This isn't an error, but a misconfiguration. Logging a # warning should be sufficient. self.logger.warning(_('Directory %(directory)r does not map ' 'to a valid policy (%(error)s)') % { 'directory': asyncdir, 'error': e}) continue for prefix in self._listdir(async_pending): prefix_path = os.path.join(async_pending, prefix) if not os.path.isdir(prefix_path): continue last_obj_hash = None for update in sorted(self._listdir(prefix_path), reverse=True): update_path = os.path.join(prefix_path, update) if not os.path.isfile(update_path): continue try: obj_hash, timestamp = update.split('-') except ValueError: self.stats.errors += 1 self.logger.increment('errors') self.logger.error( _('ERROR async pending file with unexpected ' 'name %s') % (update_path)) continue if obj_hash == last_obj_hash: self.stats.unlinks += 1 self.logger.increment('unlinks') os.unlink(update_path) else: self.process_object_update(update_path, device, policy) last_obj_hash = obj_hash self.objects_running_time = ratelimit_sleep( self.objects_running_time, self.max_objects_per_second) now = time.time() if now - last_status_update >= self.report_interval: this_sweep = self.stats.since(start_stats) self.logger.info( ('Object update sweep progress on %(device)s: ' '%(elapsed).02fs, %(stats)s (pid: %(pid)d)'), {'device': device, 'elapsed': now - start_time, 'pid': my_pid, 'stats': this_sweep}) last_status_update = now try: os.rmdir(prefix_path) except OSError: pass self.logger.timing_since('timing', start_time) sweep_totals = self.stats.since(start_stats) self.logger.info( ('Object update sweep completed on %(device)s ' 'in %(elapsed).02fs seconds:, ' '%(successes)d successes, %(failures)d failures, ' '%(quarantines)d quarantines, ' '%(unlinks)d unlinks, %(errors)d errors, ' '%(redirects)d redirects ' '(pid: %(pid)d)'), {'device': device, 'elapsed': time.time() - start_time, 'pid': my_pid, 'successes': sweep_totals.successes, 'failures': sweep_totals.failures, 'quarantines': sweep_totals.quarantines, 'unlinks': sweep_totals.unlinks, 'errors': sweep_totals.errors, 'redirects': sweep_totals.redirects})
def audit_all_objects(self, mode='once', device_dirs=None): description = '' if device_dirs: device_dir_str = ','.join(sorted(device_dirs)) if self.auditor_type == 'ALL': description = _(' - parallel, %s') % device_dir_str else: description = _(' - %s') % device_dir_str self.logger.info(_('Begin object audit "%s" mode (%s%s)') % (mode, self.auditor_type, description)) begin = reported = time.time() self.total_bytes_processed = 0 self.total_files_processed = 0 total_quarantines = 0 total_errors = 0 time_auditing = 0 all_locs = self.diskfile_mgr.object_audit_location_generator( device_dirs=device_dirs) for location in all_locs: loop_time = time.time() self.failsafe_object_audit(location) self.logger.timing_since('timing', loop_time) self.files_running_time = ratelimit_sleep( self.files_running_time, self.max_files_per_second) self.total_files_processed += 1 now = time.time() if now - self.last_logged >= self.log_time: self.logger.info(_( 'Object audit (%(type)s). ' 'Since %(start_time)s: Locally: %(passes)d passed, ' '%(quars)d quarantined, %(errors)d errors ' 'files/sec: %(frate).2f , bytes/sec: %(brate).2f, ' 'Total time: %(total).2f, Auditing time: %(audit).2f, ' 'Rate: %(audit_rate).2f') % { 'type': '%s%s' % (self.auditor_type, description), 'start_time': time.ctime(reported), 'passes': self.passes, 'quars': self.quarantines, 'errors': self.errors, 'frate': self.passes / (now - reported), 'brate': self.bytes_processed / (now - reported), 'total': (now - begin), 'audit': time_auditing, 'audit_rate': time_auditing / (now - begin)}) cache_entry = self.create_recon_nested_dict( 'object_auditor_stats_%s' % (self.auditor_type), device_dirs, {'errors': self.errors, 'passes': self.passes, 'quarantined': self.quarantines, 'bytes_processed': self.bytes_processed, 'start_time': reported, 'audit_time': time_auditing}) dump_recon_cache(cache_entry, self.rcache, self.logger) reported = now total_quarantines += self.quarantines total_errors += self.errors self.passes = 0 self.quarantines = 0 self.errors = 0 self.bytes_processed = 0 self.last_logged = now time_auditing += (now - loop_time) # Avoid divide by zero during very short runs elapsed = (time.time() - begin) or 0.000001 self.logger.info(_( 'Object audit (%(type)s) "%(mode)s" mode ' 'completed: %(elapsed).02fs. Total quarantined: %(quars)d, ' 'Total errors: %(errors)d, Total files/sec: %(frate).2f, ' 'Total bytes/sec: %(brate).2f, Auditing time: %(audit).2f, ' 'Rate: %(audit_rate).2f') % { 'type': '%s%s' % (self.auditor_type, description), 'mode': mode, 'elapsed': elapsed, 'quars': total_quarantines + self.quarantines, 'errors': total_errors + self.errors, 'frate': self.total_files_processed / elapsed, 'brate': self.total_bytes_processed / elapsed, 'audit': time_auditing, 'audit_rate': time_auditing / elapsed}) if self.stats_sizes: self.logger.info( _('Object audit stats: %s') % json.dumps(self.stats_buckets))
account, container, obj, self.logger, keep_data_fp=True) if df.data_file is None: # file is deleted, we found the tombstone return try: obj_size = df.get_data_file_size() except DiskFileError, e: raise AuditException(str(e)) except DiskFileNotExist: return if self.zero_byte_only_at_fps and obj_size: return for chunk in df: self.bytes_running_time = ratelimit_sleep( self.bytes_running_time, self.max_bytes_per_second, incr_by=len(chunk)) self.bytes_processed += len(chunk) self.total_bytes_processed += len(chunk) df.close() if df.quarantined_dir: self.quarantines += 1 self.logger.error( _("ERROR Object %(path)s failed audit and will be " "quarantined: ETag and file's md5 do not match"), {'path': path}) except AuditException, err: self.quarantines += 1 self.logger.error(_('ERROR Object %(obj)s failed audit and will ' 'be quarantined: %(err)s'), {'obj': path, 'err': err}) object_server.quarantine_renamer(