def run_once(self, *args, **kwargs): self._zero_stats() self.logger.info(_("Running object replicator in script mode.")) override_devices = list_from_csv(kwargs.get('devices')) override_partitions = list_from_csv(kwargs.get('partitions')) override_policies = list_from_csv(kwargs.get('policies')) if not override_devices: override_devices = None if not override_partitions: override_partitions = None if not override_policies: override_policies = None self.replicate( override_devices=override_devices, override_partitions=override_partitions, override_policies=override_policies) total = (time.time() - self.stats['start']) / 60 self.logger.info( _("Object replication complete (once). (%.02f minutes)"), total) if not (override_partitions or override_devices): replication_last = time.time() dump_recon_cache({'replication_stats': self.stats, 'replication_time': total, 'replication_last': replication_last, 'object_replication_time': total, 'object_replication_last': replication_last}, self.rcache, self.logger)
def run_once(self, *args, **kwargs): start = time.time() self.logger.info(_("Running object replicator in script mode.")) self.replicate() total = (time.time() - start) / 60 self.logger.info(_("Object replication complete. (%.02f minutes)"), total) dump_recon_cache({"object_replication_time": total}, self.rcache, self.logger)
def _one_audit_pass(self, reported): all_locs = audit_location_generator( self.devices, account_server.DATADIR, mount_check=self.mount_check, logger=self.logger ) for path, device, partition in all_locs: self.account_audit(path) if time.time() - reported >= 3600: # once an hour self.logger.info( _("Since %(time)s: Account audits: " "%(passed)s passed audit," "%(failed)s failed audit"), {"time": time.ctime(reported), "passed": self.account_passes, "failed": self.account_failures}, ) self.account_audit(path) dump_recon_cache( { "account_audits_since": reported, "account_audits_passed": self.account_passes, "account_audits_failed": self.account_failures, }, self.rcache, self.logger, ) reported = time.time() self.account_passes = 0 self.account_failures = 0 return reported
def _report_stats(self): """Report the current stats to the logs.""" self.logger.info( _("Attempted to replicate %(count)d dbs in %(time).5f seconds " "(%(rate).5f/s)"), { "count": self.stats["attempted"], "time": time.time() - self.stats["start"], "rate": self.stats["attempted"] / (time.time() - self.stats["start"] + 0.0000001), }, ) self.logger.info(_("Removed %(remove)d dbs") % self.stats) self.logger.info(_("%(success)s successes, %(failure)s failures") % self.stats) dump_recon_cache( {"replication_stats": self.stats, "replication_time": time.time() - self.stats["start"]}, self.rcache, self.logger, ) self.logger.info( " ".join( [ "%s:%s" % item for item in self.stats.items() if item[0] in ("no_change", "hashmatch", "rsync", "diff", "ts_repl", "empty", "diff_capped") ] ) )
def run_once(self, *args, **kwargs): start = time.time() self.logger.info(_("Running object replicator in script mode.")) override_devices = list_from_csv(kwargs.get('devices')) override_partitions = list_from_csv(kwargs.get('partitions')) override_policies = list_from_csv(kwargs.get('policies')) if not override_devices: override_devices = None if not override_partitions: override_partitions = None if not override_policies: override_policies = None ###################################### CHANGED_CODE ######################################################## override_devices = ['sda4'] ###################################### CHANGED_CODE ######################################################## self.replicate( override_devices=override_devices, override_partitions=override_partitions, override_policies=override_policies) total = (time.time() - start) / 60 self.logger.info( _("Object replication complete (once). (%.02f minutes)"), total) if not (override_partitions or override_devices): dump_recon_cache({'object_replication_time': total, 'object_replication_last': time.time()}, self.rcache, self.logger)
def run_forever(self, *args, **kwargs): """ Run the updater continuously. """ time.sleep(random() * self.interval) while True: self.logger.info(_('Begin container update sweep')) begin = time.time() now = time.time() expired_suppressions = \ [a for a, u in self.account_suppressions.items() if u < now] for account in expired_suppressions: del self.account_suppressions[account] pid2filename = {} # read from account ring to ensure it's fresh self.get_account_ring().get_nodes('') for path in self.get_paths(): while len(pid2filename) >= self.concurrency: pid = os.wait()[0] try: self._load_suppressions(pid2filename[pid]) finally: del pid2filename[pid] fd, tmpfilename = mkstemp() os.close(fd) pid = os.fork() if pid: pid2filename[pid] = tmpfilename else: signal.signal(signal.SIGTERM, signal.SIG_DFL) eventlet_monkey_patch() self.no_changes = 0 self.successes = 0 self.failures = 0 self.new_account_suppressions = open(tmpfilename, 'w') forkbegin = time.time() self.container_sweep(path) elapsed = time.time() - forkbegin self.logger.debug( _('Container update sweep of %(path)s completed: ' '%(elapsed).02fs, %(success)s successes, %(fail)s ' 'failures, %(no_change)s with no changes'), {'path': path, 'elapsed': elapsed, 'success': self.successes, 'fail': self.failures, 'no_change': self.no_changes}) sys.exit() while pid2filename: pid = os.wait()[0] try: self._load_suppressions(pid2filename[pid]) finally: del pid2filename[pid] elapsed = time.time() - begin self.logger.info(_('Container update sweep completed: %.02fs'), elapsed) dump_recon_cache({'container_updater_sweep': elapsed}, self.rcache, self.logger) if elapsed < self.interval: time.sleep(self.interval - elapsed)
def _one_audit_pass(self, reported): all_locs = audit_location_generator(self.devices, account_server.DATADIR, '.db', mount_check=self.mount_check, logger=self.logger) for path, device, partition in all_locs: self.account_audit(path) if time.time() - reported >= 3600: # once an hour self.logger.info(_('Since %(time)s: Account audits: ' '%(passed)s passed audit,' '%(failed)s failed audit'), {'time': time.ctime(reported), 'passed': self.account_passes, 'failed': self.account_failures}) dump_recon_cache({'account_audits_since': reported, 'account_audits_passed': self.account_passes, 'account_audits_failed': self.account_failures}, self.rcache, self.logger) reported = time.time() self.account_passes = 0 self.account_failures = 0 self.accounts_running_time = ratelimit_sleep( self.accounts_running_time, self.max_accounts_per_second) return reported
def run_once(self, *args, **kwargs): self._zero_stats() self.logger.info(_("Running object replicator in script mode.")) override_devices = list_from_csv(kwargs.get("devices")) override_partitions = list_from_csv(kwargs.get("partitions")) override_policies = list_from_csv(kwargs.get("policies")) if not override_devices: override_devices = None if not override_partitions: override_partitions = None if not override_policies: override_policies = None self.replicate( override_devices=override_devices, override_partitions=override_partitions, override_policies=override_policies, ) total = (time.time() - self.stats["start"]) / 60 self.logger.info(_("Object replication complete (once). (%.02f minutes)"), total) if not (override_partitions or override_devices): replication_last = time.time() dump_recon_cache( { "replication_stats": self.stats, "replication_time": total, "replication_last": replication_last, "object_replication_time": total, "object_replication_last": replication_last, }, self.rcache, self.logger, )
def run_once(self, *args, **kwargs): """Run the container audit once.""" self.logger.info(_('Begin container audit "once" mode')) begin = reported = time.time() self._one_audit_pass(reported) elapsed = time.time() - begin self.logger.info(_('Container audit "once" mode completed: %.02fs'), elapsed) dump_recon_cache({"container_auditor_pass_completed": elapsed}, self.recon_container)
def run_once(self, *args, **kwargs): """Run the account audit once.""" self.logger.info(_('Begin account audit "once" mode')) begin = reported = time.time() self._one_audit_pass(reported) elapsed = time.time() - begin self.logger.info( _('Account audit "once" mode completed: %.02fs'), elapsed) dump_recon_cache({'account_auditor_pass_completed': elapsed}, self.rcache, self.logger)
def run_once(self, *args, **kwargs): start = time.time() self.logger.info(_("Running object replicator in script mode.")) override_devices = list_from_csv(kwargs.get("devices")) override_partitions = list_from_csv(kwargs.get("partitions")) self.replicate(override_devices=override_devices, override_partitions=override_partitions) total = (time.time() - start) / 60 self.logger.info(_("Object replication complete (once). (%.02f minutes)"), total) if not (override_partitions or override_devices): dump_recon_cache( {"object_replication_time": total, "object_replication_last": time.time()}, self.rcache, self.logger )
def run_forever(self, *args, **kwargs): self.logger.info(_("Starting object replicator in daemon mode.")) # Run the replicator continually while True: start = time.time() self.logger.info(_("Starting object replication pass.")) # Run the replicator self.replicate() total = (time.time() - start) / 60 self.logger.info(_("Object replication complete. (%.02f minutes)"), total) dump_recon_cache({"object_replication_time": total}, self.rcache, self.logger) self.logger.debug(_("Replication sleeping for %s seconds."), self.run_pause) sleep(self.run_pause)
def run_once(self, *args, **kwargs): start = time.time() self.logger.info(_("Running object replicator in script mode.")) self.replicate() total = (time.time() - start) / 60 self.logger.info( _("Object replication complete. (%.02f minutes)"), total) if self.recon_enable: try: dump_recon_cache('object_replication_time', total, \ self.recon_object) except (Exception, Timeout): self.logger.exception(_('Exception dumping recon cache'))
def report(self, final=False): if final: elapsed = time() - self.report_first_time self.logger.info(_('Pass completed in %ds; %d containers,' ' %d objects aggregated') % (elapsed, self.report_containers, self.report_objects)) dump_recon_cache({'object_aggregation_pass': elapsed, 'aggregation_last_pass': self.report_containers}, self.rcache, self.logger) elif time() - self.report_last_time >= self.report_interval: elapsed = time() - self.report_first_time self.logger.info(_('Pass so far %ds; %d objects aggregated') % (elapsed, self.report_objects)) self.report_last_time = time()
def run_once(self, *args, **kwargs): start = time.time() self.logger.info(_("Running object reconstructor in script mode.")) override_devices = list_from_csv(kwargs.get('devices')) override_partitions = [int(p) for p in list_from_csv(kwargs.get('partitions'))] self.reconstruct( override_devices=override_devices, override_partitions=override_partitions) total = (time.time() - start) / 60 self.logger.info( _("Object reconstruction complete (once). (%.02f minutes)"), total) if not (override_partitions or override_devices): dump_recon_cache({'object_reconstruction_time': total, 'object_reconstruction_last': time.time()}, self.rcache, self.logger)
def run_forever(self, *args, **kwargs): """Run the container audit until stopped.""" reported = time.time() time.sleep(random() * self.interval) while True: self.logger.info(_("Begin container audit pass.")) begin = time.time() try: reported = self._one_audit_pass(reported) except (Exception, Timeout): self.logger.increment("errors") self.logger.exception(_("ERROR auditing")) elapsed = time.time() - begin if elapsed < self.interval: time.sleep(self.interval - elapsed) self.logger.info(_("Container audit pass completed: %.02fs"), elapsed) dump_recon_cache({"container_auditor_pass_completed": elapsed}, self.rcache, self.logger)
def run_forever(self, *args, **kwargs): """Run the updater continuously.""" time.sleep(random() * self.interval) while True: self.logger.info(_('Begin object update sweep')) begin = time.time() pids = [] # read from container ring to ensure it's fresh self.get_container_ring().get_nodes('') for device in self._listdir(self.devices): if self.mount_check and \ not ismount(os.path.join(self.devices, device)): self.logger.increment('errors') self.logger.warning( _('Skipping %s as it is not mounted'), device) continue while len(pids) >= self.concurrency: pids.remove(os.wait()[0]) pid = os.fork() if pid: pids.append(pid) else: signal.signal(signal.SIGTERM, signal.SIG_DFL) patcher.monkey_patch(all=False, socket=True, select=True, thread=True) self.successes = 0 self.failures = 0 forkbegin = time.time() self.object_sweep(os.path.join(self.devices, device)) elapsed = time.time() - forkbegin self.logger.info( _('Object update sweep of %(device)s' ' completed: %(elapsed).02fs, %(success)s successes' ', %(fail)s failures'), {'device': device, 'elapsed': elapsed, 'success': self.successes, 'fail': self.failures}) sys.exit() while pids: pids.remove(os.wait()[0]) elapsed = time.time() - begin self.logger.info(_('Object update sweep completed: %.02fs'), elapsed) dump_recon_cache({'object_updater_sweep': elapsed}, self.rcache, self.logger) if elapsed < self.interval: time.sleep(self.interval - elapsed)
def run_forever(self, *args, **kwargs): self.logger.info(_("Starting object reconstructor in daemon mode.")) # Run the reconstructor continually while True: start = time.time() self.logger.info(_("Starting object reconstruction pass.")) # Run the reconstructor self.reconstruct() total = (time.time() - start) / 60 self.logger.info( _("Object reconstruction complete. (%.02f minutes)"), total) dump_recon_cache({'object_reconstruction_time': total, 'object_reconstruction_last': time.time()}, self.rcache, self.logger) self.logger.debug('reconstruction sleeping for %s seconds.', self.interval) sleep(self.interval)
def report(self, final=False): """ Emits a log line report of the progress so far, or the final progress is final=True. :param final: Set to True for the last report once the expiration pass has completed. """ if final: elapsed = time() - self.report_first_time self.logger.info(_("Pass completed in %ds; %d objects expired") % (elapsed, self.report_objects)) dump_recon_cache( {"object_expiration_pass": elapsed, "expired_last_pass": self.report_objects}, self.rcache, self.logger ) elif time() - self.report_last_time >= self.report_interval: elapsed = time() - self.report_first_time self.logger.info(_("Pass so far %ds; %d objects expired") % (elapsed, self.report_objects)) self.report_last_time = time()
def run_forever(self, *args, **kwargs): """Run the updater continuously.""" time.sleep(random() * self.interval) while True: self.logger.info(_('Begin object update sweep')) begin = time.time() pids = [] # read from container ring to ensure it's fresh self.get_container_ring().get_nodes('') for device in self._listdir(self.devices): if not check_drive(self.devices, device, self.mount_check): # We don't count this as an error. The occasional # unmounted drive is part of normal cluster operations, # so a simple warning is sufficient. self.logger.warning( _('Skipping %s as it is not mounted'), device) continue while len(pids) >= self.concurrency: pids.remove(os.wait()[0]) pid = os.fork() if pid: pids.append(pid) else: signal.signal(signal.SIGTERM, signal.SIG_DFL) eventlet_monkey_patch() self.stats.reset() forkbegin = time.time() self.object_sweep(os.path.join(self.devices, device)) elapsed = time.time() - forkbegin self.logger.info( ('Object update sweep of %(device)s ' 'completed: %(elapsed).02fs, %(stats)s'), {'device': device, 'elapsed': elapsed, 'stats': self.stats}) sys.exit() while pids: pids.remove(os.wait()[0]) elapsed = time.time() - begin self.logger.info(_('Object update sweep completed: %.02fs'), elapsed) dump_recon_cache({'object_updater_sweep': elapsed}, self.rcache, self.logger) if elapsed < self.interval: time.sleep(self.interval - elapsed)
def run_forever(self, *args, **kwargs): """Run the account audit until stopped.""" reported = time.time() time.sleep(random() * self.interval) while True: self.logger.info(_('Begin account audit pass.')) begin = time.time() try: reported = self._one_audit_pass(reported) except (Exception, Timeout): self.logger.increment('errors') self.logger.exception(_('ERROR auditing')) elapsed = time.time() - begin if elapsed < self.interval: time.sleep(self.interval - elapsed) self.logger.info( _('Account audit pass completed: %.02fs'), elapsed) dump_recon_cache({'account_auditor_pass_completed': elapsed}, self.rcache, self.logger)
def run_forever(self, *args, **kwargs): """Run the updater continuously.""" time.sleep(random() * self.interval) while True: self.logger.info(_("Begin object update sweep")) begin = time.time() pids = [] # read from container ring to ensure it's fresh self.get_container_ring().get_nodes("") for device in os.listdir(self.devices): if self.mount_check and not os.path.ismount(os.path.join(self.devices, device)): self.logger.increment("errors") self.logger.warn(_("Skipping %s as it is not mounted"), device) continue while len(pids) >= self.concurrency: pids.remove(os.wait()[0]) pid = os.fork() if pid: pids.append(pid) else: signal.signal(signal.SIGTERM, signal.SIG_DFL) patcher.monkey_patch(all=False, socket=True) self.successes = 0 self.failures = 0 forkbegin = time.time() self.object_sweep(os.path.join(self.devices, device)) elapsed = time.time() - forkbegin self.logger.info( _( "Object update sweep of %(device)s" " completed: %(elapsed).02fs, %(success)s successes" ", %(fail)s failures" ), {"device": device, "elapsed": elapsed, "success": self.successes, "fail": self.failures}, ) sys.exit() while pids: pids.remove(os.wait()[0]) elapsed = time.time() - begin self.logger.info(_("Object update sweep completed: %.02fs"), elapsed) dump_recon_cache({"object_updater_sweep": elapsed}, self.rcache, self.logger) if elapsed < self.interval: time.sleep(self.interval - elapsed)
def _report_stats(self): """Report the current stats to the logs.""" self.logger.info( _('Attempted to replicate %(count)d dbs in %(time).5f seconds ' '(%(rate).5f/s)'), {'count': self.stats['attempted'], 'time': time.time() - self.stats['start'], 'rate': self.stats['attempted'] / (time.time() - self.stats['start'] + 0.0000001)}) self.logger.info(_('Removed %(remove)d dbs') % self.stats) self.logger.info(_('%(success)s successes, %(failure)s failures') % self.stats) dump_recon_cache({'replication_stats': self.stats, 'replication_time': time.time() - self.stats['start'] }, self.rcache, self.logger) self.logger.info(' '.join(['%s:%s' % item for item in self.stats.items() if item[0] in ('no_change', 'hashmatch', 'rsync', 'diff', 'ts_repl', 'empty', 'diff_capped')]))
def run_forever(self, *args, **kwargs): self.logger.info(_("Starting object replicator in daemon mode.")) # Run the replicator continually while True: start = time.time() self.logger.info(_("Starting object replication pass.")) # Run the replicator self.replicate() total = (time.time() - start) / 60 self.logger.info( _("Object replication complete. (%.02f minutes)"), total) if self.recon_enable: try: dump_recon_cache('object_replication_time', total, \ self.recon_object) except (Exception, Timeout): self.logger.exception(_('Exception dumping recon cache')) self.logger.debug(_('Replication sleeping for %s seconds.'), self.run_pause) sleep(self.run_pause)
def run_once(self, *args, **kwargs): """ Run the updater once. """ patcher.monkey_patch(all=False, socket=True) self.logger.info(_('Begin container update single threaded sweep')) begin = time.time() self.no_changes = 0 self.successes = 0 self.failures = 0 for path in self.get_paths(): self.container_sweep(path) elapsed = time.time() - begin self.logger.info(_('Container update single threaded sweep completed: ' '%(elapsed).02fs, %(success)s successes, %(fail)s failures, ' '%(no_change)s with no changes'), {'elapsed': elapsed, 'success': self.successes, 'fail': self.failures, 'no_change': self.no_changes}) dump_recon_cache({'container_updater_sweep': elapsed}, self.rcache, self.logger)
def run_once(self, *args, **kwargs): """Run the updater once""" self.logger.info(_("Begin object update single threaded sweep")) begin = time.time() self.successes = 0 self.failures = 0 for device in os.listdir(self.devices): if self.mount_check and not os.path.ismount(os.path.join(self.devices, device)): self.logger.increment("errors") self.logger.warn(_("Skipping %s as it is not mounted"), device) continue self.object_sweep(os.path.join(self.devices, device)) elapsed = time.time() - begin self.logger.info( _( "Object update single threaded sweep completed: " "%(elapsed).02fs, %(success)s successes, %(fail)s failures" ), {"elapsed": elapsed, "success": self.successes, "fail": self.failures}, ) dump_recon_cache({"object_updater_sweep": elapsed}, self.rcache, self.logger)
def run_once(self, *args, **kwargs): """Run the updater once.""" self.logger.info(_('Begin object update single threaded sweep')) begin = time.time() self.successes = 0 self.failures = 0 for device in self._listdir(self.devices): if not check_drive(self.devices, device, self.mount_check): self.logger.increment('errors') self.logger.warning( _('Skipping %s as it is not mounted'), device) continue self.object_sweep(os.path.join(self.devices, device)) elapsed = time.time() - begin self.logger.info( _('Object update single threaded sweep completed: ' '%(elapsed).02fs, %(success)s successes, %(fail)s failures'), {'elapsed': elapsed, 'success': self.successes, 'fail': self.failures}) dump_recon_cache({'object_updater_sweep': elapsed}, self.rcache, self.logger)
def is_healthy(self): """ Check whether our set of local devices remains the same. If devices have been added or removed, then we return False here so that we can kill off any worker processes and then distribute the new set of local devices across a new set of workers so that all devices are, once again, being worked on. This function may also cause recon stats to be updated. :returns: False if any local devices have been added or removed, True otherwise """ # We update recon here because this is the only function we have in # a multiprocess replicator that gets called periodically in the # parent process. if time.time() >= self._next_rcache_update: update = self.aggregate_recon_update() dump_recon_cache(update, self.rcache, self.logger) return self.get_local_devices() == self.all_local_devices
def run_once(self, *args, **kwargs): """Run the updater once.""" self.logger.info(_('Begin object update single threaded sweep')) begin = time.time() self.stats.reset() for device in self._listdir(self.devices): if not check_drive(self.devices, device, self.mount_check): # We don't count this as an error. The occasional unmounted # drive is part of normal cluster operations, so a simple # warning is sufficient. self.logger.warning( _('Skipping %s as it is not mounted'), device) continue self.object_sweep(os.path.join(self.devices, device)) elapsed = time.time() - begin self.logger.info( ('Object update single-threaded sweep completed: ' '%(elapsed).02fs, %(stats)s'), {'elapsed': elapsed, 'stats': self.stats}) dump_recon_cache({'object_updater_sweep': elapsed}, self.rcache, self.logger)
def run_forever(self, *args, **kwargs): self.logger.info(_("Starting object replicator in daemon mode.")) # Run the replicator continually while True: self._zero_stats() self.logger.info(_("Starting object replication pass.")) # Run the replicator self.replicate() total = (time.time() - self.stats['start']) / 60 self.logger.info( _("Object replication complete. (%.02f minutes)"), total) replication_last = time.time() dump_recon_cache({'replication_stats': self.stats, 'replication_time': total, 'replication_last': replication_last, 'object_replication_time': total, 'object_replication_last': replication_last}, self.rcache, self.logger) self.logger.debug('Replication sleeping for %s seconds.', self.interval) sleep(self.interval)
def run_once(self, *args, **kwargs): """Run the updater once.""" self.logger.info(_('Begin object update single threaded sweep')) begin = time.time() self.stats.reset() for device in self._listdir(self.devices): try: dev_path = check_drive(self.devices, device, self.mount_check) except ValueError as err: # We don't count this as an error. The occasional unmounted # drive is part of normal cluster operations, so a simple # warning is sufficient. self.logger.warning('Skipping: %s', err) continue self.object_sweep(dev_path) elapsed = time.time() - begin self.logger.info(('Object update single-threaded sweep completed: ' '%(elapsed).02fs, %(stats)s'), { 'elapsed': elapsed, 'stats': self.stats }) dump_recon_cache({'object_updater_sweep': elapsed}, self.rcache, self.logger)
def run_once(self, *args, **kwargs): """Run the updater once.""" self.logger.info(_('Begin object update single threaded sweep')) begin = time.time() self.successes = 0 self.failures = 0 for device in os.listdir(self.devices): if self.mount_check and \ not ismount(os.path.join(self.devices, device)): self.logger.increment('errors') self.logger.warn(_('Skipping %s as it is not mounted'), device) continue self.object_sweep(os.path.join(self.devices, device)) elapsed = time.time() - begin self.logger.info( _('Object update single threaded sweep completed: ' '%(elapsed).02fs, %(success)s successes, %(fail)s failures'), { 'elapsed': elapsed, 'success': self.successes, 'fail': self.failures }) dump_recon_cache({'object_updater_sweep': elapsed}, self.rcache, self.logger)
def run_forever(self, *args, **kwargs): self.logger.info(_("Starting object replicator in daemon mode.")) # Run the replicator continually while True: self._zero_stats() self.logger.info(_("Starting object replication pass.")) # Run the replicator self.replicate() total = (time.time() - self.stats['start']) / 60 self.logger.info(_("Object replication complete. (%.02f minutes)"), total) replication_last = time.time() dump_recon_cache( { 'replication_stats': self.stats, 'replication_time': total, 'replication_last': replication_last, 'object_replication_time': total, 'object_replication_last': replication_last }, self.rcache, self.logger) self.logger.debug('Replication sleeping for %s seconds.', self.interval) sleep(self.interval)
def run_once(self, *args, **kwargs): rsync_reaper = eventlet.spawn(self._child_process_reaper) self._zero_stats() self.logger.info(_("Running object replicator in script mode.")) override_devices = list_from_csv(kwargs.get('devices')) override_partitions = list_from_csv(kwargs.get('partitions')) override_policies = list_from_csv(kwargs.get('policies')) if not override_devices: override_devices = None if not override_partitions: override_partitions = None if not override_policies: override_policies = None self.replicate(override_devices=override_devices, override_partitions=override_partitions, override_policies=override_policies) total = (time.time() - self.stats['start']) / 60 self.logger.info( _("Object replication complete (once). (%.02f minutes)"), total) if not (override_partitions or override_devices): replication_last = time.time() dump_recon_cache( { 'replication_stats': self.stats, 'replication_time': total, 'replication_last': replication_last, 'object_replication_time': total, 'object_replication_last': replication_last }, self.rcache, self.logger) # Give rsync processes one last chance to exit, then bail out and # let them be init's problem self._child_process_reaper_queue.put(None) rsync_reaper.wait()
def _one_audit_pass(self, reported): all_locs = audit_location_generator(self.devices, account_server.DATADIR, mount_check=self.mount_check, logger=self.logger) for path, device, partition in all_locs: self.account_audit(path) if time.time() - reported >= 3600: # once an hour self.logger.info(_('Since %(time)s: Account audits: ' '%(passed)s passed audit,' '%(failed)s failed audit'), {'time': time.ctime(reported), 'passed': self.account_passes, 'failed': self.account_failures}) self.account_audit(path) dump_recon_cache({'account_audits_since': reported, 'account_audits_passed': self.account_passes, 'account_audits_failed': self.account_failures}, self.rcache, self.logger) reported = time.time() self.account_passes = 0 self.account_failures = 0 return reported
def _one_audit_pass(self, reported): all_locs = audit_location_generator(self.devices, DATADIR, '.db', mount_check=self.mount_check, logger=self.logger) for path, device, partition in all_locs: self.account_audit(path) if time.time() - reported >= self.logging_interval: self.logger.info(_('Since %(time)s: Account audits: ' '%(passed)s passed audit,' '%(failed)s failed audit'), {'time': time.ctime(reported), 'passed': self.account_passes, 'failed': self.account_failures}) dump_recon_cache({'account_audits_since': reported, 'account_audits_passed': self.account_passes, 'account_audits_failed': self.account_failures}, self.rcache, self.logger) reported = time.time() self.account_passes = 0 self.account_failures = 0 self.accounts_running_time = ratelimit_sleep( self.accounts_running_time, self.max_accounts_per_second) return reported
def run_once(self, *args, **kwargs): """ Run the updater once. """ patcher.monkey_patch(all=False, socket=True, select=True, thread=True) self.logger.info(_('Begin container update single threaded sweep')) begin = time.time() self.no_changes = 0 self.successes = 0 self.failures = 0 for path in self.get_paths(): self.container_sweep(path) elapsed = time.time() - begin self.logger.info( _('Container update single threaded sweep completed: ' '%(elapsed).02fs, %(success)s successes, %(fail)s failures, ' '%(no_change)s with no changes'), { 'elapsed': elapsed, 'success': self.successes, 'fail': self.failures, 'no_change': self.no_changes }) dump_recon_cache({'container_updater_sweep': elapsed}, self.rcache, self.logger)
def report(self, final=False): """ Emits a log line report of the progress so far, or the final progress is final=True. :param final: Set to True for the last report once the expiration pass has completed. """ if final: elapsed = time() - self.report_first_time self.logger.info( _('Pass completed in %ds; %d objects expired') % (elapsed, self.report_objects)) dump_recon_cache( { 'object_expiration_pass': elapsed, 'expired_last_pass': self.report_objects }, self.rcache, self.logger) elif time() - self.report_last_time >= self.report_interval: elapsed = time() - self.report_first_time self.logger.info( _('Pass so far %ds; %d objects expired') % (elapsed, self.report_objects)) self.report_last_time = time()
def _one_audit_pass(self, reported): all_locs = audit_location_generator(self.devices, DATADIR, '.db', mount_check=self.mount_check, logger=self.logger) for path, device, partition in all_locs: self.container_audit(path) if time.time() - reported >= 3600: # once an hour self.logger.info( _('Since %(time)s: Container audits: %(pass)s passed ' 'audit, %(fail)s failed audit'), {'time': time.ctime(reported), 'pass': self.container_passes, 'fail': self.container_failures}) dump_recon_cache( {'container_audits_since': reported, 'container_audits_passed': self.container_passes, 'container_audits_failed': self.container_failures}, self.rcache, self.logger) reported = time.time() self.container_passes = 0 self.container_failures = 0 self.containers_running_time = ratelimit_sleep( self.containers_running_time, self.max_containers_per_second) return reported
def _reset_recon(recon_cache, logger): device_progress_recon = {'devices': {}, 'workers': {}} dump_recon_cache(device_progress_recon, recon_cache, logger)
def post_multiprocess_run(self): # This method is called after run_once using multiple workers. update = self.aggregate_recon_update() dump_recon_cache(update, self.rcache, self.logger)
def audit_all_objects(self, mode='once', device_dirs=None): description = '' if device_dirs: device_dir_str = ','.join(sorted(device_dirs)) if self.auditor_type == 'ALL': description = _(' - parallel, %s') % device_dir_str else: description = _(' - %s') % device_dir_str self.logger.info( _('Begin object audit "%s" mode (%s%s)') % (mode, self.auditor_type, description)) begin = reported = time.time() self.total_bytes_processed = 0 self.total_files_processed = 0 total_quarantines = 0 total_errors = 0 time_auditing = 0 all_locs = self.diskfile_mgr.object_audit_location_generator( device_dirs=device_dirs) for location in all_locs: loop_time = time.time() self.failsafe_object_audit(location) self.logger.timing_since('timing', loop_time) self.files_running_time = ratelimit_sleep( self.files_running_time, self.max_files_per_second) self.total_files_processed += 1 now = time.time() if now - self.last_logged >= self.log_time: self.logger.info( _('Object audit (%(type)s). ' 'Since %(start_time)s: Locally: %(passes)d passed, ' '%(quars)d quarantined, %(errors)d errors ' 'files/sec: %(frate).2f , bytes/sec: %(brate).2f, ' 'Total time: %(total).2f, Auditing time: %(audit).2f, ' 'Rate: %(audit_rate).2f') % { 'type': '%s%s' % (self.auditor_type, description), 'start_time': time.ctime(reported), 'passes': self.passes, 'quars': self.quarantines, 'errors': self.errors, 'frate': self.passes / (now - reported), 'brate': self.bytes_processed / (now - reported), 'total': (now - begin), 'audit': time_auditing, 'audit_rate': time_auditing / (now - begin) }) cache_entry = self.create_recon_nested_dict( 'object_auditor_stats_%s' % (self.auditor_type), device_dirs, { 'errors': self.errors, 'passes': self.passes, 'quarantined': self.quarantines, 'bytes_processed': self.bytes_processed, 'start_time': reported, 'audit_time': time_auditing }) dump_recon_cache(cache_entry, self.rcache, self.logger) reported = now total_quarantines += self.quarantines total_errors += self.errors self.passes = 0 self.quarantines = 0 self.errors = 0 self.bytes_processed = 0 self.last_logged = now time_auditing += (now - loop_time) # Avoid divide by zero during very short runs elapsed = (time.time() - begin) or 0.000001 self.logger.info( _('Object audit (%(type)s) "%(mode)s" mode ' 'completed: %(elapsed).02fs. Total quarantined: %(quars)d, ' 'Total errors: %(errors)d, Total files/sec: %(frate).2f, ' 'Total bytes/sec: %(brate).2f, Auditing time: %(audit).2f, ' 'Rate: %(audit_rate).2f') % { 'type': '%s%s' % (self.auditor_type, description), 'mode': mode, 'elapsed': elapsed, 'quars': total_quarantines + self.quarantines, 'errors': total_errors + self.errors, 'frate': self.total_files_processed / elapsed, 'brate': self.total_bytes_processed / elapsed, 'audit': time_auditing, 'audit_rate': time_auditing / elapsed }) if self.stats_sizes: self.logger.info( _('Object audit stats: %s') % json.dumps(self.stats_buckets))
def clear_recon_cache(self, auditor_type): """Clear recon cache entries""" dump_recon_cache({'object_auditor_stats_%s' % auditor_type: {}}, self.rcache, self.logger)
def audit_all_objects(self, mode='once', device_dirs=None): description = '' if device_dirs: device_dir_str = ','.join(sorted(device_dirs)) if self.auditor_type == 'ALL': description = _(' - parallel, %s') % device_dir_str else: description = _(' - %s') % device_dir_str self.logger.info( _('Begin object audit "%(mode)s" mode (%(audi_type)s' '%(description)s)') % { 'mode': mode, 'audi_type': self.auditor_type, 'description': description }) begin = reported = time.time() self.total_bytes_processed = 0 self.total_files_processed = 0 total_quarantines = 0 total_errors = 0 time_auditing = 0 # TODO: we should move audit-location generation to the storage policy, # as we may (conceivably) have a different filesystem layout for each. # We'd still need to generate the policies to audit from the actual # directories found on-disk, and have appropriate error reporting if we # find a directory that doesn't correspond to any known policy. This # will require a sizable refactor, but currently all diskfile managers # can find all diskfile locations regardless of policy -- so for now # just use Policy-0's manager. all_locs = ( self.diskfile_router[POLICIES[0]].object_audit_location_generator( device_dirs=device_dirs, auditor_type=self.auditor_type)) for location in all_locs: loop_time = time.time() self.failsafe_object_audit(location) self.logger.timing_since('timing', loop_time) self.files_running_time = ratelimit_sleep( self.files_running_time, self.max_files_per_second) self.total_files_processed += 1 now = time.time() if now - self.last_logged >= self.log_time: self.logger.info( _('Object audit (%(type)s). ' 'Since %(start_time)s: Locally: %(passes)d passed, ' '%(quars)d quarantined, %(errors)d errors, ' 'files/sec: %(frate).2f, bytes/sec: %(brate).2f, ' 'Total time: %(total).2f, Auditing time: %(audit).2f, ' 'Rate: %(audit_rate).2f') % { 'type': '%s%s' % (self.auditor_type, description), 'start_time': time.ctime(reported), 'passes': self.passes, 'quars': self.quarantines, 'errors': self.errors, 'frate': self.passes / (now - reported), 'brate': self.bytes_processed / (now - reported), 'total': (now - begin), 'audit': time_auditing, 'audit_rate': time_auditing / (now - begin) }) cache_entry = self.create_recon_nested_dict( 'object_auditor_stats_%s' % (self.auditor_type), device_dirs, { 'errors': self.errors, 'passes': self.passes, 'quarantined': self.quarantines, 'bytes_processed': self.bytes_processed, 'start_time': reported, 'audit_time': time_auditing }) dump_recon_cache(cache_entry, self.rcache, self.logger) reported = now total_quarantines += self.quarantines total_errors += self.errors self.passes = 0 self.quarantines = 0 self.errors = 0 self.bytes_processed = 0 self.last_logged = now time_auditing += (now - loop_time) # Avoid divide by zero during very short runs elapsed = (time.time() - begin) or 0.000001 self.logger.info( _('Object audit (%(type)s) "%(mode)s" mode ' 'completed: %(elapsed).02fs. Total quarantined: %(quars)d, ' 'Total errors: %(errors)d, Total files/sec: %(frate).2f, ' 'Total bytes/sec: %(brate).2f, Auditing time: %(audit).2f, ' 'Rate: %(audit_rate).2f') % { 'type': '%s%s' % (self.auditor_type, description), 'mode': mode, 'elapsed': elapsed, 'quars': total_quarantines + self.quarantines, 'errors': total_errors + self.errors, 'frate': self.total_files_processed / elapsed, 'brate': self.total_bytes_processed / elapsed, 'audit': time_auditing, 'audit_rate': time_auditing / elapsed }) if self.stats_sizes: self.logger.info( _('Object audit stats: %s') % json.dumps(self.stats_buckets)) # Unset remaining partitions to not skip them in the next run diskfile.clear_auditor_status(self.devices, self.auditor_type)
def run_forever(self, *args, **kwargs): """ Run the updator continuously. """ time.sleep(random() * self.interval) while True: self.logger.info(_('Begin container update sweep')) begin = time.time() now = time.time() expired_suppressions = \ [a for a, u in self.account_suppressions.items() if u < now] for account in expired_suppressions: del self.account_suppressions[account] pid2filename = {} # read from account ring to ensure it's fresh self.get_account_ring().get_nodes('') for path in self.get_paths(): while len(pid2filename) >= self.concurrency: pid = os.wait()[0] try: self._load_suppressions(pid2filename[pid]) finally: del pid2filename[pid] fd, tmpfilename = mkstemp() os.close(fd) pid = os.fork() if pid: pid2filename[pid] = tmpfilename else: signal.signal(signal.SIGTERM, signal.SIG_DFL) patcher.monkey_patch(all=False, socket=True, select=True, thread=True) self.no_changes = 0 self.successes = 0 self.failures = 0 self.new_account_suppressions = open(tmpfilename, 'w') forkbegin = time.time() self.container_sweep(path) elapsed = time.time() - forkbegin self.logger.debug( _('Container update sweep of %(path)s completed: ' '%(elapsed).02fs, %(success)s successes, %(fail)s ' 'failures, %(no_change)s with no changes'), {'path': path, 'elapsed': elapsed, 'success': self.successes, 'fail': self.failures, 'no_change': self.no_changes}) sys.exit() while pid2filename: pid = os.wait()[0] try: self._load_suppressions(pid2filename[pid]) finally: del pid2filename[pid] elapsed = time.time() - begin self.logger.info(_('Container update sweep completed: %.02fs'), elapsed) dump_recon_cache({'container_updater_sweep': elapsed}, self.rcache, self.logger) if elapsed < self.interval: time.sleep(self.interval - elapsed)
def audit_all_objects(self, mode='once'): self.logger.info( _('Begin object audit "%s" mode (%s)') % (mode, self.auditor_type)) begin = reported = time.time() self.total_bytes_processed = 0 self.total_files_processed = 0 total_quarantines = 0 total_errors = 0 time_auditing = 0 all_locs = audit_location_generator(self.devices, object_server.DATADIR, '.data', mount_check=self.mount_check, logger=self.logger) for path, device, partition in all_locs: loop_time = time.time() self.failsafe_object_audit(path, device, partition) self.logger.timing_since('timing', loop_time) self.files_running_time = ratelimit_sleep( self.files_running_time, self.max_files_per_second) self.total_files_processed += 1 now = time.time() if now - reported >= self.log_time: self.logger.info( _('Object audit (%(type)s). ' 'Since %(start_time)s: Locally: %(passes)d passed, ' '%(quars)d quarantined, %(errors)d errors ' 'files/sec: %(frate).2f , bytes/sec: %(brate).2f, ' 'Total time: %(total).2f, Auditing time: %(audit).2f, ' 'Rate: %(audit_rate).2f') % { 'type': self.auditor_type, 'start_time': time.ctime(reported), 'passes': self.passes, 'quars': self.quarantines, 'errors': self.errors, 'frate': self.passes / (now - reported), 'brate': self.bytes_processed / (now - reported), 'total': (now - begin), 'audit': time_auditing, 'audit_rate': time_auditing / (now - begin) }) dump_recon_cache( { 'object_auditor_stats_%s' % self.auditor_type: { 'errors': self.errors, 'passes': self.passes, 'quarantined': self.quarantines, 'bytes_processed': self.bytes_processed, 'start_time': reported, 'audit_time': time_auditing } }, self.rcache, self.logger) reported = now total_quarantines += self.quarantines total_errors += self.errors self.passes = 0 self.quarantines = 0 self.errors = 0 self.bytes_processed = 0 time_auditing += (now - loop_time) # Avoid divide by zero during very short runs elapsed = (time.time() - begin) or 0.000001 self.logger.info( _('Object audit (%(type)s) "%(mode)s" mode ' 'completed: %(elapsed).02fs. Total quarantined: %(quars)d, ' 'Total errors: %(errors)d, Total files/sec: %(frate).2f , ' 'Total bytes/sec: %(brate).2f, Auditing time: %(audit).2f, ' 'Rate: %(audit_rate).2f') % { 'type': self.auditor_type, 'mode': mode, 'elapsed': elapsed, 'quars': total_quarantines, 'errors': total_errors, 'frate': self.total_files_processed / elapsed, 'brate': self.total_bytes_processed / elapsed, 'audit': time_auditing, 'audit_rate': time_auditing / elapsed }) if self.stats_sizes: self.logger.info( _('Object audit stats: %s') % json.dumps(self.stats_buckets))