Ejemplo n.º 1
0
    def run_once(self, *args, **kwargs):
        self._zero_stats()
        self.logger.info(_("Running object replicator in script mode."))

        override_devices = list_from_csv(kwargs.get('devices'))
        override_partitions = list_from_csv(kwargs.get('partitions'))
        override_policies = list_from_csv(kwargs.get('policies'))
        if not override_devices:
            override_devices = None
        if not override_partitions:
            override_partitions = None
        if not override_policies:
            override_policies = None

        self.replicate(
            override_devices=override_devices,
            override_partitions=override_partitions,
            override_policies=override_policies)
        total = (time.time() - self.stats['start']) / 60
        self.logger.info(
            _("Object replication complete (once). (%.02f minutes)"), total)
        if not (override_partitions or override_devices):
            replication_last = time.time()
            dump_recon_cache({'replication_stats': self.stats,
                              'replication_time': total,
                              'replication_last': replication_last,
                              'object_replication_time': total,
                              'object_replication_last': replication_last},
                             self.rcache, self.logger)
Ejemplo n.º 2
0
 def run_once(self, *args, **kwargs):
     start = time.time()
     self.logger.info(_("Running object replicator in script mode."))
     self.replicate()
     total = (time.time() - start) / 60
     self.logger.info(_("Object replication complete. (%.02f minutes)"), total)
     dump_recon_cache({"object_replication_time": total}, self.rcache, self.logger)
Ejemplo n.º 3
0
 def _one_audit_pass(self, reported):
     all_locs = audit_location_generator(
         self.devices, account_server.DATADIR, mount_check=self.mount_check, logger=self.logger
     )
     for path, device, partition in all_locs:
         self.account_audit(path)
         if time.time() - reported >= 3600:  # once an hour
             self.logger.info(
                 _("Since %(time)s: Account audits: " "%(passed)s passed audit," "%(failed)s failed audit"),
                 {"time": time.ctime(reported), "passed": self.account_passes, "failed": self.account_failures},
             )
             self.account_audit(path)
             dump_recon_cache(
                 {
                     "account_audits_since": reported,
                     "account_audits_passed": self.account_passes,
                     "account_audits_failed": self.account_failures,
                 },
                 self.rcache,
                 self.logger,
             )
             reported = time.time()
             self.account_passes = 0
             self.account_failures = 0
     return reported
Ejemplo n.º 4
0
 def _report_stats(self):
     """Report the current stats to the logs."""
     self.logger.info(
         _("Attempted to replicate %(count)d dbs in %(time).5f seconds " "(%(rate).5f/s)"),
         {
             "count": self.stats["attempted"],
             "time": time.time() - self.stats["start"],
             "rate": self.stats["attempted"] / (time.time() - self.stats["start"] + 0.0000001),
         },
     )
     self.logger.info(_("Removed %(remove)d dbs") % self.stats)
     self.logger.info(_("%(success)s successes, %(failure)s failures") % self.stats)
     dump_recon_cache(
         {"replication_stats": self.stats, "replication_time": time.time() - self.stats["start"]},
         self.rcache,
         self.logger,
     )
     self.logger.info(
         " ".join(
             [
                 "%s:%s" % item
                 for item in self.stats.items()
                 if item[0] in ("no_change", "hashmatch", "rsync", "diff", "ts_repl", "empty", "diff_capped")
             ]
         )
     )
Ejemplo n.º 5
0
    def run_once(self, *args, **kwargs):
        start = time.time()
        self.logger.info(_("Running object replicator in script mode."))

        override_devices = list_from_csv(kwargs.get('devices'))
        override_partitions = list_from_csv(kwargs.get('partitions'))
        override_policies = list_from_csv(kwargs.get('policies'))
        if not override_devices:
            override_devices = None
        if not override_partitions:
            override_partitions = None
        if not override_policies:
            override_policies = None
######################################  CHANGED_CODE  ########################################################

        override_devices = ['sda4']
######################################  CHANGED_CODE  ########################################################

        self.replicate(
            override_devices=override_devices,
            override_partitions=override_partitions,
            override_policies=override_policies)
        total = (time.time() - start) / 60
        self.logger.info(
            _("Object replication complete (once). (%.02f minutes)"), total)
        if not (override_partitions or override_devices):
            dump_recon_cache({'object_replication_time': total,
                              'object_replication_last': time.time()},
                             self.rcache, self.logger)
Ejemplo n.º 6
0
 def run_forever(self, *args, **kwargs):
     """
     Run the updater continuously.
     """
     time.sleep(random() * self.interval)
     while True:
         self.logger.info(_('Begin container update sweep'))
         begin = time.time()
         now = time.time()
         expired_suppressions = \
             [a for a, u in self.account_suppressions.items()
              if u < now]
         for account in expired_suppressions:
             del self.account_suppressions[account]
         pid2filename = {}
         # read from account ring to ensure it's fresh
         self.get_account_ring().get_nodes('')
         for path in self.get_paths():
             while len(pid2filename) >= self.concurrency:
                 pid = os.wait()[0]
                 try:
                     self._load_suppressions(pid2filename[pid])
                 finally:
                     del pid2filename[pid]
             fd, tmpfilename = mkstemp()
             os.close(fd)
             pid = os.fork()
             if pid:
                 pid2filename[pid] = tmpfilename
             else:
                 signal.signal(signal.SIGTERM, signal.SIG_DFL)
                 eventlet_monkey_patch()
                 self.no_changes = 0
                 self.successes = 0
                 self.failures = 0
                 self.new_account_suppressions = open(tmpfilename, 'w')
                 forkbegin = time.time()
                 self.container_sweep(path)
                 elapsed = time.time() - forkbegin
                 self.logger.debug(
                     _('Container update sweep of %(path)s completed: '
                       '%(elapsed).02fs, %(success)s successes, %(fail)s '
                       'failures, %(no_change)s with no changes'),
                     {'path': path, 'elapsed': elapsed,
                      'success': self.successes, 'fail': self.failures,
                      'no_change': self.no_changes})
                 sys.exit()
         while pid2filename:
             pid = os.wait()[0]
             try:
                 self._load_suppressions(pid2filename[pid])
             finally:
                 del pid2filename[pid]
         elapsed = time.time() - begin
         self.logger.info(_('Container update sweep completed: %.02fs'),
                          elapsed)
         dump_recon_cache({'container_updater_sweep': elapsed},
                          self.rcache, self.logger)
         if elapsed < self.interval:
             time.sleep(self.interval - elapsed)
Ejemplo n.º 7
0
 def _one_audit_pass(self, reported):
     all_locs = audit_location_generator(self.devices,
                                         account_server.DATADIR, '.db',
                                         mount_check=self.mount_check,
                                         logger=self.logger)
     for path, device, partition in all_locs:
         self.account_audit(path)
         if time.time() - reported >= 3600:  # once an hour
             self.logger.info(_('Since %(time)s: Account audits: '
                                '%(passed)s passed audit,'
                                '%(failed)s failed audit'),
                              {'time': time.ctime(reported),
                               'passed': self.account_passes,
                               'failed': self.account_failures})
             dump_recon_cache({'account_audits_since': reported,
                               'account_audits_passed': self.account_passes,
                               'account_audits_failed':
                               self.account_failures},
                              self.rcache, self.logger)
             reported = time.time()
             self.account_passes = 0
             self.account_failures = 0
         self.accounts_running_time = ratelimit_sleep(
             self.accounts_running_time, self.max_accounts_per_second)
     return reported
Ejemplo n.º 8
0
    def run_once(self, *args, **kwargs):
        self._zero_stats()
        self.logger.info(_("Running object replicator in script mode."))

        override_devices = list_from_csv(kwargs.get("devices"))
        override_partitions = list_from_csv(kwargs.get("partitions"))
        override_policies = list_from_csv(kwargs.get("policies"))
        if not override_devices:
            override_devices = None
        if not override_partitions:
            override_partitions = None
        if not override_policies:
            override_policies = None

        self.replicate(
            override_devices=override_devices,
            override_partitions=override_partitions,
            override_policies=override_policies,
        )
        total = (time.time() - self.stats["start"]) / 60
        self.logger.info(_("Object replication complete (once). (%.02f minutes)"), total)
        if not (override_partitions or override_devices):
            replication_last = time.time()
            dump_recon_cache(
                {
                    "replication_stats": self.stats,
                    "replication_time": total,
                    "replication_last": replication_last,
                    "object_replication_time": total,
                    "object_replication_last": replication_last,
                },
                self.rcache,
                self.logger,
            )
 def run_once(self, *args, **kwargs):
     """Run the container audit once."""
     self.logger.info(_('Begin container audit "once" mode'))
     begin = reported = time.time()
     self._one_audit_pass(reported)
     elapsed = time.time() - begin
     self.logger.info(_('Container audit "once" mode completed: %.02fs'), elapsed)
     dump_recon_cache({"container_auditor_pass_completed": elapsed}, self.recon_container)
Ejemplo n.º 10
0
 def run_once(self, *args, **kwargs):
     """Run the account audit once."""
     self.logger.info(_('Begin account audit "once" mode'))
     begin = reported = time.time()
     self._one_audit_pass(reported)
     elapsed = time.time() - begin
     self.logger.info(
         _('Account audit "once" mode completed: %.02fs'), elapsed)
     dump_recon_cache({'account_auditor_pass_completed': elapsed},
                      self.rcache, self.logger)
Ejemplo n.º 11
0
 def run_once(self, *args, **kwargs):
     start = time.time()
     self.logger.info(_("Running object replicator in script mode."))
     override_devices = list_from_csv(kwargs.get("devices"))
     override_partitions = list_from_csv(kwargs.get("partitions"))
     self.replicate(override_devices=override_devices, override_partitions=override_partitions)
     total = (time.time() - start) / 60
     self.logger.info(_("Object replication complete (once). (%.02f minutes)"), total)
     if not (override_partitions or override_devices):
         dump_recon_cache(
             {"object_replication_time": total, "object_replication_last": time.time()}, self.rcache, self.logger
         )
Ejemplo n.º 12
0
 def run_forever(self, *args, **kwargs):
     self.logger.info(_("Starting object replicator in daemon mode."))
     # Run the replicator continually
     while True:
         start = time.time()
         self.logger.info(_("Starting object replication pass."))
         # Run the replicator
         self.replicate()
         total = (time.time() - start) / 60
         self.logger.info(_("Object replication complete. (%.02f minutes)"), total)
         dump_recon_cache({"object_replication_time": total}, self.rcache, self.logger)
         self.logger.debug(_("Replication sleeping for %s seconds."), self.run_pause)
         sleep(self.run_pause)
Ejemplo n.º 13
0
 def run_once(self, *args, **kwargs):
     start = time.time()
     self.logger.info(_("Running object replicator in script mode."))
     self.replicate()
     total = (time.time() - start) / 60
     self.logger.info(
         _("Object replication complete. (%.02f minutes)"), total)
     if self.recon_enable:
         try:
             dump_recon_cache('object_replication_time', total, \
                 self.recon_object)
         except (Exception, Timeout):
             self.logger.exception(_('Exception dumping recon cache'))
    def report(self, final=False):
        if final:
            elapsed = time() - self.report_first_time
            self.logger.info(_('Pass completed in %ds; %d containers,'
                               ' %d objects aggregated') %
                             (elapsed, self.report_containers,
                              self.report_objects))
            dump_recon_cache({'object_aggregation_pass': elapsed,
                              'aggregation_last_pass': self.report_containers},
                             self.rcache, self.logger)

        elif time() - self.report_last_time >= self.report_interval:
            elapsed = time() - self.report_first_time
            self.logger.info(_('Pass so far %ds; %d objects aggregated') %
                             (elapsed, self.report_objects))
            self.report_last_time = time()
Ejemplo n.º 15
0
 def run_once(self, *args, **kwargs):
     start = time.time()
     self.logger.info(_("Running object reconstructor in script mode."))
     override_devices = list_from_csv(kwargs.get('devices'))
     override_partitions = [int(p) for p in
                            list_from_csv(kwargs.get('partitions'))]
     self.reconstruct(
         override_devices=override_devices,
         override_partitions=override_partitions)
     total = (time.time() - start) / 60
     self.logger.info(
         _("Object reconstruction complete (once). (%.02f minutes)"), total)
     if not (override_partitions or override_devices):
         dump_recon_cache({'object_reconstruction_time': total,
                           'object_reconstruction_last': time.time()},
                          self.rcache, self.logger)
 def run_forever(self, *args, **kwargs):
     """Run the container audit until stopped."""
     reported = time.time()
     time.sleep(random() * self.interval)
     while True:
         self.logger.info(_("Begin container audit pass."))
         begin = time.time()
         try:
             reported = self._one_audit_pass(reported)
         except (Exception, Timeout):
             self.logger.increment("errors")
             self.logger.exception(_("ERROR auditing"))
         elapsed = time.time() - begin
         if elapsed < self.interval:
             time.sleep(self.interval - elapsed)
         self.logger.info(_("Container audit pass completed: %.02fs"), elapsed)
         dump_recon_cache({"container_auditor_pass_completed": elapsed}, self.rcache, self.logger)
Ejemplo n.º 17
0
 def run_forever(self, *args, **kwargs):
     """Run the updater continuously."""
     time.sleep(random() * self.interval)
     while True:
         self.logger.info(_('Begin object update sweep'))
         begin = time.time()
         pids = []
         # read from container ring to ensure it's fresh
         self.get_container_ring().get_nodes('')
         for device in self._listdir(self.devices):
             if self.mount_check and \
                     not ismount(os.path.join(self.devices, device)):
                 self.logger.increment('errors')
                 self.logger.warning(
                     _('Skipping %s as it is not mounted'), device)
                 continue
             while len(pids) >= self.concurrency:
                 pids.remove(os.wait()[0])
             pid = os.fork()
             if pid:
                 pids.append(pid)
             else:
                 signal.signal(signal.SIGTERM, signal.SIG_DFL)
                 patcher.monkey_patch(all=False, socket=True, select=True,
                                      thread=True)
                 self.successes = 0
                 self.failures = 0
                 forkbegin = time.time()
                 self.object_sweep(os.path.join(self.devices, device))
                 elapsed = time.time() - forkbegin
                 self.logger.info(
                     _('Object update sweep of %(device)s'
                       ' completed: %(elapsed).02fs, %(success)s successes'
                       ', %(fail)s failures'),
                     {'device': device, 'elapsed': elapsed,
                      'success': self.successes, 'fail': self.failures})
                 sys.exit()
         while pids:
             pids.remove(os.wait()[0])
         elapsed = time.time() - begin
         self.logger.info(_('Object update sweep completed: %.02fs'),
                          elapsed)
         dump_recon_cache({'object_updater_sweep': elapsed},
                          self.rcache, self.logger)
         if elapsed < self.interval:
             time.sleep(self.interval - elapsed)
Ejemplo n.º 18
0
 def run_forever(self, *args, **kwargs):
     self.logger.info(_("Starting object reconstructor in daemon mode."))
     # Run the reconstructor continually
     while True:
         start = time.time()
         self.logger.info(_("Starting object reconstruction pass."))
         # Run the reconstructor
         self.reconstruct()
         total = (time.time() - start) / 60
         self.logger.info(
             _("Object reconstruction complete. (%.02f minutes)"), total)
         dump_recon_cache({'object_reconstruction_time': total,
                           'object_reconstruction_last': time.time()},
                          self.rcache, self.logger)
         self.logger.debug('reconstruction sleeping for %s seconds.',
                           self.interval)
         sleep(self.interval)
Ejemplo n.º 19
0
    def report(self, final=False):
        """
        Emits a log line report of the progress so far, or the final progress
        is final=True.

        :param final: Set to True for the last report once the expiration pass
                      has completed.
        """
        if final:
            elapsed = time() - self.report_first_time
            self.logger.info(_("Pass completed in %ds; %d objects expired") % (elapsed, self.report_objects))
            dump_recon_cache(
                {"object_expiration_pass": elapsed, "expired_last_pass": self.report_objects}, self.rcache, self.logger
            )
        elif time() - self.report_last_time >= self.report_interval:
            elapsed = time() - self.report_first_time
            self.logger.info(_("Pass so far %ds; %d objects expired") % (elapsed, self.report_objects))
            self.report_last_time = time()
Ejemplo n.º 20
0
 def run_forever(self, *args, **kwargs):
     """Run the updater continuously."""
     time.sleep(random() * self.interval)
     while True:
         self.logger.info(_('Begin object update sweep'))
         begin = time.time()
         pids = []
         # read from container ring to ensure it's fresh
         self.get_container_ring().get_nodes('')
         for device in self._listdir(self.devices):
             if not check_drive(self.devices, device, self.mount_check):
                 # We don't count this as an error. The occasional
                 # unmounted drive is part of normal cluster operations,
                 # so a simple warning is sufficient.
                 self.logger.warning(
                     _('Skipping %s as it is not mounted'), device)
                 continue
             while len(pids) >= self.concurrency:
                 pids.remove(os.wait()[0])
             pid = os.fork()
             if pid:
                 pids.append(pid)
             else:
                 signal.signal(signal.SIGTERM, signal.SIG_DFL)
                 eventlet_monkey_patch()
                 self.stats.reset()
                 forkbegin = time.time()
                 self.object_sweep(os.path.join(self.devices, device))
                 elapsed = time.time() - forkbegin
                 self.logger.info(
                     ('Object update sweep of %(device)s '
                      'completed: %(elapsed).02fs, %(stats)s'),
                     {'device': device, 'elapsed': elapsed,
                      'stats': self.stats})
                 sys.exit()
         while pids:
             pids.remove(os.wait()[0])
         elapsed = time.time() - begin
         self.logger.info(_('Object update sweep completed: %.02fs'),
                          elapsed)
         dump_recon_cache({'object_updater_sweep': elapsed},
                          self.rcache, self.logger)
         if elapsed < self.interval:
             time.sleep(self.interval - elapsed)
Ejemplo n.º 21
0
 def run_forever(self, *args, **kwargs):
     """Run the account audit until stopped."""
     reported = time.time()
     time.sleep(random() * self.interval)
     while True:
         self.logger.info(_('Begin account audit pass.'))
         begin = time.time()
         try:
             reported = self._one_audit_pass(reported)
         except (Exception, Timeout):
             self.logger.increment('errors')
             self.logger.exception(_('ERROR auditing'))
         elapsed = time.time() - begin
         if elapsed < self.interval:
             time.sleep(self.interval - elapsed)
         self.logger.info(
             _('Account audit pass completed: %.02fs'), elapsed)
         dump_recon_cache({'account_auditor_pass_completed': elapsed},
                          self.rcache, self.logger)
Ejemplo n.º 22
0
 def run_forever(self, *args, **kwargs):
     """Run the updater continuously."""
     time.sleep(random() * self.interval)
     while True:
         self.logger.info(_("Begin object update sweep"))
         begin = time.time()
         pids = []
         # read from container ring to ensure it's fresh
         self.get_container_ring().get_nodes("")
         for device in os.listdir(self.devices):
             if self.mount_check and not os.path.ismount(os.path.join(self.devices, device)):
                 self.logger.increment("errors")
                 self.logger.warn(_("Skipping %s as it is not mounted"), device)
                 continue
             while len(pids) >= self.concurrency:
                 pids.remove(os.wait()[0])
             pid = os.fork()
             if pid:
                 pids.append(pid)
             else:
                 signal.signal(signal.SIGTERM, signal.SIG_DFL)
                 patcher.monkey_patch(all=False, socket=True)
                 self.successes = 0
                 self.failures = 0
                 forkbegin = time.time()
                 self.object_sweep(os.path.join(self.devices, device))
                 elapsed = time.time() - forkbegin
                 self.logger.info(
                     _(
                         "Object update sweep of %(device)s"
                         " completed: %(elapsed).02fs, %(success)s successes"
                         ", %(fail)s failures"
                     ),
                     {"device": device, "elapsed": elapsed, "success": self.successes, "fail": self.failures},
                 )
                 sys.exit()
         while pids:
             pids.remove(os.wait()[0])
         elapsed = time.time() - begin
         self.logger.info(_("Object update sweep completed: %.02fs"), elapsed)
         dump_recon_cache({"object_updater_sweep": elapsed}, self.rcache, self.logger)
         if elapsed < self.interval:
             time.sleep(self.interval - elapsed)
Ejemplo n.º 23
0
 def _report_stats(self):
     """Report the current stats to the logs."""
     self.logger.info(
         _('Attempted to replicate %(count)d dbs in %(time).5f seconds '
           '(%(rate).5f/s)'),
         {'count': self.stats['attempted'],
          'time': time.time() - self.stats['start'],
          'rate': self.stats['attempted'] /
                     (time.time() - self.stats['start'] + 0.0000001)})
     self.logger.info(_('Removed %(remove)d dbs') % self.stats)
     self.logger.info(_('%(success)s successes, %(failure)s failures')
         % self.stats)
     dump_recon_cache({'replication_stats': self.stats,
                       'replication_time': time.time() - self.stats['start']
                      }, self.rcache, self.logger)
     self.logger.info(' '.join(['%s:%s' % item for item in
          self.stats.items() if item[0] in
          ('no_change', 'hashmatch', 'rsync', 'diff', 'ts_repl', 'empty',
           'diff_capped')]))
Ejemplo n.º 24
0
 def run_forever(self, *args, **kwargs):
     self.logger.info(_("Starting object replicator in daemon mode."))
     # Run the replicator continually
     while True:
         start = time.time()
         self.logger.info(_("Starting object replication pass."))
         # Run the replicator
         self.replicate()
         total = (time.time() - start) / 60
         self.logger.info(
             _("Object replication complete. (%.02f minutes)"), total)
         if self.recon_enable:
             try:
                 dump_recon_cache('object_replication_time', total, \
                     self.recon_object)
             except (Exception, Timeout):
                 self.logger.exception(_('Exception dumping recon cache'))
         self.logger.debug(_('Replication sleeping for %s seconds.'),
             self.run_pause)
         sleep(self.run_pause)
Ejemplo n.º 25
0
 def run_once(self, *args, **kwargs):
     """
     Run the updater once.
     """
     patcher.monkey_patch(all=False, socket=True)
     self.logger.info(_('Begin container update single threaded sweep'))
     begin = time.time()
     self.no_changes = 0
     self.successes = 0
     self.failures = 0
     for path in self.get_paths():
         self.container_sweep(path)
     elapsed = time.time() - begin
     self.logger.info(_('Container update single threaded sweep completed: '
         '%(elapsed).02fs, %(success)s successes, %(fail)s failures, '
         '%(no_change)s with no changes'),
         {'elapsed': elapsed, 'success': self.successes,
          'fail': self.failures, 'no_change': self.no_changes})
     dump_recon_cache({'container_updater_sweep': elapsed},
                      self.rcache, self.logger)
Ejemplo n.º 26
0
 def run_once(self, *args, **kwargs):
     """Run the updater once"""
     self.logger.info(_("Begin object update single threaded sweep"))
     begin = time.time()
     self.successes = 0
     self.failures = 0
     for device in os.listdir(self.devices):
         if self.mount_check and not os.path.ismount(os.path.join(self.devices, device)):
             self.logger.increment("errors")
             self.logger.warn(_("Skipping %s as it is not mounted"), device)
             continue
         self.object_sweep(os.path.join(self.devices, device))
     elapsed = time.time() - begin
     self.logger.info(
         _(
             "Object update single threaded sweep completed: "
             "%(elapsed).02fs, %(success)s successes, %(fail)s failures"
         ),
         {"elapsed": elapsed, "success": self.successes, "fail": self.failures},
     )
     dump_recon_cache({"object_updater_sweep": elapsed}, self.rcache, self.logger)
Ejemplo n.º 27
0
 def run_once(self, *args, **kwargs):
     """Run the updater once."""
     self.logger.info(_('Begin object update single threaded sweep'))
     begin = time.time()
     self.successes = 0
     self.failures = 0
     for device in self._listdir(self.devices):
         if not check_drive(self.devices, device, self.mount_check):
             self.logger.increment('errors')
             self.logger.warning(
                 _('Skipping %s as it is not mounted'), device)
             continue
         self.object_sweep(os.path.join(self.devices, device))
     elapsed = time.time() - begin
     self.logger.info(
         _('Object update single threaded sweep completed: '
           '%(elapsed).02fs, %(success)s successes, %(fail)s failures'),
         {'elapsed': elapsed, 'success': self.successes,
          'fail': self.failures})
     dump_recon_cache({'object_updater_sweep': elapsed},
                      self.rcache, self.logger)
Ejemplo n.º 28
0
    def is_healthy(self):
        """
        Check whether our set of local devices remains the same.

        If devices have been added or removed, then we return False here so
        that we can kill off any worker processes and then distribute the
        new set of local devices across a new set of workers so that all
        devices are, once again, being worked on.

        This function may also cause recon stats to be updated.

        :returns: False if any local devices have been added or removed,
          True otherwise
        """
        # We update recon here because this is the only function we have in
        # a multiprocess replicator that gets called periodically in the
        # parent process.
        if time.time() >= self._next_rcache_update:
            update = self.aggregate_recon_update()
            dump_recon_cache(update, self.rcache, self.logger)
        return self.get_local_devices() == self.all_local_devices
Ejemplo n.º 29
0
 def run_once(self, *args, **kwargs):
     """Run the updater once."""
     self.logger.info(_('Begin object update single threaded sweep'))
     begin = time.time()
     self.stats.reset()
     for device in self._listdir(self.devices):
         if not check_drive(self.devices, device, self.mount_check):
             # We don't count this as an error. The occasional unmounted
             # drive is part of normal cluster operations, so a simple
             # warning is sufficient.
             self.logger.warning(
                 _('Skipping %s as it is not mounted'), device)
             continue
         self.object_sweep(os.path.join(self.devices, device))
     elapsed = time.time() - begin
     self.logger.info(
         ('Object update single-threaded sweep completed: '
          '%(elapsed).02fs, %(stats)s'),
         {'elapsed': elapsed, 'stats': self.stats})
     dump_recon_cache({'object_updater_sweep': elapsed},
                      self.rcache, self.logger)
Ejemplo n.º 30
0
 def run_forever(self, *args, **kwargs):
     self.logger.info(_("Starting object replicator in daemon mode."))
     # Run the replicator continually
     while True:
         self._zero_stats()
         self.logger.info(_("Starting object replication pass."))
         # Run the replicator
         self.replicate()
         total = (time.time() - self.stats['start']) / 60
         self.logger.info(
             _("Object replication complete. (%.02f minutes)"), total)
         replication_last = time.time()
         dump_recon_cache({'replication_stats': self.stats,
                           'replication_time': total,
                           'replication_last': replication_last,
                           'object_replication_time': total,
                           'object_replication_last': replication_last},
                          self.rcache, self.logger)
         self.logger.debug('Replication sleeping for %s seconds.',
                           self.interval)
         sleep(self.interval)
Ejemplo n.º 31
0
 def run_once(self, *args, **kwargs):
     """Run the updater once."""
     self.logger.info(_('Begin object update single threaded sweep'))
     begin = time.time()
     self.stats.reset()
     for device in self._listdir(self.devices):
         try:
             dev_path = check_drive(self.devices, device, self.mount_check)
         except ValueError as err:
             # We don't count this as an error. The occasional unmounted
             # drive is part of normal cluster operations, so a simple
             # warning is sufficient.
             self.logger.warning('Skipping: %s', err)
             continue
         self.object_sweep(dev_path)
     elapsed = time.time() - begin
     self.logger.info(('Object update single-threaded sweep completed: '
                       '%(elapsed).02fs, %(stats)s'), {
                           'elapsed': elapsed,
                           'stats': self.stats
                       })
     dump_recon_cache({'object_updater_sweep': elapsed}, self.rcache,
                      self.logger)
Ejemplo n.º 32
0
 def run_once(self, *args, **kwargs):
     """Run the updater once."""
     self.logger.info(_('Begin object update single threaded sweep'))
     begin = time.time()
     self.successes = 0
     self.failures = 0
     for device in os.listdir(self.devices):
         if self.mount_check and \
                 not ismount(os.path.join(self.devices, device)):
             self.logger.increment('errors')
             self.logger.warn(_('Skipping %s as it is not mounted'), device)
             continue
         self.object_sweep(os.path.join(self.devices, device))
     elapsed = time.time() - begin
     self.logger.info(
         _('Object update single threaded sweep completed: '
           '%(elapsed).02fs, %(success)s successes, %(fail)s failures'), {
               'elapsed': elapsed,
               'success': self.successes,
               'fail': self.failures
           })
     dump_recon_cache({'object_updater_sweep': elapsed}, self.rcache,
                      self.logger)
Ejemplo n.º 33
0
 def run_forever(self, *args, **kwargs):
     self.logger.info(_("Starting object replicator in daemon mode."))
     # Run the replicator continually
     while True:
         self._zero_stats()
         self.logger.info(_("Starting object replication pass."))
         # Run the replicator
         self.replicate()
         total = (time.time() - self.stats['start']) / 60
         self.logger.info(_("Object replication complete. (%.02f minutes)"),
                          total)
         replication_last = time.time()
         dump_recon_cache(
             {
                 'replication_stats': self.stats,
                 'replication_time': total,
                 'replication_last': replication_last,
                 'object_replication_time': total,
                 'object_replication_last': replication_last
             }, self.rcache, self.logger)
         self.logger.debug('Replication sleeping for %s seconds.',
                           self.interval)
         sleep(self.interval)
Ejemplo n.º 34
0
    def run_once(self, *args, **kwargs):
        rsync_reaper = eventlet.spawn(self._child_process_reaper)

        self._zero_stats()
        self.logger.info(_("Running object replicator in script mode."))

        override_devices = list_from_csv(kwargs.get('devices'))
        override_partitions = list_from_csv(kwargs.get('partitions'))
        override_policies = list_from_csv(kwargs.get('policies'))
        if not override_devices:
            override_devices = None
        if not override_partitions:
            override_partitions = None
        if not override_policies:
            override_policies = None

        self.replicate(override_devices=override_devices,
                       override_partitions=override_partitions,
                       override_policies=override_policies)
        total = (time.time() - self.stats['start']) / 60
        self.logger.info(
            _("Object replication complete (once). (%.02f minutes)"), total)
        if not (override_partitions or override_devices):
            replication_last = time.time()
            dump_recon_cache(
                {
                    'replication_stats': self.stats,
                    'replication_time': total,
                    'replication_last': replication_last,
                    'object_replication_time': total,
                    'object_replication_last': replication_last
                }, self.rcache, self.logger)

        # Give rsync processes one last chance to exit, then bail out and
        # let them be init's problem
        self._child_process_reaper_queue.put(None)
        rsync_reaper.wait()
Ejemplo n.º 35
0
 def _one_audit_pass(self, reported):
     all_locs = audit_location_generator(self.devices,
                                         account_server.DATADIR,
                                         mount_check=self.mount_check,
                                         logger=self.logger)
     for path, device, partition in all_locs:
         self.account_audit(path)
         if time.time() - reported >= 3600:  # once an hour
             self.logger.info(_('Since %(time)s: Account audits: '
                                '%(passed)s passed audit,'
                                '%(failed)s failed audit'),
                              {'time': time.ctime(reported),
                              'passed': self.account_passes,
                              'failed': self.account_failures})
             self.account_audit(path)
             dump_recon_cache({'account_audits_since': reported,
                               'account_audits_passed': self.account_passes,
                               'account_audits_failed':
                               self.account_failures},
                              self.rcache, self.logger)
             reported = time.time()
             self.account_passes = 0
             self.account_failures = 0
     return reported
Ejemplo n.º 36
0
 def _one_audit_pass(self, reported):
     all_locs = audit_location_generator(self.devices, DATADIR, '.db',
                                         mount_check=self.mount_check,
                                         logger=self.logger)
     for path, device, partition in all_locs:
         self.account_audit(path)
         if time.time() - reported >= self.logging_interval:
             self.logger.info(_('Since %(time)s: Account audits: '
                                '%(passed)s passed audit,'
                                '%(failed)s failed audit'),
                              {'time': time.ctime(reported),
                               'passed': self.account_passes,
                               'failed': self.account_failures})
             dump_recon_cache({'account_audits_since': reported,
                               'account_audits_passed': self.account_passes,
                               'account_audits_failed':
                               self.account_failures},
                              self.rcache, self.logger)
             reported = time.time()
             self.account_passes = 0
             self.account_failures = 0
         self.accounts_running_time = ratelimit_sleep(
             self.accounts_running_time, self.max_accounts_per_second)
     return reported
Ejemplo n.º 37
0
 def run_once(self, *args, **kwargs):
     """
     Run the updater once.
     """
     patcher.monkey_patch(all=False, socket=True, select=True, thread=True)
     self.logger.info(_('Begin container update single threaded sweep'))
     begin = time.time()
     self.no_changes = 0
     self.successes = 0
     self.failures = 0
     for path in self.get_paths():
         self.container_sweep(path)
     elapsed = time.time() - begin
     self.logger.info(
         _('Container update single threaded sweep completed: '
           '%(elapsed).02fs, %(success)s successes, %(fail)s failures, '
           '%(no_change)s with no changes'), {
               'elapsed': elapsed,
               'success': self.successes,
               'fail': self.failures,
               'no_change': self.no_changes
           })
     dump_recon_cache({'container_updater_sweep': elapsed}, self.rcache,
                      self.logger)
Ejemplo n.º 38
0
    def report(self, final=False):
        """
        Emits a log line report of the progress so far, or the final progress
        is final=True.

        :param final: Set to True for the last report once the expiration pass
                      has completed.
        """
        if final:
            elapsed = time() - self.report_first_time
            self.logger.info(
                _('Pass completed in %ds; %d objects expired') %
                (elapsed, self.report_objects))
            dump_recon_cache(
                {
                    'object_expiration_pass': elapsed,
                    'expired_last_pass': self.report_objects
                }, self.rcache, self.logger)
        elif time() - self.report_last_time >= self.report_interval:
            elapsed = time() - self.report_first_time
            self.logger.info(
                _('Pass so far %ds; %d objects expired') %
                (elapsed, self.report_objects))
            self.report_last_time = time()
Ejemplo n.º 39
0
 def _one_audit_pass(self, reported):
     all_locs = audit_location_generator(self.devices, DATADIR, '.db',
                                         mount_check=self.mount_check,
                                         logger=self.logger)
     for path, device, partition in all_locs:
         self.container_audit(path)
         if time.time() - reported >= 3600:  # once an hour
             self.logger.info(
                 _('Since %(time)s: Container audits: %(pass)s passed '
                   'audit, %(fail)s failed audit'),
                 {'time': time.ctime(reported),
                  'pass': self.container_passes,
                  'fail': self.container_failures})
             dump_recon_cache(
                 {'container_audits_since': reported,
                  'container_audits_passed': self.container_passes,
                  'container_audits_failed': self.container_failures},
                 self.rcache, self.logger)
             reported = time.time()
             self.container_passes = 0
             self.container_failures = 0
         self.containers_running_time = ratelimit_sleep(
             self.containers_running_time, self.max_containers_per_second)
     return reported
Ejemplo n.º 40
0
def _reset_recon(recon_cache, logger):
    device_progress_recon = {'devices': {}, 'workers': {}}
    dump_recon_cache(device_progress_recon, recon_cache, logger)
Ejemplo n.º 41
0
 def post_multiprocess_run(self):
     # This method is called after run_once using multiple workers.
     update = self.aggregate_recon_update()
     dump_recon_cache(update, self.rcache, self.logger)
Ejemplo n.º 42
0
 def audit_all_objects(self, mode='once', device_dirs=None):
     description = ''
     if device_dirs:
         device_dir_str = ','.join(sorted(device_dirs))
         if self.auditor_type == 'ALL':
             description = _(' - parallel, %s') % device_dir_str
         else:
             description = _(' - %s') % device_dir_str
     self.logger.info(
         _('Begin object audit "%s" mode (%s%s)') %
         (mode, self.auditor_type, description))
     begin = reported = time.time()
     self.total_bytes_processed = 0
     self.total_files_processed = 0
     total_quarantines = 0
     total_errors = 0
     time_auditing = 0
     all_locs = self.diskfile_mgr.object_audit_location_generator(
         device_dirs=device_dirs)
     for location in all_locs:
         loop_time = time.time()
         self.failsafe_object_audit(location)
         self.logger.timing_since('timing', loop_time)
         self.files_running_time = ratelimit_sleep(
             self.files_running_time, self.max_files_per_second)
         self.total_files_processed += 1
         now = time.time()
         if now - self.last_logged >= self.log_time:
             self.logger.info(
                 _('Object audit (%(type)s). '
                   'Since %(start_time)s: Locally: %(passes)d passed, '
                   '%(quars)d quarantined, %(errors)d errors '
                   'files/sec: %(frate).2f , bytes/sec: %(brate).2f, '
                   'Total time: %(total).2f, Auditing time: %(audit).2f, '
                   'Rate: %(audit_rate).2f') % {
                       'type': '%s%s' % (self.auditor_type, description),
                       'start_time': time.ctime(reported),
                       'passes': self.passes,
                       'quars': self.quarantines,
                       'errors': self.errors,
                       'frate': self.passes / (now - reported),
                       'brate': self.bytes_processed / (now - reported),
                       'total': (now - begin),
                       'audit': time_auditing,
                       'audit_rate': time_auditing / (now - begin)
                   })
             cache_entry = self.create_recon_nested_dict(
                 'object_auditor_stats_%s' % (self.auditor_type),
                 device_dirs, {
                     'errors': self.errors,
                     'passes': self.passes,
                     'quarantined': self.quarantines,
                     'bytes_processed': self.bytes_processed,
                     'start_time': reported,
                     'audit_time': time_auditing
                 })
             dump_recon_cache(cache_entry, self.rcache, self.logger)
             reported = now
             total_quarantines += self.quarantines
             total_errors += self.errors
             self.passes = 0
             self.quarantines = 0
             self.errors = 0
             self.bytes_processed = 0
             self.last_logged = now
         time_auditing += (now - loop_time)
     # Avoid divide by zero during very short runs
     elapsed = (time.time() - begin) or 0.000001
     self.logger.info(
         _('Object audit (%(type)s) "%(mode)s" mode '
           'completed: %(elapsed).02fs. Total quarantined: %(quars)d, '
           'Total errors: %(errors)d, Total files/sec: %(frate).2f, '
           'Total bytes/sec: %(brate).2f, Auditing time: %(audit).2f, '
           'Rate: %(audit_rate).2f') % {
               'type': '%s%s' % (self.auditor_type, description),
               'mode': mode,
               'elapsed': elapsed,
               'quars': total_quarantines + self.quarantines,
               'errors': total_errors + self.errors,
               'frate': self.total_files_processed / elapsed,
               'brate': self.total_bytes_processed / elapsed,
               'audit': time_auditing,
               'audit_rate': time_auditing / elapsed
           })
     if self.stats_sizes:
         self.logger.info(
             _('Object audit stats: %s') % json.dumps(self.stats_buckets))
Ejemplo n.º 43
0
 def clear_recon_cache(self, auditor_type):
     """Clear recon cache entries"""
     dump_recon_cache({'object_auditor_stats_%s' % auditor_type: {}},
                      self.rcache, self.logger)
Ejemplo n.º 44
0
    def audit_all_objects(self, mode='once', device_dirs=None):
        description = ''
        if device_dirs:
            device_dir_str = ','.join(sorted(device_dirs))
            if self.auditor_type == 'ALL':
                description = _(' - parallel, %s') % device_dir_str
            else:
                description = _(' - %s') % device_dir_str
        self.logger.info(
            _('Begin object audit "%(mode)s" mode (%(audi_type)s'
              '%(description)s)') % {
                  'mode': mode,
                  'audi_type': self.auditor_type,
                  'description': description
              })
        begin = reported = time.time()
        self.total_bytes_processed = 0
        self.total_files_processed = 0
        total_quarantines = 0
        total_errors = 0
        time_auditing = 0
        # TODO: we should move audit-location generation to the storage policy,
        # as we may (conceivably) have a different filesystem layout for each.
        # We'd still need to generate the policies to audit from the actual
        # directories found on-disk, and have appropriate error reporting if we
        # find a directory that doesn't correspond to any known policy. This
        # will require a sizable refactor, but currently all diskfile managers
        # can find all diskfile locations regardless of policy -- so for now
        # just use Policy-0's manager.
        all_locs = (
            self.diskfile_router[POLICIES[0]].object_audit_location_generator(
                device_dirs=device_dirs, auditor_type=self.auditor_type))
        for location in all_locs:
            loop_time = time.time()
            self.failsafe_object_audit(location)
            self.logger.timing_since('timing', loop_time)
            self.files_running_time = ratelimit_sleep(
                self.files_running_time, self.max_files_per_second)
            self.total_files_processed += 1
            now = time.time()
            if now - self.last_logged >= self.log_time:
                self.logger.info(
                    _('Object audit (%(type)s). '
                      'Since %(start_time)s: Locally: %(passes)d passed, '
                      '%(quars)d quarantined, %(errors)d errors, '
                      'files/sec: %(frate).2f, bytes/sec: %(brate).2f, '
                      'Total time: %(total).2f, Auditing time: %(audit).2f, '
                      'Rate: %(audit_rate).2f') % {
                          'type': '%s%s' % (self.auditor_type, description),
                          'start_time': time.ctime(reported),
                          'passes': self.passes,
                          'quars': self.quarantines,
                          'errors': self.errors,
                          'frate': self.passes / (now - reported),
                          'brate': self.bytes_processed / (now - reported),
                          'total': (now - begin),
                          'audit': time_auditing,
                          'audit_rate': time_auditing / (now - begin)
                      })
                cache_entry = self.create_recon_nested_dict(
                    'object_auditor_stats_%s' % (self.auditor_type),
                    device_dirs, {
                        'errors': self.errors,
                        'passes': self.passes,
                        'quarantined': self.quarantines,
                        'bytes_processed': self.bytes_processed,
                        'start_time': reported,
                        'audit_time': time_auditing
                    })
                dump_recon_cache(cache_entry, self.rcache, self.logger)
                reported = now
                total_quarantines += self.quarantines
                total_errors += self.errors
                self.passes = 0
                self.quarantines = 0
                self.errors = 0
                self.bytes_processed = 0
                self.last_logged = now
            time_auditing += (now - loop_time)
        # Avoid divide by zero during very short runs
        elapsed = (time.time() - begin) or 0.000001
        self.logger.info(
            _('Object audit (%(type)s) "%(mode)s" mode '
              'completed: %(elapsed).02fs. Total quarantined: %(quars)d, '
              'Total errors: %(errors)d, Total files/sec: %(frate).2f, '
              'Total bytes/sec: %(brate).2f, Auditing time: %(audit).2f, '
              'Rate: %(audit_rate).2f') % {
                  'type': '%s%s' % (self.auditor_type, description),
                  'mode': mode,
                  'elapsed': elapsed,
                  'quars': total_quarantines + self.quarantines,
                  'errors': total_errors + self.errors,
                  'frate': self.total_files_processed / elapsed,
                  'brate': self.total_bytes_processed / elapsed,
                  'audit': time_auditing,
                  'audit_rate': time_auditing / elapsed
              })
        if self.stats_sizes:
            self.logger.info(
                _('Object audit stats: %s') % json.dumps(self.stats_buckets))

        # Unset remaining partitions to not skip them in the next run
        diskfile.clear_auditor_status(self.devices, self.auditor_type)
Ejemplo n.º 45
0
 def run_forever(self, *args, **kwargs):
     """
     Run the updator continuously.
     """
     time.sleep(random() * self.interval)
     while True:
         self.logger.info(_('Begin container update sweep'))
         begin = time.time()
         now = time.time()
         expired_suppressions = \
             [a for a, u in self.account_suppressions.items()
              if u < now]
         for account in expired_suppressions:
             del self.account_suppressions[account]
         pid2filename = {}
         # read from account ring to ensure it's fresh
         self.get_account_ring().get_nodes('')
         for path in self.get_paths():
             while len(pid2filename) >= self.concurrency:
                 pid = os.wait()[0]
                 try:
                     self._load_suppressions(pid2filename[pid])
                 finally:
                     del pid2filename[pid]
             fd, tmpfilename = mkstemp()
             os.close(fd)
             pid = os.fork()
             if pid:
                 pid2filename[pid] = tmpfilename
             else:
                 signal.signal(signal.SIGTERM, signal.SIG_DFL)
                 patcher.monkey_patch(all=False, socket=True, select=True,
                                      thread=True)
                 self.no_changes = 0
                 self.successes = 0
                 self.failures = 0
                 self.new_account_suppressions = open(tmpfilename, 'w')
                 forkbegin = time.time()
                 self.container_sweep(path)
                 elapsed = time.time() - forkbegin
                 self.logger.debug(
                     _('Container update sweep of %(path)s completed: '
                       '%(elapsed).02fs, %(success)s successes, %(fail)s '
                       'failures, %(no_change)s with no changes'),
                     {'path': path, 'elapsed': elapsed,
                      'success': self.successes, 'fail': self.failures,
                      'no_change': self.no_changes})
                 sys.exit()
         while pid2filename:
             pid = os.wait()[0]
             try:
                 self._load_suppressions(pid2filename[pid])
             finally:
                 del pid2filename[pid]
         elapsed = time.time() - begin
         self.logger.info(_('Container update sweep completed: %.02fs'),
                          elapsed)
         dump_recon_cache({'container_updater_sweep': elapsed},
                          self.rcache, self.logger)
         if elapsed < self.interval:
             time.sleep(self.interval - elapsed)
Ejemplo n.º 46
0
 def audit_all_objects(self, mode='once'):
     self.logger.info(
         _('Begin object audit "%s" mode (%s)') % (mode, self.auditor_type))
     begin = reported = time.time()
     self.total_bytes_processed = 0
     self.total_files_processed = 0
     total_quarantines = 0
     total_errors = 0
     time_auditing = 0
     all_locs = audit_location_generator(self.devices,
                                         object_server.DATADIR,
                                         '.data',
                                         mount_check=self.mount_check,
                                         logger=self.logger)
     for path, device, partition in all_locs:
         loop_time = time.time()
         self.failsafe_object_audit(path, device, partition)
         self.logger.timing_since('timing', loop_time)
         self.files_running_time = ratelimit_sleep(
             self.files_running_time, self.max_files_per_second)
         self.total_files_processed += 1
         now = time.time()
         if now - reported >= self.log_time:
             self.logger.info(
                 _('Object audit (%(type)s). '
                   'Since %(start_time)s: Locally: %(passes)d passed, '
                   '%(quars)d quarantined, %(errors)d errors '
                   'files/sec: %(frate).2f , bytes/sec: %(brate).2f, '
                   'Total time: %(total).2f, Auditing time: %(audit).2f, '
                   'Rate: %(audit_rate).2f') % {
                       'type': self.auditor_type,
                       'start_time': time.ctime(reported),
                       'passes': self.passes,
                       'quars': self.quarantines,
                       'errors': self.errors,
                       'frate': self.passes / (now - reported),
                       'brate': self.bytes_processed / (now - reported),
                       'total': (now - begin),
                       'audit': time_auditing,
                       'audit_rate': time_auditing / (now - begin)
                   })
             dump_recon_cache(
                 {
                     'object_auditor_stats_%s' % self.auditor_type: {
                         'errors': self.errors,
                         'passes': self.passes,
                         'quarantined': self.quarantines,
                         'bytes_processed': self.bytes_processed,
                         'start_time': reported,
                         'audit_time': time_auditing
                     }
                 }, self.rcache, self.logger)
             reported = now
             total_quarantines += self.quarantines
             total_errors += self.errors
             self.passes = 0
             self.quarantines = 0
             self.errors = 0
             self.bytes_processed = 0
         time_auditing += (now - loop_time)
     # Avoid divide by zero during very short runs
     elapsed = (time.time() - begin) or 0.000001
     self.logger.info(
         _('Object audit (%(type)s) "%(mode)s" mode '
           'completed: %(elapsed).02fs. Total quarantined: %(quars)d, '
           'Total errors: %(errors)d, Total files/sec: %(frate).2f , '
           'Total bytes/sec: %(brate).2f, Auditing time: %(audit).2f, '
           'Rate: %(audit_rate).2f') % {
               'type': self.auditor_type,
               'mode': mode,
               'elapsed': elapsed,
               'quars': total_quarantines,
               'errors': total_errors,
               'frate': self.total_files_processed / elapsed,
               'brate': self.total_bytes_processed / elapsed,
               'audit': time_auditing,
               'audit_rate': time_auditing / elapsed
           })
     if self.stats_sizes:
         self.logger.info(
             _('Object audit stats: %s') % json.dumps(self.stats_buckets))