def __init__(self, id, display_name=None, description=None, notes=None, working_dir=None, content_unit_counts=None, last_unit_added=None, last_unit_removed=None, repo_obj=None): self.id = id self.display_name = display_name self.description = description self.notes = notes self.working_dir = working_dir self.content_unit_counts = content_unit_counts or {} self.last_unit_added = dateutils.ensure_tz(last_unit_added) self.last_unit_removed = dateutils.ensure_tz(last_unit_removed) self.repo_obj = repo_obj
def test_tz_specified(self): """ Ensure that if the tz is already specified, it is used. """ dt = datetime.datetime.now(dateutils.local_tz()) new_date = dateutils.ensure_tz(dt) self.assertEquals(new_date.tzinfo, dateutils.utc_tz())
def test_none_object(self): """ Ensure that if None is passed, return None. """ dt = None new_date = dateutils.ensure_tz(dt) self.assertEquals(new_date, None)
def test_tz_not_specified(self): """ Test that if a tz is not specified, it is added. """ dt = datetime.datetime.utcnow() new_date = dateutils.ensure_tz(dt) self.assertEquals(new_date.tzinfo, dateutils.utc_tz())
def check_celery_processes(self): """ Look for missing Celery processes, log and cleanup as needed. To find a missing Celery process, filter the Workers model for entries older than utcnow() - WORKER_TIMEOUT_SECONDS. The heartbeat times are stored in native UTC, so this is a comparable datetime. For each missing worker found, call _delete_worker() synchronously for cleanup. This method also checks that at least one resource_manager and one scheduler process is present. If there are zero of either, log at the error level that Pulp will not operate correctly. """ msg = _( 'Checking if pulp_workers, pulp_celerybeat, or pulp_resource_manager ' 'processes are missing for more than %d seconds' ) % self.CELERY_TIMEOUT_SECONDS _logger.debug(msg) now = ensure_tz(datetime.utcnow()) oldest_heartbeat_time = now - timedelta( seconds=self.CELERY_TIMEOUT_SECONDS) worker_list = Worker.objects.all() worker_count = 0 resource_manager_count = 0 scheduler_count = 0 for worker in worker_list: if worker.last_heartbeat < oldest_heartbeat_time: msg = _( "Worker '%s' has gone missing, removing from list of workers" ) % worker.name _logger.error(msg) _delete_worker(worker.name) elif worker.name.startswith(SCHEDULER_WORKER_NAME): scheduler_count = scheduler_count + 1 elif worker.name.startswith(RESOURCE_MANAGER_WORKER_NAME): resource_manager_count = resource_manager_count + 1 else: worker_count = worker_count + 1 if resource_manager_count == 0: msg = _( "There are 0 pulp_resource_manager processes running. Pulp will not operate " "correctly without at least one pulp_resource_mananger process running." ) _logger.error(msg) if scheduler_count == 0: msg = _( "There are 0 pulp_celerybeat processes running. Pulp will not operate " "correctly without at least one pulp_celerybeat process running." ) _logger.error(msg) output_dict = { 'workers': worker_count, 'celerybeat': scheduler_count, 'resource_manager': resource_manager_count } msg = _("%(workers)d pulp_worker processes, %(celerybeat)d " "pulp_celerybeat processes, and %(resource_manager)d " "pulp_resource_manager processes") % output_dict _logger.debug(msg)
def tick(self): """ Superclass runs a tick, that is one iteration of the scheduler. Executes all due tasks. This method updates the last heartbeat time of the scheduler. :return: number of seconds before the next tick should run :rtype: float """ worker_watcher.handle_worker_heartbeat(CELERYBEAT_NAME) if celery_version.startswith('4') and self.schedule_changed: # Setting _heap = None is a workaround for this bug in Celery4 # https://github.com/celery/celery/pull/3958 # Once 3958 is released and updated in Fedora this can be removed self._heap = None now = ensure_tz(datetime.utcnow()) old_timestamp = now - timedelta(seconds=constants.PULP_PROCESS_TIMEOUT_INTERVAL) # Updating the current lock if lock is on this instance of celerybeat result = CeleryBeatLock.objects(name=CELERYBEAT_NAME).\ update(set__timestamp=datetime.utcnow()) # If current instance has lock and updated lock_timestamp, call super if result == 1: _logger.debug(_('Lock updated by %(celerybeat_name)s') % {'celerybeat_name': CELERYBEAT_NAME}) ret = self.call_tick(CELERYBEAT_NAME) else: # check for old enough time_stamp and remove if such lock is present CeleryBeatLock.objects(timestamp__lte=old_timestamp).delete() try: lock_timestamp = datetime.utcnow() # Insert new lock entry new_lock = CeleryBeatLock(name=CELERYBEAT_NAME, timestamp=lock_timestamp) new_lock.save() _logger.debug(_("New lock acquired by %(celerybeat_name)s") % {'celerybeat_name': CELERYBEAT_NAME}) if not self._first_lock_acq_check: msg = _("Failover occurred: '%s' is now the primary celerybeat " "instance") % CELERYBEAT_NAME _logger.warning(msg) # After acquiring new lock call super to dispatch tasks ret = self.call_tick(CELERYBEAT_NAME) except mongoengine.NotUniqueError: # Setting a default wait time for celerybeat instances with no lock ret = constants.PULP_PROCESS_HEARTBEAT_INTERVAL if self._first_lock_acq_check: _logger.info(_("Hot spare celerybeat instance '%(celerybeat_name)s' detected.") % {'celerybeat_name': CELERYBEAT_NAME}) self._first_lock_acq_check = False return ret
def get_resource_manager_lock(name): """ Tries to acquire the resource manager lock. If the lock cannot be acquired immediately, it will wait until the currently active instance becomes unavailable, at which point the worker cleanup routine will clear the lock for us to acquire. A worker record will be created so that the waiting resource manager will appear in the Status API. This worker record will be cleaned up through the regular worker shutdown routine. :param name: The hostname of the worker :type name: basestring """ assert name.startswith(constants.RESOURCE_MANAGER_WORKER_NAME) lock = ResourceManagerLock(name=name) # Whether this is the first lock availability check for this instance _first_check = True while True: now = dateutils.ensure_tz(datetime.utcnow()) old_timestamp = now - timedelta( seconds=constants.PULP_PROCESS_TIMEOUT_INTERVAL) ResourceManagerLock.objects(timestamp__lte=old_timestamp).delete() # Create / update the worker record so that Pulp knows we exist Worker.objects(name=name).update_one( set__last_heartbeat=datetime.utcnow(), upsert=True) try: lock.timestamp = now lock.save() msg = _( "Resource manager '%s' has acquired the resource manager lock" ) % name _logger.debug(msg) if not _first_check: msg = _( "Failover occurred: '%s' is now the primary resource manager" ) % name _logger.warning(msg) break except mongoengine.NotUniqueError: # Only log the message the first time if _first_check: _logger.info( _("Hot spare pulp_resource_manager instance '%(name)s' detected." ) % {'name': name}) _first_check = False time.sleep(constants.PULP_PROCESS_HEARTBEAT_INTERVAL)
def check_celery_processes(self): """ Look for missing Celery processes, log and cleanup as needed. To find a missing Celery process, filter the Workers model for entries older than utcnow() - WORKER_TIMEOUT_SECONDS. The heartbeat times are stored in native UTC, so this is a comparable datetime. For each missing worker found, call _delete_worker() synchronously for cleanup. This method also checks that at least one resource_manager and one scheduler process is present. If there are zero of either, log at the error level that Pulp will not operate correctly. """ msg = _('Checking if pulp_workers, pulp_celerybeat, or pulp_resource_manager processes ' 'are missing for more than %d seconds') % constants.PULP_PROCESS_TIMEOUT_INTERVAL _logger.debug(msg) now = ensure_tz(datetime.utcnow()) oldest_heartbeat_time = now - timedelta(seconds=constants.PULP_PROCESS_TIMEOUT_INTERVAL) worker_list = Worker.objects.all() worker_count = 0 resource_manager_count = 0 scheduler_count = 0 for worker in worker_list: if worker.last_heartbeat < oldest_heartbeat_time: msg = _("Worker '%s' has gone missing, removing from list of workers") % worker.name _logger.error(msg) if worker.name.startswith(constants.SCHEDULER_WORKER_NAME): worker.delete() else: _delete_worker(worker.name) elif worker.name.startswith(constants.SCHEDULER_WORKER_NAME): scheduler_count = scheduler_count + 1 elif worker.name.startswith(constants.RESOURCE_MANAGER_WORKER_NAME): resource_manager_count = resource_manager_count + 1 else: worker_count = worker_count + 1 if resource_manager_count == 0: msg = _("There are 0 pulp_resource_manager processes running. Pulp will not operate " "correctly without at least one pulp_resource_mananger process running.") _logger.error(msg) if scheduler_count == 0: msg = _("There are 0 pulp_celerybeat processes running. Pulp will not operate " "correctly without at least one pulp_celerybeat process running.") _logger.error(msg) output_dict = {'workers': worker_count, 'celerybeat': scheduler_count, 'resource_manager': resource_manager_count} msg = _("%(workers)d pulp_worker processes, %(celerybeat)d " "pulp_celerybeat processes, and %(resource_manager)d " "pulp_resource_manager processes") % output_dict _logger.debug(msg)
def get_resource_manager_lock(name): """ Tries to acquire the resource manager lock. If the lock cannot be acquired immediately, it will wait until the currently active instance becomes unavailable, at which point the worker cleanup routine will clear the lock for us to acquire. A worker record will be created so that the waiting resource manager will appear in the Status API. This worker record will be cleaned up through the regular worker shutdown routine. :param name: The hostname of the worker :type name: basestring """ assert name.startswith(constants.RESOURCE_MANAGER_WORKER_NAME) lock = ResourceManagerLock(name=name) # Whether this is the first lock availability check for this instance _first_check = True while True: now = dateutils.ensure_tz(datetime.utcnow()) old_timestamp = now - timedelta(seconds=PULP_PROCESS_TIMEOUT_INTERVAL) ResourceManagerLock.objects(timestamp__lte=old_timestamp).delete() # Create / update the worker record so that Pulp knows we exist Worker.objects(name=name).update_one(set__last_heartbeat=datetime.utcnow(), upsert=True) try: lock.timestamp = now lock.save() msg = _("Resource manager '%s' has acquired the resource manager lock") % name _logger.debug(msg) if not _first_check: msg = _("Failover occurred: '%s' is now the primary resource manager") % name _logger.warning(msg) break except mongoengine.NotUniqueError: # Only log the message the first time if _first_check: _logger.info(_("Hot spare pulp_resource_manager instance '%(name)s' detected.") % {'name': name}) _first_check = False time.sleep(PULP_PROCESS_HEARTBEAT_INTERVAL)
def to_python(self, value): """ Ensures that the datetime object returned has timezone UTC set. This assumes that if the value lacks a timezone, the data is already UTC, and the corresponding timezone object will be added. :param value: a datetime object :type value: datetime.datetime :return: an equivalent datetime object with the timezone set to UTC :rtype: datetime.datetime """ ret = super(UTCDateTimeField, self).to_python(value) return dateutils.ensure_tz(ret)
def get_online(self): """ Returns a queryset with a subset of Worker documents. The queryset is filtered to remove any Worker document that has not been updated in the last 25 seconds. :return: mongoengine queryset object :rtype: mongoengine.queryset.QuerySet """ query_set = self now = ensure_tz(datetime.utcnow()) oldest_heartbeat_time = now - timedelta(seconds=PULP_PROCESS_TIMEOUT_INTERVAL) return query_set.filter(last_heartbeat__gte=oldest_heartbeat_time)
def get_online(self): """ Returns a queryset with a subset of Worker documents. The queryset is filtered to remove any Worker document that has not been updated in the last 25 seconds. :return: mongoengine queryset object :rtype: mongoengine.queryset.QuerySet """ query_set = self now = ensure_tz(datetime.utcnow()) oldest_heartbeat_time = now - timedelta( seconds=PULP_PROCESS_TIMEOUT_INTERVAL) return query_set.filter(last_heartbeat__gte=oldest_heartbeat_time)
def last_publish(self): """ Returns the timestamp of the last time this repo was published, regardless of the success or failure of the publish. If the repo was never published, this call returns None. :return: timestamp instance describing the last publish :rtype: datetime.datetime or None :raises DistributorConduitException: if any errors occur """ try: collection = RepoDistributor.get_collection() distributor = collection.find_one({'repo_id': self.repo_id, 'id': self.distributor_id}) if distributor is None: raise pulp_exceptions.MissingResource(self.repo_id) return dateutils.ensure_tz(distributor['last_publish']) except Exception, e: _logger.exception('Error getting last publish time for repo [%s]' % self.repo_id) raise DistributorConduitException(e), None, sys.exc_info()[2]
def last_publish(self): """ Returns the timestamp of the last time this repo was published, regardless of the success or failure of the publish. If the repo was never published, this call returns None. :return: timestamp instance describing the last publish :rtype: datetime.datetime or None :raises DistributorConduitException: if any errors occur """ try: collection = RepoDistributor.get_collection() distributor = collection.find_one({ 'repo_id': self.repo_id, 'id': self.distributor_id }) if distributor is None: raise pulp_exceptions.MissingResource(self.repo_id) return dateutils.ensure_tz(distributor['last_publish']) except Exception, e: _logger.exception('Error getting last publish time for repo [%s]' % self.repo_id) raise DistributorConduitException(e), None, sys.exc_info()[2]
def tick(self): """ Superclass runs a tick, that is one iteration of the scheduler. Executes all due tasks. This method updates the last heartbeat time of the scheduler. :return: number of seconds before the next tick should run :rtype: float """ worker_watcher.handle_worker_heartbeat(CELERYBEAT_NAME) if celery_version.startswith('4') and self.schedule_changed: # Setting _heap = None is a workaround for this bug in Celery4 # https://github.com/celery/celery/pull/3958 # Once 3958 is released and updated in Fedora this can be removed self._heap = None now = ensure_tz(datetime.utcnow()) old_timestamp = now - timedelta( seconds=constants.PULP_PROCESS_TIMEOUT_INTERVAL) # Updating the current lock if lock is on this instance of celerybeat result = CeleryBeatLock.objects(name=CELERYBEAT_NAME).\ update(set__timestamp=datetime.utcnow()) # If current instance has lock and updated lock_timestamp, call super if result == 1: _logger.debug( _('Lock updated by %(celerybeat_name)s') % {'celerybeat_name': CELERYBEAT_NAME}) ret = self.call_tick(CELERYBEAT_NAME) else: # check for old enough time_stamp and remove if such lock is present CeleryBeatLock.objects(timestamp__lte=old_timestamp).delete() try: lock_timestamp = datetime.utcnow() # Insert new lock entry new_lock = CeleryBeatLock(name=CELERYBEAT_NAME, timestamp=lock_timestamp) new_lock.save() _logger.debug( _("New lock acquired by %(celerybeat_name)s") % {'celerybeat_name': CELERYBEAT_NAME}) if not self._first_lock_acq_check: msg = _( "Failover occurred: '%s' is now the primary celerybeat " "instance") % CELERYBEAT_NAME _logger.warning(msg) # After acquiring new lock call super to dispatch tasks ret = self.call_tick(CELERYBEAT_NAME) except mongoengine.NotUniqueError: # Setting a default wait time for celerybeat instances with no lock ret = constants.PULP_PROCESS_HEARTBEAT_INTERVAL if self._first_lock_acq_check: _logger.info( _("Hot spare celerybeat instance '%(celerybeat_name)s' detected." ) % {'celerybeat_name': CELERYBEAT_NAME}) self._first_lock_acq_check = False return ret
def check_celery_processes(self): """ Look for missing Celery processes, log and cleanup as needed. To find a missing Celery process, filter the Workers model for entries older than utcnow() - WORKER_TIMEOUT_SECONDS. The heartbeat times are stored in native UTC, so this is a comparable datetime. For each missing worker found, call _delete_worker() synchronously for cleanup. This method also checks that at least one resource_manager and one scheduler process is present. If there are zero of either, log at the error level that Pulp will not operate correctly. """ msg = _( 'Checking if pulp_workers, pulp_celerybeat, or pulp_resource_manager processes ' 'are missing for more than %d seconds' ) % PULP_PROCESS_TIMEOUT_INTERVAL _logger.debug(msg) now = ensure_tz(datetime.utcnow()) oldest_heartbeat_time = now - timedelta( seconds=PULP_PROCESS_TIMEOUT_INTERVAL) worker_list = Worker.objects.all() if (ensure_tz(datetime.utcnow()) - now > timedelta(seconds=PULP_PROCESS_HEARTBEAT_INTERVAL)): sec = (ensure_tz(datetime.utcnow()) - now).total_seconds() msg = _( "Celery process check took {time}s which exceeds heartbeat interval. Consider " "adjusting the worker_timeout setting.").format(time=sec) _logger.warn(msg) worker_count = 0 resource_manager_count = 0 scheduler_count = 0 for worker in worker_list: if worker.last_heartbeat < oldest_heartbeat_time: msg = _( "Worker '%s' has gone missing, removing from list of workers" ) % worker.name _logger.error(msg) if worker.name.startswith(constants.SCHEDULER_WORKER_NAME): worker.delete() else: _delete_worker(worker.name) elif worker.name.startswith(constants.SCHEDULER_WORKER_NAME): scheduler_count = scheduler_count + 1 elif worker.name.startswith( constants.RESOURCE_MANAGER_WORKER_NAME): resource_manager_count = resource_manager_count + 1 else: worker_count = worker_count + 1 if resource_manager_count == 0: msg = _( "There are 0 pulp_resource_manager processes running. Pulp will not operate " "correctly without at least one pulp_resource_manager process running." ) _logger.error(msg) if scheduler_count == 0: msg = _( "There are 0 pulp_celerybeat processes running. Pulp will not operate " "correctly without at least one pulp_celerybeat process running." ) _logger.error(msg) output_dict = { 'workers': worker_count, 'celerybeat': scheduler_count, 'resource_manager': resource_manager_count } msg = _("%(workers)d pulp_worker processes, %(celerybeat)d " "pulp_celerybeat processes, and %(resource_manager)d " "pulp_resource_manager processes") % output_dict _logger.debug(msg)