def _init_hr(self, get_coord): self.hr = coordinator.HashRing('kazoo://1.2.3.4:2181', 'group') self.hr.get_members = mock.Mock(return_value=['id1', 'id2', 'id3']) self.hr.member_id = 'id2' self.hr._hash = mock.Mock(side_effect=[1, 10, 20, 5, 13, 25])
def test_get_subset_without_backend(self): hr = coordinator.HashRing('', 'group') objects = [mock.Mock(id=1), mock.Mock(id=2)] # all objects will be managed by this engine if coordinator backend # is not provided self.assertEqual(objects, hr.get_subset(objects))
class SaharaPeriodicTasks(periodic_task.PeriodicTasks): hr = coordinator.HashRing(CONF.periodic_coordinator_backend_url, 'sahara-periodic-tasks') def __init__(self): super(SaharaPeriodicTasks, self).__init__(CONF) @periodic_task.periodic_task(spacing=heartbeat_interval, run_immediately=True) @set_context def heartbeat(self, ctx): self.hr.heartbeat() @periodic_task.periodic_task(spacing=45) @set_context def update_job_statuses(self, ctx): LOG.debug('Updating job statuses') all_je = conductor.job_execution_get_all(ctx, end_time=None) je_to_manage = self.hr.get_subset(all_je) for job in je_to_manage: job_manager.update_job_status(job.id) @periodic_task.periodic_task(spacing=90) @set_context def terminate_unneeded_transient_clusters(self, ctx): LOG.debug('Terminating unneeded transient clusters') all_clusters = conductor.cluster_get_all( ctx, status=c_u.CLUSTER_STATUS_ACTIVE, is_transient=True) clusters_to_manage = self.hr.get_subset(all_clusters) for cluster in clusters_to_manage: jc = conductor.job_execution_count(ctx, end_time=None, cluster_id=cluster.id) if jc > 0: continue spacing = get_time_since_last_update(cluster) if spacing < CONF.min_transient_cluster_active_time: continue terminate_cluster(ctx, cluster, description='transient') # Add event log info cleanup context.ctx().current_instance_info = context.InstanceInfo() @periodic_task.periodic_task(spacing=zombie_task_spacing) @set_context def check_for_zombie_proxy_users(self, ctx): all_users = p.proxy_domain_users_list() users_to_manage = self.hr.get_subset(all_users) for user in users_to_manage: if user.name.startswith('job_'): je_id = user.name[4:] je = conductor.job_execution_get(ctx, je_id) if je is None or (je.info['status'] in edp.JOB_STATUSES_TERMINATED): LOG.debug('Found zombie proxy user {username}'.format( username=user.name)) p.proxy_user_delete(user_id=user.id) @periodic_task.periodic_task(spacing=3600) @set_context def terminate_incomplete_clusters(self, ctx): if CONF.cleanup_time_for_incomplete_clusters <= 0: return LOG.debug('Terminating old clusters in non-final state') # NOTE(alazarev) Retrieving all clusters once in hour for now. # Criteria support need to be implemented in sahara db API to # have SQL filtering. all_clusters = [ cluster for cluster in conductor.cluster_get_all(ctx) if (cluster.status not in [ c_u.CLUSTER_STATUS_ACTIVE, c_u.CLUSTER_STATUS_ERROR, c_u.CLUSTER_STATUS_DELETING ]) ] clusters_to_manage = self.hr.get_subset(all_clusters) for cluster in clusters_to_manage: spacing = get_time_since_last_update(cluster) if spacing < CONF.cleanup_time_for_incomplete_clusters * 3600: continue terminate_cluster(ctx, cluster, description='incomplete') # Add event log info cleanup context.ctx().current_instance_info = context.InstanceInfo() @periodic_task.periodic_task( spacing=vb.get_verification_periodic_interval()) @set_context def run_verifications(self, ctx): LOG.debug("Executing health checks for the clusters") start_dict = {'verification': {'status': 'START'}} all_clusters = conductor.cluster_get_all( ctx, status=c_u.CLUSTER_STATUS_ACTIVE) clusters_to_manage = self.hr.get_subset(all_clusters) for cluster in clusters_to_manage: try: vb.validate_verification_start(cluster) api.update_cluster(cluster.id, start_dict) except vb.CannotVerifyError: LOG.debug("Skipping running verification " "on the cluster %s" % cluster.name)