Ejemplo n.º 1
0
 def _init_hr(self, get_coord):
     self.hr = coordinator.HashRing('kazoo://1.2.3.4:2181', 'group')
     self.hr.get_members = mock.Mock(return_value=['id1', 'id2', 'id3'])
     self.hr.member_id = 'id2'
     self.hr._hash = mock.Mock(side_effect=[1, 10, 20, 5, 13, 25])
Ejemplo n.º 2
0
 def test_get_subset_without_backend(self):
     hr = coordinator.HashRing('', 'group')
     objects = [mock.Mock(id=1), mock.Mock(id=2)]
     # all objects will be managed by this engine if coordinator backend
     # is not provided
     self.assertEqual(objects, hr.get_subset(objects))
Ejemplo n.º 3
0
    class SaharaPeriodicTasks(periodic_task.PeriodicTasks):
        hr = coordinator.HashRing(CONF.periodic_coordinator_backend_url,
                                  'sahara-periodic-tasks')

        def __init__(self):
            super(SaharaPeriodicTasks, self).__init__(CONF)

        @periodic_task.periodic_task(spacing=heartbeat_interval,
                                     run_immediately=True)
        @set_context
        def heartbeat(self, ctx):
            self.hr.heartbeat()

        @periodic_task.periodic_task(spacing=45)
        @set_context
        def update_job_statuses(self, ctx):
            LOG.debug('Updating job statuses')
            all_je = conductor.job_execution_get_all(ctx, end_time=None)
            je_to_manage = self.hr.get_subset(all_je)
            for job in je_to_manage:
                job_manager.update_job_status(job.id)

        @periodic_task.periodic_task(spacing=90)
        @set_context
        def terminate_unneeded_transient_clusters(self, ctx):
            LOG.debug('Terminating unneeded transient clusters')
            all_clusters = conductor.cluster_get_all(
                ctx, status=c_u.CLUSTER_STATUS_ACTIVE, is_transient=True)
            clusters_to_manage = self.hr.get_subset(all_clusters)

            for cluster in clusters_to_manage:
                jc = conductor.job_execution_count(ctx,
                                                   end_time=None,
                                                   cluster_id=cluster.id)

                if jc > 0:
                    continue

                spacing = get_time_since_last_update(cluster)
                if spacing < CONF.min_transient_cluster_active_time:
                    continue

                terminate_cluster(ctx, cluster, description='transient')
                # Add event log info cleanup
                context.ctx().current_instance_info = context.InstanceInfo()

        @periodic_task.periodic_task(spacing=zombie_task_spacing)
        @set_context
        def check_for_zombie_proxy_users(self, ctx):
            all_users = p.proxy_domain_users_list()
            users_to_manage = self.hr.get_subset(all_users)
            for user in users_to_manage:
                if user.name.startswith('job_'):
                    je_id = user.name[4:]
                    je = conductor.job_execution_get(ctx, je_id)
                    if je is None or (je.info['status']
                                      in edp.JOB_STATUSES_TERMINATED):
                        LOG.debug('Found zombie proxy user {username}'.format(
                            username=user.name))
                        p.proxy_user_delete(user_id=user.id)

        @periodic_task.periodic_task(spacing=3600)
        @set_context
        def terminate_incomplete_clusters(self, ctx):
            if CONF.cleanup_time_for_incomplete_clusters <= 0:
                return

            LOG.debug('Terminating old clusters in non-final state')

            # NOTE(alazarev) Retrieving all clusters once in hour for now.
            # Criteria support need to be implemented in sahara db API to
            # have SQL filtering.
            all_clusters = [
                cluster for cluster in conductor.cluster_get_all(ctx)
                if (cluster.status not in [
                    c_u.CLUSTER_STATUS_ACTIVE, c_u.CLUSTER_STATUS_ERROR,
                    c_u.CLUSTER_STATUS_DELETING
                ])
            ]
            clusters_to_manage = self.hr.get_subset(all_clusters)

            for cluster in clusters_to_manage:

                spacing = get_time_since_last_update(cluster)
                if spacing < CONF.cleanup_time_for_incomplete_clusters * 3600:
                    continue

                terminate_cluster(ctx, cluster, description='incomplete')
                # Add event log info cleanup
                context.ctx().current_instance_info = context.InstanceInfo()

        @periodic_task.periodic_task(
            spacing=vb.get_verification_periodic_interval())
        @set_context
        def run_verifications(self, ctx):
            LOG.debug("Executing health checks for the clusters")
            start_dict = {'verification': {'status': 'START'}}
            all_clusters = conductor.cluster_get_all(
                ctx, status=c_u.CLUSTER_STATUS_ACTIVE)
            clusters_to_manage = self.hr.get_subset(all_clusters)

            for cluster in clusters_to_manage:
                try:
                    vb.validate_verification_start(cluster)
                    api.update_cluster(cluster.id, start_dict)
                except vb.CannotVerifyError:
                    LOG.debug("Skipping running verification "
                              "on the cluster %s" % cluster.name)