Exemple #1
0
    def test_locks_query_count(self):
        """Check that query count to pull in available jobs hasn't changed"""

        EXPECTED_QUERIES = 0

        #  object to be locked by jobs
        host_ct_key = ContentType.objects.get_for_model(
            self.host.downcast()).natural_key()
        host_id = self.host.id

        #  create 200 host ups and down jobs in 'pending' default state
        #  key point is they are not in the 'complete' state.
        for job_num in xrange(200):
            if job_num % 2 == 0:
                RebootHostJob.objects.create(host=self.host)
            else:
                ShutdownHostJob.objects.create(host=self.host)

        #  Loads up the caches, including the _lock_cache while should find
        #  these jobs.
        js = JobScheduler()

        reset_queries()

        #  Getting jobs here may incur a higher cost.
        js.available_jobs([(host_ct_key, host_id)])

        query_sum = len(connection.queries)
        self.assertEqual(
            query_sum,
            EXPECTED_QUERIES,
            "something changed with queries! "
            "got %s expected %s" % (query_sum, EXPECTED_QUERIES),
        )
Exemple #2
0
    def run(self):
        from chroma_core.services.job_scheduler.job_scheduler import JobScheduler
        from chroma_core.services.job_scheduler.job_scheduler_client import JobSchedulerRpc
        from chroma_core.services.job_scheduler.agent_rpc import AgentRpc

        super(Service, self).run()

        # Cancel anything that's left behind from a previous run
        for command in Command.objects.filter(complete=False):
            command.completed(True, True)
        Job.objects.filter(~Q(state="complete")).update(state="complete", cancelled=True)

        self._job_scheduler = JobScheduler()
        self._queue_thread = ServiceThread(QueueHandler(self._job_scheduler))
        self._rpc_thread = ServiceThread(JobSchedulerRpc(self._job_scheduler))
        self._progress_thread = ServiceThread(self._job_scheduler.progress)
        AgentRpc.start()
        self._queue_thread.start()
        self._rpc_thread.start()
        self._progress_thread.start()

        self._children_started.set()
        self._mail_alerts_thread = MailAlerts(settings.EMAIL_SENDER, settings.EMAIL_SUBJECT_PREFIX, settings.EMAIL_HOST)
        self._mail_alerts_thread.start()

        self._complete.wait()

        self.log.info("Cancelling outstanding jobs...")

        for job in Job.objects.filter(~Q(state="complete")).order_by("-id"):
            self._job_scheduler.cancel_job(job.id)
Exemple #3
0
    def test_no_locks_query_count(self):
        """Check that query count to pull in available jobs hasn't changed

        If this test fails, consider changing the EXPECTED_QUERIES, or why
        it regressed.
        """

        EXPECTED_QUERIES = 0

        #  no jobs locking this object
        host_ct_key = ContentType.objects.get_for_model(
            self.host.downcast()).natural_key()
        host_id = self.host.id

        #  Loads up the caches
        js = JobScheduler()

        reset_queries()
        js.available_transitions([(host_ct_key, host_id)])

        query_sum = len(connection.queries)
        self.assertEqual(
            query_sum,
            EXPECTED_QUERIES,
            "something changed with queries! "
            "got %s expected %s" % (query_sum, EXPECTED_QUERIES),
        )
Exemple #4
0
    def setUp(self):

        super(TestAvailableJobs, self).setUp()

        from chroma_core.services.job_scheduler.job_scheduler import JobScheduler

        from tests.unit.chroma_core.helpers import load_default_profile

        load_default_profile()

        self.JobScheduler = JobScheduler
        self.js = JobScheduler()
        volume = synthetic_volume(with_storage=False)

        # Create object before ObjectCache init, so they are in the cache.
        self.host = synthetic_host()
        self.mgs = ManagedMgs.objects.create(volume=volume)
        self.fs = ManagedFilesystem.objects.create(name="mgsfs", mgs=self.mgs)
        self.mdt = ManagedMdt.objects.create(volume=volume,
                                             filesystem=self.fs,
                                             index=1)
        self.ost = ManagedOst.objects.create(volume=volume,
                                             filesystem=self.fs,
                                             index=1)

        # If you create object after init of this case, they will not be in it.
        ObjectCache.getInstance()
Exemple #5
0
    def setUp(self):

        super(TestAvailableJobs, self).setUp()

        from chroma_core.services.job_scheduler.job_scheduler import JobScheduler

        from tests.unit.chroma_core.helpers import load_default_profile

        load_default_profile()

        self.JobScheduler = JobScheduler
        self.js = JobScheduler()

        # Create object before ObjectCache init, so they are in the cache.
        self.host = synthetic_host()

        (mgt, fs, mdt, ost) = create_simple_fs()

        self.mgs = mgt
        self.fs = fs
        self.mdt = mdt
        self.ost = ost

        # If you create object after init of this case, they will not be in it.
        ObjectCache.getInstance()
    def setUp(self):
        super(TestOrderedTargets, self).setUp()

        # If the test that just ran imported storage_plugin_manager, it will
        # have instantiated its singleton, and created some DB records.
        # Django TestCase rolls back the database, so make sure that we
        # also roll back (reset) this singleton.
        import chroma_core.lib.storage_plugin.manager

        chroma_core.lib.storage_plugin.manager.storage_plugin_manager = (
            chroma_core.lib.storage_plugin.manager.StoragePluginManager())

        load_default_profile()

        self.job_scheduler = JobScheduler()
        self.no_of_nodes = 10
        self.nodes = []

        for node in range(0, self.no_of_nodes):
            self.nodes.append(synthetic_host("node%s" % node))

        for node in self.nodes:
            synthetic_volume_full(
                node, secondary_hosts=list(set(self.nodes) - set([node])))

        self.volume_ids = [volume.id for volume in Volume.objects.all()]
Exemple #7
0
    def test_no_locks_query_count(self):
        """Check that query count to pull in available jobs hasn't changed

        If this test fails, consider changing the EXPECTED_QUERIES, or why
        it regressed.
        """

        # 20131217 - mjmac: bumped to 7 for new Client management jobs
        # 20141007 - chris: change to 5 because some objects are now in the ObjectCache
        EXPECTED_QUERIES = 5  # but 3 are for setup

        host_ct_key = ContentType.objects.get_for_model(
            self.host.downcast()).natural_key()
        host_id = self.host.id

        #  Loads up the caches
        js = JobScheduler()

        reset_queries()
        js.available_jobs([
            (host_ct_key, host_id),
        ])

        query_sum = len(connection.queries)
        self.assertEqual(
            query_sum, EXPECTED_QUERIES, "something changed with queries! "
            "got %s expected %s" % (query_sum, EXPECTED_QUERIES))
    def setUp(self):
        super(TestAvailableTransitions, self).setUp()

        self.js = JobScheduler()

        load_default_profile()

        self.host = synthetic_host()
        self.assertEqual(self.host.state, "managed")
    def setUp(self):
        super(TestAvailableTransitions, self).setUp()

        self.js = JobScheduler()
        self.volume = synthetic_volume(with_storage=False)

        load_default_profile()

        self.host = synthetic_host()
        self.assertEqual(self.host.state, 'managed')
Exemple #10
0
    def test_object_is_locked(self):
        js = JobScheduler()
        self._fake_add_lock(js, self.host.lnet_configuration, 'lnet_up')

        lnet_configuration_ct_key = ContentType.objects.get_for_model(
            self.host.lnet_configuration.downcast()).natural_key()
        lnet_configuration_id = self.host.lnet_configuration.id

        locks = js.get_locks(lnet_configuration_ct_key, lnet_configuration_id)
        self.assertFalse(locks['read'])
        self.assertEqual(2, len(locks['write']))
    def test_managed_target_dne(self):
        host_ct_key = ContentType.objects.get_for_model(self.host.downcast()).natural_key()
        host_id = self.host.id

        self.host.mark_deleted()

        job_scheduler = JobScheduler()

        avail_trans = job_scheduler.available_transitions([(host_ct_key, host_id), ])[host_id]
        self.assertTrue(len(avail_trans) == 0, avail_trans)
        avail_jobs = job_scheduler.available_jobs([(host_ct_key, host_id), ])[host_id]
        self.assertTrue(self.host.state, 'managed')
        self.assertTrue(len(avail_jobs) == 3)   # Three states from configured -> Force Remove. Reboot, Shutdown
    def test_managed_target_dne(self):
        ct_id = ContentType.objects.get_for_model(self.host.downcast()).id
        host_id = self.host.id

        self.host.mark_deleted()

        job_scheduler = JobScheduler()

        composite_id = "{}:{}".format(ct_id, host_id)

        avail_trans = job_scheduler.available_transitions([(ct_id, host_id)])[composite_id]
        self.assertTrue(len(avail_trans) == 0, avail_trans)
        avail_jobs = job_scheduler.available_jobs([(ct_id, host_id)])[composite_id]
        self.assertTrue(self.host.state, "managed")
        self.assertTrue(len(avail_jobs) == 3)  # Three states from configured -> Force Remove. Reboot, Shutdown
    def test_host_complete_job(self):
        """If a DeployHostJob completes in failure, the host should be in state "undeploy" """

        job_scheduler = JobScheduler()

        load_default_profile()

        host = synthetic_host()
        host.state = "undeployed"
        host.save()

        deploy_host_job = DeployHostJob.objects.create(managed_host=host)
        deploy_host_job.locks_json = "{}"

        job_scheduler._complete_job(deploy_host_job,
                                    errored=True,
                                    cancelled=False)

        host = freshen(host)
        self.assertEqual(host.state, "undeployed")
    def test_add_verb(self):
        """Test that add verb turns the jobs into the correct dictionary"""

        lnet_configuration = synthetic_host().lnet_configuration

        def _mock_get_job_class(begin_state, end_state, last_job_in_route=False):
            return ConfigureLNetJob  # a StateChangeJob
        lnet_configuration.get_job_class = _mock_get_job_class

        self.assertTrue(lnet_configuration.get_job_class(lnet_configuration.state, 'ignored') == ConfigureLNetJob)
        self.assertTrue(hasattr(lnet_configuration.get_job_class(lnet_configuration.state, 'ignored'), 'state_verb'))

        # NB: JobScheduler._fetch_jobs takes an object, but could take a class
        jobs = JobScheduler()._add_verbs(lnet_configuration, ['ignored', ])

        job_dict = jobs[0]
        self.assertTrue('verb' in job_dict)
        self.assertTrue('display_group' in job_dict)
        self.assertTrue('display_order' in job_dict)
        self.assertTrue('state' in job_dict)
        self.assertTrue('long_description' in job_dict)
Exemple #15
0
def _passthrough_create_filesystem(target_data):
    ObjectCache.clear()
    return JobScheduler().create_filesystem(target_data)
    def setUp(self):
        super(JobTestCase, self).setUp()

        from chroma_core.services.http_agent import HttpAgentRpc
        from chroma_core.services.http_agent import Service as HttpAgentService

        # FIXME: have to do self before every test because otherwise
        # one test will get all the setup of StoragePluginClass records,
        # the in-memory instance of storage_plugin_manager will expect
        # them to still be there but they'll have been cleaned
        # out of the database.  Setting up self stuff should be done
        # as part of the initial DB setup before any test is started
        # so that it's part of the baseline that's rolled back to
        # after each test.
        import chroma_core.lib.storage_plugin.manager

        chroma_core.lib.storage_plugin.manager.storage_plugin_manager = (
            chroma_core.lib.storage_plugin.manager.StoragePluginManager())

        # Intercept attempts to call out to lustre servers
        import chroma_core.services.job_scheduler.agent_rpc

        self.old_agent_rpc = chroma_core.services.job_scheduler.agent_rpc.AgentRpc
        self.old_agent_ssh = chroma_core.services.job_scheduler.agent_rpc.AgentSsh
        MockAgentRpc.mock_servers = self.mock_servers
        MockAgentSsh.mock_servers = self.mock_servers

        chroma_core.services.job_scheduler.agent_rpc.AgentRpc = MockAgentRpc
        chroma_core.services.job_scheduler.agent_rpc.AgentSsh = MockAgentSsh

        # Any RPCs that are going to get called need explicitly overriding to
        # turn into local calls -- self is a catch-all to prevent any RPC classes
        # from trying to do network comms during unit tests
        ServiceRpcInterface._call = mock.Mock(side_effect=NotImplementedError)
        ServiceQueue.put = mock.Mock()
        ServiceQueue.purge = mock.Mock()

        # Create an instance for the purposes of the test
        from chroma_core.services.plugin_runner.resource_manager import ResourceManager

        resource_manager = ResourceManager()
        from chroma_core.services.plugin_runner import AgentPluginHandlerCollection

        def patch_daemon_rpc(rpc_class, test_daemon):
            # Patch AgentDaemonRpc to call our instance instead of trying to do an RPC
            def rpc_local(fn_name, *args, **kwargs):
                # Run the response through a serialize/deserialize cycle to
                # give it that special RPC flavor.
                retval = json.loads(
                    json.dumps(getattr(test_daemon, fn_name)(*args, **kwargs)))
                log.info("patch_daemon_rpc: %s(%s %s) -> %s" %
                         (fn_name, args, kwargs, retval))
                return retval

            rpc_class._call = mock.Mock(side_effect=rpc_local)

        aphc = AgentPluginHandlerCollection(resource_manager)

        patch_daemon_rpc(AgentDaemonRpcInterface, aphc)

        aphc.update_host_resources = mock.Mock(
            side_effect=parse_synthentic_device_info)

        patch_daemon_rpc(HttpAgentRpc, HttpAgentService())

        from chroma_core.services.job_scheduler.dep_cache import DepCache
        from chroma_core.services.job_scheduler.job_scheduler import JobScheduler, RunJobThread
        from chroma_core.services.job_scheduler.job_scheduler_client import JobSchedulerRpc
        from chroma_core.services.job_scheduler.job_scheduler_notify import NotificationQueue

        ObjectCache.clear()
        self.job_scheduler = JobScheduler()
        patch_daemon_rpc(JobSchedulerRpc, self.job_scheduler)

        # self.job_scheduler.progress.put = mock.Mock(side_effect = lambda msg: self.job_scheduler.progress._handle(msg))
        # self.job_scheduler.progress.advance = mock.Mock(side_effect = lambda msg: self.job_scheduler.progress._handle(msg))

        from chroma_core.services.job_scheduler import QueueHandler

        job_scheduler_queue_handler = QueueHandler(self.job_scheduler)

        def job_scheduler_queue_immediate(body):
            log.info("job_scheduler_queue_immediate: %s" % body)
            job_scheduler_queue_handler.on_message(body)

        NotificationQueue.put = mock.Mock(
            side_effect=job_scheduler_queue_immediate)

        import chroma_core.services.job_scheduler.job_scheduler

        chroma_core.services.job_scheduler.job_scheduler._disable_database = mock.Mock(
        )

        def _spawn_job(job):
            log.debug("functional spawn job")
            thread = RunJobThread(self.job_scheduler.progress,
                                  self.job_scheduler._db_quota, job,
                                  job.get_steps())
            self.job_scheduler._run_threads[job.id] = thread
            thread._run()

        self.job_scheduler._spawn_job = mock.Mock(side_effect=_spawn_job)

        def run_next():
            while True:
                runnable_jobs = self.job_scheduler._job_collection.ready_jobs

                log.info(
                    "run_next: %d runnable jobs of (%d pending, %d tasked)" % (
                        len(runnable_jobs),
                        len(self.job_scheduler._job_collection.pending_jobs),
                        len(self.job_scheduler._job_collection.tasked_jobs),
                    ))

                if not runnable_jobs:
                    break

                dep_cache = DepCache()
                ok_jobs, cancel_jobs = self.job_scheduler._check_jobs(
                    runnable_jobs, dep_cache)
                self.job_scheduler._job_collection.update_many(
                    ok_jobs, "tasked")
                for job in cancel_jobs:
                    self.job_scheduler._complete_job(job, False, True)
                for job in ok_jobs:
                    self.job_scheduler._spawn_job(job)

                self.drain_progress(skip_advance=True)

        JobScheduler._run_next = mock.Mock(side_effect=run_next)

        #
        # def complete_job(job, errored = False, cancelled = False):
        #     ObjectCache.clear()
        #     self.job_scheduler._complete_job(job, errored, cancelled)

        # JobScheduler.complete_job = mock.Mock(side_effect=complete_job)

        # Patch host removal because we use a _test_lun function that generates Volumes
        # with no corresponding StorageResourceRecords, so the real implementation wouldn't
        # remove them
        def fake_remove_host_resources(host_id):
            from chroma_core.models.host import Volume, VolumeNode

            for vn in VolumeNode.objects.filter(host__id=host_id):
                vn.mark_deleted()
            for volume in Volume.objects.all():
                if volume.volumenode_set.count() == 0:
                    volume.mark_deleted()

        AgentDaemonRpcInterface.remove_host_resources = mock.Mock(
            side_effect=fake_remove_host_resources)