def test_multi_user_usage(self): users = self.user_factory.new_users(6) job_resources = {'cpus': 0.1, 'mem': 123} all_job_uuids = [] pools, _ = util.all_pools(self.cook_url) try: # Start jobs for several users for i, user in enumerate(users): with user: for j in range(i): job_uuid, resp = util.submit_job(self.cook_url, command='sleep 480', max_retries=2, **job_resources) self.assertEqual(resp.status_code, 201, resp.content) all_job_uuids.append(job_uuid) job = util.load_job(self.cook_url, job_uuid) self.assertEqual(user.name, job['user'], job) # Don't query until the jobs are all running util.wait_for_jobs(self.cook_url, all_job_uuids, 'running') # Check the usage for each of our users for i, user in enumerate(users): with user: # Get the current usage resp = util.user_current_usage(self.cook_url, user=user.name) self.assertEqual(resp.status_code, 200, resp.content) usage_data = resp.json() # Check that the response structure looks as expected if pools: self.assertEqual(list(usage_data.keys()), ['total_usage', 'pools'], usage_data) else: self.assertEqual(list(usage_data.keys()), ['total_usage'], usage_data) self.assertEqual(len(usage_data['total_usage']), 4, usage_data) # Check that each user's usage is as expected self.assertEqual(usage_data['total_usage']['mem'], job_resources['mem'] * i, usage_data) self.assertEqual(usage_data['total_usage']['cpus'], job_resources['cpus'] * i, usage_data) self.assertEqual(usage_data['total_usage']['gpus'], 0, usage_data) self.assertEqual(usage_data['total_usage']['jobs'], i, usage_data) finally: for job_uuid in all_job_uuids: job = util.load_job(self.cook_url, job_uuid) for instance in job['instances']: if instance['status'] == 'failed': mesos.dump_sandbox_files(util.session, instance, job) # Terminate all of the jobs if all_job_uuids: with self.user_factory.admin(): util.kill_jobs(self.cook_url, all_job_uuids, assert_response=False)
def low_priority_job(): job = util.load_job(self.cook_url, uuid_large) one_hour_in_millis = 60 * 60 * 1000 start = util.current_milli_time() - one_hour_in_millis end = util.current_milli_time() running = util.jobs(self.cook_url, user=user.name, state='running', start=start, end=end).json() waiting = util.jobs(self.cook_url, user=user.name, state='waiting', start=start, end=end).json() self.logger.info( f'Currently running jobs: {json.dumps(running, indent=2)}' ) self.logger.info( f'Currently waiting jobs: {json.dumps(waiting, indent=2)}' ) return job
def test_pool_scheduling(self): admin = self.user_factory.admin() user = self.user_factory.new_user() pools, _ = util.active_pools(self.cook_url) all_job_uuids = [] try: default_pool = util.default_pool(self.cook_url) self.assertLess(1, len(pools)) self.assertIsNotNone(default_pool) cpus = 0.1 with admin: self.logger.info( f'Running tasks: {json.dumps(util.running_tasks(self.cook_url), indent=2)}' ) for pool in pools: # Lower the user's cpu quota on this pool pool_name = pool['name'] quota_multiplier = 1 if pool_name == default_pool else 2 util.set_limit(self.cook_url, 'quota', user.name, cpus=cpus * quota_multiplier, pool=pool_name) with user: util.kill_running_and_waiting_jobs(self.cook_url, user.name) for pool in pools: pool_name = pool['name'] # Submit a job that fills the user's quota on this pool quota = util.get_limit(self.cook_url, 'quota', user.name, pool_name).json() quota_cpus = quota['cpus'] filling_job_uuid, _ = util.submit_job(self.cook_url, cpus=quota_cpus, command='sleep 600', pool=pool_name) all_job_uuids.append(filling_job_uuid) instance = util.wait_for_running_instance( self.cook_url, filling_job_uuid) slave_pool = util.node_pool(instance['hostname']) self.assertEqual(pool_name, slave_pool) # Submit a job that should not get scheduled job_uuid, _ = util.submit_job(self.cook_url, cpus=cpus, command='ls', pool=pool_name) all_job_uuids.append(job_uuid) job = util.load_job(self.cook_url, job_uuid) self.assertEqual('waiting', job['status']) # Assert that the unscheduled reason and data are correct @retry(stop_max_delay=60000, wait_fixed=5000) def check_unscheduled_reason(): jobs, _ = util.unscheduled_jobs( self.cook_url, job_uuid) self.logger.info(f'Unscheduled jobs: {jobs}') self.assertEqual(job_uuid, jobs[0]['uuid']) job_reasons = jobs[0]['reasons'] # Check the spot-in-queue reason reason = next(r for r in job_reasons if r['reason'] == 'You have 1 other jobs ahead in the ' 'queue.') self.assertEqual({'jobs': [filling_job_uuid]}, reason['data']) # Check the exceeding-quota reason reason = next( r for r in job_reasons if r['reason'] == reasons.JOB_WOULD_EXCEED_QUOTA) self.assertEqual( { 'cpus': { 'limit': quota_cpus, 'usage': quota_cpus + cpus } }, reason['data']) check_unscheduled_reason() finally: with admin: util.kill_jobs(self.cook_url, all_job_uuids, assert_response=False) for pool in pools: util.reset_limit(self.cook_url, 'quota', user.name, reason=self.current_name(), pool=pool['name'])