Exemple #1
0
 def test_job_count_quota(self):
     admin = self.user_factory.admin()
     user = self.user_factory.new_user()
     all_job_uuids = []
     try:
         # User with no quota can't submit jobs
         with admin:
             resp = util.set_limit(self.cook_url,
                                   'quota',
                                   user.name,
                                   count=0)
             self.assertEqual(resp.status_code, 201, resp.text)
         with user:
             _, resp = util.submit_job(self.cook_url)
             self.assertEqual(resp.status_code, 422, msg=resp.text)
         # Reset user's quota back to default, then user can submit jobs again
         with admin:
             resp = util.reset_limit(self.cook_url, 'quota', user.name)
             self.assertEqual(resp.status_code, 204, resp.text)
         with user:
             job_uuid, resp = util.submit_job(self.cook_url)
             self.assertEqual(resp.status_code, 201, msg=resp.text)
             all_job_uuids.append(job_uuid)
         # Can't set negative quota
         with admin:
             resp = util.set_limit(self.cook_url,
                                   'quota',
                                   user.name,
                                   count=-1)
             self.assertEqual(resp.status_code, 400, resp.text)
     finally:
         with admin:
             util.kill_jobs(self.cook_url, all_job_uuids)
             util.reset_limit(self.cook_url, 'quota', user.name)
Exemple #2
0
 def test_job_cpu_quota(self):
     admin = self.user_factory.admin()
     user = self.user_factory.new_user()
     all_job_uuids = []
     try:
         # User with no quota can't submit jobs
         with admin:
             resp = util.set_limit(self.cook_url,
                                   'quota',
                                   user.name,
                                   cpus=0)
             self.assertEqual(resp.status_code, 201, resp.text)
         with user:
             _, resp = util.submit_job(self.cook_url)
             self.assertEqual(resp.status_code, 422, msg=resp.text)
         # User with tiny quota can't submit bigger jobs, but can submit tiny jobs
         with admin:
             resp = util.set_limit(self.cook_url,
                                   'quota',
                                   user.name,
                                   cpus=0.25)
             self.assertEqual(resp.status_code, 201, resp.text)
         with user:
             _, resp = util.submit_job(self.cook_url, cpus=0.5)
             self.assertEqual(resp.status_code, 422, msg=resp.text)
             job_uuid, resp = util.submit_job(self.cook_url, cpus=0.25)
             self.assertEqual(resp.status_code, 201, msg=resp.text)
             all_job_uuids.append(job_uuid)
         # Reset user's quota back to default, then user can submit jobs again
         with admin:
             resp = util.reset_limit(self.cook_url,
                                     'quota',
                                     user.name,
                                     reason=self.current_name())
             self.assertEqual(resp.status_code, 204, resp.text)
         with user:
             job_uuid, resp = util.submit_job(self.cook_url)
             self.assertEqual(resp.status_code, 201, msg=resp.text)
             all_job_uuids.append(job_uuid)
         # Can't set negative quota
         with admin:
             resp = util.set_limit(self.cook_url,
                                   'quota',
                                   user.name,
                                   cpus=-4)
             self.assertEqual(resp.status_code, 400, resp.text)
     finally:
         with admin:
             util.kill_jobs(self.cook_url,
                            all_job_uuids,
                            assert_response=False)
             util.reset_limit(self.cook_url,
                              'quota',
                              user.name,
                              reason=self.current_name())
Exemple #3
0
 def test_cannot_impersonate_admin_endpoints(self):
     user1 = self.user_factory.new_user()
     # admin can do admin things
     with self.admin:
         # read queue endpoint
         resp = util.query_queue(self.cook_url)
         self.assertEqual(resp.status_code, 200, resp.text)
         # set user quota
         resp = util.set_limit(self.cook_url, 'quota', user1.name, cpus=20)
         self.assertEqual(resp.status_code, 201, resp.text)
         # reset user quota back to default
         resp = util.reset_limit(self.cook_url,
                                 'quota',
                                 user1.name,
                                 reason=self.current_name())
         self.assertEqual(resp.status_code, 204, resp.text)
         # set user share
         resp = util.set_limit(self.cook_url, 'share', user1.name, cpus=10)
         self.assertEqual(resp.status_code, 201, resp.text)
         # reset user share back to default
         resp = util.reset_limit(self.cook_url,
                                 'share',
                                 user1.name,
                                 reason=self.current_name())
         self.assertEqual(resp.status_code, 204, resp.text)
     # impersonator cannot indirectly do admin things
     with self.poser.impersonating(self.admin):
         # read queue endpoint
         resp = util.query_queue(self.cook_url)
         self.assertEqual(resp.status_code, 403, resp.text)
         # set user quota
         resp = util.set_limit(self.cook_url, 'quota', user1.name, cpus=20)
         self.assertEqual(resp.status_code, 403, resp.text)
         # reset user quota back to default
         resp = util.reset_limit(self.cook_url,
                                 'quota',
                                 user1.name,
                                 reason=self.current_name())
         self.assertEqual(resp.status_code, 403, resp.text)
         # set user share
         resp = util.set_limit(self.cook_url, 'share', user1.name, cpus=10)
         self.assertEqual(resp.status_code, 403, resp.text)
         # reset user share back to default
         resp = util.reset_limit(self.cook_url,
                                 'share',
                                 user1.name,
                                 reason=self.current_name())
         self.assertEqual(resp.status_code, 403, resp.text)
Exemple #4
0
    def test_pool_scheduling(self):
        admin = self.user_factory.admin()
        user = self.user_factory.new_user()
        pools, _ = util.active_pools(self.cook_url)
        all_job_uuids = []
        try:
            default_pool = util.default_pool(self.cook_url)
            self.assertLess(1, len(pools))
            self.assertIsNotNone(default_pool)

            cpus = 0.1
            with admin:
                self.logger.info(
                    f'Running tasks: {json.dumps(util.running_tasks(self.cook_url), indent=2)}'
                )
                for pool in pools:
                    # Lower the user's cpu quota on this pool
                    pool_name = pool['name']
                    quota_multiplier = 1 if pool_name == default_pool else 2
                    util.set_limit(self.cook_url,
                                   'quota',
                                   user.name,
                                   cpus=cpus * quota_multiplier,
                                   pool=pool_name)

            with user:
                util.kill_running_and_waiting_jobs(self.cook_url, user.name)
                for pool in pools:
                    pool_name = pool['name']

                    # Submit a job that fills the user's quota on this pool
                    quota = util.get_limit(self.cook_url, 'quota', user.name,
                                           pool_name).json()
                    quota_cpus = quota['cpus']
                    filling_job_uuid, _ = util.submit_job(self.cook_url,
                                                          cpus=quota_cpus,
                                                          command='sleep 600',
                                                          pool=pool_name)
                    all_job_uuids.append(filling_job_uuid)
                    instance = util.wait_for_running_instance(
                        self.cook_url, filling_job_uuid)
                    slave_pool = util.node_pool(instance['hostname'])
                    self.assertEqual(pool_name, slave_pool)

                    # Submit a job that should not get scheduled
                    job_uuid, _ = util.submit_job(self.cook_url,
                                                  cpus=cpus,
                                                  command='ls',
                                                  pool=pool_name)
                    all_job_uuids.append(job_uuid)
                    job = util.load_job(self.cook_url, job_uuid)
                    self.assertEqual('waiting', job['status'])

                    # Assert that the unscheduled reason and data are correct
                    @retry(stop_max_delay=60000, wait_fixed=5000)
                    def check_unscheduled_reason():
                        jobs, _ = util.unscheduled_jobs(
                            self.cook_url, job_uuid)
                        self.logger.info(f'Unscheduled jobs: {jobs}')
                        self.assertEqual(job_uuid, jobs[0]['uuid'])
                        job_reasons = jobs[0]['reasons']
                        # Check the spot-in-queue reason
                        reason = next(r for r in job_reasons if r['reason'] ==
                                      'You have 1 other jobs ahead in the '
                                      'queue.')
                        self.assertEqual({'jobs': [filling_job_uuid]},
                                         reason['data'])
                        # Check the exceeding-quota reason
                        reason = next(
                            r for r in job_reasons
                            if r['reason'] == reasons.JOB_WOULD_EXCEED_QUOTA)
                        self.assertEqual(
                            {
                                'cpus': {
                                    'limit': quota_cpus,
                                    'usage': quota_cpus + cpus
                                }
                            }, reason['data'])

                    check_unscheduled_reason()
        finally:
            with admin:
                util.kill_jobs(self.cook_url,
                               all_job_uuids,
                               assert_response=False)
                for pool in pools:
                    util.reset_limit(self.cook_url,
                                     'quota',
                                     user.name,
                                     reason=self.current_name(),
                                     pool=pool['name'])
Exemple #5
0
    def trigger_preemption(self, pool):
        """
        Triggers preemption on the provided pool (which can be None) by doing the following:

        1. Choose a user, X
        2. Lower X's cpu share to 0.1 and cpu quota to 1.0
        3. Submit a job, J1, from X with 1.0 cpu and priority 99 (fills the cpu quota)
        4. Wait for J1 to start running
        5. Submit a job, J2, from X with 0.1 cpu and priority 100
        6. Wait until J1 is preempted (to make room for J2)
        """
        admin = self.user_factory.admin()
        user = self.user_factory.new_user()
        all_job_uuids = []
        try:
            small_cpus = 0.1
            large_cpus = small_cpus * 10
            with admin:
                # Lower the user's cpu share and quota
                util.set_limit(self.cook_url,
                               'share',
                               user.name,
                               cpus=small_cpus,
                               pool=pool)
                util.set_limit(self.cook_url,
                               'quota',
                               user.name,
                               cpus=large_cpus,
                               pool=pool)

            with user:
                # Submit a large job that fills up the user's quota
                base_priority = 99
                command = 'sleep 600'
                uuid_large, _ = util.submit_job(self.cook_url,
                                                priority=base_priority,
                                                cpus=large_cpus,
                                                command=command,
                                                pool=pool)
                all_job_uuids.append(uuid_large)
                util.wait_for_running_instance(self.cook_url, uuid_large)

                # Submit a higher-priority job that should trigger preemption
                uuid_high_priority, _ = util.submit_job(
                    self.cook_url,
                    priority=base_priority + 1,
                    cpus=small_cpus,
                    command=command,
                    name='higher_priority_job',
                    pool=pool)
                all_job_uuids.append(uuid_high_priority)

                # Assert that the lower-priority job was preempted
                def low_priority_job():
                    job = util.load_job(self.cook_url, uuid_large)
                    one_hour_in_millis = 60 * 60 * 1000
                    start = util.current_milli_time() - one_hour_in_millis
                    end = util.current_milli_time()
                    running = util.jobs(self.cook_url,
                                        user=user.name,
                                        state='running',
                                        start=start,
                                        end=end).json()
                    waiting = util.jobs(self.cook_url,
                                        user=user.name,
                                        state='waiting',
                                        start=start,
                                        end=end).json()
                    self.logger.info(
                        f'Currently running jobs: {json.dumps(running, indent=2)}'
                    )
                    self.logger.info(
                        f'Currently waiting jobs: {json.dumps(waiting, indent=2)}'
                    )
                    return job

                def job_was_preempted(job):
                    for instance in job['instances']:
                        self.logger.debug(
                            f'Checking if instance was preempted: {instance}')
                        if instance.get(
                                'reason_string') == 'Preempted by rebalancer':
                            return True
                    self.logger.info(f'Job has not been preempted: {job}')
                    return False

                max_wait_ms = util.settings(
                    self.cook_url
                )['rebalancer']['interval-seconds'] * 1000 * 1.5
                self.logger.info(
                    f'Waiting up to {max_wait_ms} milliseconds for preemption to happen'
                )
                util.wait_until(low_priority_job,
                                job_was_preempted,
                                max_wait_ms=max_wait_ms,
                                wait_interval_ms=5000)
        finally:
            with admin:
                util.kill_jobs(self.cook_url,
                               all_job_uuids,
                               assert_response=False)
                util.reset_limit(self.cook_url,
                                 'share',
                                 user.name,
                                 reason=self.current_name(),
                                 pool=pool)
                util.reset_limit(self.cook_url,
                                 'quota',
                                 user.name,
                                 reason=self.current_name(),
                                 pool=pool)
Exemple #6
0
    def test_preemption(self):
        admin = self.user_factory.admin()
        user = self.user_factory.new_user()
        all_job_uuids = []
        try:
            small_cpus = 0.1
            large_cpus = small_cpus * 10
            with admin:
                # Lower the user's cpu share and quota
                util.set_limit(self.cook_url,
                               'share',
                               user.name,
                               cpus=small_cpus)
                util.set_limit(self.cook_url,
                               'quota',
                               user.name,
                               cpus=large_cpus)

            with user:
                # Submit a large job that fills up the user's quota
                base_priority = 99
                command = 'sleep 600'
                uuid_large, _ = util.submit_job(self.cook_url,
                                                priority=base_priority,
                                                cpus=large_cpus,
                                                command=command)
                all_job_uuids.append(uuid_large)
                util.wait_for_running_instance(self.cook_url, uuid_large)

                # Submit a higher-priority job that should trigger preemption
                uuid_high_priority, _ = util.submit_job(
                    self.cook_url,
                    priority=base_priority + 1,
                    cpus=small_cpus,
                    command=command,
                    name='higher_priority_job')
                all_job_uuids.append(uuid_high_priority)

                # Assert that the lower-priority job was preempted
                def low_priority_job():
                    job = util.load_job(self.cook_url, uuid_large)
                    one_hour_in_millis = 60 * 60 * 1000
                    start = util.current_milli_time() - one_hour_in_millis
                    end = util.current_milli_time()
                    running = util.jobs(self.cook_url,
                                        user=user.name,
                                        state='running',
                                        start=start,
                                        end=end).json()
                    waiting = util.jobs(self.cook_url,
                                        user=user.name,
                                        state='waiting',
                                        start=start,
                                        end=end).json()
                    self.logger.info(
                        f'Currently running jobs: {json.dumps(running, indent=2)}'
                    )
                    self.logger.info(
                        f'Currently waiting jobs: {json.dumps(waiting, indent=2)}'
                    )
                    return job

                def job_was_preempted(job):
                    for instance in job['instances']:
                        self.logger.debug(
                            f'Checking if instance was preempted: {instance}')
                        if instance.get(
                                'reason_string') == 'Preempted by rebalancer':
                            return True
                    self.logger.info(f'Job has not been preempted: {job}')
                    return False

                max_wait_ms = util.settings(
                    self.cook_url
                )['rebalancer']['interval-seconds'] * 1000 * 1.5
                self.logger.info(
                    f'Waiting up to {max_wait_ms} milliseconds for preemption to happen'
                )
                util.wait_until(low_priority_job,
                                job_was_preempted,
                                max_wait_ms=max_wait_ms,
                                wait_interval_ms=5000)
        finally:
            with admin:
                util.kill_jobs(self.cook_url,
                               all_job_uuids,
                               assert_response=False)
                util.reset_limit(self.cook_url,
                                 'share',
                                 user.name,
                                 reason=self.current_name())
                util.reset_limit(self.cook_url,
                                 'quota',
                                 user.name,
                                 reason=self.current_name())