def patch_jobs(active_jobs=None, desired_jobs=None): """ patch jobs check current state of the job and applies desired goal state for the job. It can yield to create a job or updating a job. """ jobs = {} for job_name, job_spec in desired_jobs.items(): if job_name in active_jobs.keys(): # job exists -> update to desired state patch_job(active_jobs[job_name], job_spec) jobs[job_name] = active_jobs[job_name].get_job_id() else: # job does not exist -> create job = StatelessJob(job_config=job_spec, config=IntegrationTestConfig( pool_file=RESPOOL_FILE_NAME, max_retry_attempts=MAX_RETRY_ATTEMPTS)) job.create() time.sleep(10) job.wait_for_all_pods_running() jobs[job_name] = job.get_job_id() # TODO: Kill any undesired active job running in the canary cluster return jobs
def patch_jobs(active_jobs=None, desired_jobs=None): """ patch jobs check current state of the job and applies desired goal state for the job. It can yield to create a job or updating a job. """ jobs = {} for job_name, job_spec in desired_jobs.items(): if job_name in active_jobs.keys(): j = active_jobs[job_name] # failfast is not None then do not run canary test # until dirty jobs are restored manually. if os.getenv("FAILFAST") == "NO": # job exists -> update to desired state patch_job(j, job_spec) jobs[job_name] = j.get_job_id() else: # if job update diff has non-nil result means that previous # canary test run failed and we want more runs to block # until issue is manually debugged and state is restored. job_spec.respool_id.value = j.get_spec().respool_id.value resp = j.get_replace_job_diff(job_spec=job_spec) print resp if len(resp.instances_removed) > 0 or \ len(resp.instances_updated) > 0 or \ len(resp.instances_added) > 0: pytest.exit( "canary test run was aborted, since jobs are dirty!!") jobs[job_name] = j.get_job_id() else: # job does not exist -> create job = StatelessJob( job_config=job_spec, config=IntegrationTestConfig( pool_file=RESPOOL_FILE_NAME, max_retry_attempts=MAX_RETRY_ATTEMPTS, ), ) job.create() time.sleep(10) job.wait_for_all_pods_running() jobs[job_name] = job.get_job_id() # TODO: Kill any undesired active job running in the canary cluster return jobs
def test__delete_killed_job(): job = StatelessJob() job.create() job.wait_for_state(goal_state="RUNNING") job_id = job.get_job_id() job.stop() job.wait_for_state(goal_state="KILLED") job.delete() time.sleep(10) try: job.get_job() except grpc.RpcError as e: assert e.code() == grpc.StatusCode.NOT_FOUND return raise Exception("job not found error not received") # try to find the job from active_jobs ids = get_active_jobs() assert job_id not in ids