def test__create_a_stateless_job_with_3_tasks_on_3_different_hosts(): label_key = "job.name" label_value = "peloton_stateless_job" job = Job( job_file="test_stateless_job.yaml", config=IntegrationTestConfig( max_retry_attempts=100, pool_file='test_stateless_respool.yaml', ), options=[ with_labels({label_key: label_value}), with_constraint(_label_constraint(label_key, label_value)), with_instance_count(3), ], ) job.create() job.wait_for_state(goal_state="RUNNING") # Determine if tasks run on different hosts hosts = set() for _, task in job.get_tasks().iteritems(): task_info = task.get_info() hosts = hosts.union({task_info.runtime.host}) kill_jobs([job]) # Ensure that the tasks run on 3 different hosts assert len(hosts) == 3
def test__create_2_stateless_jobs_with_task_to_task_anti_affinity_between_jobs( ): # noqa label_key = "job.name" jobs = [] for i in range(2): job = Job( job_file="test_stateless_job.yaml", config=IntegrationTestConfig( max_retry_attempts=100, pool_file='test_stateless_respool.yaml', ), options=[ with_labels({label_key: "peloton_stateless_job%s" % i}), with_job_name("TestPelotonDockerJob_Stateless" + repr(i)), with_instance_count(1), ], ) job.job_config.defaultConfig.constraint.CopyFrom( task_pb2.Constraint( type=2, andConstraint=task_pb2.AndConstraint(constraints=[ task_pb2.Constraint( type=1, labelConstraint=task_pb2.LabelConstraint( kind=1, condition=2, requirement=0, label=peloton_pb2.Label( # Tasks of my own job key="job.name", value="peloton_stateless_job%s" % i, ), ), ), task_pb2.Constraint( type=1, labelConstraint=task_pb2.LabelConstraint( kind=1, condition=2, requirement=0, label=peloton_pb2.Label( # Avoid tasks of the other job key="job.name", value="peloton_stateless_job%s" % ((i + 1) % 2), ), ), ), ]), )) jobs.append(job) for job in jobs: job.create() time.sleep(1) # Determine if tasks run on different hosts hosts = set() for job in jobs: job.wait_for_state(goal_state="RUNNING") for _, task in job.get_tasks().iteritems(): task_info = task.get_info() hosts = hosts.union(set({task_info.runtime.host})) kill_jobs(jobs) # Ensure that the tasks run on 2 different hosts assert len(hosts) == 2
def test__dynamic_partition_pool_restrictions(peloton_client): # we start with shared=1, batch_reserved=2 # delete batch_reserved so that its hosts go to "default" delete_host_pool(util.HOSTPOOL_BATCH_RESERVED) # setup 3 host-pools with 1 host each ensure_host_pool(util.HOSTPOOL_BATCH_RESERVED, 1) ensure_host_pool(util.HOSTPOOL_SHARED, 1) ensure_host_pool(util.HOSTPOOL_STATELESS, 1) hostToPool = dict() resp = list_host_pools() for pool in resp.pools: for h in pool.hosts: hostToPool[h] = pool.name # Job has two instances with 3 cpus each. # Only one instance will run. npjob = Job( client=peloton_client, job_file="test_non_preemptible_job.yaml", config=IntegrationTestConfig(max_retry_attempts=100), ) npjob.create() npjob.wait_for_state(goal_state='RUNNING') count = 0 for t in npjob.get_tasks(): if npjob.get_task(t).state_str == "PENDING": count = count + 1 else: hostname = npjob.get_task(t).get_runtime().host assert hostToPool[hostname] == util.HOSTPOOL_BATCH_RESERVED assert count == 1 # Stateless job has 4 instances with host limit 1 # so only one instance will run sjob = Job( client=peloton_client, job_file="test_stateless_job_host_limit_1.yaml", config=IntegrationTestConfig(max_retry_attempts=100, sleep_time_sec=2), ) sjob.create() sjob.wait_for_state(goal_state="RUNNING") count = 0 for t in sjob.get_tasks(): if sjob.get_task(t).state_str == "PENDING": count = count + 1 else: hostname = sjob.get_task(t).get_runtime().host assert hostToPool[hostname] == util.HOSTPOOL_STATELESS assert count == 3 # Preemptible batch job has 12 instances with 1 CPU each, # so 4 instances will run. pjob = Job( client=peloton_client, job_file="test_preemptible_job.yaml", config=IntegrationTestConfig(max_retry_attempts=100, sleep_time_sec=2), ) pjob.create() pjob.wait_for_state(goal_state="RUNNING") count = 0 for t in pjob.get_tasks(): if pjob.get_task(t).state_str == "PENDING": count = count + 1 else: hostname = pjob.get_task(t).get_runtime().host assert hostToPool[hostname] == util.HOSTPOOL_SHARED assert count == 8 # Stop all jobs npjob.stop() sjob.stop() pjob.stop()