def get_jobs_by_label(self, label, name, job_states): """ :param label: the label value of the job :param name: the name of the job :param job_states: the job status :type label: str :type name: str :type job_states: dict :rtype: Response """ request = job.QueryRequest( respoolID=peloton.ResourcePoolID(value=self.respool_id), spec=job.QuerySpec( pagination=query.PaginationSpec(offset=0, limit=100), labels=[ peloton.Label(key="cluster_name", value=label), peloton.Label(key="module_name", value=name), ], jobStates=job_states, ), ) try: records = self.client.job_svc.Query( request, metadata=self.client.jobmgr_metadata, timeout=default_timeout, ).records ids = [record.id.value for record in records] return ids except Exception as e: print_fail("Exception calling Get job :%s" % str(e)) raise
def get_job_config_spec(self, label, name, num_instance, default_task_config, instance_config=None, **extra): """ Creates a job.JobConfig object :param label: the label value of the job :param name: the name of the job :param respool_id: the id of the resource pool :param num_instance: the number of instance of the job :param default_task_config: the default task config of the job :param instance_config: instance specific task config :param extra: extra information of the job :type label: str :type name: str :type respool_id: str :type num_instance: int :type default_task_config: task.TaskConfig :type instance_config: dict<int, task.TaskConfig> :type extra: dict """ return job.JobConfig( name=name, type=extra.get('job_type', job.SERVICE), labels=[ peloton.Label( key='cluster_name', value=label, ), peloton.Label( key='module_name', value=name, ), ], owningTeam=extra.get('owningTeam', 'compute'), description=extra.get('description', 'compute task'), instanceCount=num_instance, defaultConfig=default_task_config, instanceConfig=instance_config, # sla is required by resmgr sla=job.SlaConfig( priority=1, preemptible=True, ), respoolID=peloton.ResourcePoolID(value=self.respool_id), changeLog=extra.get('change_log', None))
def test_placement_exclusive_job(exclusive_host, peloton_client): excl_constraint = task_pb2.Constraint( type=1, # Label constraint labelConstraint=task_pb2.LabelConstraint( kind=2, # Host condition=2, # Equal requirement=1, label=peloton_pb2.Label( key="peloton/exclusive", value="exclusive-test-label" ), ), ) # Set number of instances to be a few more than what can run on # a single exclusive host job = Job( client=peloton_client, job_file="long_running_job.yaml", config=IntegrationTestConfig(max_retry_attempts=100, sleep_time_sec=2), options=[with_constraint(excl_constraint), with_instance_count(6)], ) job.job_config.defaultConfig.command.value = "sleep 10" job.create() job.wait_for_state() # check that all of them ran on exclusive host task_infos = job.list_tasks().value for instance_id, task_info in task_infos.items(): assert "exclusive" in task_info.runtime.host
def apply(job_config): for lk, lv in labels.iteritems(): job_config.defaultConfig.labels.extend([ peloton.Label( key=lk, value=lv, )],)
def create_job(self, instance_num, use_instance_config, sleep_time): default_config = self.create_pod_config(sleep_time, 'static') instance_config = {} if use_instance_config: for i in range(0, instance_num): instance_config[i] = self.create_pod_config( sleep_time, 'instance %s' % i) request = job.CreateRequest( config=job.JobConfig( name='instance %s && sleep %s && instance config %s' % (instance_num, sleep_time, use_instance_config), labels=[ peloton.Label( key='task_num', value=str(instance_num), ), peloton.Label( key='sleep_time', value=str(sleep_time), ), peloton.Label( key='use_instance_config', value=str(use_instance_config), ), ], owningTeam='compute', description='test job', instanceCount=instance_num, defaultConfig=default_config, instanceConfig=instance_config, # sla is required by resmgr sla=job.SlaConfig( priority=1, preemptible=False, ), respoolID=peloton.ResourcePoolID(value=self.respool_id), ), ) resp = self.client.job_svc.Create( request, metadata=self.client.jobmgr_metadata, timeout=default_timeout, ) self.job_id = resp.jobId.value return resp.jobId.value
def test__query_job_by_name_by_label(create_jobs): respoolID = create_jobs[1] running_jobs = create_jobs[2]["RUNNING"] # query by name and label spec_by_label = job_pb2.QuerySpec( name=running_jobs[0].get_info().config.name, labels=[peloton.Label(key="testKey0", value="testVal0")], ) resp = query_by_spec(respoolID, spec_by_label) assert len(resp.results) == NUM_JOBS_PER_STATE
def _label_constraint(key, value): """ Returns a label constraint for host limit 1 :param key: The key fo the label :param value: The value of the label """ return task_pb2.Constraint( type=1, labelConstraint=task_pb2.LabelConstraint( kind=1, condition=2, requirement=0, label=peloton_pb2.Label( # Tasks of my own job key=key, value=value, ), ), )
def test__create_2_stateless_jobs_with_task_to_task_anti_affinity_between_jobs( ): # noqa label_key = "job.name" jobs = [] for i in range(2): job = Job( job_file="test_stateless_job.yaml", config=IntegrationTestConfig( max_retry_attempts=100, pool_file='test_stateless_respool.yaml', ), options=[ with_labels({label_key: "peloton_stateless_job%s" % i}), with_job_name("TestPelotonDockerJob_Stateless" + repr(i)), with_instance_count(1), ], ) job.job_config.defaultConfig.constraint.CopyFrom( task_pb2.Constraint( type=2, andConstraint=task_pb2.AndConstraint(constraints=[ task_pb2.Constraint( type=1, labelConstraint=task_pb2.LabelConstraint( kind=1, condition=2, requirement=0, label=peloton_pb2.Label( # Tasks of my own job key="job.name", value="peloton_stateless_job%s" % i, ), ), ), task_pb2.Constraint( type=1, labelConstraint=task_pb2.LabelConstraint( kind=1, condition=2, requirement=0, label=peloton_pb2.Label( # Avoid tasks of the other job key="job.name", value="peloton_stateless_job%s" % ((i + 1) % 2), ), ), ), ]), )) jobs.append(job) for job in jobs: job.create() time.sleep(1) # Determine if tasks run on different hosts hosts = set() for job in jobs: job.wait_for_state(goal_state="RUNNING") for _, task in job.get_tasks().iteritems(): task_info = task.get_info() hosts = hosts.union(set({task_info.runtime.host})) kill_jobs(jobs) # Ensure that the tasks run on 2 different hosts assert len(hosts) == 2