def create(self, in_place=False, entity_version=None): """ replace the job spec with the spec provided in StatelessUpdate if entity_version is provided, replace will use the provided value, and raise an exception if version is wrong. if entity_version is not provided, replace will query job runtime to get config version and retry until version is correct. :return: the update ID """ # wait for job manager leader self.job.wait_for_jobmgr_available() respool_id = self.pool.ensure_exists() self.updated_job_spec.respool_id.value = respool_id job_entity_version = (entity_version or self.job.entity_version or self.job.get_status().version.value) while True: request = stateless_svc.ReplaceJobRequest( job_id=v1alpha_peloton.JobID(value=self.job.job_id), version=v1alpha_peloton.EntityVersion( value=job_entity_version), spec=self.updated_job_spec, update_spec=stateless.UpdateSpec( batch_size=self.batch_size, rollback_on_failure=self.roll_back_on_failure, max_instance_retries=self.max_instance_attempts, max_tolerable_instance_failures=self.max_failure_instances, start_paused=self.start_paused, in_place=in_place, ), ) try: resp = self.client.stateless_svc.ReplaceJob( request, metadata=self.client.jobmgr_metadata, timeout=self.config.rpc_timeout_sec, ) except grpc.RpcError as e: # if config version is incorrect and caller does not specify a # config version, get config version from job runtime # and try again. if (e.code() == grpc.StatusCode.ABORTED and INVALID_ENTITY_VERSION_ERR_MESSAGE in e.details() and entity_version is None): job_entity_version = (entity_version or self.job.get_status().version.value) continue raise break self.job.entity_version = resp.version.value log.info( "job spec replaced with new entity version: %s", self.job.entity_version, )
def update_job( self, instance_inc, batch_size, use_instance_config, sleep_time, host_limit_1=False, ): default_config = self.create_pod_config( sleep_time, "static", host_limit_1=host_limit_1) job_spec = create_stateless_job_spec( "instance %s && sleep %s" % (instance_inc, sleep_time), [ v1alpha_peloton.Label(key="task_num", value=str(instance_inc)), v1alpha_peloton.Label(key="sleep_time", value=str(sleep_time)), ], instance_inc, default_config, self.respool_id, ) update_spec = stateless.UpdateSpec(batch_size=batch_size) while True: # first get the entity version job_info = self.get_job_info() version = job_info.status.version.value job_spec.instance_count = ( job_info.spec.instance_count + instance_inc ) request = stateless_svc.ReplaceJobRequest( job_id=v1alpha_peloton.JobID(value=self.job_id), version=v1alpha_peloton.EntityVersion(value=version), spec=job_spec, update_spec=update_spec, ) try: resp = self.client.stateless_svc.ReplaceJob( request, metadata=self.client.jobmgr_metadata, timeout=default_timeout, ) except grpc.RpcError as e: # if entity version is incorrect, just retry if ( e.code() == grpc.StatusCode.ABORTED and INVALID_ENTITY_VERSION_ERR_MESSAGE in e.details() ): continue raise break return resp