def stop_job(self):
        while True:
            # first get the entity version
            job_info = self.get_job_info()
            version = job_info.status.version.value

            request = stateless_svc.StopJobRequest(
                job_id=v1alpha_peloton.JobID(value=self.job_id),
                version=v1alpha_peloton.EntityVersion(value=version),
            )
            try:
                self.client.stateless_svc.StopJob(
                    request,
                    metadata=self.client.jobmgr_metadata,
                    timeout=default_timeout,
                )
            except grpc.RpcError as e:
                # if entity version is incorrect, just retry
                if (
                    e.code() == grpc.StatusCode.ABORTED
                    and INVALID_ENTITY_VERSION_ERR_MESSAGE in e.details()
                ):
                    continue
                raise
            break
Beispiel #2
0
    def stop(self, ranges=None, entity_version=None):
        """
        Stops certain pods based on the ranges.
        If ranges is not provided then it stops the job

        Job level stop does not support range.
        We are using pod api for range operation.
        We do this for backward compatibility of existing tests

        :param ranges: the instance ranges to stop
        :param entity_version: the entity version of the job, for concurrency control.
            If entity_version is provided, stop will use the provided value,
            and raise an exception if version is wrong.
            if entity_version is not provided, stop will query job runtime to
            get config version and retry until version is correct.
        :return: stop response from the API
        """
        # wait for job manager leader
        self.wait_for_jobmgr_available()
        if ranges is None:
            job_entity_version = (entity_version or self.entity_version
                                  or self.get_status().version.value)

            while True:
                request = stateless_svc.StopJobRequest(
                    job_id=v1alpha_peloton.JobID(value=self.job_id),
                    version=v1alpha_peloton.EntityVersion(
                        value=job_entity_version),
                )
                try:
                    resp = self.client.stateless_svc.StopJob(
                        request,
                        metadata=self.client.jobmgr_metadata,
                        timeout=self.config.rpc_timeout_sec,
                    )
                except grpc.RpcError as e:
                    # if entity version is incorrect, get entity version from job status
                    # and try again.
                    if (e.code() == grpc.StatusCode.ABORTED and
                            INVALID_ENTITY_VERSION_ERR_MESSAGE in e.details()
                            and entity_version is None):
                        job_entity_version = (entity_version or
                                              self.get_status().version.value)
                        continue
                    raise
                break
            self.entity_version = resp.version.value
            log.info("job stopped, new entity version: %s",
                     self.entity_version)
            return resp

        for pod_range in ranges:
            for pod_id in range(getattr(pod_range, "from"), pod_range.to):
                pod_name = self.job_id + "-" + str(pod_id)
                request = pod_svc.StopPodRequest(
                    pod_name=v1alpha_peloton.PodName(value=pod_name))
                self.client.pod_svc.StopPod(
                    request,
                    metadata=self.client.jobmgr_metadata,
                    timeout=self.config.rpc_timeout_sec,
                )

        log.info("stopping pods in job {0} with ranges {1}".format(
            self.job_id, ranges))
        return pod_svc.StopPodResponse()