コード例 #1
0
ファイル: kubernetes_runner.py プロジェクト: suryaavala/tfx
    def WaitUntilRunning(self, deadline: float) -> None:
        assert self._pod_name, (
            'Pod has not been created yet. You should call Start() first.')

        while time.time() < deadline:
            try:
                pod = self._k8s_core_api.read_namespaced_pod(
                    name=self._pod_name, namespace=self._namespace)
            except rest.ApiException as e:
                logging.info('Continue polling after getting ApiException(%s)',
                             e)
                time.sleep(_DEFAULT_POLLING_INTERVAL_SEC)
                continue
            # Pod phase is one of Pending, Running, Succeeded, Failed, or Unknown.
            # Succeeded and Failed indicates the pod lifecycle has reached its end,
            # while we expect the job to be running and hanging. Phase is Unknown if
            # the state of the pod could not be obtained, thus we can wait until we
            # confirm the phase.
            pod_phase = _PodPhase(pod.status.phase)
            if pod_phase == _PodPhase.RUNNING and pod.status.pod_ip:
                self._endpoint = '{}:{}'.format(
                    pod.status.pod_ip, self._serving_binary.container_port)
                return
            if pod_phase.is_done:
                raise error_types.JobAborted(
                    'Job has been aborted. (phase={})'.format(pod_phase))
            logging.info('Waiting for the pod to be running. (phase=%s)',
                         pod_phase)
            time.sleep(_DEFAULT_POLLING_INTERVAL_SEC)

        raise error_types.DeadlineExceeded(
            'Deadline exceeded while waiting for pod to be running.')
コード例 #2
0
ファイル: base_client.py プロジェクト: suryaavala/tfx
  def WaitUntilModelLoaded(self, deadline: float,
                           polling_interval_sec: int) -> None:
    """Wait until model is loaded and available.

    Args:
      deadline: A deadline time in UTC timestamp (in seconds).
      polling_interval_sec: GetServingStatus() polling interval.

    Raises:
      DeadlineExceeded: When deadline exceeded before model is ready.
      ValidationFailed: If validation failed explicitly.
    """
    while time.time() < deadline:
      status = self._GetServingStatus()
      if status == types.ModelServingStatus.NOT_READY:
        logging.log_every_n_seconds(
            level=logging.INFO,
            n_seconds=10,
            msg='Waiting for model to be loaded...')
        time.sleep(polling_interval_sec)
        continue
      elif status == types.ModelServingStatus.UNAVAILABLE:
        raise error_types.ValidationFailed(
            'Model server failed to load the model.')
      else:
        logging.info('Model is successfully loaded.')
        return

    raise error_types.DeadlineExceeded(
        'Deadline exceeded while waiting the model to be loaded.')
コード例 #3
0
    def WaitUntilRunning(self, deadline):
        assert self._container is not None, 'container has not been started.'

        while time.time() < deadline:
            try:
                # Reload container attributes from server. This is the only right way to
                # retrieve the latest container status from docker engine.
                self._container.reload()
                status = self._container.status
            except docker_errors.NotFound:
                # If the job has been aborted and container has specified auto_removal
                # to True, we might get a NotFound error during container.reload().
                raise error_types.JobAborted(
                    'Container not found. Possibly removed after the job has been '
                    'aborted.')
            # The container is just created and not yet in the running status.
            if status == 'created':
                time.sleep(_POLLING_INTERVAL_SEC)
                continue
            # The container is running :)
            if status == 'running':
                return
            # Docker status is one of {'created', 'restarting', 'running', 'removing',
            # 'paused', 'exited', or 'dead'}. Status other than 'created' and
            # 'running' indicates the job has been aborted.
            raise error_types.JobAborted(
                'Job has been aborted (container status={})'.format(status))

        raise error_types.DeadlineExceeded(
            'Deadline exceeded while waiting for the container to be running.')