Exemple #1
0
def wait_for_pods_to_be_deleted(
    client,
    namespace,
    pod_selector,
    timeout=datetime.timedelta(minutes=5),
    polling_interval=datetime.timedelta(seconds=30)):
    """Wait for the specified job to be deleted.

  Args:
    client: K8s api client.
    namespace: Namespace.
    pod_selector: Selector for the pods.
    timeout: How long to wait for the job.
    polling_interval: How often to poll for the status of the job.
    status_callback: (Optional): Callable. If supplied this callable is
      invoked after we poll the job. Callable takes a single argument which
      is the job.
  """
    end_time = datetime.datetime.now() + timeout
    while True:
        pods = list_pods(client, namespace, pod_selector)

        logging.info("%s pods matched %s pods", len(pods.items), pod_selector)

        if not pods.items:
            return

        if datetime.datetime.now() + polling_interval > end_time:
            raise util.TimeoutError("Timeout waiting for pods to be deleted.")

        time.sleep(polling_interval.seconds)
Exemple #2
0
def wait_for_vm(project,
                zone,
                vm,
                timeout=datetime.timedelta(minutes=5),
                polling_interval=datetime.timedelta(seconds=10)):
    """Wait for the VM to be ready. This is measured by trying to ssh into the VM.

    timeout: A datetime.timedelta expressing the amount of time to wait before
      giving up.
    polling_interval: A datetime.timedelta to represent the amount of time to
      wait between requests polling for the operation status.
  Raises:
    TimeoutError: if we timeout waiting for the operation to complete.
  """
    endtime = datetime.datetime.now() + timeout
    while True:
        try:
            util.run([
                "gcloud", "compute", "--project=" + project, "ssh",
                "--zone=" + zone, vm, "--", "echo hello world"
            ])
            logging.info("VM is ready")
            return
        except subprocess.CalledProcessError:
            pass

        if datetime.datetime.now() > endtime:
            raise util.TimeoutError((
                "Timed out waiting for VM to {0} be sshable. Check firewall rules "
                "aren't blocking ssh.").format(vm))

        time.sleep(polling_interval.total_seconds())
Exemple #3
0
def wait_for_workflows(client,
                       namespace,
                       names,
                       timeout=datetime.timedelta(minutes=30),
                       polling_interval=datetime.timedelta(seconds=30),
                       status_callback=None):
    """Wait for multiple workflows to finish.

  Args:
    client: K8s api client.
    namespace: namespace for the workflow.
    names: Names of the workflows to wait for.
    timeout: How long to wait for the workflow.
    polling_interval: How often to poll for the status of the workflow.
    status_callback: (Optional): Callable. If supplied this callable is
      invoked after we poll the job. Callable takes a single argument which
      is the job.

  Returns:
    results: A list of the final status of the workflows.
  Raises:
    TimeoutError: If timeout waiting for the job to finish.
  """
    crd_api = k8s_client.CustomObjectsApi(client)
    end_time = datetime.datetime.now() + timeout
    while True:
        all_results = []

        for n in names:
            results = crd_api.get_namespaced_custom_object(
                GROUP, VERSION, namespace, PLURAL, n)

            all_results.append(results)
            if status_callback:
                status_callback(results)

        done = True
        for results in all_results:
            # Sometimes it takes a while for the argo controller to populate
            # the status field of an object.
            if results.get("status",
                           {}).get("phase", "") not in ["Failed", "Succeeded"]:
                done = False

        if done:
            return all_results
        if datetime.datetime.now() + polling_interval > end_time:
            raise util.TimeoutError(
                "Timeout waiting for workflows {0} in namespace {1} to finish."
                .format(",".join(names), namespace))

        time.sleep(polling_interval.seconds)

    return []
Exemple #4
0
def wait_for_pods_to_be_in_phases(
    client,
    namespace,
    pod_selector,
    phases,
    timeout=datetime.timedelta(minutes=15),
    polling_interval=datetime.timedelta(seconds=30)):
    """Wait for the pods matching the selector to be in the specified state

  Args:
    client: K8s api client.
    namespace: Namespace.
    pod_selector: Selector for the pods.
    phases: List of desired phases
    timeout: How long to wait for the job.
    polling_interval: How often to poll for the status of the job.
    status_callback: (Optional): Callable. If supplied this callable is
      invoked after we poll the job. Callable takes a single argument which
      is the job.
  """
    time.sleep(polling_interval.seconds)
    end_time = datetime.datetime.now() + timeout
    while True:

        pods = list_pods(client, namespace, pod_selector)

        logging.info("%s pods matched %s pods", len(pods.items), pod_selector)

        is_match = True

        for p in pods.items:
            if p.status.phase not in phases:
                # for debug
                logging.info("pod in phase %s", p.status.phase)
                is_match = False

        if is_match and pods.items:
            logging.info("All pods in phase %s", phases)
            log_pods(pods)
            return pods

        if datetime.datetime.now() + polling_interval > end_time:
            logging.info("Latest pod phases")
            log_pods(pods)
            logging.error("Timeout waiting for pods to be in phase: %s",
                          phases)
            raise util.TimeoutError(
                "Timeout waiting for pods to be in states %s" % phases)

        time.sleep(polling_interval.seconds)

    return None
Exemple #5
0
def wait_for_delete(client,
                    namespace,
                    name,
                    version="v1alpha1",
                    timeout=datetime.timedelta(minutes=5),
                    polling_interval=datetime.timedelta(seconds=30),
                    status_callback=None):
    """Wait for the specified job to be deleted.

  Args:
    client: K8s api client.
    namespace: namespace for the job.
    name: Name of the job.
    timeout: How long to wait for the job.
    polling_interval: How often to poll for the status of the job.
    status_callback: (Optional): Callable. If supplied this callable is
      invoked after we poll the job. Callable takes a single argument which
      is the job.
  """
    crd_api = k8s_client.CustomObjectsApi(client)
    end_time = datetime.datetime.now() + timeout
    while True:
        try:
            results = crd_api.get_namespaced_custom_object(
                tf_job_client.TF_JOB_GROUP, version, namespace,
                tf_job_client.TF_JOB_PLURAL, name)
        except rest.ApiException as e:
            if e.status == httplib.NOT_FOUND:
                return
            logging.exception("rest.ApiException thrown")
            raise
        if status_callback:
            status_callback(results)

        if datetime.datetime.now() + polling_interval > end_time:
            raise util.TimeoutError(
                "Timeout waiting for job {0} in namespace {1} to be deleted.".
                format(name, namespace))

        time.sleep(polling_interval.seconds)
Exemple #6
0
def wait_for_workflow(client,
                      namespace,
                      name,
                      timeout=datetime.timedelta(minutes=30),
                      polling_interval=datetime.timedelta(seconds=30),
                      status_callback=None):
    """Wait for the specified workflow to finish.

  Args:
    client: K8s api client.
    namespace: namespace for the workflow.
    name: Name of the workflow.
    timeout: How long to wait for the workflow.
    polling_interval: How often to poll for the status of the workflow.
    status_callback: (Optional): Callable. If supplied this callable is
      invoked after we poll the job. Callable takes a single argument which
      is the job.

  Raises:
    TimeoutError: If timeout waiting for the job to finish.
  """
    crd_api = k8s_client.CustomObjectsApi(client)
    end_time = datetime.datetime.now() + timeout
    while True:
        results = crd_api.get_namespaced_custom_object(GROUP, VERSION,
                                                       namespace, PLURAL, name)

        if status_callback:
            status_callback(results)

        if results["status"]["phase"] in ["Failed", "Succeeded"]:
            return results

        if datetime.datetime.now() + polling_interval > end_time:
            raise util.TimeoutError(
                "Timeout waiting for workflow {0} in namespace {1} to finish.".
                format(name, namespace))

        time.sleep(polling_interval.seconds)