コード例 #1
0
def scheduler(status: kopf.Status, patch: kopf.Patch, logger: kopf.Logger,
              **_: Any) -> str:
    replication = status.get("replication", {})
    replication["codeBuildStatus"] = None
    replication["codeBuildPhase"] = None
    replication["codeBuildId"] = None

    attempt = replication.get("attempt", 0) + 1
    if attempt > CONFIG["max_replication_attempts"]:
        replication["replicationStatus"] = "MaxAttemptsExceeded"
        replication["attempt"] = attempt

        patch["status"] = {"replication": replication}
    else:
        with LOCK:
            global WORKERS_IN_PROCESS
            logger.debug("WORKERS_IN_PROCESS: %s", WORKERS_IN_PROCESS)
            if WORKERS_IN_PROCESS < CONFIG["workers"]:
                WORKERS_IN_PROCESS += 1
                replication["replicationStatus"] = "Scheduled"
                replication["attempt"] = attempt

                patch["status"] = {"replication": replication}
                logger.info("Schedule Attempt: %s", replication["attempt"])

    return cast(str, replication["replicationStatus"])
コード例 #2
0
def delete_poddefaults_from_user_namespaces(
    poddefaults: List[Dict[str, Any]],
    user_namespaces: List[str],
    client: DynamicClient,
    logger: kopf.Logger,
) -> None:
    logger.debug(
        "Deleting PodDefaults %s from user Namespaces %s",
        [pd["metadata"]["name"] for pd in poddefaults],
        user_namespaces,
    )
    for poddefault in poddefaults:
        for namespace in user_namespaces:
            try:
                delete_poddefault(
                    namespace=namespace,
                    name=poddefault["metadata"]["name"],
                    client=client,
                    logger=logger,
                )
            except Exception as e:
                logger.warn(
                    "Unable to delete PodDefault %s from Namespace %s: %s",
                    poddefault["metadata"]["name"],
                    namespace,
                    str(e),
                )
コード例 #3
0
def delete_userspace(namespace: str, name: str, client: dynamic.DynamicClient,
                     logger: kopf.Logger) -> None:
    api = client.resources.get(api_version=ORBIT_API_VERSION,
                               group=ORBIT_API_GROUP,
                               kind="UserSpace")
    api.delete(namespace=namespace, name=name, body={})
    logger.debug("Deleted UserSpace: %s in Namesapce: %s", name, namespace)
コード例 #4
0
def modify_poddefault(
    namespace: str,
    name: str,
    desc: str,
    client: dynamic.DynamicClient,
    logger: kopf.Logger,
) -> None:
    api = client.resources.get(api_version=KUBEFLOW_API_VERSION, group=KUBEFLOW_API_GROUP, kind="PodDefault")
    patch = {"spec": {"desc": desc}}
    api.patch(namespace=namespace, name=name, body=patch)
    logger.debug("Modified PodDefault: %s in Namespace: %s", name, namespace)
コード例 #5
0
def modify_userspace(
    namespace: str,
    name: str,
    desc: str,
    client: dynamic.DynamicClient,
    logger: kopf.Logger,
) -> None:
    api = client.resources.get(api_version=ORBIT_API_VERSION,
                               group=ORBIT_API_GROUP,
                               kind="UserSpace")
    patch = {"spec": {"desc": desc}}
    api.patch(namespace=namespace, name=name, body=patch)
    logger.debug("Modified UserSpace: %s in Namespace: %s", name, namespace)
コード例 #6
0
def create_poddefault(
    namespace: str,
    poddefault: Dict[str, Any],
    client: dynamic.DynamicClient,
    logger: kopf.Logger,
) -> None:
    api = client.resources.get(api_version=KUBEFLOW_API_VERSION, group=KUBEFLOW_API_GROUP, kind="PodDefault")
    api.create(namespace=namespace, body=poddefault)
    logger.debug(
        "Created PodDefault: %s in Namespace: %s",
        poddefault["metadata"]["name"],
        namespace,
    )
コード例 #7
0
def _uninstall_chart(helm_release: str, namespace: str,
                     logger: kopf.Logger) -> bool:
    install_status = True
    cmd = f"/usr/local/bin/helm uninstall --debug --namespace {namespace} {helm_release}"
    try:
        logger.debug("running uninstall cmd: %s", cmd)
        output = run_command(cmd)
        logger.debug(output)
        logger.info("finished uninstall cmd: %s", cmd)
    except Exception:
        logger.error("errored cmd: %s", cmd)
        install_status = False
    return install_status
コード例 #8
0
def _get_team_context(team: str, logger: kopf.Logger) -> Dict[str, Any]:
    try:
        api_instance = CoreV1Api()
        team_context_cf: V1ConfigMap = api_instance.read_namespaced_config_map(
            "orbit-team-context", team)
        team_context_str = team_context_cf.data["team"]

        logger.debug("team context: %s", team_context_str)
        team_context: Dict[str, Any] = json.loads(team_context_str)
        logger.debug("team context keys: %s", team_context.keys())
    except Exception as e:
        logger.error("Error during fetching team context configmap")
        raise e
    return team_context
コード例 #9
0
def rescheduler(status: kopf.Status, patch: kopf.Patch, logger: kopf.Logger,
                **_: Any) -> str:
    logger.debug("Rescheduling")
    replication = status.get("replication", {})
    failure_delay = replication.get("failureDelay", 0)

    if failure_delay > 0:
        replication["failureDelay"] = failure_delay - 5
    else:
        replication["replicationStatus"] = "Pending"
        replication["failureDelay"] = None

    patch["status"] = {"replication": replication}
    return "Rescheduled"
コード例 #10
0
def orbit_job_monitor(
    namespace: str,
    name: str,
    patch: kopf.Patch,
    logger: kopf.Logger,
    namespaces_idx: kopf.Index[str, Dict[str, Any]],
    jobs_idx: kopf.Index[Tuple[str, str], Dict[str, Any]],
    **_: Any,
) -> Any:
    ns: Optional[Dict[str, Any]] = None
    k8s_job: Optional[Dict[str, Any]] = None

    for ns in namespaces_idx.get(namespace, []):
        logger.debug("ns: %s", ns)

    if ns is None:
        patch["status"] = {
            "orbitJobOperator": {
                "jobStatus": "JobDetailsNotFound",
                "error": "No Namespace resource found"
            }
        }
        return "JobDetailsNotFound"

    for k8s_job in jobs_idx.get((namespace, name), []):
        logger.debug("k8s_job: %s", k8s_job)

    if k8s_job is None:  # To tackle the race condition caused by Timer
        return "JobMetadataNotFound"

    if k8s_job.get("status", {}).get("active") == 1:
        job_status = "Active"
    else:
        job_status = k8s_job.get("status", {}).get("conditions",
                                                   [{}])[0].get("type")

    k8s_job_reason = k8s_job.get("status", {}).get("conditions",
                                                   [{}])[0].get("status")
    k8s_job_message = k8s_job.get("status", {}).get("conditions",
                                                    [{}])[0].get("message")

    patch["status"] = {
        "orbitJobOperator": {
            "jobStatus": job_status,
            "jobName": k8s_job.get("name"),
            "k8sJobReason": k8s_job_reason,
            "k8sJobMessage": k8s_job_message,
        }
    }
    return job_status
コード例 #11
0
def create_userspace(
    namespace: str,
    userspace: Dict[str, Any],
    client: dynamic.DynamicClient,
    logger: kopf.Logger,
) -> None:
    api = client.resources.get(api_version=ORBIT_API_VERSION,
                               group=ORBIT_API_GROUP,
                               kind="UserSpace")
    api.create(namespace=namespace, body=userspace)
    logger.debug(
        "Created UserSpace: %s in Namespace: %s",
        userspace["metadata"]["name"],
        namespace,
    )
コード例 #12
0
def copy_poddefaults_to_user_namespaces(
    poddefaults: List[Dict[str, Any]],
    user_namespaces: List[str],
    client: DynamicClient,
    logger: kopf.Logger,
) -> None:
    logger.debug(
        "Copying PodDefaults %s to user Namespaces %s",
        [pd["metadata"]["name"] for pd in poddefaults],
        user_namespaces,
    )
    for poddefault in poddefaults:
        for namespace in user_namespaces:
            try:
                kwargs = {
                    "name": poddefault["metadata"]["name"],
                    "desc": poddefault["spec"]["desc"],
                    "labels": {
                        "orbit/space": "user",
                        "orbit/team": poddefault["metadata"]["labels"].get("orbit/team", None),
                    },
                }
                create_poddefault(
                    namespace=namespace,
                    poddefault=construct(**kwargs),
                    client=client,
                    logger=logger,
                )
            except ApiException as e:
                logger.warning(
                    "Unable to create PodDefault %s in Namespace %s: %s",
                    poddefault["metadata"]["name"],
                    namespace,
                    str(e.body),
                )
            except Exception as e:
                logger.warning(
                    "Failed to create PodDefault",
                    str(e),
                )
def update_pod_images(
    spec: kopf.Spec,
    patch: kopf.Patch,
    dryrun: bool,
    logger: kopf.Logger,
    imagereplications_idx: kopf.Index[str, str],
    **_: Any,
) -> kopf.Patch:
    if dryrun:
        logger.debug("DryRun - Skip Pod Mutation")
        return patch

    annotations = {}
    init_containers: List[Dict[str, Any]] = []
    containers: List[Dict[str, Any]] = []
    replications = {}

    def process_containers(src_containers: Optional[List[Dict[str, Any]]],
                           dest_containers: List[Dict[str, Any]]) -> None:
        for container in src_containers if src_containers else []:
            image = container.get("image", "")
            desired_image = imagereplication_utils.get_desired_image(
                image=image, config=CONFIG)
            if image != desired_image:
                container_copy = deepcopy(container)
                container_copy["image"] = desired_image
                dest_containers.append(container_copy)
                replications[image] = desired_image
                annotations[
                    f"original-container-image~1{container['name']}"] = image

    process_containers(spec.get("initContainers", []), init_containers)
    process_containers(spec.get("containers", []), containers)

    if replications:
        client = dynamic_client()
        for source, destination in replications.items():
            if not imagereplications_idx.get(destination, []):
                imagereplication_utils.create_imagereplication(
                    namespace="orbit-system",
                    source=source,
                    destination=destination,
                    client=client,
                    logger=logger,
                )
            else:
                logger.debug("Skipping ImageReplication Creation")

    if annotations:
        patch["metadata"] = {"annotations": annotations}
        patch["spec"] = {}
        if init_containers:
            patch["spec"]["initContainers"] = init_containers
        if containers:
            patch["spec"]["containers"] = containers

    logger.debug("Patch: %s", str(patch))
    return patch
コード例 #14
0
def _install_helm_chart(
    helm_release: str,
    namespace: str,
    team: str,
    user: str,
    user_email: str,
    user_efsapid: str,
    repo: str,
    package: str,
    logger: kopf.Logger,
) -> bool:
    install_status = True
    # try to uninstall first
    try:
        cmd = f"helm uninstall --debug {helm_release} -n {team}"
        logger.debug("running cmd: %s", cmd)
        output = run_command(cmd)
        logger.debug(output)
        logger.info("finished cmd: %s", cmd)
    except Exception:
        logger.debug("helm uninstall did not find the release")

    cmd = (
        f"/usr/local/bin/helm upgrade --install --devel --debug --namespace {team} "
        f"{helm_release} {repo}/{package} "
        f"--set user={user},user_email={user_email},namespace={namespace},user_efsapid={user_efsapid}"
    )
    try:
        logger.debug("running cmd: %s", cmd)
        output = run_command(cmd)
        logger.debug(output)
        logger.info("finished cmd: %s", cmd)
    except Exception:
        logger.warning("errored cmd: %s", cmd)
        install_status = False
    return install_status
コード例 #15
0
def filter_podsettings(
    podsettings: List[Dict[str, Any]],
    pod_labels: kopf.Labels,
    logger: kopf.Logger,
) -> List[Dict[str, Any]]:
    filtered_podsettings: List[Dict[str, Any]] = []

    def labels_match(labels: kopf.Labels,
                     selector_labels: kopf.Labels) -> bool:
        for key, value in selector_labels.items():
            label_value = labels.get(key, None)
            if label_value != value:
                logger.debug(
                    "NoHit: Label value check, label %s with value %s does not equal %s",
                    key,
                    label_value,
                    value,
                )
                return False
        return True

    def expressions_match(labels: kopf.Labels,
                          selector_expressions: List[Dict[str, Any]]) -> bool:
        for match_expression in selector_expressions:
            pod_label_value = labels.get(match_expression["key"], None)
            operator = match_expression["operator"]
            values = match_expression.get("values", [])

            if operator == "Exists" and pod_label_value is None:
                logger.debug(
                    "NoHit: Exists check, label %s does not exist",
                    match_expression["key"],
                )
                return False
            if operator == "NotExists" and pod_label_value is not None:
                logger.debug(
                    "NoHit: NotExists check, label %s does exist with value %s",
                    match_expression["key"],
                    pod_label_value,
                )
                return False
            if operator == "In" and pod_label_value not in values:
                logger.debug(
                    "NoHit: In check, label %s has value %s which is not in %s",
                    match_expression["key"],
                    pod_label_value,
                    values,
                )
                return False
            if operator == "NotIn" and pod_label_value in values:
                logger.debug(
                    "NoHit: NotIn check, label %s has value %s which is in %s",
                    match_expression["key"],
                    pod_label_value,
                    values,
                )
                return False
        return True

    for podsetting in podsettings:
        selector_labels = podsetting["spec"]["podSelector"].get(
            "matchLabels", {})
        selector_expressions = podsetting["spec"]["podSelector"].get(
            "matchExpressions", [])

        if pod_labels == {}:
            logger.debug("NoHit: Pod contains no labels to match against")
            continue
        elif selector_labels == {} and selector_expressions == []:
            logger.debug(
                "NoHit: PodSetting contains no podSelectors to match against. PodSetting: %s",
                podsetting["name"],
            )
            continue
        elif not labels_match(pod_labels, selector_labels):
            logger.debug(
                "NoHit: Pod labels and PodSetting matchLabels do not match. PodSetting: %s",
                podsetting["name"],
            )
            continue
        elif not expressions_match(pod_labels, selector_expressions):
            logger.debug(
                "NoHit: Pod labels and PodSetting matchExpressions do not match. PodSetting: %s",
                podsetting["name"],
            )
            continue
        else:
            logger.debug(
                "Hit: Pod labels and PodSetting podSelectors match. PodSetting: %s",
                podsetting["name"],
            )
            filtered_podsettings.append(podsetting)
    return filtered_podsettings
コード例 #16
0
def delete_poddefault(namespace: str, name: str, client: dynamic.DynamicClient, logger: kopf.Logger) -> None:
    api = client.resources.get(api_version=KUBEFLOW_API_VERSION, group=KUBEFLOW_API_GROUP, kind="PodDefault")
    api.delete(namespace=namespace, name=name, body={})
    logger.debug("Deleted PodDefault: %s in Namesapce: %s", name, namespace)
コード例 #17
0
def install_team(
    name: str,
    meta: kopf.Meta,
    spec: kopf.Spec,
    status: kopf.Status,
    patch: kopf.Patch,
    podsettings_idx: kopf.Index[str, Dict[str, Any]],
    logger: kopf.Logger,
    **_: Any,
) -> str:
    logger.debug("loading kubeconfig")
    load_config()

    logger.info("processing userspace cr")
    logger.debug("namespace: %s", name)

    env = spec.get("env", None)
    space = spec.get("space", None)
    team = spec.get("team", None)
    user = spec.get("user", None)
    team_efsid = spec.get("teamEfsId", None)
    user_email = spec.get("userEmail", None)

    logger.debug("new namespace: %s,%s,%s,%s", team, user, user_email, name)

    if not env or not space or not team or not user or not team_efsid or not user_email:
        logger.error(
            ("All of env, space, team, user, team_efsid, and user_email are required."
             "Found: %s, %s, %s, %s, %s, %s"),
            env,
            space,
            team,
            user,
            team_efsid,
            user_email,
        )
        patch["metadata"] = {
            "annotations": {
                "orbit/helm-chart-installation": "Skipped"
            }
        }
        return "Skipping"

    client = dynamic_client()

    try:
        logger.info(f"Creating EFS endpoint for {team}-{user}...")
        efs_ep_resp = _create_user_efs_endpoint(user=user,
                                                team_name=team,
                                                team_efsid=team_efsid,
                                                env=env)
        access_point_id = efs_ep_resp.get("AccessPointId", "")
        logger.info(f"AccessPointId is {access_point_id}")
    except Exception as e:
        logger.error(
            f"Error while creating EFS access point for user_name={user} and team={team}: {e}"
        )
        patch["status"] = {
            "userSpaceOperator": {
                "installationStatus": "Failed to create EFS AccessPoint",
                "exception": str(e)
            }
        }
        return "Failed"

    team_context = _get_team_context(team=team, logger=logger)
    logger.info("team context keys: %s", team_context.keys())
    helm_repo_url = team_context["UserHelmRepository"]
    logger.debug("Adding Helm Repository: %s at %s", team, helm_repo_url)
    repo = f"{team}--userspace"
    # add the team repo
    unique_hash = "".join(
        random.choice(string.ascii_lowercase) for i in range(6))
    run_command(f"helm repo add {repo} {helm_repo_url}")
    try:
        # In isolated envs, we cannot refresh stable, and since we don't use it, we remove it
        run_command("helm repo remove stable")
    except Exception:
        logger.info(
            "Tried to remove stable repo...got an error, but moving on")
    run_command("helm repo update")
    run_command(
        f"helm search repo --devel {repo} -o json > /tmp/{unique_hash}-charts.json"
    )
    with open(f"/tmp/{unique_hash}-charts.json", "r") as f:
        charts = json.load(f)
    run_command(
        f"helm list -n {team} -o json > /tmp/{unique_hash}-releases.json")
    with open(f"/tmp/{unique_hash}-releases.json", "r") as f:
        releaseList = json.load(f)
        releases = [r["name"] for r in releaseList]
        logger.info("current installed releases: %s", releases)

    for chart in charts:
        chart_name = chart["name"].split("/")[1]
        helm_release = f"{name}-{chart_name}"
        # do not install again the chart if its already installed as some charts are not upgradable.
        # namespaces might
        if helm_release not in releaseList:
            # install the helm package for this user space
            logger.info(
                f"install the helm package chart_name={chart_name} helm_release={helm_release}"
            )
            install_status = _install_helm_chart(
                helm_release=helm_release,
                namespace=name,
                team=team,
                user=user,
                user_email=user_email,
                user_efsapid=access_point_id,
                repo=repo,
                package=chart_name,
                logger=logger,
            )
            if install_status:
                logger.info("Helm release %s installed at %s", helm_release,
                            name)
                continue
            else:
                patch["status"] = {
                    "userSpaceOperator": {
                        "installationStatus": "Failed to install",
                        "chart_name": chart_name
                    }
                }
                return "Failed"

    logger.info("Copying PodDefaults from Team")
    logger.info("podsettings_idx:%s", podsettings_idx)

    # Construct pseudo poddefaults for each podsetting in the team namespace
    poddefaults = [
        poddefault_utils.construct(
            name=ps["name"],
            desc=ps["spec"].get("desc", ""),
            labels={
                "orbit/space": "team",
                "orbit/team": team
            },
        ) for ps in podsettings_idx.get(team, [])
    ]
    poddefault_utils.copy_poddefaults_to_user_namespaces(
        client=client,
        poddefaults=poddefaults,
        user_namespaces=[name],
        logger=logger)

    patch["metadata"] = {
        "annotations": {
            "orbit/helm-chart-installation": "Complete"
        }
    }
    patch["metadata"] = {"labels": {"userEfsApId": access_point_id}}
    patch["status"] = {
        "userSpaceOperator": {
            "installationStatus": "Installed"
        }
    }

    return "Installed"
コード例 #18
0
def uninstall_team_charts(
    name: str,
    annotations: kopf.Annotations,
    labels: kopf.Labels,
    spec: kopf.Spec,
    patch: kopf.Patch,
    logger: kopf.Logger,
    meta: kopf.Meta,
    **_: Any,
) -> str:
    logger.debug("loading kubeconfig")
    load_config()

    logger.info("processing removed namespace %s", name)
    space = spec.get("space", None)

    if space == "team":
        logger.info("delete all namespaces that belong to the team %s", name)
        run_command(f"kubectl delete profile -l orbit/team={name}")
        time.sleep(60)
        run_command(
            f"kubectl delete namespace -l orbit/team={name},orbit/space=user")
        logger.info("all namespaces that belong to the team %s are deleted",
                    name)
    elif space == "user":
        env = spec.get("env", None)
        team = spec.get("team", None)
        user = spec.get("user", None)
        user_email = spec.get("userEmail", None)

        logger.debug("removed namespace: %s,%s,%s,%s", team, user, user_email,
                     name)

        if not env or not space or not team or not user or not user_email:
            logger.error(
                "All of env, space, team, user, and user_email are required. Found: %s, %s, %s, %s, %s",
                env,
                space,
                team,
                user,
                user_email,
            )
            return "Skipping"

        _delete_user_efs_endpoint(user_name=user,
                                  user_namespace=f"{team}-{user}",
                                  logger=logger,
                                  meta=meta)
        team_context = _get_team_context(team=team, logger=logger)
        logger.info("team context keys: %s", team_context.keys())
        helm_repo_url = team_context["UserHelmRepository"]
        repo = f"{team}--userspace"
        # add the team repo
        unique_hash = "".join(
            random.choice(string.ascii_lowercase) for i in range(6))
        run_command(f"helm repo add {repo} {helm_repo_url}")
        run_command(
            f"helm search repo --devel {repo} -o json > /tmp/{unique_hash}-charts.json"
        )
        with open(f"/tmp/{unique_hash}-charts.json", "r") as f:
            charts = json.load(f)
        run_command(
            f"helm list -n {team} -o json > /tmp/{unique_hash}-releases.json")
        with open(f"/tmp/{unique_hash}-releases.json", "r") as f:
            releaseList = json.load(f)
            releases = [r["name"] for r in releaseList]
            logger.info("current installed releases: %s", releases)

        for chart in charts:
            chart_name = chart["name"].split("/")[1]
            helm_release = f"{name}-{chart_name}"
            if helm_release in releases:
                install_status = _uninstall_chart(helm_release=helm_release,
                                                  namespace=team,
                                                  logger=logger)

                if install_status:
                    logger.info("Helm release %s installed at %s",
                                helm_release, name)
                    continue
                else:
                    patch["status"] = {
                        "userSpaceOperator": {
                            "installationStatus": "Failed to uninstall",
                            "chart_name": chart_name
                        }
                    }
                    return "Failed"

    patch["status"] = {
        "userSpaceOperator": {
            "installationStatus": "Uninstalled"
        }
    }
    return "Uninstalled"
コード例 #19
0
def update_pod_images(
    namespace: str,
    labels: kopf.Labels,
    body: kopf.Body,
    patch: kopf.Patch,
    dryrun: bool,
    logger: kopf.Logger,
    warnings: List[str],
    namespaces_idx: kopf.Index[str, Dict[str, Any]],
    podsettings_idx: kopf.Index[str, Dict[str, Any]],
    **_: Any,
) -> kopf.Patch:
    if dryrun:
        logger.debug("DryRun - Skip Pod Mutation")
        return patch

    # This is a hack to get the only namespace from the index Store
    ns: Dict[str, Any] = {}
    for ns in cast(List[Dict[str, Any]], namespaces_idx.get(namespace, [{}])):
        logger.debug("Namespace: %s", ns)

    team = ns.get("labels", {}).get("orbit/team", None)
    if not team:
        logger.info("No 'orbit/team' label found on Pod's Namespace: %s", namespace)
        # warnings.append(f"No 'orbit/team' label found on Pod's Namespace: {namespace}")
        return patch

    team_podsettings: List[Dict[str, Any]] = cast(List[Dict[str, Any]], podsettings_idx.get(team, []))
    if not team_podsettings:
        logger.info("No PodSettings found for Pod's Team: %s", team)
        # warnings.append(f"No PodSettings found for Pod's Team: {team}")
        return patch

    fitlered_podsettings = podsetting_utils.filter_podsettings(
        podsettings=team_podsettings, pod_labels=labels, logger=logger
    )
    if not fitlered_podsettings:
        logger.info("No PodSetting Selectors matched the Pod")
        return patch

    applied_podsetting_names = []
    body_dict = {
        "metadata": {k: v for k, v in body["metadata"].items()},
        "spec": {k: v for k, v in body["spec"].items()},
    }
    logger.debug("BodyDict: %s", body_dict)
    mutable_body = deepcopy(body)
    for podsetting in fitlered_podsettings:
        try:
            podsetting_utils.apply_settings_to_pod(namespace=ns, podsetting=podsetting, pod=mutable_body, logger=logger)
            applied_podsetting_names.append(podsetting["name"])
        except Exception as e:
            logger.exception("Error applying PodSetting %s: %s", podsetting["name"], str(e))
            warnings.append(f"Error applying PodSetting {podsetting['name']}: {str(e)}")

    if body_dict["spec"] == mutable_body["spec"] and body_dict["metadata"] == mutable_body["metadata"]:
        logger.warn("PodSetting Selectors matched the Pod but no changes were applied")
        warnings.append("PodSetting Selectors matched the Pod but no changes were applied")
        return patch

    patch["metadata"] = {}
    patch["metadata"]["annotations"] = {
        **mutable_body["metadata"].get("annotations", {}),
        **{"orbit/applied-podsettings": ",".join(applied_podsetting_names)},
    }
    patch["metadata"]["annotations"] = {k.replace("/", "~1"): v for k, v in patch["metadata"]["annotations"].items()}

    if "labels" in mutable_body["metadata"]:
        patch["metadata"]["labels"] = {k.replace("/", "~1"): v for k, v in mutable_body["metadata"]["labels"].items()}

    patch["spec"] = mutable_body["spec"]

    logger.info("Applying Patch %s", patch)
    return patch
コード例 #20
0
def orbit_cron_job_monitor(
    namespace: str,
    name: str,
    patch: kopf.Patch,
    status: kopf.Status,
    logger: kopf.Logger,
    namespaces_idx: kopf.Index[str, Dict[str, Any]],
    cron_jobs_idx: kopf.Index[Tuple[str, str], Dict[str, Any]],
    **_: Any,
) -> Any:
    ns: Optional[Dict[str, Any]] = None
    k8s_job: Optional[Dict[str, Any]] = None

    for ns in namespaces_idx.get(namespace, []):
        logger.debug("ns: %s", ns)

    if ns is None:
        patch["status"] = {
            "orbitJobOperator": {
                "jobStatus": "JobDetailsNotFound",
                "error": "No Namespace resource found"
            }
        }
        return "JobDetailsNotFound"

    logger.debug("cron_jobs_idx: %s", cron_jobs_idx)
    for k8s_job in cron_jobs_idx.get((namespace, name), []):
        logger.debug("k8s_job: %s", k8s_job)

    if k8s_job is None:  # To tackle the race condition caused by Timer
        return "JobMetadataNotFound"

    if not k8s_job.get("status", {}):
        cron_job_status = "Activating"
    else:
        cron_job_status = "Active"

    if k8s_job.get("status"):
        for i in k8s_job.get("status", {}).get("active", [{}]):
            if i.get("name") not in status.get("orbitJobOperator",
                                               {}).get("cronJobIds", []):
                cron_job_ids: List[str] = status.get("orbitJobOperator",
                                                     {}).get("cronJobIds", [])
                cron_job_ids.append(i.get("name"))
                patch["status"] = {
                    "orbitJobOperator": {
                        "jobStatus": cron_job_status,
                        "jobName": k8s_job.get("name"),
                        "cronJobIds": cron_job_ids,
                    }
                }
            else:
                return cron_job_status
    else:
        patch["status"] = {
            "orbitJobOperator": {
                "jobStatus":
                cron_job_status,
                "jobName":
                k8s_job.get("name"),
                "cronJobIds":
                status.get("orbitJobOperator", {}).get("cronJobIds", []),
            }
        }

    return cron_job_status
コード例 #21
0
def create_job(
    namespace: str,
    name: str,
    labels: kopf.Labels,
    annotations: kopf.Annotations,
    spec: kopf.Spec,
    status: kopf.Status,
    patch: kopf.Patch,
    logger: kopf.Logger,
    namespaces_idx: kopf.Index[str, Dict[str, Any]],
    podsettings_idx: kopf.Index[Tuple[str, str], Dict[str, Any]],
    **_: Any,
) -> str:
    ns: Optional[Dict[str, Any]] = None
    for ns in namespaces_idx.get(namespace, []):
        logger.debug("ns: %s", ns)

    if ns is None:
        patch["status"] = {
            "orbitJobOperator": {
                "jobStatus": "JobCreationFailed",
                "error": "No Namespace resource found"
            }
        }
        return "JobCreationFailed"

    env = ns["env"]
    team = ns["team"]

    global ENV_CONTEXT  # Caching
    if ENV_CONTEXT is None:
        context = _load_env_context_from_ssm(env)
        if context is None:
            patch["status"] = {
                "orbitJobOperator": {
                    "jobStatus": "JobCreationFailed",
                    "error": "Unable to load Env Context from SSM"
                }
            }
            return "JobCreationFailed"
        else:
            ENV_CONTEXT = context

    node_type = spec.get("compute", {}).get("nodeType", "fargate")
    labels = {
        "app": "orbit-runner",
        "orbit/node-type": node_type,
        "notebook-name": spec.get("notebookName", ""),
        "orbit/attach-security-group": "yes" if node_type == "ec2" else "no",
    }

    podsetting_metadata: Dict[str, Any] = {}
    for podsetting_metadata in podsettings_idx.get(
        (team, spec.get("compute", {}).get("podSetting", None)), []):
        logger.debug("PodSetting: %s", podsetting_metadata)

    job_spec = job_utils.construct_job_spec(
        env=env,
        team=team,
        env_context=ENV_CONTEXT,
        podsetting_metadata=podsetting_metadata,
        orbit_job_spec=spec,
        labels=labels,
    )

    logger.debug("spec: %s", spec)
    if spec.get("schedule"):
        cronjob_id = f"orbit-{namespace}-{spec.get('triggerName')}"
        cron_job_template: V1beta1JobTemplateSpec = V1beta1JobTemplateSpec(
            spec=job_spec)
        cron_job_spec: V1beta1CronJobSpec = V1beta1CronJobSpec(
            job_template=cron_job_template, schedule=spec.get("schedule"))
        job = V1beta1CronJob(
            api_version="batch/v1beta1",
            kind="CronJob",
            metadata=V1ObjectMeta(name=cronjob_id,
                                  labels={
                                      **labels,
                                      **spec.get("compute", {}).get(
                                          "labels", {})
                                  },
                                  namespace=namespace),
            status=V1beta1CronJobStatus(),
            spec=cron_job_spec,
        )
        kopf.adopt(job, nested="spec.template")
        cron_job_instance: V1beta1CronJob = BatchV1beta1Api(
        ).create_namespaced_cron_job(namespace=namespace, body=job)
        cronjob_instance_metadata: V1ObjectMeta = cron_job_instance.metadata
        logger.debug("Started Cron Job: %s", cronjob_instance_metadata.name)
        patch["metadata"] = {"labels": {"k8sJobType": "CronJob"}}
        patch["status"] = {
            "orbitJobOperator": {
                "jobStatus": "JobCreated",
                "jobName": cronjob_instance_metadata.name,
                "nodeType": node_type,
            }
        }
        return "CronJobCreated"
    else:
        job = V1Job(
            api_version="batch/v1",
            kind="Job",
            metadata=V1ObjectMeta(labels={
                **labels,
                **spec.get("compute", {}).get("labels", {})
            }),
            spec=job_spec,
        )

        kopf.adopt(job, nested="spec.template")
        job_instance: V1Job = BatchV1Api().create_namespaced_job(
            namespace=namespace, body=job)

        job_instance_metadata: V1ObjectMeta = job_instance.metadata
        logger.debug("Started Job: %s", job_instance_metadata.name)
        patch["metadata"] = {"labels": {"k8sJobType": "Job"}}
        patch["status"] = {
            "orbitJobOperator": {
                "jobStatus": "JobCreated",
                "jobName": job_instance_metadata.name,
                "nodeType": node_type,
            }
        }
        return "JobCreated"