コード例 #1
0
ファイル: operator.py プロジェクト: supatomic/ray
def status_handling_loop():
    while True:
        cluster_name, cluster_namespace, phase = cluster_status_q.get()
        try:
            operator_utils.set_status(cluster_name, cluster_namespace, phase)
        except Exception:
            log_prefix = ",".join(cluster_name, cluster_namespace)
            logger.exception(f"{log_prefix}: Error setting RayCluster status.")
コード例 #2
0
def handle_event(event_type, cluster_cr, cluster_name):
    # TODO: This only detects errors in the parent process and thus doesn't
    # catch cluster-specific autoscaling failures. Fix that (perhaps at
    # the same time that we eliminate subprocesses).
    try:
        cluster_action(event_type, cluster_cr, cluster_name)
    except Exception:
        logger.exception(f"Error while updating RayCluster {cluster_name}.")
        operator_utils.set_status(cluster_cr, cluster_name, "Error")
コード例 #3
0
def status_handling_loop(queue: mp.Queue):
    # TODO: Status will not be set if Operator restarts after `queue.put`
    # but before `set_status`.
    while True:
        item = queue.get()
        if item is None:
            break

        cluster_name, cluster_namespace, phase = item
        try:
            operator_utils.set_status(cluster_name, cluster_namespace, phase)
        except Exception:
            log_prefix = ",".join([cluster_name, cluster_namespace])
            logger.exception(f"{log_prefix}: Error setting RayCluster status.")
コード例 #4
0
def cluster_action(event_type, cluster_cr, cluster_name) -> None:

    cluster_config = operator_utils.cr_to_config(cluster_cr)
    cluster_name = cluster_config["cluster_name"]

    if event_type == "ADDED":
        operator_utils.set_status(cluster_cr, cluster_name, "Running")
        ray_clusters[cluster_name] = RayCluster(cluster_config)
        ray_clusters[cluster_name].create_or_update()
        last_generation[cluster_name] = cluster_cr["metadata"]["generation"]
    elif event_type == "MODIFIED":
        # Check metadata.generation to determine if there's a spec change.
        current_generation = cluster_cr["metadata"]["generation"]
        if current_generation > last_generation[cluster_name]:
            ray_clusters[cluster_name].set_config(cluster_config)
            ray_clusters[cluster_name].create_or_update()
            last_generation[cluster_name] = current_generation

    elif event_type == "DELETED":
        ray_clusters[cluster_name].clean_up()
        del ray_clusters[cluster_name]
        del last_generation[cluster_name]
コード例 #5
0
ファイル: operator.py プロジェクト: zivzone/ray
def cluster_action(event_type: str, cluster_cr: Dict[str, Any],
                   cluster_name: str, cluster_namespace: str) -> None:

    cluster_config = operator_utils.cr_to_config(cluster_cr)
    cluster_name = cluster_config["cluster_name"]
    cluster_identifier = (cluster_name, cluster_namespace)

    if event_type == "ADDED":

        operator_utils.set_status(cluster_cr, cluster_name, cluster_namespace,
                                  "Running")

        ray_cluster = RayCluster(cluster_config)

        # Track changes to the custom resource's spec field:
        generation = cluster_cr["metadata"]["generation"]
        ray_cluster.set_generation(generation)

        ray_cluster.create_or_update()

        ray_clusters[cluster_identifier] = ray_cluster

    elif event_type == "MODIFIED":
        ray_cluster = ray_clusters[cluster_identifier]
        # Check metadata.generation to determine if there's a spec change.
        current_generation = cluster_cr["metadata"]["generation"]
        # Only update if there's been a change to the spec.
        if current_generation > ray_cluster.get_generation():
            ray_cluster.set_generation(current_generation)
            ray_cluster.set_config(cluster_config)
            ray_cluster.create_or_update()

    elif event_type == "DELETED":
        ray_cluster = ray_clusters[cluster_identifier]
        ray_cluster.clean_up()
        del ray_clusters[cluster_identifier]
コード例 #6
0
def status_handling_loop():
    while True:
        cluster_name, cluster_namespace, status = cluster_status_q.get()
        operator_utils.set_status(cluster_name, cluster_namespace, status)