Example #1
0
def generate_exp_name_and_labels(script_name: str, namespace: str, name: str = None,
                                 run_kind: RunKinds = RunKinds.TRAINING) -> Tuple[str, Dict[str, str]]:
    if script_name:
        script_name = Path(script_name).name

    if name:
        # CASE 1: If user pass name as param, then use it. If experiment with this name exists - return error
        experiment = Experiment.get(namespace=namespace, name=name)
        experiment_runs: List[Run] = experiment.get_runs() if experiment else []
        if experiment and experiment_runs:
            raise SubmitExperimentError(Texts.EXPERIMENT_ALREADY_EXISTS_ERROR_MSG.format(name=name))
        # subcase when experiment has no associated runs.
        if experiment and not experiment_runs:
            raise SubmitExperimentError(Texts.EXPERIMENT_INVALID_STATE_MSG.format(name=name))
        # if there are still artifacts from previous experiment with the same name
        if list_pods(namespace=namespace, label_selector=f'runName={name}'):
            raise SubmitExperimentError(Texts.EXPERIMENT_PREV_EXP_STILL_TERMINATING)
        return name, prepare_label(script_name, name, name, run_kind=run_kind)
    else:
        # CASE 2: If user submit exp without name, but there is already exp with the same script name, then:
        # --> use existing exp name and add post-fix with next index
        generated_name, labels = generate_name_for_existing_exps(script_name, namespace, run_kind=run_kind)
        if generated_name:
            return generated_name, labels

        # CASE 3: If user submit exp without name and there is no existing exps with matching script name,then:
        # --> generate new name

        result = generate_name(script_name)

        experiments = Experiment.list(namespace=namespace, name_filter=result)
        if experiments and len(experiments) > 0:
            result = f'{result}-{len(experiments)}'
            return result, prepare_label(script_name, result, run_kind=run_kind)
        return result, prepare_label(script_name, result, run_kind=run_kind)
Example #2
0
def cancel_pods_mode(namespace: str,
                     run_name: str = None,
                     pod_ids: str = None,
                     pod_status: str = None):
    namespace_pods = k8s_pods.list_pods(namespace=namespace)

    runs_only_pods = [pod for pod in namespace_pods if 'runName' in pod.labels]

    filtered_pods = runs_only_pods

    if run_name:
        run_name_match_pods = []
        for pod in runs_only_pods:
            if re.match(run_name, pod.labels['runName']):
                run_name_match_pods.append(pod)

        filtered_pods = run_name_match_pods

    if pod_ids:
        pod_ids_match_pods = []
        pod_ids_array = pod_ids.split(',')
        for pod in filtered_pods:
            if pod.name in pod_ids_array:
                pod_ids_match_pods.append(pod)

        filtered_pods = pod_ids_match_pods

    if pod_status:
        status_filtered_pods = []
        try:
            converted_pod_status = PodStatus(pod_status.upper())
        except ValueError:
            handle_error(user_msg=Texts.BAD_POD_STATUS_PASSED.format(
                status_passed=pod_status,
                available_statuses=PodStatus.all_members()))
            exit(1)
            return

        for pod in filtered_pods:
            if pod.status == converted_pod_status:
                status_filtered_pods.append(pod)

        filtered_pods = status_filtered_pods

    if not filtered_pods:
        handle_error(user_msg=Texts.LACK_OF_PODS_ERROR_MSG)
        exit(1)

    click.echo(
        Texts.WILL_BE_PURGED_LIST_HEADER.format(
            experiment_name_plural='pods',
            operation_word=Texts.DELETE_OPERATION["deleted"]))
    for pod in filtered_pods:
        click.echo(f"     - {pod.name}")

    if not click.confirm(
            Texts.CONFIRM_CANCEL_MSG.format(
                experiment_name_plural='pods',
                operation_word=Texts.DELETE_OPERATION["deletion"])):
        handle_error(user_msg=Texts.CANCELLATION_ABORTED_MSG.format(
            experiment_name_plural='pods',
            operation_word=Texts.DELETE_OPERATION["deletion"]))
        exit(0)

    deleted_pods = []
    not_deleted_pods = []

    for pod in filtered_pods:
        click.echo(Texts.CANCELING_PODS_MSG.format(pod_name=pod.name))
        try:
            pod.delete()
            deleted_pods.append(pod)
        except Exception:
            handle_error(logger, Texts.OTHER_POD_CANCELLING_ERROR_MSG)
            not_deleted_pods.append(pod)

    if deleted_pods:
        click.echo(
            Texts.SUCCESSFULLY_CANCELLED_LIST_HEADER.format(
                experiment_name_plural='pods',
                operation_word=Texts.DELETE_OPERATION["deleted"]))
        for pod in deleted_pods:
            click.echo(f"     - {pod.name}")

    if not_deleted_pods:
        click.echo(
            Texts.FAILED_TO_CANCEL_LIST_HEADER.format(
                experiment_name_plural='pods',
                operation_word=Texts.DELETE_OPERATION["deleted"]))
        for pod in not_deleted_pods:
            click.echo(f"     - {pod.name}")
        sys.exit(1)