def generate_exp_name_and_labels(script_name: str, namespace: str, name: str = None, run_kind: RunKinds = RunKinds.TRAINING) -> Tuple[str, Dict[str, str]]: if script_name: script_name = Path(script_name).name if name: # CASE 1: If user pass name as param, then use it. If experiment with this name exists - return error experiment = Experiment.get(namespace=namespace, name=name) experiment_runs: List[Run] = experiment.get_runs() if experiment else [] if experiment and experiment_runs: raise SubmitExperimentError(Texts.EXPERIMENT_ALREADY_EXISTS_ERROR_MSG.format(name=name)) # subcase when experiment has no associated runs. if experiment and not experiment_runs: raise SubmitExperimentError(Texts.EXPERIMENT_INVALID_STATE_MSG.format(name=name)) # if there are still artifacts from previous experiment with the same name if list_pods(namespace=namespace, label_selector=f'runName={name}'): raise SubmitExperimentError(Texts.EXPERIMENT_PREV_EXP_STILL_TERMINATING) return name, prepare_label(script_name, name, name, run_kind=run_kind) else: # CASE 2: If user submit exp without name, but there is already exp with the same script name, then: # --> use existing exp name and add post-fix with next index generated_name, labels = generate_name_for_existing_exps(script_name, namespace, run_kind=run_kind) if generated_name: return generated_name, labels # CASE 3: If user submit exp without name and there is no existing exps with matching script name,then: # --> generate new name result = generate_name(script_name) experiments = Experiment.list(namespace=namespace, name_filter=result) if experiments and len(experiments) > 0: result = f'{result}-{len(experiments)}' return result, prepare_label(script_name, result, run_kind=run_kind) return result, prepare_label(script_name, result, run_kind=run_kind)
def cancel_pods_mode(namespace: str, run_name: str = None, pod_ids: str = None, pod_status: str = None): namespace_pods = k8s_pods.list_pods(namespace=namespace) runs_only_pods = [pod for pod in namespace_pods if 'runName' in pod.labels] filtered_pods = runs_only_pods if run_name: run_name_match_pods = [] for pod in runs_only_pods: if re.match(run_name, pod.labels['runName']): run_name_match_pods.append(pod) filtered_pods = run_name_match_pods if pod_ids: pod_ids_match_pods = [] pod_ids_array = pod_ids.split(',') for pod in filtered_pods: if pod.name in pod_ids_array: pod_ids_match_pods.append(pod) filtered_pods = pod_ids_match_pods if pod_status: status_filtered_pods = [] try: converted_pod_status = PodStatus(pod_status.upper()) except ValueError: handle_error(user_msg=Texts.BAD_POD_STATUS_PASSED.format( status_passed=pod_status, available_statuses=PodStatus.all_members())) exit(1) return for pod in filtered_pods: if pod.status == converted_pod_status: status_filtered_pods.append(pod) filtered_pods = status_filtered_pods if not filtered_pods: handle_error(user_msg=Texts.LACK_OF_PODS_ERROR_MSG) exit(1) click.echo( Texts.WILL_BE_PURGED_LIST_HEADER.format( experiment_name_plural='pods', operation_word=Texts.DELETE_OPERATION["deleted"])) for pod in filtered_pods: click.echo(f" - {pod.name}") if not click.confirm( Texts.CONFIRM_CANCEL_MSG.format( experiment_name_plural='pods', operation_word=Texts.DELETE_OPERATION["deletion"])): handle_error(user_msg=Texts.CANCELLATION_ABORTED_MSG.format( experiment_name_plural='pods', operation_word=Texts.DELETE_OPERATION["deletion"])) exit(0) deleted_pods = [] not_deleted_pods = [] for pod in filtered_pods: click.echo(Texts.CANCELING_PODS_MSG.format(pod_name=pod.name)) try: pod.delete() deleted_pods.append(pod) except Exception: handle_error(logger, Texts.OTHER_POD_CANCELLING_ERROR_MSG) not_deleted_pods.append(pod) if deleted_pods: click.echo( Texts.SUCCESSFULLY_CANCELLED_LIST_HEADER.format( experiment_name_plural='pods', operation_word=Texts.DELETE_OPERATION["deleted"])) for pod in deleted_pods: click.echo(f" - {pod.name}") if not_deleted_pods: click.echo( Texts.FAILED_TO_CANCEL_LIST_HEADER.format( experiment_name_plural='pods', operation_word=Texts.DELETE_OPERATION["deleted"])) for pod in not_deleted_pods: click.echo(f" - {pod.name}") sys.exit(1)