def test_delete_helm_release_failure(mocker): mocker.patch("util.helm.execute_system_command", return_value=("", 1, "")) fake_config_path = '/usr/ogorek/nctl_config' fake_config = mocker.patch('util.helm.Config') fake_config.return_value.config_path = fake_config_path with pytest.raises(RuntimeError): delete_helm_release(test_username)
def test_delete_helm_release_success(mocker): esc_mock = mocker.patch("util.helm.execute_system_command") esc_mock.side_effect = [(f"release \"{test_username}\" deleted", 0, f"release \"{test_username}\" deleted"), (f"release: \"{test_username}\" not found", 0, f"release: \"{test_username}\" not found")] fake_config_path = '/usr/ogorek/nctl_config' fake_config = mocker.patch('util.helm.Config') fake_config.return_value.config_path = fake_config_path delete_helm_release(test_username) assert esc_mock.call_count == 1
def cancel_experiment_runs(runs_to_cancel: List[Run], namespace: str) -> Tuple[List[Run], List[Run]]: """ Cancel given list of Runs belonging to a single namespace. :param runs_to_cancel: Runs to be cancelled :param namespace: namespace where Run instances reside :return: tuple of list containing successfully Runs and list containing Runs that were not cancelled """ deleted_runs = [] not_deleted_runs = [] try: for run in runs_to_cancel: logger.debug(f"Cancelling {run.name} run ...") click.echo( Texts.CANCELING_RUNS_START_MSG.format( run_name=run.name, experiment_name=experiment_name)) try: # if run status is cancelled - omit the following steps if run.state != RunStatus.CANCELLED: with spinner(text=Texts.CANCEL_SETTING_STATUS_MSG.format( run_name=run.name)): delete_helm_release(release_name=run.name, namespace=namespace, purge=False) # change a run state to CANCELLED run.state = RunStatus.CANCELLED run.end_timestamp = datetime.utcnow().strftime( "%Y-%m-%dT%H:%M:%SZ") run.update() deleted_runs.append(run) except Exception: logger.exception( Texts.INCOMPLETE_CANCEL_ERROR_MSG.format( run_name=run.name, experiment_name=experiment_name)) click.echo( Texts.INCOMPLETE_CANCEL_ERROR_MSG.format( run_name=run.name, experiment_name=experiment_name)) not_deleted_runs.append(run) except Exception: logger.exception("Error during cancelling experiments") return deleted_runs, not_deleted_runs return deleted_runs, not_deleted_runs
def ctrl_c_handler_for_submit(sig, frame): log.debug("ctrl-c pressed while submitting") try: with spinner(text=Texts.CTRL_C_PURGING_PROGRESS_MSG): if submitted_runs: for run in submitted_runs: try: # delete run delete_k8s_object("run", run.name) # purge helm release delete_helm_release(run.name, namespace=submitted_namespace, purge=True) except Exception: log.exception(Texts.ERROR_WHILE_REMOVING_RUNS) delete_k8s_object("experiment", submitted_experiment) except Exception: log.exception(Texts.ERROR_WHILE_REMOVING_EXPERIMENT) for proc in psutil.Process(os.getpid()).children(recursive=True): proc.send_signal(signal.SIGKILL) exit(1)
def purge_experiment(exp_name: str, runs_to_purge: List[Run], k8s_es_client: K8sElasticSearchClient, namespace: str) -> Tuple[List[Run], List[Run]]: """ Purge experiment with a given name by cancelling runs given as a parameter. If given experiment contains more runs than is in the list of runs - experiment's state remains intact. :param exp_name: name of an experiment to which belong runs passed in run_list parameter :param runs_to_purge: list of runs that should be purged, they have to belong to exp_name experiment :param k8s_es_client: Kubernetes ElasticSearch client :param namespace: namespace where experiment is located :return: two list - first contains runs that were cancelled successfully, second - those which weren't """ logger.debug(f"Purging {exp_name} experiment ...") purged_runs: List[Run] = [] not_purged_runs: List[Run] = [] experiment = Experiment.get(name=exp_name, namespace=namespace) if not experiment: raise RuntimeError(Texts.GET_EXPERIMENT_ERROR_MSG) experiment_runs = Run.list(namespace=namespace, exp_name_filter=[exp_name]) # check whether experiment has more runs that should be cancelled cancel_whole_experiment = (len(experiment_runs) == len(runs_to_purge)) if cancel_whole_experiment: experiment.state = ExperimentStatus.CANCELLING experiment.update() try: cancelled_runs, not_cancelled_runs = cancel_experiment_runs( runs_to_cancel=runs_to_purge, namespace=namespace) not_purged_runs = not_cancelled_runs if cancel_whole_experiment: # Delete associated workflows experiment_associated_workflows = [ wf for wf in ArgoWorkflow.list(namespace=namespace) if wf.labels.get('experimentName') == experiment.name ] for wf in experiment_associated_workflows: wf.delete() # Remove tags from git repo manager try: delete_exp_tag_from_git_repo_manager( experiment_name=experiment.name, username=namespace, experiments_workdir=get_run_environment_path('')) except Exception: handle_error(logger, Texts.GIT_REPO_MANAGER_ERROR_MSG, Texts.GIT_REPO_MANAGER_ERROR_MSG) raise for run in cancelled_runs: logger.debug(f"Purging {run.name} run ...") click.echo(Texts.PURGING_START_MSG.format(run_name=run.name)) try: with spinner(text=Texts.PURGING_PROGRESS_MSG.format( run_name=run.name)): # purge helm release delete_helm_release(run.name, namespace=namespace, purge=True) # delete run kubectl.delete_k8s_object("run", run.name) purged_runs.append(run) except Exception as exe: not_purged_runs.append(run) logger.exception("Error during purging runs.") # occurence of NotFound error may mean, that run has been removed earlier if "NotFound" not in str(exe): click.echo( Texts.INCOMPLETE_PURGE_ERROR_MSG.format( experiment_name=experiment_name)) raise exe try: # clear run logs if is_current_user_administrator(): logger.debug(f"Clearing logs for {run.name} run.") with spinner(text=Texts.PURGING_LOGS_PROGRESS_MSG.format( run_name=run.name)): k8s_es_client.delete_logs_for_run(run=run.name, namespace=namespace) except Exception: logger.exception("Error during clearing run logs.") # CAN-1099 - docker garbage collector has errors that prevent from correct removal of images # try: # try to remove images from docker registry # delete_images_for_experiment(exp_name=run.name) # except Exception: # logger.exception("Error during removing images.") if cancel_whole_experiment and not not_purged_runs: try: kubectl.delete_k8s_object("experiment", exp_name) except Exception: # problems during deleting experiments are hidden as if runs were # cancelled user doesn't have a possibility to remove them logger.exception("Error during purging experiment.") except Exception: logger.exception("Error during purging experiment.") return purged_runs, not_purged_runs return purged_runs, not_purged_runs