def status(ctx: click.Context, username: str): """ Returns status of a model :param username; if checked - searches for model for a certain user """ try: workflows: List[ArgoWorkflow.ArgoWorkflowCliModel] = [] if not username: namespace = get_kubectl_current_context_namespace() else: namespace = username with spinner(text=Texts.LOAD_DATA_MSG): # filtering out workflows used to build images with training jobs workflows = [ workflow.cli_representation for workflow in ArgoWorkflow.list( namespace=namespace, label_selector="type!=build-workflow") ] click.echo( tabulate(workflows, headers=MODEL_HEADERS, tablefmt=TBLT_TABLE_FORMAT)) except Exception: handle_error(logger, Texts.OTHER_ERROR_MSG, Texts.OTHER_ERROR_MSG, add_verbosity_msg=True) exit(1)
def workflow_list(ctx: click.Context): try: namespace = get_kubectl_current_context_namespace() workflows: List[ArgoWorkflow] = ArgoWorkflow.list(namespace=namespace) click.echo( tabulate([workflow.cli_representation for workflow in workflows], headers=HEADERS, tablefmt=TBLT_TABLE_FORMAT)) except Exception: handle_error(logger, Texts.OTHER_ERROR_MSG, Texts.OTHER_ERROR_MSG, add_verbosity_msg=True) exit(1)
def workflow_list(state: State): try: namespace = get_kubectl_current_context_namespace() workflows = ArgoWorkflow.list(namespace=namespace) click.echo( tabulate([workflow.cli_representation for workflow in workflows], headers=HEADERS, tablefmt="orgtbl")) except Exception: handle_error(logger, Texts.OTHER_ERROR_MSG, Texts.OTHER_ERROR_MSG, add_verbosity_msg=True) exit(1)
def purge_experiment(exp_name: str, runs_to_purge: List[Run], k8s_es_client: K8sElasticSearchClient, namespace: str) -> Tuple[List[Run], List[Run]]: """ Purge experiment with a given name by cancelling runs given as a parameter. If given experiment contains more runs than is in the list of runs - experiment's state remains intact. :param exp_name: name of an experiment to which belong runs passed in run_list parameter :param runs_to_purge: list of runs that should be purged, they have to belong to exp_name experiment :param k8s_es_client: Kubernetes ElasticSearch client :param namespace: namespace where experiment is located :return: two list - first contains runs that were cancelled successfully, second - those which weren't """ logger.debug(f"Purging {exp_name} experiment ...") purged_runs: List[Run] = [] not_purged_runs: List[Run] = [] experiment = Experiment.get(name=exp_name, namespace=namespace) if not experiment: raise RuntimeError(Texts.GET_EXPERIMENT_ERROR_MSG) experiment_runs = Run.list(namespace=namespace, exp_name_filter=[exp_name]) # check whether experiment has more runs that should be cancelled cancel_whole_experiment = (len(experiment_runs) == len(runs_to_purge)) if cancel_whole_experiment: experiment.state = ExperimentStatus.CANCELLING experiment.update() try: cancelled_runs, not_cancelled_runs = cancel_experiment_runs( runs_to_cancel=runs_to_purge, namespace=namespace) not_purged_runs = not_cancelled_runs if cancel_whole_experiment: # Delete associated workflows experiment_associated_workflows = [ wf for wf in ArgoWorkflow.list(namespace=namespace) if wf.labels.get('experimentName') == experiment.name ] for wf in experiment_associated_workflows: wf.delete() # Remove tags from git repo manager try: delete_exp_tag_from_git_repo_manager( experiment_name=experiment.name, username=namespace, experiments_workdir=get_run_environment_path('')) except Exception: handle_error(logger, Texts.GIT_REPO_MANAGER_ERROR_MSG, Texts.GIT_REPO_MANAGER_ERROR_MSG) raise for run in cancelled_runs: logger.debug(f"Purging {run.name} run ...") click.echo(Texts.PURGING_START_MSG.format(run_name=run.name)) try: with spinner(text=Texts.PURGING_PROGRESS_MSG.format( run_name=run.name)): # purge helm release delete_helm_release(run.name, namespace=namespace, purge=True) # delete run kubectl.delete_k8s_object("run", run.name) purged_runs.append(run) except Exception as exe: not_purged_runs.append(run) logger.exception("Error during purging runs.") # occurence of NotFound error may mean, that run has been removed earlier if "NotFound" not in str(exe): click.echo( Texts.INCOMPLETE_PURGE_ERROR_MSG.format( experiment_name=experiment_name)) raise exe try: # clear run logs if is_current_user_administrator(): logger.debug(f"Clearing logs for {run.name} run.") with spinner(text=Texts.PURGING_LOGS_PROGRESS_MSG.format( run_name=run.name)): k8s_es_client.delete_logs_for_run(run=run.name, namespace=namespace) except Exception: logger.exception("Error during clearing run logs.") # CAN-1099 - docker garbage collector has errors that prevent from correct removal of images # try: # try to remove images from docker registry # delete_images_for_experiment(exp_name=run.name) # except Exception: # logger.exception("Error during removing images.") if cancel_whole_experiment and not not_purged_runs: try: kubectl.delete_k8s_object("experiment", exp_name) except Exception: # problems during deleting experiments are hidden as if runs were # cancelled user doesn't have a possibility to remove them logger.exception("Error during purging experiment.") except Exception: logger.exception("Error during purging experiment.") return purged_runs, not_purged_runs return purged_runs, not_purged_runs
def get_logs(operation_name: str, start_date: str, end_date: str, match: str, output: bool, pager: bool, follow: bool): """ Show logs for a given model export operation. """ # check whether we have operations with a given name if operation_name and match: handle_error(user_msg=Texts.NAME_M_BOTH_GIVEN_ERROR_MSG) exit(1) elif not operation_name and not match: handle_error(user_msg=Texts.NAME_M_NONE_GIVEN_ERROR_MSG) exit(1) try: with K8sProxy(NAUTAAppNames.ELASTICSEARCH) as proxy: es_client = K8sElasticSearchClient(host="127.0.0.1", port=proxy.tunnel_port, verify_certs=False, use_ssl=False) namespace = get_kubectl_current_context_namespace() if match: operation_name = match name_filter = match else: name_filter = f'^{operation_name}$' workflows = ArgoWorkflow.list(namespace=namespace, name_filter=name_filter) if not workflows: raise ValueError( f'Operation with given name: {operation_name} does not ' f'exists in namespace {namespace}.') follow_logs = True if follow and not output else False if output and len(workflows) > 1: click.echo(Texts.MORE_EXP_LOGS_MESSAGE) for workflow in workflows: start_date = start_date if start_date else workflow.started_at ops_logs_generator = es_client.get_argo_workflow_logs_generator( workflow=workflow, namespace=namespace, start_date=start_date, end_date=end_date, follow=follow_logs) if output: save_logs_to_file(logs_generator=ops_logs_generator, instance_name=workflow.name, instance_type="operation") else: if len(workflows) > 1: click.echo(f'Operation : {workflow.name}') print_logs(run_logs_generator=ops_logs_generator, pager=pager) except K8sProxyCloseError: handle_error(logger, Texts.PROXY_CLOSE_LOG_ERROR_MSG, Texts.PROXY_CLOSE_LOG_ERROR_MSG) exit(1) except LocalPortOccupiedError as exe: handle_error( logger, Texts.LOCAL_PORT_OCCUPIED_ERROR_MSG.format( exception_message=exe.message), Texts.LOCAL_PORT_OCCUPIED_ERROR_MSG.format( exception_message=exe.message)) exit(1) except K8sProxyOpenError: handle_error(logger, Texts.PROXY_CREATION_ERROR_MSG, Texts.PROXY_CREATION_ERROR_MSG) exit(1) except ValueError: handle_error( logger, Texts.OPERATION_NOT_EXISTS_ERROR_MSG.format( operation_name=operation_name), Texts.OPERATION_NOT_EXISTS_ERROR_MSG.format( experiment_name=operation_name)) exit(1) except Exception: handle_error(logger, Texts.LOGS_GET_OTHER_ERROR_MSG, Texts.LOGS_GET_OTHER_ERROR_MSG) exit(1)