def purge_user(username: str): """ Removes all system's artifacts that belong to a removed user. K8s objects are removed during removal of a namespace. :param username: name of a user for which artifacts should be removed It throws exception in case of any problems detected during removal of a user """ try: # remove data from elasticsearch with spinner(text=TextsDel.DELETION_DELETING_USERS_EXPERIMENTS): es_client = K8sElasticSearchClient( host=f'{get_kubectl_host(with_port=True)}' f'/api/v1/namespaces/nauta/services/nauta-elasticsearch:nauta/proxy', verify_certs=False, use_ssl=True, headers={'Authorization': get_api_key()}) es_client.delete_logs_for_namespace(username) # remove data from git repo manager with k8s_proxy_context_manager.K8sProxy(NAUTAAppNames.GIT_REPO_MANAGER) as proxy,\ spinner(text=TextsDel.DELETION_DELETING_USERS_REPOSITORY): grm_client = GitRepoManagerClient(host='127.0.0.1', port=proxy.tunnel_port) grm_client.delete_nauta_user(username=username) except K8sProxyCloseError as exe: logger.exception("Error during closing of a proxy.") raise exe except Exception as exe: logger.exception(f"Error during removal of {username} user data") raise exe
def logs(state: State, workflow_name: str): try: namespace = get_kubectl_current_context_namespace() workflow: ArgoWorkflow = ArgoWorkflow.get(namespace=namespace, name=workflow_name) if not workflow: click.echo(Texts.NOT_FOUND_MSG.format(workflow_name=workflow_name)) exit(0) with K8sProxy(NAUTAAppNames.ELASTICSEARCH) as proxy: es_client = K8sElasticSearchClient(host="127.0.0.1", port=proxy.tunnel_port, verify_certs=False, use_ssl=False) start_date = workflow.started_at workflow_logs_generator = es_client.get_argo_workflow_logs_generator(workflow=workflow, namespace=namespace, start_date=start_date) for log_entry in workflow_logs_generator: if not log_entry.content.isspace(): click.echo(f'{log_entry.date} {log_entry.pod_name} {log_entry.content}') except K8sProxyCloseError: handle_error(logger, Texts.PROXY_CLOSE_LOG_ERROR_MSG, Texts.PROXY_CLOSE_USER_ERROR_MSG) exit(1) except LocalPortOccupiedError as exe: handle_error(logger, Texts.LOCAL_PORT_OCCUPIED_ERROR_MSG.format(exception_message=exe.message), Texts.LOCAL_PORT_OCCUPIED_ERROR_MSG.format(exception_message=exe.message)) exit(1) except K8sProxyOpenError: handle_error(logger, Texts.PROXY_CREATION_ERROR_MSG, Texts.PROXY_CREATION_ERROR_MSG) exit(1) except Exception: handle_error(logger, Texts.OTHER_ERROR_MSG, Texts.OTHER_ERROR_MSG, add_verbosity_msg=True) exit(1)
def logs(ctx: click.Context, workflow_name: str): try: namespace = get_kubectl_current_context_namespace() workflow: ArgoWorkflow = ArgoWorkflow.get(namespace=namespace, name=workflow_name) if not workflow: click.echo(Texts.NOT_FOUND_MSG.format(workflow_name=workflow_name)) exit(0) es_client = K8sElasticSearchClient( host=f'{get_kubectl_host(with_port=True)}' f'/api/v1/namespaces/nauta/services/nauta-elasticsearch:nauta/proxy', verify_certs=False, use_ssl=True, headers={'Authorization': get_api_key()}) start_date = workflow.started_at workflow_logs_generator = es_client.get_argo_workflow_logs_generator( workflow=workflow, namespace=namespace, start_date=start_date) for log_entry in workflow_logs_generator: if not log_entry.content.isspace(): click.echo( f'{log_entry.date} {log_entry.pod_name} {log_entry.content}' ) except Exception: handle_error(logger, Texts.OTHER_ERROR_MSG, Texts.OTHER_ERROR_MSG, add_verbosity_msg=True) exit(1)
def test_delete_logs_for_run(mocker): client = K8sElasticSearchClient(host='fake', port=8080, namespace='kube-system') mocked_delete_logs = mocker.patch.object(client, 'delete_by_query') run_name = 'test_run' namespace = 'fake-namespace' client.delete_logs_for_run(run_name, namespace) delete_query = { "query": { "bool": { "must": [{ "term": { 'kubernetes.labels.runName.keyword': run_name } }, { "term": { 'kubernetes.namespace_name.keyword': namespace } }] } } } mocked_delete_logs.assert_called_with(index='_all', body=delete_query)
def test_delete_logs_for_namespace(mocker): client = K8sElasticSearchClient(host='fake', port=8080, namespace='kube-system') mocked_delete_logs = mocker.patch.object(client, 'delete_by_query') client.delete_logs_for_namespace("namespace") assert mocked_delete_logs.call_count == 1
def test_full_log_search(mocker): client = K8sElasticSearchClient(host='fake', port=8080, namespace='kube-system') es_scan_mock = mocker.patch( 'logs_aggregator.k8s_es_client.elasticsearch.helpers.scan') es_scan_mock.return_value = iter(TEST_SCAN_OUTPUT) assert list(client.get_log_generator()) == TEST_LOG_ENTRIES
def test_get_experiment_logs_time_range(mocker): client = K8sElasticSearchClient(host='fake', port=8080, namespace='kube-system') mocked_log_search = mocker.patch.object(client, 'get_log_generator') mocked_log_search.return_value = iter(TEST_LOG_ENTRIES) experiment_name = 'fake-experiment' namespace = 'fake-namespace' run_mock = MagicMock(spec=Run) run_mock.name = experiment_name start_date = '2018-04-17T09:28:39+00:00' end_date = '2018-04-17T09:28:49+00:00' experiment_logs = client.get_experiment_logs_generator( run=run_mock, namespace=namespace, start_date=start_date, end_date=end_date) for log, expected_log in zip(experiment_logs, TEST_LOG_ENTRIES): assert log == expected_log mocked_log_search.assert_called_with(query_body={ "query": { "bool": { "must": [{ 'term': { 'kubernetes.labels.runName.keyword': experiment_name } }, { 'term': { 'kubernetes.namespace_name.keyword': namespace } }], "filter": { "range": { "@timestamp": { "gte": start_date, "lte": end_date } } } } }, "sort": { "@timestamp": { "order": "asc" } } }, filters=[], index='_all')
def test_full_log_search_filter_idempotent(mocker): client = K8sElasticSearchClient(host='fake', port=8080, namespace='kube-system') es_scan_mock = mocker.patch( 'logs_aggregator.k8s_es_client.elasticsearch.helpers.scan') es_scan_mock.return_value = iter(TEST_SCAN_OUTPUT) filter_all_results = list( client.get_log_generator(filters=[lambda x: True])) assert filter_all_results == TEST_LOG_ENTRIES
def test_get_workflow_logs(mocker): client = K8sElasticSearchClient(host='fake', port=8080, namespace='kube-system') mocked_log_search = mocker.patch.object(client, 'get_log_generator') mocked_log_search.return_value = iter(TEST_LOG_ENTRIES) namespace = 'fake-namespace' workflow_name = 'test-workflow' workflow_mock = MagicMock(spec=ArgoWorkflow) workflow_mock.name = workflow_name workflow_start_date = '2018-04-17T09:28:39+00:00' experiment_logs = client.get_argo_workflow_logs_generator( workflow=workflow_mock, namespace=namespace, start_date=workflow_start_date) for log, expected_log in zip(experiment_logs, TEST_LOG_ENTRIES): assert log == expected_log mocked_log_search.assert_called_with(query_body={ "query": { "bool": { "must": [{ 'term': { 'kubernetes.labels.workflows_argoproj_io/workflow.keyword': workflow_mock.name } }, { 'term': { 'kubernetes.namespace_name.keyword': namespace } }], "filter": { "range": { "@timestamp": { "gte": workflow_start_date } } } } }, "sort": { "@timestamp": { "order": "asc" } } }, filters=[], index='_all')
def _debug_workflow_logs(workflow: ArgoWorkflow, namespace: str): try: with K8sProxy(NAUTAAppNames.ELASTICSEARCH) as proxy: es_client = K8sElasticSearchClient(host="127.0.0.1", port=proxy.tunnel_port, verify_certs=False, use_ssl=False) start_date = workflow.started_at workflow_logs_generator = es_client.get_argo_workflow_logs_generator(workflow=workflow, namespace=namespace, start_date=start_date) log.debug(f'=== Workflow {workflow.name} logs ===') for log_entry in workflow_logs_generator: if not log_entry.content.isspace(): log.debug(f'{log_entry.date} {log_entry.pod_name} {log_entry.content}') log.debug(f'=== Workflow {workflow.name} logs ===') except Exception: log.exception(f'Failed to get {workflow.name} worklfow logs.')
def purge_user(username: str): """ Removes all system's artifacts that belong to a removed user. K8s objects are removed during removal of a namespace. :param username: name of a user for which artifacts should be removed It throws exception in case of any problems detected during removal of a user """ # remove data from elasticsearch try: with k8s_proxy_context_manager.K8sProxy(NAUTAAppNames.ELASTICSEARCH) as proxy,\ spinner(text=TextsDel.DELETION_DELETING_USERS_EXPERIMENTS): es_client = K8sElasticSearchClient(host="127.0.0.1", port=proxy.tunnel_port, verify_certs=False, use_ssl=False) es_client.delete_logs_for_namespace(username) except K8sProxyCloseError as exe: logger.exception("Error during closing of a proxy for elasticsearch.") raise exe except Exception as exe: logger.exception("Error during removal of data from elasticsearch") raise exe
def get_logs(experiment_name: str, min_severity: SeverityLevel, start_date: str, end_date: str, pod_ids: str, pod_status: PodStatus, match: str, output: bool, pager: bool, follow: bool, runs_kinds: List[RunKinds], instance_type: str): """ Show logs for a given experiment. """ # check whether we have runs with a given name if experiment_name and match: handle_error(user_msg=Texts.NAME_M_BOTH_GIVEN_ERROR_MSG.format(instance_type=instance_type)) exit(1) elif not experiment_name and not match: handle_error(user_msg=Texts.NAME_M_NONE_GIVEN_ERROR_MSG.format(instance_type=instance_type)) exit(1) try: with K8sProxy(NAUTAAppNames.ELASTICSEARCH) as proxy: es_client = K8sElasticSearchClient(host="127.0.0.1", port=proxy.tunnel_port, verify_certs=False, use_ssl=False) namespace = get_kubectl_current_context_namespace() if match: experiment_name = match name_filter = match else: name_filter = f'^{experiment_name}$' runs = Run.list(namespace=namespace, name_filter=name_filter, run_kinds_filter=runs_kinds) if not runs: raise ValueError(f'Run with given name: {experiment_name} does not exists in namespace {namespace}.') pod_ids = pod_ids.split(',') if pod_ids else None min_severity = SeverityLevel[min_severity] if min_severity else None pod_status = PodStatus[pod_status] if pod_status else None follow_logs = True if follow and not output else False if output and len(runs) > 1: click.echo(Texts.MORE_EXP_LOGS_MESSAGE) for run in runs: start_date = start_date if start_date else run.creation_timestamp run_logs_generator = es_client.get_experiment_logs_generator(run=run, namespace=namespace, min_severity=min_severity, start_date=start_date, end_date=end_date, pod_ids=pod_ids, pod_status=pod_status, follow=follow_logs) if output: save_logs_to_file(run=run, run_logs_generator=run_logs_generator, instance_type=instance_type) else: if len(runs) > 1: click.echo(f'Experiment : {run.name}') print_logs(run_logs_generator=run_logs_generator, pager=pager) except K8sProxyCloseError: handle_error(logger, Texts.PROXY_CLOSE_LOG_ERROR_MSG, Texts.PROXY_CLOSE_USER_ERROR_MSG) exit(1) except LocalPortOccupiedError as exe: handle_error(logger, Texts.LOCAL_PORT_OCCUPIED_ERROR_MSG.format(exception_message=exe.message), Texts.LOCAL_PORT_OCCUPIED_ERROR_MSG.format(exception_message=exe.message)) exit(1) except K8sProxyOpenError: handle_error(logger, Texts.PROXY_CREATION_ERROR_MSG, Texts.PROXY_CREATION_ERROR_MSG) exit(1) except ValueError: handle_error(logger, Texts.EXPERIMENT_NOT_EXISTS_ERROR_MSG.format(experiment_name=experiment_name, instance_type=instance_type.capitalize()), Texts.EXPERIMENT_NOT_EXISTS_ERROR_MSG.format(experiment_name=experiment_name, instance_type=instance_type.capitalize())) exit(1) except Exception: handle_error(logger, Texts.LOGS_GET_OTHER_ERROR_MSG.format(instance_type=instance_type), Texts.LOGS_GET_OTHER_ERROR_MSG.format(instance_type=instance_type)) exit(1)
def cancel(state: State, name: str, match: str, purge: bool, pod_ids: str, pod_status: str, listed_runs_kinds: List[RunKinds] = None): """ Cancels chosen experiments based on a name provided as a parameter. """ if not listed_runs_kinds: listed_runs_kinds = [RunKinds.TRAINING, RunKinds.JUPYTER] # check whether we have runs with a given name if name and match: handle_error(user_msg=Texts.NAME_M_BOTH_GIVEN_ERROR_MSG) exit(1) if not name and not match: handle_error(user_msg=Texts.NAME_M_NONE_GIVEN_ERROR_MSG) exit(1) current_namespace = get_current_namespace() if pod_ids or pod_status: if not name: name = match cancel_pods_mode(namespace=current_namespace, run_name=name, pod_ids=pod_ids, pod_status=pod_status) exit(0) search_for_experiment = False exp_to_be_cancelled = None if name: exp_to_be_cancelled = Experiment.get(namespace=current_namespace, name=name) exp_to_be_cancelled_kind = RunKinds(exp_to_be_cancelled.metadata['labels'].get('runKind')) \ if exp_to_be_cancelled else None exp_to_be_cancelled = exp_to_be_cancelled if exp_to_be_cancelled_kind in listed_runs_kinds else None if exp_to_be_cancelled: search_for_experiment = True else: name = f"^{name}$" else: name = match list_of_all_runs = None list_of_applicable_states = [RunStatus.QUEUED, RunStatus.RUNNING] if purge: list_of_applicable_states.extend( [RunStatus.FAILED, RunStatus.COMPLETE, RunStatus.CANCELLED]) try: if search_for_experiment: list_of_all_runs = Run.list(namespace=current_namespace, exp_name_filter=[name], run_kinds_filter=listed_runs_kinds) else: list_of_all_runs = Run.list(namespace=current_namespace, name_filter=name, run_kinds_filter=listed_runs_kinds) except Exception: handle_error( logger, Texts.LIST_RUNS_ERROR_MSG.format( experiment_name_plural=experiment_name_plural), Texts.LIST_RUNS_ERROR_MSG.format( experiment_name_plural=experiment_name_plural)) exit(1) # Handle cancellation of experiments with no associated Runs if exp_to_be_cancelled and not list_of_all_runs: cancel_uninitialized_experiment(experiment=exp_to_be_cancelled, namespace=current_namespace, purge=purge) if not list_of_all_runs: handle_error(user_msg=Texts.LACK_OF_EXPERIMENTS_ERROR_MSG.format( experiment_name_plural=experiment_name_plural, experiment_name=experiment_name)) exit(1) elif not purge and not [ run for run in list_of_all_runs if run.state in [RunStatus.QUEUED, RunStatus.RUNNING] ]: handle_error( user_msg=Texts.LACK_OF_EXP_TO_BE_CANCELLED_ERROR_MSG.format( experiment_name_plural=experiment_name_plural)) exit(1) # check whether we have at least one experiment in state other than CANCELLED list_of_runs_to_be_deleted: List[Run] = [] names_of_cancelled_runs: List[str] = [] if not purge: # check whether we have at least one experiment in state other than CANCELLED for run in list_of_all_runs: if run.state in list_of_applicable_states: list_of_runs_to_be_deleted.append(run) else: names_of_cancelled_runs.append(run.name) if not list_of_runs_to_be_deleted: handle_error( user_msg=Texts.EXPERIMENTS_ALREADY_CANCELLED_ERROR_MSG.format( experiment_name_plural=experiment_name_plural, operation_word=Texts. DELETE_OPERATION["deleted"] if experiment_name_plural == 'pods' else Texts.CANCEL_OPERATION["cancelled"])) exit(1) elif len(list_of_runs_to_be_deleted) != len(list_of_all_runs): click.echo( Texts.ALREADY_CANCELLED_LIST_HEADER.format( experiment_name_plural=experiment_name_plural, operation_word=Texts.DELETE_OPERATION["deleted"] if experiment_name_plural == 'pods' else Texts.CANCEL_OPERATION["cancelled"])) for name in names_of_cancelled_runs: click.echo(f" - {name}") click.echo( Texts.CAN_BE_CANCELLED_LIST_HEADER.format( experiment_name_plural=experiment_name_plural, operation_word=Texts.DELETE_OPERATION["deleted"] if experiment_name_plural == 'pods' else Texts.CANCEL_OPERATION["cancelled"])) for run in list_of_runs_to_be_deleted: click.echo(f" - {run.name}") else: click.echo( Texts.WILL_BE_CANCELLED_LIST_HEADER.format( experiment_name_plural=experiment_name_plural, operation_word=Texts.DELETE_OPERATION["deleted"] if experiment_name_plural == 'pods' else Texts.CANCEL_OPERATION["cancelled"])) for run in list_of_runs_to_be_deleted: click.echo(f" - {run.name}") else: list_of_runs_to_be_deleted = list_of_all_runs click.echo( Texts.WILL_BE_PURGED_LIST_HEADER.format( experiment_name_plural=experiment_name_plural, operation_word=Texts.DELETE_OPERATION["deleted"] if experiment_name_plural == 'pods' else Texts.CANCEL_OPERATION["cancelled"])) for run in list_of_runs_to_be_deleted: click.echo(f" - {run.name}") if not click.confirm( Texts.CONFIRM_CANCEL_MSG.format( experiment_name_plural=experiment_name_plural, operation_word=Texts.DELETE_OPERATION["deletion"] if experiment_name_plural == 'pods' else Texts.CANCEL_OPERATION["cancellation"])): handle_error(user_msg=Texts.CANCELLATION_ABORTED_MSG.format( experiment_name_plural=experiment_name_plural, operation_word=Texts. DELETE_OPERATION["deletion"] if experiment_name_plural == 'pods' else Texts.CANCEL_OPERATION["cancellation"])) exit(0) # group runs by experiments exp_with_runs: defaultdict = defaultdict(list) for run in list_of_runs_to_be_deleted: exp_with_runs[run.experiment_name].append(run) deleted_runs = [] not_deleted_runs = [] if purge: # Connect to elasticsearch in order to purge run logs try: with K8sProxy(NAUTAAppNames.ELASTICSEARCH) as proxy: es_client = K8sElasticSearchClient( host="127.0.0.1", port=proxy.tunnel_port, verify_certs=False, use_ssl=False, with_admin_privledges=is_current_user_administrator()) for exp_name, run_list in exp_with_runs.items(): try: exp_del_runs, exp_not_del_runs = purge_experiment( exp_name=exp_name, runs_to_purge=run_list, namespace=current_namespace, k8s_es_client=es_client) deleted_runs.extend(exp_del_runs) not_deleted_runs.extend(exp_not_del_runs) except Exception: handle_error(logger, Texts.OTHER_CANCELLING_ERROR_MSG) not_deleted_runs.extend(run_list) except K8sProxyCloseError: handle_error(logger, Texts.PROXY_CLOSING_ERROR_LOG_MSG, Texts.PROXY_CLOSING_ERROR_USER_MSG) exit(1) except LocalPortOccupiedError as exe: handle_error( logger, Texts.PORT_OCCUPIED_ERROR_LOG_MSG, Texts.PORT_OCCUPIED_ERROR_USER_MSG.format( exception_message=exe.message)) exit(1) except K8sProxyOpenError: handle_error(logger, Texts.PROXY_OPEN_ERROR_MSG, Texts.PROXY_OPEN_ERROR_MSG) exit(1) else: for exp_name, run_list in exp_with_runs.items(): try: exp_del_runs, exp_not_del_runs = cancel_experiment( exp_name=exp_name, runs_to_cancel=run_list, namespace=current_namespace) deleted_runs.extend(exp_del_runs) not_deleted_runs.extend(exp_not_del_runs) except Exception: handle_error(logger, Texts.OTHER_CANCELLING_ERROR_MSG) not_deleted_runs.extend(run_list) if deleted_runs: click.echo( Texts.SUCCESSFULLY_CANCELLED_LIST_HEADER.format( experiment_name_plural=experiment_name_plural, operation_word=Texts.DELETE_OPERATION["deleted"] if experiment_name_plural == 'pods' else Texts.CANCEL_OPERATION["cancelled"])) for run in deleted_runs: click.echo(f" - {run.name}") if not_deleted_runs: click.echo( Texts.FAILED_TO_CANCEL_LIST_HEADER.format( experiment_name_plural=experiment_name_plural, operation_word=Texts.DELETE_OPERATION["deleted"] if experiment_name_plural == 'pods' else Texts.CANCEL_OPERATION["cancelled"])) for run in not_deleted_runs: click.echo(f" - {run.name}") sys.exit(1)
def purge_experiment(exp_name: str, runs_to_purge: List[Run], k8s_es_client: K8sElasticSearchClient, namespace: str) -> Tuple[List[Run], List[Run]]: """ Purge experiment with a given name by cancelling runs given as a parameter. If given experiment contains more runs than is in the list of runs - experiment's state remains intact. :param exp_name: name of an experiment to which belong runs passed in run_list parameter :param runs_to_purge: list of runs that should be purged, they have to belong to exp_name experiment :param k8s_es_client: Kubernetes ElasticSearch client :param namespace: namespace where experiment is located :return: two list - first contains runs that were cancelled successfully, second - those which weren't """ logger.debug(f"Purging {exp_name} experiment ...") purged_runs: List[Run] = [] not_purged_runs: List[Run] = [] experiment = Experiment.get(name=exp_name, namespace=namespace) if not experiment: raise RuntimeError(Texts.GET_EXPERIMENT_ERROR_MSG) experiment_runs = Run.list(namespace=namespace, exp_name_filter=[exp_name]) # check whether experiment has more runs that should be cancelled cancel_whole_experiment = (len(experiment_runs) == len(runs_to_purge)) if cancel_whole_experiment: experiment.state = ExperimentStatus.CANCELLING experiment.update() try: cancelled_runs, not_cancelled_runs = cancel_experiment_runs( runs_to_cancel=runs_to_purge, namespace=namespace) not_purged_runs = not_cancelled_runs if cancel_whole_experiment: # Delete associated workflows experiment_associated_workflows = [ wf for wf in ArgoWorkflow.list(namespace=namespace) if wf.labels.get('experimentName') == experiment.name ] for wf in experiment_associated_workflows: wf.delete() # Remove tags from git repo manager try: delete_exp_tag_from_git_repo_manager( experiment_name=experiment.name, username=namespace, experiments_workdir=get_run_environment_path('')) except Exception: handle_error(logger, Texts.GIT_REPO_MANAGER_ERROR_MSG, Texts.GIT_REPO_MANAGER_ERROR_MSG) raise for run in cancelled_runs: logger.debug(f"Purging {run.name} run ...") click.echo(Texts.PURGING_START_MSG.format(run_name=run.name)) try: with spinner(text=Texts.PURGING_PROGRESS_MSG.format( run_name=run.name)): # purge helm release delete_helm_release(run.name, namespace=namespace, purge=True) # delete run kubectl.delete_k8s_object("run", run.name) purged_runs.append(run) except Exception as exe: not_purged_runs.append(run) logger.exception("Error during purging runs.") # occurence of NotFound error may mean, that run has been removed earlier if "NotFound" not in str(exe): click.echo( Texts.INCOMPLETE_PURGE_ERROR_MSG.format( experiment_name=experiment_name)) raise exe try: # clear run logs if is_current_user_administrator(): logger.debug(f"Clearing logs for {run.name} run.") with spinner(text=Texts.PURGING_LOGS_PROGRESS_MSG.format( run_name=run.name)): k8s_es_client.delete_logs_for_run(run=run.name, namespace=namespace) except Exception: logger.exception("Error during clearing run logs.") # CAN-1099 - docker garbage collector has errors that prevent from correct removal of images # try: # try to remove images from docker registry # delete_images_for_experiment(exp_name=run.name) # except Exception: # logger.exception("Error during removing images.") if cancel_whole_experiment and not not_purged_runs: try: kubectl.delete_k8s_object("experiment", exp_name) except Exception: # problems during deleting experiments are hidden as if runs were # cancelled user doesn't have a possibility to remove them logger.exception("Error during purging experiment.") except Exception: logger.exception("Error during purging experiment.") return purged_runs, not_purged_runs return purged_runs, not_purged_runs
def get_logs(experiment_name: str, min_severity: SeverityLevel, start_date: str, end_date: str, pod_ids: str, pod_status: PodStatus, match: str, output: bool, pager: bool, follow: bool, runs_kinds: List[RunKinds], instance_type: str): """ Show logs for a given experiment. """ # check whether we have runs with a given name if experiment_name and match: handle_error(user_msg=Texts.NAME_M_BOTH_GIVEN_ERROR_MSG.format( instance_type=instance_type)) exit(1) elif not experiment_name and not match: handle_error(user_msg=Texts.NAME_M_NONE_GIVEN_ERROR_MSG.format( instance_type=instance_type)) exit(1) try: es_client = K8sElasticSearchClient( host=f'{get_kubectl_host(with_port=True)}' f'/api/v1/namespaces/nauta/services/nauta-elasticsearch:nauta/proxy', verify_certs=False, use_ssl=True, headers={'Authorization': get_api_key()}) namespace = get_kubectl_current_context_namespace() if match: experiment_name = match name_filter = match else: name_filter = f'^{experiment_name}$' runs = Run.list(namespace=namespace, name_filter=name_filter, run_kinds_filter=runs_kinds) if not runs: raise ValueError( f'Run with given name: {experiment_name} does not exists in namespace {namespace}.' ) pod_ids = pod_ids.split(',') if pod_ids else None # type: ignore follow_logs = True if follow and not output else False if output and len(runs) > 1: click.echo(Texts.MORE_EXP_LOGS_MESSAGE) for run in runs: start_date = start_date if start_date else run.creation_timestamp run_logs_generator = es_client.get_experiment_logs_generator( run=run, namespace=namespace, min_severity=min_severity, start_date=start_date, end_date=end_date, pod_ids=pod_ids, pod_status=pod_status, follow=follow_logs) if output: save_logs_to_file(logs_generator=run_logs_generator, instance_name=run.name, instance_type=instance_type) else: if len(runs) > 1: click.echo(f'Experiment : {run.name}') print_logs(run_logs_generator=run_logs_generator, pager=pager) except ValueError: handle_error( logger, Texts.EXPERIMENT_NOT_EXISTS_ERROR_MSG.format( experiment_name=experiment_name, instance_type=instance_type.capitalize()), Texts.EXPERIMENT_NOT_EXISTS_ERROR_MSG.format( experiment_name=experiment_name, instance_type=instance_type.capitalize())) exit(1) except Exception: handle_error( logger, Texts.LOGS_GET_OTHER_ERROR_MSG.format(instance_type=instance_type), Texts.LOGS_GET_OTHER_ERROR_MSG.format(instance_type=instance_type)) exit(1)
def get_logs(operation_name: str, start_date: str, end_date: str, match: str, output: bool, pager: bool, follow: bool): """ Show logs for a given model export operation. """ # check whether we have operations with a given name if operation_name and match: handle_error(user_msg=Texts.NAME_M_BOTH_GIVEN_ERROR_MSG) exit(1) elif not operation_name and not match: handle_error(user_msg=Texts.NAME_M_NONE_GIVEN_ERROR_MSG) exit(1) try: with K8sProxy(NAUTAAppNames.ELASTICSEARCH) as proxy: es_client = K8sElasticSearchClient(host="127.0.0.1", port=proxy.tunnel_port, verify_certs=False, use_ssl=False) namespace = get_kubectl_current_context_namespace() if match: operation_name = match name_filter = match else: name_filter = f'^{operation_name}$' workflows = ArgoWorkflow.list(namespace=namespace, name_filter=name_filter) if not workflows: raise ValueError( f'Operation with given name: {operation_name} does not ' f'exists in namespace {namespace}.') follow_logs = True if follow and not output else False if output and len(workflows) > 1: click.echo(Texts.MORE_EXP_LOGS_MESSAGE) for workflow in workflows: start_date = start_date if start_date else workflow.started_at ops_logs_generator = es_client.get_argo_workflow_logs_generator( workflow=workflow, namespace=namespace, start_date=start_date, end_date=end_date, follow=follow_logs) if output: save_logs_to_file(logs_generator=ops_logs_generator, instance_name=workflow.name, instance_type="operation") else: if len(workflows) > 1: click.echo(f'Operation : {workflow.name}') print_logs(run_logs_generator=ops_logs_generator, pager=pager) except K8sProxyCloseError: handle_error(logger, Texts.PROXY_CLOSE_LOG_ERROR_MSG, Texts.PROXY_CLOSE_LOG_ERROR_MSG) exit(1) except LocalPortOccupiedError as exe: handle_error( logger, Texts.LOCAL_PORT_OCCUPIED_ERROR_MSG.format( exception_message=exe.message), Texts.LOCAL_PORT_OCCUPIED_ERROR_MSG.format( exception_message=exe.message)) exit(1) except K8sProxyOpenError: handle_error(logger, Texts.PROXY_CREATION_ERROR_MSG, Texts.PROXY_CREATION_ERROR_MSG) exit(1) except ValueError: handle_error( logger, Texts.OPERATION_NOT_EXISTS_ERROR_MSG.format( operation_name=operation_name), Texts.OPERATION_NOT_EXISTS_ERROR_MSG.format( experiment_name=operation_name)) exit(1) except Exception: handle_error(logger, Texts.LOGS_GET_OTHER_ERROR_MSG, Texts.LOGS_GET_OTHER_ERROR_MSG) exit(1)