예제 #1
0
def purge_user(username: str):
    """
    Removes all system's artifacts that belong to a removed user.
    K8s objects are removed during removal of a namespace.
    :param username: name of a user for which artifacts should be removed
    It throws exception in case of any problems detected during removal of a user
    """
    try:
        # remove data from elasticsearch
        with spinner(text=TextsDel.DELETION_DELETING_USERS_EXPERIMENTS):
            es_client = K8sElasticSearchClient(
                host=f'{get_kubectl_host(with_port=True)}'
                f'/api/v1/namespaces/nauta/services/nauta-elasticsearch:nauta/proxy',
                verify_certs=False,
                use_ssl=True,
                headers={'Authorization': get_api_key()})
            es_client.delete_logs_for_namespace(username)

        # remove data from git repo manager
        with k8s_proxy_context_manager.K8sProxy(NAUTAAppNames.GIT_REPO_MANAGER) as proxy,\
                spinner(text=TextsDel.DELETION_DELETING_USERS_REPOSITORY):
            grm_client = GitRepoManagerClient(host='127.0.0.1',
                                              port=proxy.tunnel_port)
            grm_client.delete_nauta_user(username=username)
    except K8sProxyCloseError as exe:
        logger.exception("Error during closing of a proxy.")
        raise exe
    except Exception as exe:
        logger.exception(f"Error during removal of {username} user data")
        raise exe
예제 #2
0
파일: logs.py 프로젝트: yuanbw/nauta
def logs(state: State, workflow_name: str):
    try:
        namespace = get_kubectl_current_context_namespace()
        workflow: ArgoWorkflow = ArgoWorkflow.get(namespace=namespace, name=workflow_name)
        if not workflow:
            click.echo(Texts.NOT_FOUND_MSG.format(workflow_name=workflow_name))
            exit(0)

        with K8sProxy(NAUTAAppNames.ELASTICSEARCH) as proxy:
            es_client = K8sElasticSearchClient(host="127.0.0.1", port=proxy.tunnel_port,
                                               verify_certs=False, use_ssl=False)
            start_date = workflow.started_at
            workflow_logs_generator = es_client.get_argo_workflow_logs_generator(workflow=workflow,
                                                                                 namespace=namespace,
                                                                                 start_date=start_date)
            for log_entry in workflow_logs_generator:
                if not log_entry.content.isspace():
                    click.echo(f'{log_entry.date} {log_entry.pod_name} {log_entry.content}')
    except K8sProxyCloseError:
        handle_error(logger, Texts.PROXY_CLOSE_LOG_ERROR_MSG, Texts.PROXY_CLOSE_USER_ERROR_MSG)
        exit(1)
    except LocalPortOccupiedError as exe:
        handle_error(logger, Texts.LOCAL_PORT_OCCUPIED_ERROR_MSG.format(exception_message=exe.message),
                     Texts.LOCAL_PORT_OCCUPIED_ERROR_MSG.format(exception_message=exe.message))
        exit(1)
    except K8sProxyOpenError:
        handle_error(logger, Texts.PROXY_CREATION_ERROR_MSG, Texts.PROXY_CREATION_ERROR_MSG)
        exit(1)
    except Exception:
        handle_error(logger, Texts.OTHER_ERROR_MSG, Texts.OTHER_ERROR_MSG, add_verbosity_msg=True)
        exit(1)
예제 #3
0
파일: logs.py 프로젝트: pnijhara/nauta
def logs(ctx: click.Context, workflow_name: str):
    try:
        namespace = get_kubectl_current_context_namespace()
        workflow: ArgoWorkflow = ArgoWorkflow.get(namespace=namespace,
                                                  name=workflow_name)
        if not workflow:
            click.echo(Texts.NOT_FOUND_MSG.format(workflow_name=workflow_name))
            exit(0)

        es_client = K8sElasticSearchClient(
            host=f'{get_kubectl_host(with_port=True)}'
            f'/api/v1/namespaces/nauta/services/nauta-elasticsearch:nauta/proxy',
            verify_certs=False,
            use_ssl=True,
            headers={'Authorization': get_api_key()})
        start_date = workflow.started_at
        workflow_logs_generator = es_client.get_argo_workflow_logs_generator(
            workflow=workflow, namespace=namespace, start_date=start_date)
        for log_entry in workflow_logs_generator:
            if not log_entry.content.isspace():
                click.echo(
                    f'{log_entry.date} {log_entry.pod_name} {log_entry.content}'
                )
    except Exception:
        handle_error(logger,
                     Texts.OTHER_ERROR_MSG,
                     Texts.OTHER_ERROR_MSG,
                     add_verbosity_msg=True)
        exit(1)
예제 #4
0
def test_delete_logs_for_run(mocker):
    client = K8sElasticSearchClient(host='fake',
                                    port=8080,
                                    namespace='kube-system')
    mocked_delete_logs = mocker.patch.object(client, 'delete_by_query')

    run_name = 'test_run'
    namespace = 'fake-namespace'

    client.delete_logs_for_run(run_name, namespace)

    delete_query = {
        "query": {
            "bool": {
                "must": [{
                    "term": {
                        'kubernetes.labels.runName.keyword': run_name
                    }
                }, {
                    "term": {
                        'kubernetes.namespace_name.keyword': namespace
                    }
                }]
            }
        }
    }

    mocked_delete_logs.assert_called_with(index='_all', body=delete_query)
예제 #5
0
def test_delete_logs_for_namespace(mocker):
    client = K8sElasticSearchClient(host='fake',
                                    port=8080,
                                    namespace='kube-system')
    mocked_delete_logs = mocker.patch.object(client, 'delete_by_query')

    client.delete_logs_for_namespace("namespace")

    assert mocked_delete_logs.call_count == 1
예제 #6
0
def test_full_log_search(mocker):
    client = K8sElasticSearchClient(host='fake',
                                    port=8080,
                                    namespace='kube-system')
    es_scan_mock = mocker.patch(
        'logs_aggregator.k8s_es_client.elasticsearch.helpers.scan')
    es_scan_mock.return_value = iter(TEST_SCAN_OUTPUT)

    assert list(client.get_log_generator()) == TEST_LOG_ENTRIES
예제 #7
0
def test_get_experiment_logs_time_range(mocker):
    client = K8sElasticSearchClient(host='fake',
                                    port=8080,
                                    namespace='kube-system')
    mocked_log_search = mocker.patch.object(client, 'get_log_generator')
    mocked_log_search.return_value = iter(TEST_LOG_ENTRIES)

    experiment_name = 'fake-experiment'
    namespace = 'fake-namespace'

    run_mock = MagicMock(spec=Run)
    run_mock.name = experiment_name

    start_date = '2018-04-17T09:28:39+00:00'
    end_date = '2018-04-17T09:28:49+00:00'

    experiment_logs = client.get_experiment_logs_generator(
        run=run_mock,
        namespace=namespace,
        start_date=start_date,
        end_date=end_date)

    for log, expected_log in zip(experiment_logs, TEST_LOG_ENTRIES):
        assert log == expected_log

    mocked_log_search.assert_called_with(query_body={
        "query": {
            "bool": {
                "must": [{
                    'term': {
                        'kubernetes.labels.runName.keyword': experiment_name
                    }
                }, {
                    'term': {
                        'kubernetes.namespace_name.keyword': namespace
                    }
                }],
                "filter": {
                    "range": {
                        "@timestamp": {
                            "gte": start_date,
                            "lte": end_date
                        }
                    }
                }
            }
        },
        "sort": {
            "@timestamp": {
                "order": "asc"
            }
        }
    },
                                         filters=[],
                                         index='_all')
예제 #8
0
def test_full_log_search_filter_idempotent(mocker):
    client = K8sElasticSearchClient(host='fake',
                                    port=8080,
                                    namespace='kube-system')
    es_scan_mock = mocker.patch(
        'logs_aggregator.k8s_es_client.elasticsearch.helpers.scan')
    es_scan_mock.return_value = iter(TEST_SCAN_OUTPUT)

    filter_all_results = list(
        client.get_log_generator(filters=[lambda x: True]))
    assert filter_all_results == TEST_LOG_ENTRIES
예제 #9
0
def test_get_workflow_logs(mocker):
    client = K8sElasticSearchClient(host='fake',
                                    port=8080,
                                    namespace='kube-system')
    mocked_log_search = mocker.patch.object(client, 'get_log_generator')
    mocked_log_search.return_value = iter(TEST_LOG_ENTRIES)

    namespace = 'fake-namespace'

    workflow_name = 'test-workflow'
    workflow_mock = MagicMock(spec=ArgoWorkflow)
    workflow_mock.name = workflow_name

    workflow_start_date = '2018-04-17T09:28:39+00:00'

    experiment_logs = client.get_argo_workflow_logs_generator(
        workflow=workflow_mock,
        namespace=namespace,
        start_date=workflow_start_date)

    for log, expected_log in zip(experiment_logs, TEST_LOG_ENTRIES):
        assert log == expected_log

    mocked_log_search.assert_called_with(query_body={
        "query": {
            "bool": {
                "must": [{
                    'term': {
                        'kubernetes.labels.workflows_argoproj_io/workflow.keyword':
                        workflow_mock.name
                    }
                }, {
                    'term': {
                        'kubernetes.namespace_name.keyword': namespace
                    }
                }],
                "filter": {
                    "range": {
                        "@timestamp": {
                            "gte": workflow_start_date
                        }
                    }
                }
            }
        },
        "sort": {
            "@timestamp": {
                "order": "asc"
            }
        }
    },
                                         filters=[],
                                         index='_all')
예제 #10
0
파일: common.py 프로젝트: hzjai0624/nauta
def _debug_workflow_logs(workflow: ArgoWorkflow, namespace: str):
    try:
        with K8sProxy(NAUTAAppNames.ELASTICSEARCH) as proxy:
            es_client = K8sElasticSearchClient(host="127.0.0.1", port=proxy.tunnel_port,
                                               verify_certs=False, use_ssl=False)
            start_date = workflow.started_at
            workflow_logs_generator = es_client.get_argo_workflow_logs_generator(workflow=workflow,
                                                                                 namespace=namespace,
                                                                                 start_date=start_date)
            log.debug(f'=== Workflow {workflow.name} logs ===')
            for log_entry in workflow_logs_generator:
                if not log_entry.content.isspace():
                    log.debug(f'{log_entry.date} {log_entry.pod_name} {log_entry.content}')
            log.debug(f'=== Workflow {workflow.name} logs ===')
    except Exception:
        log.exception(f'Failed to get {workflow.name} worklfow logs.')
예제 #11
0
def purge_user(username: str):
    """
    Removes all system's artifacts that belong to a removed user.
    K8s objects are removed during removal of a namespace.
    :param username: name of a user for which artifacts should be removed
    It throws exception in case of any problems detected during removal of a user
    """
    # remove data from elasticsearch
    try:
        with k8s_proxy_context_manager.K8sProxy(NAUTAAppNames.ELASTICSEARCH) as proxy,\
            spinner(text=TextsDel.DELETION_DELETING_USERS_EXPERIMENTS):
            es_client = K8sElasticSearchClient(host="127.0.0.1", port=proxy.tunnel_port,
                                               verify_certs=False, use_ssl=False)
            es_client.delete_logs_for_namespace(username)
    except K8sProxyCloseError as exe:
        logger.exception("Error during closing of a proxy for elasticsearch.")
        raise exe
    except Exception as exe:
        logger.exception("Error during removal of data from elasticsearch")
        raise exe
예제 #12
0
def get_logs(experiment_name: str, min_severity: SeverityLevel, start_date: str,
             end_date: str, pod_ids: str, pod_status: PodStatus, match: str, output: bool, pager: bool, follow: bool,
             runs_kinds: List[RunKinds], instance_type: str):
    """
    Show logs for a given experiment.
    """
    # check whether we have runs with a given name
    if experiment_name and match:
        handle_error(user_msg=Texts.NAME_M_BOTH_GIVEN_ERROR_MSG.format(instance_type=instance_type))
        exit(1)
    elif not experiment_name and not match:
        handle_error(user_msg=Texts.NAME_M_NONE_GIVEN_ERROR_MSG.format(instance_type=instance_type))
        exit(1)

    try:
        with K8sProxy(NAUTAAppNames.ELASTICSEARCH) as proxy:
            es_client = K8sElasticSearchClient(host="127.0.0.1", port=proxy.tunnel_port,
                                               verify_certs=False, use_ssl=False)
            namespace = get_kubectl_current_context_namespace()
            if match:
                experiment_name = match
                name_filter = match
            else:
                name_filter = f'^{experiment_name}$'
            runs = Run.list(namespace=namespace, name_filter=name_filter, run_kinds_filter=runs_kinds)
            if not runs:
                raise ValueError(f'Run with given name: {experiment_name} does not exists in namespace {namespace}.')

            pod_ids = pod_ids.split(',') if pod_ids else None
            min_severity = SeverityLevel[min_severity] if min_severity else None
            pod_status = PodStatus[pod_status] if pod_status else None
            follow_logs = True if follow and not output else False

            if output and len(runs) > 1:
                click.echo(Texts.MORE_EXP_LOGS_MESSAGE)

            for run in runs:
                start_date = start_date if start_date else run.creation_timestamp

                run_logs_generator = es_client.get_experiment_logs_generator(run=run, namespace=namespace,
                                                                             min_severity=min_severity,
                                                                             start_date=start_date, end_date=end_date,
                                                                             pod_ids=pod_ids, pod_status=pod_status,
                                                                             follow=follow_logs)

                if output:
                    save_logs_to_file(run=run, run_logs_generator=run_logs_generator, instance_type=instance_type)
                else:
                    if len(runs) > 1:
                        click.echo(f'Experiment : {run.name}')
                    print_logs(run_logs_generator=run_logs_generator, pager=pager)

    except K8sProxyCloseError:
        handle_error(logger, Texts.PROXY_CLOSE_LOG_ERROR_MSG, Texts.PROXY_CLOSE_USER_ERROR_MSG)
        exit(1)
    except LocalPortOccupiedError as exe:
        handle_error(logger, Texts.LOCAL_PORT_OCCUPIED_ERROR_MSG.format(exception_message=exe.message),
                     Texts.LOCAL_PORT_OCCUPIED_ERROR_MSG.format(exception_message=exe.message))
        exit(1)
    except K8sProxyOpenError:
        handle_error(logger, Texts.PROXY_CREATION_ERROR_MSG, Texts.PROXY_CREATION_ERROR_MSG)
        exit(1)
    except ValueError:
        handle_error(logger, Texts.EXPERIMENT_NOT_EXISTS_ERROR_MSG.format(experiment_name=experiment_name,
                                                                          instance_type=instance_type.capitalize()),
                     Texts.EXPERIMENT_NOT_EXISTS_ERROR_MSG.format(experiment_name=experiment_name,
                                                                  instance_type=instance_type.capitalize()))
        exit(1)
    except Exception:
        handle_error(logger, Texts.LOGS_GET_OTHER_ERROR_MSG.format(instance_type=instance_type),
                     Texts.LOGS_GET_OTHER_ERROR_MSG.format(instance_type=instance_type))
        exit(1)
예제 #13
0
def cancel(state: State,
           name: str,
           match: str,
           purge: bool,
           pod_ids: str,
           pod_status: str,
           listed_runs_kinds: List[RunKinds] = None):
    """
    Cancels chosen experiments based on a name provided as a parameter.
    """
    if not listed_runs_kinds:
        listed_runs_kinds = [RunKinds.TRAINING, RunKinds.JUPYTER]

    # check whether we have runs with a given name
    if name and match:
        handle_error(user_msg=Texts.NAME_M_BOTH_GIVEN_ERROR_MSG)
        exit(1)

    if not name and not match:
        handle_error(user_msg=Texts.NAME_M_NONE_GIVEN_ERROR_MSG)
        exit(1)

    current_namespace = get_current_namespace()

    if pod_ids or pod_status:
        if not name:
            name = match

        cancel_pods_mode(namespace=current_namespace,
                         run_name=name,
                         pod_ids=pod_ids,
                         pod_status=pod_status)
        exit(0)

    search_for_experiment = False
    exp_to_be_cancelled = None

    if name:
        exp_to_be_cancelled = Experiment.get(namespace=current_namespace,
                                             name=name)
        exp_to_be_cancelled_kind = RunKinds(exp_to_be_cancelled.metadata['labels'].get('runKind')) \
            if exp_to_be_cancelled else None
        exp_to_be_cancelled = exp_to_be_cancelled if exp_to_be_cancelled_kind in listed_runs_kinds else None

        if exp_to_be_cancelled:
            search_for_experiment = True
        else:
            name = f"^{name}$"
    else:
        name = match

    list_of_all_runs = None

    list_of_applicable_states = [RunStatus.QUEUED, RunStatus.RUNNING]

    if purge:
        list_of_applicable_states.extend(
            [RunStatus.FAILED, RunStatus.COMPLETE, RunStatus.CANCELLED])

    try:
        if search_for_experiment:
            list_of_all_runs = Run.list(namespace=current_namespace,
                                        exp_name_filter=[name],
                                        run_kinds_filter=listed_runs_kinds)
        else:
            list_of_all_runs = Run.list(namespace=current_namespace,
                                        name_filter=name,
                                        run_kinds_filter=listed_runs_kinds)
    except Exception:
        handle_error(
            logger,
            Texts.LIST_RUNS_ERROR_MSG.format(
                experiment_name_plural=experiment_name_plural),
            Texts.LIST_RUNS_ERROR_MSG.format(
                experiment_name_plural=experiment_name_plural))
        exit(1)

    # Handle cancellation of experiments with no associated Runs
    if exp_to_be_cancelled and not list_of_all_runs:
        cancel_uninitialized_experiment(experiment=exp_to_be_cancelled,
                                        namespace=current_namespace,
                                        purge=purge)

    if not list_of_all_runs:
        handle_error(user_msg=Texts.LACK_OF_EXPERIMENTS_ERROR_MSG.format(
            experiment_name_plural=experiment_name_plural,
            experiment_name=experiment_name))
        exit(1)
    elif not purge and not [
            run for run in list_of_all_runs
            if run.state in [RunStatus.QUEUED, RunStatus.RUNNING]
    ]:
        handle_error(
            user_msg=Texts.LACK_OF_EXP_TO_BE_CANCELLED_ERROR_MSG.format(
                experiment_name_plural=experiment_name_plural))
        exit(1)

    # check whether we have at least one experiment in state other than CANCELLED
    list_of_runs_to_be_deleted: List[Run] = []
    names_of_cancelled_runs: List[str] = []

    if not purge:
        # check whether we have at least one experiment in state other than CANCELLED
        for run in list_of_all_runs:
            if run.state in list_of_applicable_states:
                list_of_runs_to_be_deleted.append(run)
            else:
                names_of_cancelled_runs.append(run.name)

        if not list_of_runs_to_be_deleted:
            handle_error(
                user_msg=Texts.EXPERIMENTS_ALREADY_CANCELLED_ERROR_MSG.format(
                    experiment_name_plural=experiment_name_plural,
                    operation_word=Texts.
                    DELETE_OPERATION["deleted"] if experiment_name_plural ==
                    'pods' else Texts.CANCEL_OPERATION["cancelled"]))
            exit(1)
        elif len(list_of_runs_to_be_deleted) != len(list_of_all_runs):
            click.echo(
                Texts.ALREADY_CANCELLED_LIST_HEADER.format(
                    experiment_name_plural=experiment_name_plural,
                    operation_word=Texts.DELETE_OPERATION["deleted"]
                    if experiment_name_plural == 'pods' else
                    Texts.CANCEL_OPERATION["cancelled"]))
            for name in names_of_cancelled_runs:
                click.echo(f"     - {name}")
            click.echo(
                Texts.CAN_BE_CANCELLED_LIST_HEADER.format(
                    experiment_name_plural=experiment_name_plural,
                    operation_word=Texts.DELETE_OPERATION["deleted"]
                    if experiment_name_plural == 'pods' else
                    Texts.CANCEL_OPERATION["cancelled"]))
            for run in list_of_runs_to_be_deleted:
                click.echo(f"     - {run.name}")
        else:
            click.echo(
                Texts.WILL_BE_CANCELLED_LIST_HEADER.format(
                    experiment_name_plural=experiment_name_plural,
                    operation_word=Texts.DELETE_OPERATION["deleted"]
                    if experiment_name_plural == 'pods' else
                    Texts.CANCEL_OPERATION["cancelled"]))
            for run in list_of_runs_to_be_deleted:
                click.echo(f"     - {run.name}")
    else:
        list_of_runs_to_be_deleted = list_of_all_runs
        click.echo(
            Texts.WILL_BE_PURGED_LIST_HEADER.format(
                experiment_name_plural=experiment_name_plural,
                operation_word=Texts.DELETE_OPERATION["deleted"]
                if experiment_name_plural == 'pods' else
                Texts.CANCEL_OPERATION["cancelled"]))
        for run in list_of_runs_to_be_deleted:
            click.echo(f"     - {run.name}")

    if not click.confirm(
            Texts.CONFIRM_CANCEL_MSG.format(
                experiment_name_plural=experiment_name_plural,
                operation_word=Texts.DELETE_OPERATION["deletion"]
                if experiment_name_plural == 'pods' else
                Texts.CANCEL_OPERATION["cancellation"])):
        handle_error(user_msg=Texts.CANCELLATION_ABORTED_MSG.format(
            experiment_name_plural=experiment_name_plural,
            operation_word=Texts.
            DELETE_OPERATION["deletion"] if experiment_name_plural ==
            'pods' else Texts.CANCEL_OPERATION["cancellation"]))
        exit(0)

    # group runs by experiments
    exp_with_runs: defaultdict = defaultdict(list)

    for run in list_of_runs_to_be_deleted:
        exp_with_runs[run.experiment_name].append(run)

    deleted_runs = []
    not_deleted_runs = []

    if purge:
        # Connect to elasticsearch in order to purge run logs
        try:
            with K8sProxy(NAUTAAppNames.ELASTICSEARCH) as proxy:
                es_client = K8sElasticSearchClient(
                    host="127.0.0.1",
                    port=proxy.tunnel_port,
                    verify_certs=False,
                    use_ssl=False,
                    with_admin_privledges=is_current_user_administrator())
                for exp_name, run_list in exp_with_runs.items():
                    try:
                        exp_del_runs, exp_not_del_runs = purge_experiment(
                            exp_name=exp_name,
                            runs_to_purge=run_list,
                            namespace=current_namespace,
                            k8s_es_client=es_client)
                        deleted_runs.extend(exp_del_runs)
                        not_deleted_runs.extend(exp_not_del_runs)
                    except Exception:
                        handle_error(logger, Texts.OTHER_CANCELLING_ERROR_MSG)
                        not_deleted_runs.extend(run_list)
        except K8sProxyCloseError:
            handle_error(logger, Texts.PROXY_CLOSING_ERROR_LOG_MSG,
                         Texts.PROXY_CLOSING_ERROR_USER_MSG)
            exit(1)
        except LocalPortOccupiedError as exe:
            handle_error(
                logger, Texts.PORT_OCCUPIED_ERROR_LOG_MSG,
                Texts.PORT_OCCUPIED_ERROR_USER_MSG.format(
                    exception_message=exe.message))
            exit(1)
        except K8sProxyOpenError:
            handle_error(logger, Texts.PROXY_OPEN_ERROR_MSG,
                         Texts.PROXY_OPEN_ERROR_MSG)
            exit(1)
    else:
        for exp_name, run_list in exp_with_runs.items():
            try:
                exp_del_runs, exp_not_del_runs = cancel_experiment(
                    exp_name=exp_name,
                    runs_to_cancel=run_list,
                    namespace=current_namespace)
                deleted_runs.extend(exp_del_runs)
                not_deleted_runs.extend(exp_not_del_runs)
            except Exception:
                handle_error(logger, Texts.OTHER_CANCELLING_ERROR_MSG)
                not_deleted_runs.extend(run_list)

    if deleted_runs:
        click.echo(
            Texts.SUCCESSFULLY_CANCELLED_LIST_HEADER.format(
                experiment_name_plural=experiment_name_plural,
                operation_word=Texts.DELETE_OPERATION["deleted"]
                if experiment_name_plural == 'pods' else
                Texts.CANCEL_OPERATION["cancelled"]))
        for run in deleted_runs:
            click.echo(f"     - {run.name}")

    if not_deleted_runs:
        click.echo(
            Texts.FAILED_TO_CANCEL_LIST_HEADER.format(
                experiment_name_plural=experiment_name_plural,
                operation_word=Texts.DELETE_OPERATION["deleted"]
                if experiment_name_plural == 'pods' else
                Texts.CANCEL_OPERATION["cancelled"]))
        for run in not_deleted_runs:
            click.echo(f"     - {run.name}")
        sys.exit(1)
예제 #14
0
def purge_experiment(exp_name: str, runs_to_purge: List[Run],
                     k8s_es_client: K8sElasticSearchClient,
                     namespace: str) -> Tuple[List[Run], List[Run]]:
    """
       Purge experiment with a given name by cancelling runs given as a parameter. If given experiment
       contains more runs than is in the list of runs - experiment's state remains intact.

       :param exp_name: name of an experiment to which belong runs passed in run_list parameter
       :param runs_to_purge: list of runs that should be purged, they have to belong to exp_name experiment
       :param k8s_es_client: Kubernetes ElasticSearch client
       :param namespace: namespace where experiment is located
       :return: two list - first contains runs that were cancelled successfully, second - those which weren't
       """
    logger.debug(f"Purging {exp_name} experiment ...")

    purged_runs: List[Run] = []
    not_purged_runs: List[Run] = []

    experiment = Experiment.get(name=exp_name, namespace=namespace)
    if not experiment:
        raise RuntimeError(Texts.GET_EXPERIMENT_ERROR_MSG)

    experiment_runs = Run.list(namespace=namespace, exp_name_filter=[exp_name])
    # check whether experiment has more runs that should be cancelled
    cancel_whole_experiment = (len(experiment_runs) == len(runs_to_purge))
    if cancel_whole_experiment:
        experiment.state = ExperimentStatus.CANCELLING
        experiment.update()

    try:
        cancelled_runs, not_cancelled_runs = cancel_experiment_runs(
            runs_to_cancel=runs_to_purge, namespace=namespace)
        not_purged_runs = not_cancelled_runs

        if cancel_whole_experiment:
            # Delete associated workflows
            experiment_associated_workflows = [
                wf for wf in ArgoWorkflow.list(namespace=namespace)
                if wf.labels.get('experimentName') == experiment.name
            ]
            for wf in experiment_associated_workflows:
                wf.delete()

            # Remove tags from git repo manager
            try:
                delete_exp_tag_from_git_repo_manager(
                    experiment_name=experiment.name,
                    username=namespace,
                    experiments_workdir=get_run_environment_path(''))
            except Exception:
                handle_error(logger, Texts.GIT_REPO_MANAGER_ERROR_MSG,
                             Texts.GIT_REPO_MANAGER_ERROR_MSG)
                raise

        for run in cancelled_runs:
            logger.debug(f"Purging {run.name} run ...")
            click.echo(Texts.PURGING_START_MSG.format(run_name=run.name))
            try:
                with spinner(text=Texts.PURGING_PROGRESS_MSG.format(
                        run_name=run.name)):
                    # purge helm release
                    delete_helm_release(run.name,
                                        namespace=namespace,
                                        purge=True)
                    # delete run
                    kubectl.delete_k8s_object("run", run.name)
                    purged_runs.append(run)
            except Exception as exe:
                not_purged_runs.append(run)
                logger.exception("Error during purging runs.")
                # occurence of NotFound error may mean, that run has been removed earlier
                if "NotFound" not in str(exe):
                    click.echo(
                        Texts.INCOMPLETE_PURGE_ERROR_MSG.format(
                            experiment_name=experiment_name))
                    raise exe
            try:
                # clear run logs
                if is_current_user_administrator():
                    logger.debug(f"Clearing logs for {run.name} run.")
                    with spinner(text=Texts.PURGING_LOGS_PROGRESS_MSG.format(
                            run_name=run.name)):
                        k8s_es_client.delete_logs_for_run(run=run.name,
                                                          namespace=namespace)
            except Exception:
                logger.exception("Error during clearing run logs.")

            # CAN-1099 - docker garbage collector has errors that prevent from correct removal of images
            # try:
            # try to remove images from docker registry
            #    delete_images_for_experiment(exp_name=run.name)
            # except Exception:
            #    logger.exception("Error during removing images.")

        if cancel_whole_experiment and not not_purged_runs:
            try:
                kubectl.delete_k8s_object("experiment", exp_name)
            except Exception:
                # problems during deleting experiments are hidden as if runs were
                # cancelled user doesn't have a possibility to remove them
                logger.exception("Error during purging experiment.")

    except Exception:
        logger.exception("Error during purging experiment.")
        return purged_runs, not_purged_runs

    return purged_runs, not_purged_runs
예제 #15
0
def get_logs(experiment_name: str, min_severity: SeverityLevel,
             start_date: str, end_date: str, pod_ids: str,
             pod_status: PodStatus, match: str, output: bool, pager: bool,
             follow: bool, runs_kinds: List[RunKinds], instance_type: str):
    """
    Show logs for a given experiment.
    """
    # check whether we have runs with a given name
    if experiment_name and match:
        handle_error(user_msg=Texts.NAME_M_BOTH_GIVEN_ERROR_MSG.format(
            instance_type=instance_type))
        exit(1)
    elif not experiment_name and not match:
        handle_error(user_msg=Texts.NAME_M_NONE_GIVEN_ERROR_MSG.format(
            instance_type=instance_type))
        exit(1)

    try:
        es_client = K8sElasticSearchClient(
            host=f'{get_kubectl_host(with_port=True)}'
            f'/api/v1/namespaces/nauta/services/nauta-elasticsearch:nauta/proxy',
            verify_certs=False,
            use_ssl=True,
            headers={'Authorization': get_api_key()})
        namespace = get_kubectl_current_context_namespace()
        if match:
            experiment_name = match
            name_filter = match
        else:
            name_filter = f'^{experiment_name}$'
        runs = Run.list(namespace=namespace,
                        name_filter=name_filter,
                        run_kinds_filter=runs_kinds)
        if not runs:
            raise ValueError(
                f'Run with given name: {experiment_name} does not exists in namespace {namespace}.'
            )
        pod_ids = pod_ids.split(',') if pod_ids else None  # type: ignore
        follow_logs = True if follow and not output else False
        if output and len(runs) > 1:
            click.echo(Texts.MORE_EXP_LOGS_MESSAGE)
        for run in runs:
            start_date = start_date if start_date else run.creation_timestamp
            run_logs_generator = es_client.get_experiment_logs_generator(
                run=run,
                namespace=namespace,
                min_severity=min_severity,
                start_date=start_date,
                end_date=end_date,
                pod_ids=pod_ids,
                pod_status=pod_status,
                follow=follow_logs)
            if output:
                save_logs_to_file(logs_generator=run_logs_generator,
                                  instance_name=run.name,
                                  instance_type=instance_type)
            else:
                if len(runs) > 1:
                    click.echo(f'Experiment : {run.name}')
                print_logs(run_logs_generator=run_logs_generator, pager=pager)
    except ValueError:
        handle_error(
            logger,
            Texts.EXPERIMENT_NOT_EXISTS_ERROR_MSG.format(
                experiment_name=experiment_name,
                instance_type=instance_type.capitalize()),
            Texts.EXPERIMENT_NOT_EXISTS_ERROR_MSG.format(
                experiment_name=experiment_name,
                instance_type=instance_type.capitalize()))
        exit(1)
    except Exception:
        handle_error(
            logger,
            Texts.LOGS_GET_OTHER_ERROR_MSG.format(instance_type=instance_type),
            Texts.LOGS_GET_OTHER_ERROR_MSG.format(instance_type=instance_type))
        exit(1)
예제 #16
0
파일: common.py 프로젝트: pnijhara/nauta
def get_logs(operation_name: str, start_date: str, end_date: str, match: str,
             output: bool, pager: bool, follow: bool):
    """
    Show logs for a given model export operation.
    """
    # check whether we have operations with a given name
    if operation_name and match:
        handle_error(user_msg=Texts.NAME_M_BOTH_GIVEN_ERROR_MSG)
        exit(1)
    elif not operation_name and not match:
        handle_error(user_msg=Texts.NAME_M_NONE_GIVEN_ERROR_MSG)
        exit(1)

    try:
        with K8sProxy(NAUTAAppNames.ELASTICSEARCH) as proxy:
            es_client = K8sElasticSearchClient(host="127.0.0.1",
                                               port=proxy.tunnel_port,
                                               verify_certs=False,
                                               use_ssl=False)
            namespace = get_kubectl_current_context_namespace()
            if match:
                operation_name = match
                name_filter = match
            else:
                name_filter = f'^{operation_name}$'
            workflows = ArgoWorkflow.list(namespace=namespace,
                                          name_filter=name_filter)
            if not workflows:
                raise ValueError(
                    f'Operation with given name: {operation_name} does not '
                    f'exists in namespace {namespace}.')

            follow_logs = True if follow and not output else False

            if output and len(workflows) > 1:
                click.echo(Texts.MORE_EXP_LOGS_MESSAGE)

            for workflow in workflows:
                start_date = start_date if start_date else workflow.started_at

                ops_logs_generator = es_client.get_argo_workflow_logs_generator(
                    workflow=workflow,
                    namespace=namespace,
                    start_date=start_date,
                    end_date=end_date,
                    follow=follow_logs)

                if output:
                    save_logs_to_file(logs_generator=ops_logs_generator,
                                      instance_name=workflow.name,
                                      instance_type="operation")
                else:
                    if len(workflows) > 1:
                        click.echo(f'Operation : {workflow.name}')
                    print_logs(run_logs_generator=ops_logs_generator,
                               pager=pager)

    except K8sProxyCloseError:
        handle_error(logger, Texts.PROXY_CLOSE_LOG_ERROR_MSG,
                     Texts.PROXY_CLOSE_LOG_ERROR_MSG)
        exit(1)
    except LocalPortOccupiedError as exe:
        handle_error(
            logger,
            Texts.LOCAL_PORT_OCCUPIED_ERROR_MSG.format(
                exception_message=exe.message),
            Texts.LOCAL_PORT_OCCUPIED_ERROR_MSG.format(
                exception_message=exe.message))
        exit(1)
    except K8sProxyOpenError:
        handle_error(logger, Texts.PROXY_CREATION_ERROR_MSG,
                     Texts.PROXY_CREATION_ERROR_MSG)
        exit(1)
    except ValueError:
        handle_error(
            logger,
            Texts.OPERATION_NOT_EXISTS_ERROR_MSG.format(
                operation_name=operation_name),
            Texts.OPERATION_NOT_EXISTS_ERROR_MSG.format(
                experiment_name=operation_name))
        exit(1)
    except Exception:
        handle_error(logger, Texts.LOGS_GET_OTHER_ERROR_MSG,
                     Texts.LOGS_GET_OTHER_ERROR_MSG)
        exit(1)