Exemple #1
0
def verify_user_privileges(admin_command: bool, command_name: str):
    """
    Verify user's privileges for given command.
    :param admin_command: if set to True, a warning will be displayed and execution will be stopped for regular users,
     if set to False, a warning will be displayed and execution will be stopped for admin users
    :return:
    """
    try:
        if admin_command and not is_current_user_administrator():
            handle_error(logger=logger,
                         log_msg=Texts.USER_NOT_ADMIN_MSG.format(
                             command_name=command_name),
                         user_msg=Texts.USER_NOT_ADMIN_MSG.format(
                             command_name=command_name))
            sys.exit(1)
        if not admin_command and is_current_user_administrator():
            handle_error(logger=logger,
                         log_msg=Texts.USER_IS_ADMIN_MSG.format(
                             command_name=command_name),
                         user_msg=Texts.USER_IS_ADMIN_MSG.format(
                             command_name=command_name))
            sys.exit(1)
    except Exception:
        handle_error(logger,
                     Texts.ADMIN_CHECK_ERROR_MSG,
                     Texts.ADMIN_CHECK_ERROR_MSG,
                     add_verbosity_msg=True)
        sys.exit(1)
Exemple #2
0
def test_is_current_user_administrator_(mocker):
    gcr_mock = mocker.patch("util.k8s.k8s_info.get_cluster_roles", side_effect=ApiException(status=404))

    with pytest.raises(ApiException):
        is_current_user_administrator()

    assert gcr_mock.call_count == 1
Exemple #3
0
def is_admin(state: State):
    try:
        return is_current_user_administrator()
    except Exception:
        handle_error(logger,
                     Texts.ADMIN_CHECK_ERROR_MSG,
                     Texts.ADMIN_CHECK_ERROR_MSG,
                     add_verbosity_msg=state.verbosity == 0)
        exit(1)
Exemple #4
0
def is_admin():
    try:
        return is_current_user_administrator()
    except Exception:
        handle_error(
            logger,
            Texts.ADMIN_CHECK_ERROR_MSG,
            Texts.ADMIN_CHECK_ERROR_MSG,
            add_verbosity_msg=click.get_current_context().obj.verbosity == 0)
        exit(1)
Exemple #5
0
def verify_cli_dependencies():
    try:
        namespace = 'kube-system' if is_current_user_administrator(request_timeout=VERIFY_REQUEST_TIMEOUT) \
            else get_kubectl_current_context_namespace()
    except Exception:
        error_msg = Texts.KUBECTL_NAMESPACE_ERROR_MSG
        handle_error(logger, error_msg, error_msg, add_verbosity_msg=True)
        sys.exit(1)
    try:
        check_os()
        check_all_binary_dependencies(namespace=namespace)
    except (InvalidDependencyError, InvalidOsError):
        error_msg = Texts.INVALID_DEPENDENCY_ERROR_MSG
        handle_error(logger, error_msg, error_msg, add_verbosity_msg=True)
Exemple #6
0
def create(state: State, username: str, list_only: bool, filename: str):
    """
    Adds a new user with a name given as a parameter.

    :param username: name of a new user
    """

    if list_only and filename:
        handle_error(user_msg=Texts.F_L_OPTIONS_EXCLUSION_ERROR_MSG)
        exit(1)

    try:
        try:
            validate_user_name(username)
        except ValueError as exe:
            handle_error(
                logger,
                Texts.NAME_VALIDATION_ERROR_MSG.format(username=username),
                str(exe),
                add_verbosity_msg=state.verbosity == 0)
            exit(1)

        if not is_current_user_administrator():
            handle_error(logger, Texts.USER_NOT_ADMIN_ERROR_MSG,
                         Texts.USER_NOT_ADMIN_ERROR_MSG)
            exit(1)

        user_state = check_users_presence(username)

        if user_state == UserState.ACTIVE:
            handle_error(
                logger,
                Texts.USER_ALREADY_EXISTS_ERROR_MSG.format(username=username),
                Texts.USER_ALREADY_EXISTS_ERROR_MSG.format(username=username))
            exit(1)

        if user_state == UserState.TERMINATING:
            handle_error(
                logger,
                Texts.USER_BEING_REMOVED_ERROR_MSG.format(username=username),
                Texts.USER_BEING_REMOVED_ERROR_MSG.format(username=username))
            exit(1)

    except Exception:
        handle_error(
            logger,
            Texts.USER_VERIFICATION_ERROR_MSG.format(username=username),
            Texts.USER_VERIFICATION_ERROR_MSG.format(username=username),
            add_verbosity_msg=state.verbosity == 0)
        exit(1)

    try:
        with spinner(text=Texts.CREATING_USER_PROGRESS_MSG.format(
                username=username)):
            chart_location = os.path.join(Config().config_path,
                                          ADD_USER_CHART_NAME)

            nauta_config_map = NAUTAConfigMap()

            tiller_location = nauta_config_map.image_tiller
            tensorboard_service_location = nauta_config_map.image_tensorboard_service

            add_user_command = [
                "helm", "install", "--wait", "--namespace", username, "--name",
                username, chart_location, "--set", "global.nauta=nauta",
                "--set", f"username={username}", "--set",
                "TillerImage={}".format(tiller_location), "--set",
                f"TensorboardServiceImage={tensorboard_service_location}"
            ]
            env = os.environ.copy()
            env['PATH'] = Config().config_path + os.pathsep + env['PATH']
            _, err_code, log_output = execute_system_command(
                ' '.join(add_user_command), env=env, shell=True)

        if err_code:
            handle_error(logger,
                         log_output,
                         Texts.USER_ADD_ERROR_MSG,
                         add_verbosity_msg=state.verbosity == 0)

            if not delete_user(username):
                handle_error(user_msg=Texts.REMOVE_USER_ERROR_MSG.format(
                    username=username))
            sys.exit(1)

        try:
            users_password = get_users_token(username)
        except Exception:
            handle_error(logger,
                         Texts.PASSWORD_GATHER_ERROR_MSG,
                         Texts.PASSWORD_GATHER_ERROR_MSG,
                         add_verbosity_msg=state.verbosity == 0)
            users_password = ""

        try:
            cert = get_certificate(username)
        except Exception:
            handle_error(logger,
                         Texts.CERT_GATHER_ERROR_MSG,
                         Texts.CERT_GATHER_ERROR_MSG,
                         add_verbosity_msg=state.verbosity == 0)
            cert = ""

    except Exception:
        handle_error(logger,
                     Texts.USER_ADD_ERROR_MSG.format(username=username),
                     Texts.USER_ADD_ERROR_MSG.format(username=username),
                     add_verbosity_msg=state.verbosity == 0)
        if not delete_user(username):
            handle_error(user_msg=Texts.REMOVE_USER_ERROR_MSG.format(
                username=username))
        sys.exit(1)

    if is_user_created(username, 90):
        click.echo(Texts.USER_CREATION_SUCCESS_MSG.format(username=username))
    else:
        # if during 90 seconds a user hasn't been created - app displays information about it
        # but don't step processing the command - config file generated here my be useful later
        # when user has been created
        click.echo(Texts.USER_NOT_READY_ERROR_MSG.format(username=username))

    try:
        kubeconfig = generate_kubeconfig(username, username,
                                         get_kubectl_host(), users_password,
                                         cert)
    except Exception:
        handle_error(logger,
                     Texts.CONFIG_CREATION_ERROR_MSG,
                     Texts.CONFIG_CREATION_ERROR_MSG,
                     add_verbosity_msg=state.verbosity == 0)
        exit(1)

    if list_only:
        click.echo(Texts.LIST_ONLY_HEADER)
        click.echo(kubeconfig)
    else:
        if not filename:
            filename = DEFAULT_FILENAME.format(username)
        try:
            with open(filename, "w") as file:
                file.write(kubeconfig)

            click.echo(Texts.CONFIG_SAVE_SUCCESS_MSG.format(filename=filename))
        except Exception:
            handle_error(logger,
                         Texts.CONFIG_SAVE_FAIL_MSG,
                         Texts.CONFIG_SAVE_FAIL_MSG,
                         add_verbosity_msg=state.verbosity == 0)
            click.echo(Texts.CONFIG_SAVE_FAIL_INSTRUCTIONS_MSG)
            click.echo(kubeconfig)
            sys.exit(1)
Exemple #7
0
def test_is_current_user_administrator_is_not(mocker):
    gcr_mock = mocker.patch("util.k8s.k8s_info.get_cluster_roles",
                            side_effect=ApiException(status=403))

    assert not is_current_user_administrator()
    assert gcr_mock.call_count == 1
Exemple #8
0
def test_is_current_user_administrator_is(mocker):
    gcr_mock = mocker.patch("util.k8s.k8s_info.get_cluster_roles")

    assert is_current_user_administrator()
    assert gcr_mock.call_count == 1
Exemple #9
0
def verify(state: State):
    try:
        with spinner(text=Texts.CHECKING_OS_MSG):
            check_os()
        click.echo(Texts.OS_SUPPORTED_MSG)
    except InvalidOsError as exception:
        handle_error(logger,
                     str(exception),
                     str(exception),
                     add_verbosity_msg=True)
        exit(1)

    dependencies = get_dependency_map()
    kubectl_dependency_name = 'kubectl'
    kubectl_dependency_spec = dependencies[kubectl_dependency_name]

    with spinner(text=Texts.VERIFYING_DEPENDENCY_MSG.format(
            dependency_name=kubectl_dependency_name)):
        valid, installed_version = check_dependency(
            dependency_name=kubectl_dependency_name,
            dependency_spec=kubectl_dependency_spec)

    supported_versions_sign = '>='
    logger.info(
        Texts.VERSION_CHECKING_MSG.format(
            dependency_name=kubectl_dependency_name,
            installed_version=installed_version,
            supported_versions_sign=supported_versions_sign,
            expected_version=kubectl_dependency_spec.expected_version))

    if valid:
        click.echo(
            Texts.DEPENDENCY_VERIFICATION_SUCCESS_MSG.format(
                dependency_name=kubectl_dependency_name))
    else:
        handle_error(
            logger,
            Texts.KUBECTL_INVALID_VERSION_ERROR_MSG.format(
                installed_version=installed_version,
                supported_versions_sign=supported_versions_sign,
                expected_version=  # noqa
                kubectl_dependency_spec.expected_version),
            Texts.KUBECTL_INVALID_VERSION_ERROR_MSG,
            add_verbosity_msg=state.verbosity == 0)
        exit(1)

    del dependencies[kubectl_dependency_name]

    try:
        with spinner(text=Texts.CHECKING_CONNECTION_TO_CLUSTER_MSG):
            check_connection_to_cluster()
        with spinner(text=Texts.CHECKING_PORT_FORWARDING_FROM_CLUSTER_MSG):
            check_port_forwarding()
    except KubectlConnectionError as e:
        handle_error(logger,
                     str(e),
                     str(e),
                     add_verbosity_msg=state.verbosity == 0)
        exit(1)
    except FileNotFoundError:
        handle_error(logger,
                     Texts.KUBECTL_NOT_INSTALLED_ERROR_MSG,
                     Texts.KUBECTL_NOT_INSTALLED_ERROR_MSG,
                     add_verbosity_msg=state.verbosity == 0)
        exit(1)

    try:
        namespace = 'kube-system' if is_current_user_administrator(
        ) else get_kubectl_current_context_namespace()
    except Exception:
        handle_error(logger,
                     Texts.GET_K8S_NAMESPACE_ERROR_MSG,
                     Texts.GET_K8S_NAMESPACE_ERROR_MSG,
                     add_verbosity_msg=state.verbosity == 0)
        exit(1)

    dependency_versions = {}
    for dependency_name, dependency_spec in dependencies.items():
        try:
            supported_versions_sign = '==' if dependency_spec.match_exact_version else '>='
            with spinner(text=Texts.VERIFYING_DEPENDENCY_MSG.format(
                    dependency_name=dependency_name)):
                valid, installed_version = check_dependency(
                    dependency_name=dependency_name,
                    dependency_spec=dependency_spec,
                    namespace=namespace)
            dependency_versions[dependency_name] = installed_version
            logger.info(
                Texts.VERSION_CHECKING_MSG.format(
                    dependency_name=dependency_name,
                    installed_version=installed_version,
                    supported_versions_sign=supported_versions_sign,
                    expected_version=dependency_spec.expected_version))
            if valid:
                click.echo(
                    Texts.DEPENDENCY_VERIFICATION_SUCCESS_MSG.format(
                        dependency_name=dependency_name))
            else:
                click.echo(
                    Texts.INVALID_VERSION_WARNING_MSG.format(
                        dependency_name=dependency_name,
                        installed_version=installed_version,
                        supported_versions_sign=supported_versions_sign,
                        expected_version=dependency_spec.expected_version))
        except FileNotFoundError:
            handle_error(logger,
                         Texts.DEPENDENCY_NOT_INSTALLED_ERROR_MSG.format(
                             dependency_name=dependency_name),
                         Texts.DEPENDENCY_NOT_INSTALLED_ERROR_MSG.format(
                             dependency_name=dependency_name),
                         add_verbosity_msg="client" not in dependency_name)
            exit(1)
        except (RuntimeError, ValueError, TypeError):
            handle_error(logger,
                         Texts.DEPENDENCY_VERSION_CHECK_ERROR_MSG.format(
                             dependency_name=dependency_name),
                         Texts.DEPENDENCY_VERSION_CHECK_ERROR_MSG.format(
                             dependency_name=dependency_name),
                         add_verbosity_msg=state.verbosity == 0)
            exit(1)
        except Exception:
            handle_error(logger,
                         Texts.DEPENDENCY_VERIFICATION_OTHER_ERROR_MSG.format(
                             dependency_name=dependency_name),
                         Texts.DEPENDENCY_VERIFICATION_OTHER_ERROR_MSG.format(
                             dependency_name=dependency_name),
                         add_verbosity_msg=state.verbosity == 0)
            exit(1)
    else:
        # This block is entered if all dependencies were validated successfully
        # Save dependency versions in a file
        save_dependency_versions(dependency_versions)
Exemple #10
0
def cancel(state: State,
           name: str,
           match: str,
           purge: bool,
           pod_ids: str,
           pod_status: str,
           listed_runs_kinds: List[RunKinds] = None):
    """
    Cancels chosen experiments based on a name provided as a parameter.
    """
    if not listed_runs_kinds:
        listed_runs_kinds = [RunKinds.TRAINING, RunKinds.JUPYTER]

    # check whether we have runs with a given name
    if name and match:
        handle_error(user_msg=Texts.NAME_M_BOTH_GIVEN_ERROR_MSG)
        exit(1)

    if not name and not match:
        handle_error(user_msg=Texts.NAME_M_NONE_GIVEN_ERROR_MSG)
        exit(1)

    current_namespace = get_current_namespace()

    if pod_ids or pod_status:
        if not name:
            name = match

        cancel_pods_mode(namespace=current_namespace,
                         run_name=name,
                         pod_ids=pod_ids,
                         pod_status=pod_status)
        exit(0)

    search_for_experiment = False
    exp_to_be_cancelled = None

    if name:
        exp_to_be_cancelled = Experiment.get(namespace=current_namespace,
                                             name=name)
        exp_to_be_cancelled_kind = RunKinds(exp_to_be_cancelled.metadata['labels'].get('runKind')) \
            if exp_to_be_cancelled else None
        exp_to_be_cancelled = exp_to_be_cancelled if exp_to_be_cancelled_kind in listed_runs_kinds else None

        if exp_to_be_cancelled:
            search_for_experiment = True
        else:
            name = f"^{name}$"
    else:
        name = match

    list_of_all_runs = None

    list_of_applicable_states = [RunStatus.QUEUED, RunStatus.RUNNING]

    if purge:
        list_of_applicable_states.extend(
            [RunStatus.FAILED, RunStatus.COMPLETE, RunStatus.CANCELLED])

    try:
        if search_for_experiment:
            list_of_all_runs = Run.list(namespace=current_namespace,
                                        exp_name_filter=[name],
                                        run_kinds_filter=listed_runs_kinds)
        else:
            list_of_all_runs = Run.list(namespace=current_namespace,
                                        name_filter=name,
                                        run_kinds_filter=listed_runs_kinds)
    except Exception:
        handle_error(
            logger,
            Texts.LIST_RUNS_ERROR_MSG.format(
                experiment_name_plural=experiment_name_plural),
            Texts.LIST_RUNS_ERROR_MSG.format(
                experiment_name_plural=experiment_name_plural))
        exit(1)

    # Handle cancellation of experiments with no associated Runs
    if exp_to_be_cancelled and not list_of_all_runs:
        cancel_uninitialized_experiment(experiment=exp_to_be_cancelled,
                                        namespace=current_namespace,
                                        purge=purge)

    if not list_of_all_runs:
        handle_error(user_msg=Texts.LACK_OF_EXPERIMENTS_ERROR_MSG.format(
            experiment_name_plural=experiment_name_plural,
            experiment_name=experiment_name))
        exit(1)
    elif not purge and not [
            run for run in list_of_all_runs
            if run.state in [RunStatus.QUEUED, RunStatus.RUNNING]
    ]:
        handle_error(
            user_msg=Texts.LACK_OF_EXP_TO_BE_CANCELLED_ERROR_MSG.format(
                experiment_name_plural=experiment_name_plural))
        exit(1)

    # check whether we have at least one experiment in state other than CANCELLED
    list_of_runs_to_be_deleted: List[Run] = []
    names_of_cancelled_runs: List[str] = []

    if not purge:
        # check whether we have at least one experiment in state other than CANCELLED
        for run in list_of_all_runs:
            if run.state in list_of_applicable_states:
                list_of_runs_to_be_deleted.append(run)
            else:
                names_of_cancelled_runs.append(run.name)

        if not list_of_runs_to_be_deleted:
            handle_error(
                user_msg=Texts.EXPERIMENTS_ALREADY_CANCELLED_ERROR_MSG.format(
                    experiment_name_plural=experiment_name_plural,
                    operation_word=Texts.
                    DELETE_OPERATION["deleted"] if experiment_name_plural ==
                    'pods' else Texts.CANCEL_OPERATION["cancelled"]))
            exit(1)
        elif len(list_of_runs_to_be_deleted) != len(list_of_all_runs):
            click.echo(
                Texts.ALREADY_CANCELLED_LIST_HEADER.format(
                    experiment_name_plural=experiment_name_plural,
                    operation_word=Texts.DELETE_OPERATION["deleted"]
                    if experiment_name_plural == 'pods' else
                    Texts.CANCEL_OPERATION["cancelled"]))
            for name in names_of_cancelled_runs:
                click.echo(f"     - {name}")
            click.echo(
                Texts.CAN_BE_CANCELLED_LIST_HEADER.format(
                    experiment_name_plural=experiment_name_plural,
                    operation_word=Texts.DELETE_OPERATION["deleted"]
                    if experiment_name_plural == 'pods' else
                    Texts.CANCEL_OPERATION["cancelled"]))
            for run in list_of_runs_to_be_deleted:
                click.echo(f"     - {run.name}")
        else:
            click.echo(
                Texts.WILL_BE_CANCELLED_LIST_HEADER.format(
                    experiment_name_plural=experiment_name_plural,
                    operation_word=Texts.DELETE_OPERATION["deleted"]
                    if experiment_name_plural == 'pods' else
                    Texts.CANCEL_OPERATION["cancelled"]))
            for run in list_of_runs_to_be_deleted:
                click.echo(f"     - {run.name}")
    else:
        list_of_runs_to_be_deleted = list_of_all_runs
        click.echo(
            Texts.WILL_BE_PURGED_LIST_HEADER.format(
                experiment_name_plural=experiment_name_plural,
                operation_word=Texts.DELETE_OPERATION["deleted"]
                if experiment_name_plural == 'pods' else
                Texts.CANCEL_OPERATION["cancelled"]))
        for run in list_of_runs_to_be_deleted:
            click.echo(f"     - {run.name}")

    if not click.confirm(
            Texts.CONFIRM_CANCEL_MSG.format(
                experiment_name_plural=experiment_name_plural,
                operation_word=Texts.DELETE_OPERATION["deletion"]
                if experiment_name_plural == 'pods' else
                Texts.CANCEL_OPERATION["cancellation"])):
        handle_error(user_msg=Texts.CANCELLATION_ABORTED_MSG.format(
            experiment_name_plural=experiment_name_plural,
            operation_word=Texts.
            DELETE_OPERATION["deletion"] if experiment_name_plural ==
            'pods' else Texts.CANCEL_OPERATION["cancellation"]))
        exit(0)

    # group runs by experiments
    exp_with_runs: defaultdict = defaultdict(list)

    for run in list_of_runs_to_be_deleted:
        exp_with_runs[run.experiment_name].append(run)

    deleted_runs = []
    not_deleted_runs = []

    if purge:
        # Connect to elasticsearch in order to purge run logs
        try:
            with K8sProxy(NAUTAAppNames.ELASTICSEARCH) as proxy:
                es_client = K8sElasticSearchClient(
                    host="127.0.0.1",
                    port=proxy.tunnel_port,
                    verify_certs=False,
                    use_ssl=False,
                    with_admin_privledges=is_current_user_administrator())
                for exp_name, run_list in exp_with_runs.items():
                    try:
                        exp_del_runs, exp_not_del_runs = purge_experiment(
                            exp_name=exp_name,
                            runs_to_purge=run_list,
                            namespace=current_namespace,
                            k8s_es_client=es_client)
                        deleted_runs.extend(exp_del_runs)
                        not_deleted_runs.extend(exp_not_del_runs)
                    except Exception:
                        handle_error(logger, Texts.OTHER_CANCELLING_ERROR_MSG)
                        not_deleted_runs.extend(run_list)
        except K8sProxyCloseError:
            handle_error(logger, Texts.PROXY_CLOSING_ERROR_LOG_MSG,
                         Texts.PROXY_CLOSING_ERROR_USER_MSG)
            exit(1)
        except LocalPortOccupiedError as exe:
            handle_error(
                logger, Texts.PORT_OCCUPIED_ERROR_LOG_MSG,
                Texts.PORT_OCCUPIED_ERROR_USER_MSG.format(
                    exception_message=exe.message))
            exit(1)
        except K8sProxyOpenError:
            handle_error(logger, Texts.PROXY_OPEN_ERROR_MSG,
                         Texts.PROXY_OPEN_ERROR_MSG)
            exit(1)
    else:
        for exp_name, run_list in exp_with_runs.items():
            try:
                exp_del_runs, exp_not_del_runs = cancel_experiment(
                    exp_name=exp_name,
                    runs_to_cancel=run_list,
                    namespace=current_namespace)
                deleted_runs.extend(exp_del_runs)
                not_deleted_runs.extend(exp_not_del_runs)
            except Exception:
                handle_error(logger, Texts.OTHER_CANCELLING_ERROR_MSG)
                not_deleted_runs.extend(run_list)

    if deleted_runs:
        click.echo(
            Texts.SUCCESSFULLY_CANCELLED_LIST_HEADER.format(
                experiment_name_plural=experiment_name_plural,
                operation_word=Texts.DELETE_OPERATION["deleted"]
                if experiment_name_plural == 'pods' else
                Texts.CANCEL_OPERATION["cancelled"]))
        for run in deleted_runs:
            click.echo(f"     - {run.name}")

    if not_deleted_runs:
        click.echo(
            Texts.FAILED_TO_CANCEL_LIST_HEADER.format(
                experiment_name_plural=experiment_name_plural,
                operation_word=Texts.DELETE_OPERATION["deleted"]
                if experiment_name_plural == 'pods' else
                Texts.CANCEL_OPERATION["cancelled"]))
        for run in not_deleted_runs:
            click.echo(f"     - {run.name}")
        sys.exit(1)
Exemple #11
0
def purge_experiment(exp_name: str, runs_to_purge: List[Run],
                     k8s_es_client: K8sElasticSearchClient,
                     namespace: str) -> Tuple[List[Run], List[Run]]:
    """
       Purge experiment with a given name by cancelling runs given as a parameter. If given experiment
       contains more runs than is in the list of runs - experiment's state remains intact.

       :param exp_name: name of an experiment to which belong runs passed in run_list parameter
       :param runs_to_purge: list of runs that should be purged, they have to belong to exp_name experiment
       :param k8s_es_client: Kubernetes ElasticSearch client
       :param namespace: namespace where experiment is located
       :return: two list - first contains runs that were cancelled successfully, second - those which weren't
       """
    logger.debug(f"Purging {exp_name} experiment ...")

    purged_runs: List[Run] = []
    not_purged_runs: List[Run] = []

    experiment = Experiment.get(name=exp_name, namespace=namespace)
    if not experiment:
        raise RuntimeError(Texts.GET_EXPERIMENT_ERROR_MSG)

    experiment_runs = Run.list(namespace=namespace, exp_name_filter=[exp_name])
    # check whether experiment has more runs that should be cancelled
    cancel_whole_experiment = (len(experiment_runs) == len(runs_to_purge))
    if cancel_whole_experiment:
        experiment.state = ExperimentStatus.CANCELLING
        experiment.update()

    try:
        cancelled_runs, not_cancelled_runs = cancel_experiment_runs(
            runs_to_cancel=runs_to_purge, namespace=namespace)
        not_purged_runs = not_cancelled_runs

        if cancel_whole_experiment:
            # Delete associated workflows
            experiment_associated_workflows = [
                wf for wf in ArgoWorkflow.list(namespace=namespace)
                if wf.labels.get('experimentName') == experiment.name
            ]
            for wf in experiment_associated_workflows:
                wf.delete()

            # Remove tags from git repo manager
            try:
                delete_exp_tag_from_git_repo_manager(
                    experiment_name=experiment.name,
                    username=namespace,
                    experiments_workdir=get_run_environment_path(''))
            except Exception:
                handle_error(logger, Texts.GIT_REPO_MANAGER_ERROR_MSG,
                             Texts.GIT_REPO_MANAGER_ERROR_MSG)
                raise

        for run in cancelled_runs:
            logger.debug(f"Purging {run.name} run ...")
            click.echo(Texts.PURGING_START_MSG.format(run_name=run.name))
            try:
                with spinner(text=Texts.PURGING_PROGRESS_MSG.format(
                        run_name=run.name)):
                    # purge helm release
                    delete_helm_release(run.name,
                                        namespace=namespace,
                                        purge=True)
                    # delete run
                    kubectl.delete_k8s_object("run", run.name)
                    purged_runs.append(run)
            except Exception as exe:
                not_purged_runs.append(run)
                logger.exception("Error during purging runs.")
                # occurence of NotFound error may mean, that run has been removed earlier
                if "NotFound" not in str(exe):
                    click.echo(
                        Texts.INCOMPLETE_PURGE_ERROR_MSG.format(
                            experiment_name=experiment_name))
                    raise exe
            try:
                # clear run logs
                if is_current_user_administrator():
                    logger.debug(f"Clearing logs for {run.name} run.")
                    with spinner(text=Texts.PURGING_LOGS_PROGRESS_MSG.format(
                            run_name=run.name)):
                        k8s_es_client.delete_logs_for_run(run=run.name,
                                                          namespace=namespace)
            except Exception:
                logger.exception("Error during clearing run logs.")

            # CAN-1099 - docker garbage collector has errors that prevent from correct removal of images
            # try:
            # try to remove images from docker registry
            #    delete_images_for_experiment(exp_name=run.name)
            # except Exception:
            #    logger.exception("Error during removing images.")

        if cancel_whole_experiment and not not_purged_runs:
            try:
                kubectl.delete_k8s_object("experiment", exp_name)
            except Exception:
                # problems during deleting experiments are hidden as if runs were
                # cancelled user doesn't have a possibility to remove them
                logger.exception("Error during purging experiment.")

    except Exception:
        logger.exception("Error during purging experiment.")
        return purged_runs, not_purged_runs

    return purged_runs, not_purged_runs
Exemple #12
0
def submit(state: State, script_location: str, script_folder_location: str,
           template: str, name: str, pack_param: List[Tuple[str, str]],
           parameter_range: List[Tuple[str, str]], parameter_set: Tuple[str,
                                                                        ...],
           env: List[str], script_parameters: Tuple[str,
                                                    ...], requirements: str):
    if is_current_user_administrator():
        handle_error(logger, Texts.USER_IS_ADMIN_LOG_MSG,
                     Texts.USER_IS_ADMIN_USR_MSG)
        exit(1)

    logger.debug(Texts.SUBMIT_START_LOG_MSG)
    validate_script_location(script_location)
    validate_pack_params(pack_param)
    validate_pack(template)

    if os.path.isdir(script_location):
        if not requirements:
            requirements = get_default_requirements_location(
                script_directory=script_location)
        script_location = get_default_script_location(
            script_directory=script_location)

    if script_folder_location:
        validate_script_folder_location(script_folder_location)

    click.echo(Texts.SUBMIT_START_USER_MSG)

    runs_list = None
    # noinspection PyBroadException
    try:
        runs_list, runs_errors, _ = submit_experiment(
            run_kind=RunKinds.TRAINING,
            script_location=script_location,
            script_folder_location=script_folder_location,
            template=template,
            name=name,
            pack_params=pack_param,
            parameter_range=parameter_range,
            parameter_set=parameter_set,
            script_parameters=script_parameters,
            env_variables=env,
            requirements_file=requirements)
    except K8sProxyCloseError as exe:
        handle_error(user_msg=exe.message)
        click.echo(exe.message)
        if not runs_list:
            exit(1)
    except SubmitExperimentError as exe:
        handle_error(user_msg=Texts.SUBMIT_ERROR_MSG.format(
            exception_message=exe.message))
        exit(1)
    except Exception:
        handle_error(user_msg=Texts.SUBMIT_OTHER_ERROR_MSG)
        exit(1)

    # display information about status of a training
    click.echo(
        tabulate(
            [(run.cli_representation.name, run.cli_representation.parameters,
              run.cli_representation.status,
              format_run_message(runs_errors.get(run.name, "")))
             for run in runs_list],
            headers=[RUN_NAME, RUN_PARAMETERS, RUN_STATUS, RUN_MESSAGE],
            tablefmt="orgtbl"))

    # if there is at least one FAILED experiment - application has to return exit code != 0
    if any(run.state == RunStatus.FAILED for run in runs_list):
        handle_error(logger, Texts.FAILED_RUNS_LOG_MSG)
        exit(1)
Exemple #13
0
def delete(state: State, username: str, purge: bool):
    """
    Deletes a user with a name given as a parameter.

    :param username: name of a user that should be deleted
    :param purge: if set - command removes also all artifacts associated with a user
    """
    try:
        if not is_current_user_administrator():
            handle_error(user_msg=Texts.USER_NOT_ADMIN_ERROR_MSG)
            exit(1)
        click.echo(Texts.DELETION_CHECK_PRESENCE)
        user_state = check_users_presence(username)

        if user_state == UserState.NOT_EXISTS:
            handle_error(user_msg=Texts.USER_NOT_EXISTS_ERROR_MSG.format(
                username=username))
            exit(1)

        if user_state == UserState.TERMINATING:
            handle_error(user_msg=Texts.USER_BEING_REMOVED_ERROR_MSG)
            exit(1)

    except Exception:
        handle_error(logger,
                     Texts.USER_PRESENCE_VERIFICATION_ERROR_MSG,
                     Texts.USER_PRESENCE_VERIFICATION_ERROR_MSG,
                     add_verbosity_msg=state.verbosity == 0)
        exit(1)

    click.echo()
    if not click.confirm(Texts.DELETE_CONFIRM_MSG.format(username=username)):
        click.echo(Texts.DELETE_ABORT_MSG)
        exit(0)

    click.echo()

    try:
        click.echo(Texts.DELETION_START_DELETING)
        delete_user(username)

        patch_config_map_data(name=USER_DEL_CM,
                              namespace=NAUTA_NAMESPACE,
                              key=username,
                              value="1")

        if purge:
            try:
                click.echo(Texts.DELETION_START_PURGING)
                # failure during purging a user doesn't mean that user wasn't deleted
                purge_user(username)
            except Exception:
                handle_error(logger, Texts.PURGE_ERROR_MSG,
                             Texts.PURGE_ERROR_MSG)

        # CAN-616 - wait until user has been really deleted
        with spinner(text=Texts.DELETION_VERIFICATION_OF_DELETING
                     ) as user_del_spinner:
            for i in range(60):
                user_state = check_users_presence(username)

                user_del_cm_content = get_config_map_data(
                    name=USER_DEL_CM,
                    namespace=NAUTA_NAMESPACE,
                    request_timeout=1)
                if (not user_state or user_state == UserState.NOT_EXISTS) and \
                        (not user_del_cm_content or not user_del_cm_content.get(username)):
                    break
                time.sleep(1)
            else:
                user_del_spinner.hide()
                click.echo()
                click.echo(Texts.DELETE_IN_PROGRESS_MSG)
                exit(0)

        click.echo()
        click.echo(Texts.DELETE_SUCCESS_MSG.format(username=username))
    except K8sProxyCloseError:
        handle_error(logger,
                     Texts.PROXY_ERROR_LOG_MSG,
                     Texts.PROXY_ERROR_USER_MSG,
                     add_verbosity_msg=state.verbosity == 0)
        exit(1)
    except Exception:
        handle_error(logger,
                     Texts.OTHER_ERROR_LOG_MSG,
                     Texts.OTHER_ERROR_USER_MSG,
                     add_verbosity_msg=state.verbosity == 0)
        exit(1)