コード例 #1
0
ファイル: test_logs.py プロジェクト: pnijhara/nauta
def test_show_logs_match(mocker):
    es_client_mock = mocker.patch(
        "commands.common.logs_utils.K8sElasticSearchClient")

    es_client_instance = es_client_mock.return_value
    es_client_instance.get_experiment_logs_generator.return_value = TEST_LOG_ENTRIES

    get_kubectl_host_mock = mocker.patch(
        'commands.common.logs_utils.get_kubectl_host')
    get_api_key_mock = mocker.patch('commands.common.logs_utils.get_api_key')

    get_current_namespace_mock = mocker.patch(
        'commands.common.logs_utils.get_kubectl_current_context_namespace')
    fake_experiment_1_name = 'fake-experiment-1'
    fake_experiment_2_name = 'fake-experiment-2'
    list_runs_mock = mocker.patch('commands.common.logs_utils.Run.list')
    list_runs_mock.return_value = [
        Run(name=fake_experiment_1_name,
            experiment_name=fake_experiment_1_name),
        Run(name=fake_experiment_2_name,
            experiment_name=fake_experiment_2_name)
    ]

    runner = CliRunner()
    result = runner.invoke(logs.logs, ['-m', 'fake-experiment'])

    assert get_kubectl_host_mock.call_count == 1, 'kubectl host was not retrieved'
    assert get_api_key_mock.call_count == 1, 'k8s api key was not retrieved'
    assert get_current_namespace_mock.call_count == 1, 'namespace was not retrieved'
    assert list_runs_mock.call_count == 1, 'run was not retrieved'
    assert es_client_instance.get_experiment_logs_generator.call_count == 2, 'Experiment logs were not retrieved'

    assert fake_experiment_1_name in result.output
    assert fake_experiment_2_name in result.output
コード例 #2
0
def prepare_list_of_runs(parameter_range: List[Tuple[str, str]],
                         experiment_name: str, parameter_set: Tuple[str, ...],
                         template_name: str) -> List[Run]:

    run_list: List[Run] = []

    if not parameter_range and not parameter_set:
        run_list = [
            Run(name=experiment_name,
                experiment_name=experiment_name,
                pod_selector={
                    'matchLabels': {
                        'app': template_name,
                        'release': experiment_name
                    }
                })
        ]
    else:
        list_of_range_parameters: List[Tuple[str, ...]] = [("", )]
        list_of_set_parameters = [("", )]

        if parameter_range:
            list_of_range_parameters = analyze_pr_parameters_list(
                parameter_range)

        if parameter_set:
            list_of_set_parameters = analyze_ps_parameters_list(parameter_set)

        run_index = 1
        for set_param in list_of_set_parameters:
            for range_param in list_of_range_parameters:
                current_run_name = experiment_name + "-" + str(run_index)
                current_params: Tuple[str, ...] = ()

                if len(set_param) >= 1 and set_param[0]:
                    current_params = set_param

                if len(range_param) >= 1 and range_param[0]:
                    current_params = current_params + range_param

                run_list.append(
                    Run(name=current_run_name,
                        experiment_name=experiment_name,
                        parameters=current_params,
                        pod_selector={
                            'matchLabels': {
                                'app': template_name,
                                'release': current_run_name
                            }
                        }))
                run_index = run_index + 1
    return run_list
コード例 #3
0
def generate_name_for_existing_exps(script_name: str, namespace: str,
                                    run_kind: RunKinds = RunKinds.TRAINING) -> Tuple[Optional[str], Dict[str, str]]:
    exp_list = list_k8s_experiments_by_label(namespace=namespace,
                                             label_selector=f"script_name={script_name},name_origin")
    if not exp_list or len(exp_list) == 0:
        return None, {}

    # 1. Find newest experiment name
    newest_exp = None
    for exp in exp_list:
        if not newest_exp:
            newest_exp = exp
        elif exp.metadata.creation_timestamp > newest_exp.metadata.creation_timestamp:
            newest_exp = exp
    name_origin = newest_exp.metadata.labels['name_origin']

    names_of_experiments_with_the_same_origin = []
    for exp in exp_list:
        if exp.metadata.labels['name_origin'] == name_origin:
            names_of_experiments_with_the_same_origin.append(exp.metadata.name)

    # 2. Count experiments(runs) matching the same origin name of an experiment
    runs_of_exp_list = Run.list(namespace=namespace, exp_name_filter=names_of_experiments_with_the_same_origin)

    counter = 1
    if runs_of_exp_list:
        counter = len(runs_of_exp_list) + 1

    calculated_name = f"{name_origin}-{counter}"
    return calculated_name, prepare_label(script_name, calculated_name, name_origin, run_kind=run_kind)
コード例 #4
0
ファイル: user.py プロジェクト: zhcf/nauta
    def list(cls,
             namespace: str = None,
             custom_objects_api: CustomObjectsApi = None):
        """
        Return list of users.
        :namespace:
        :return: List of User objects
        """
        logger.debug('Listing users.')
        k8s_custom_object_api = custom_objects_api if custom_objects_api else PlatformResourceApiClient.get(
        )

        raw_users = k8s_custom_object_api.list_cluster_custom_object(
            group=cls.api_group_name,
            plural=cls.crd_plural_name,
            version=cls.crd_version)

        users = [
            User.from_k8s_response_dict(user_dict)
            for user_dict in raw_users['items']
        ]

        # Get experiment runs for each user
        # TODO: CHANGE IMPLEMENTATION TO USE AGGREGATED USER DATA AFTER CAN-366
        runs = Run.list(custom_objects_api=k8s_custom_object_api)
        user_map = {user.name: user for user in users}

        for run in runs:
            if user_map.get(run.namespace):
                user_map[run.namespace].experiment_runs.append(run)
            else:
                logger.error(
                    f"Run exists for nonexisting user {run.namespace}")

        return users
コード例 #5
0
ファイル: test_logs.py プロジェクト: pnijhara/nauta
def test_show_logs_failure(mocker):
    es_client_mock = mocker.patch(
        'commands.common.logs_utils.K8sElasticSearchClient')
    es_client_instance = es_client_mock.return_value
    es_client_instance.get_experiment_logs_generator.side_effect = RuntimeError

    get_kubectl_host_mock = mocker.patch(
        'commands.common.logs_utils.get_kubectl_host')
    get_api_key_mock = mocker.patch('commands.common.logs_utils.get_api_key')

    get_current_namespace_mock = mocker.patch(
        'commands.common.logs_utils.get_kubectl_current_context_namespace')
    fake_experiment_name = 'fake-experiment'
    list_runs_mock = mocker.patch('commands.common.logs_utils.Run.list')
    list_runs_mock.return_value = [
        Run(name=fake_experiment_name, experiment_name=fake_experiment_name)
    ]

    runner = CliRunner()

    result = runner.invoke(logs.logs, [fake_experiment_name])

    assert get_kubectl_host_mock.call_count == 1, 'kubectl host was not retrieved'
    assert get_api_key_mock.call_count == 1, 'k8s api key was not retrieved'
    assert get_current_namespace_mock.call_count == 1, 'namespace was not retrieved'
    assert list_runs_mock.call_count == 1, 'run was not retrieved'
    assert es_client_instance.get_experiment_logs_generator.call_count == 1, 'Experiment logs retrieval was not called'
    assert result.exit_code == 1
コード例 #6
0
ファイル: test_logs.py プロジェクト: pnijhara/nauta
def test_show_logs_from_two_experiments(mocker):
    es_client_mock = mocker.patch(
        'commands.common.logs_utils.K8sElasticSearchClient')
    es_client_instance = es_client_mock.return_value
    es_client_instance.get_experiment_logs_generator.return_value = TEST_LOG_ENTRIES

    get_kubectl_host_mock = mocker.patch(
        'commands.common.logs_utils.get_kubectl_host')
    get_api_key_mock = mocker.patch('commands.common.logs_utils.get_api_key')

    get_current_namespace_mock = mocker.patch(
        "commands.common.logs_utils.get_kubectl_current_context_namespace")

    fake_experiment_name = 'fake-experiment'
    list_runs_mock = mocker.patch('commands.common.logs_utils.Run.list')
    list_runs_mock.return_value = [
        Run(name=fake_experiment_name, experiment_name=fake_experiment_name)
    ]

    runner = CliRunner()
    m = mock_open()
    with patch("builtins.open", m) as open_mock:
        exception = RuntimeError()
        exception.message = "Cause of an error"
        open_mock.return_value.__enter__.side_effect = exception
        result = runner.invoke(logs.logs, ['fake-experiment', '-o'], input='y')

    assert CmdsCommonTexts.LOGS_STORING_ERROR.format(
        exception_message=exception.message) in result.output
    assert get_kubectl_host_mock.call_count == 1, 'kubectl host was not retrieved'
    assert get_api_key_mock.call_count == 1, 'k8s api key was not retrieved'
    assert get_current_namespace_mock.call_count == 1, "namespace was not retrieved"
    assert list_runs_mock.call_count == 1, "run was not retrieved"
    assert es_client_instance.get_experiment_logs_generator.call_count == 1, "Experiment logs were not retrieved"
コード例 #7
0
def create_fake_run(experiment: Experiment) -> Run:
    return Run(name=experiment.name, experiment_name=experiment.name, metrics={},
               parameters=experiment.parameters_spec, pod_count=0,
               pod_selector={}, state=RunStatus.CREATING, namespace=experiment.namespace,
               creation_timestamp=experiment.creation_timestamp,
               template_name=experiment.template_name,
               template_version=experiment.template_version)
コード例 #8
0
ファイル: test_logs.py プロジェクト: pnijhara/nauta
def test_show_logs_to_file_success(mocker):
    es_client_mock = mocker.patch(
        "commands.common.logs_utils.K8sElasticSearchClient")
    es_client_instance = es_client_mock.return_value
    es_client_instance.get_experiment_logs_generator.return_value = TEST_LOG_ENTRIES

    get_kubectl_host_mock = mocker.patch(
        'commands.common.logs_utils.get_kubectl_host')
    get_api_key_mock = mocker.patch('commands.common.logs_utils.get_api_key')

    get_current_namespace_mock = mocker.patch(
        "commands.common.logs_utils.get_kubectl_current_context_namespace")
    fake_experiment_name = 'fake-experiment'
    list_runs_mock = mocker.patch('commands.common.logs_utils.Run.list')
    list_runs_mock.return_value = [
        Run(name=fake_experiment_name, experiment_name=fake_experiment_name)
    ]

    runner = CliRunner()
    m = mock_open()
    with patch("builtins.open", m) as open_mock:
        runner.invoke(logs.logs, ['fake-experiment', '-o'], input='y')

    assert get_kubectl_host_mock.call_count == 1, 'kubectl host was not retrieved'
    assert get_api_key_mock.call_count == 1, 'k8s api key was not retrieved'
    assert get_current_namespace_mock.call_count == 1, "namespace was not retrieved"
    assert list_runs_mock.call_count == 1, "run was not retrieved"
    assert es_client_instance.get_experiment_logs_generator.call_count == 1, "Experiment logs were not retrieved"
    assert open_mock.call_count == 1, "File wasn't saved."
コード例 #9
0
def test_show_logs_failure_proxy_problem(mocker, exception):
    es_client_mock = mocker.patch(
        'commands.experiment.logs.K8sElasticSearchClient')
    es_client_instance = es_client_mock.return_value
    es_client_instance.get_experiment_logs_generator.side_effect = RuntimeError

    proxy_mock = mocker.patch.object(logs, 'K8sProxy')
    proxy_mock.side_effect = exception
    get_current_namespace_mock = mocker.patch(
        'commands.experiment.logs.get_kubectl_current_context_namespace')
    fake_experiment_name = 'fake-experiment'
    list_runs_mock = mocker.patch('commands.experiment.logs.Run.list')
    list_runs_mock.return_value = [
        Run(name=fake_experiment_name, experiment_name=fake_experiment_name)
    ]

    runner = CliRunner()

    result = runner.invoke(logs.logs, [fake_experiment_name])

    assert proxy_mock.call_count == 1, 'port forwarding was not initiated'
    assert get_current_namespace_mock.call_count == 0, 'namespace was retrieved'
    assert list_runs_mock.call_count == 0, 'run was retrieved'
    assert es_client_instance.get_experiment_logs_generator.call_count == 0, 'Experiment logs retrieval was called'
    assert result.exit_code == 1
コード例 #10
0
ファイル: stream.py プロジェクト: yuanbw/nauta
def stream(state: State, name: str, data: str, method_verb: InferenceVerb):
    """
    Perform stream inference task on launched prediction instance.
    """
    method_verb = InferenceVerb(method_verb)
    try:
        namespace = get_kubectl_current_context_namespace()

        # TODO: check if kind field of inference instance Run is correct
        inference_instance = Run.get(name=name, namespace=namespace)
        if not inference_instance:
            handle_error(user_msg=Texts.INSTANCE_NOT_EXISTS_ERROR_MSG.format(
                name=name))
            exit(1)
        if not inference_instance.state == RunStatus.RUNNING:
            handle_error(user_msg=Texts.INSTANCE_NOT_RUNNING_ERROR_MSG.format(
                name=name, running_code=RunStatus.RUNNING.value))
            exit(1)

        inference_instance_url = get_inference_instance_url(
            inference_instance=inference_instance)
        stream_url = f'{inference_instance_url}:{method_verb.value}'
    except Exception:
        handle_error(logger,
                     Texts.INSTANCE_GET_FAIL_ERROR_MSG.format(name=name),
                     Texts.INSTANCE_GET_FAIL_ERROR_MSG.format(name=name),
                     add_verbosity_msg=state.verbosity == 0)
        exit(1)

    try:
        with open(data, 'r', encoding='utf-8') as data_file:
            stream_data = json.load(data_file)
    except (json.JSONDecodeError, IOError):
        handle_error(logger, Texts.JSON_LOAD_ERROR_MSG.format(data=data),
                     Texts.JSON_LOAD_ERROR_MSG.format(data=data))
        exit(1)

    try:
        api_key = get_api_key()
        headers = {
            'Authorization': api_key,
            'Accept': 'application/json',
            'Content-Type': 'application/json'
        }
        with spinner(text=Texts.WAITING_FOR_RESPONSE_MSG):
            stream_response = requests.post(
                stream_url,
                data=json.dumps(stream_data),  # nosec - request to k8s cluster
                verify=False,
                headers=headers)
        stream_response.raise_for_status()
        click.echo(stream_response.text)
    except Exception as e:
        error_msg = Texts.INFERENCE_OTHER_ERROR_MSG.format(exception=e)
        if hasattr(e, 'response'):
            error_msg += Texts.INFERENCE_ERROR_RESPONSE_MSG.format(
                response_text=e.response.text)  # type: ignore
        handle_error(logger, error_msg, error_msg)
        exit(1)
コード例 #11
0
def test_list_runs_from_namespace(mock_k8s_api_client: CustomObjectsApi):
    raw_runs_single_namespace = dict(LIST_RUNS_RESPONSE_RAW)
    raw_runs_single_namespace['items'] = [raw_runs_single_namespace['items'][0]]
    mock_k8s_api_client.list_namespaced_custom_object.return_value = raw_runs_single_namespace

    runs = Run.list(namespace='namespace-1')

    assert [TEST_RUNS[0]] == runs
コード例 #12
0
def test_create_list_of_runs_ps_only(mocker):
    experiment_name = "experiment_name"
    template_name = "template_name"
    mocker.patch("platform_resources.experiment_utils.generate_exp_name_and_labels",
                 side_effect=[(experiment_name, {})])

    multiple_two_params = ("{param1:0, param2:1}", "{param1:2,param3:3}")
    multiple_two_params_list_result = \
        [Run(name=experiment_name + "-1", experiment_name=experiment_name,
             parameters=("param1=0", "param2=1")),
         Run(name=experiment_name + "-2", experiment_name=experiment_name,
             parameters=("param1=2", "param3=3"))]
    output = prepare_list_of_runs(parameter_range=[], experiment_name=experiment_name,
                                  parameter_set=multiple_two_params, template_name=template_name)
    assert len(output) == 2
    for expected_run, result_run in zip(multiple_two_params_list_result, output):
        assert expected_run.parameters == result_run.parameters
コード例 #13
0
def test_create_list_of_runs_pr_only(mocker):
    experiment_name = "experiment_name"
    template_name = "template_name"
    mocker.patch(
        "platform_resources.experiment_utils.generate_exp_name_and_labels",
        side_effect=[(experiment_name, {})])

    two_params_list = [("param1", "{0, 1}"), ("param2", "{0...2:1}")]
    two_params_list_result = \
        [Run(name=experiment_name + "-1", experiment_name=experiment_name,
             parameters=("param1=0", "param2=0")),
         Run(name=experiment_name + "-2", experiment_name=experiment_name,
             parameters=("param1=0", "param2=1")),
         Run(name=experiment_name + "-3", experiment_name=experiment_name,
             parameters=("param1=0", "param2=2")),
         Run(name=experiment_name + "-4", experiment_name=experiment_name,
             parameters=("param1=1", "param2=0")),
         Run(name=experiment_name + "-5", experiment_name=experiment_name,
             parameters=("param1=1", "param2=1")),
         Run(name=experiment_name + "-6", experiment_name=experiment_name,
             parameters=("param1=1", "param2=2"))]

    output = prepare_list_of_runs(parameter_range=two_params_list,
                                  experiment_name=experiment_name,
                                  parameter_set=(),
                                  template_name=template_name)
    assert len(output) == 6
    for expected_run, result_run in zip(two_params_list_result, output):
        assert expected_run.parameters == result_run.parameters
コード例 #14
0
def list_runs_in_cli(verbosity_lvl: int, all_users: bool, name: str, status: RunStatus,
                     listed_runs_kinds: List[RunKinds], runs_list_headers: List[str], with_metrics: bool,
                     count: int = None, brief: bool = False):
    """
    Display a list of selected runs in the cli.

    :param verbosity_lvl: level at which error messages should be logged or displayed
    :param all_users: whether to display runs regardless of their owner or not
    :param name: regular expression to which names of the shown runs have to match
    :param status: display runs with this status
    :param listed_runs_kinds: list of kinds of runs that will be listed out
    :param runs_list_headers: headers which will be displayed on top of a table shown in the cli
    :param with_metrics: whether to show metrics column or not
    :param count: number of rows displayed on a list. If not given - content of a list is not limited
    :param brief: when true only experiment name, submission date, owner and state will be print
    """

    try:
        namespace = None if all_users else get_kubectl_current_context_namespace()
        status = RunStatus[status] if status else None

        # List experiments command is actually listing Run resources instead of Experiment resources with one
        # exception - if run is initialized - nctl displays data of an experiment instead of data of a run
        runs = replace_initializing_runs(
            Run.list(namespace=namespace, state_list=[status], name_filter=name, run_kinds_filter=listed_runs_kinds))
        runs_representations = [run.cli_representation for run in runs]
        if brief:
            runs_table_data = [
                (run_representation.name, run_representation.submission_date, run_representation.submitter,
                 run_representation.status)
                for run_representation in runs_representations
            ]
        elif with_metrics:
            runs_table_data = runs_representations
        else:
            runs_table_data = [
                (run_representation.name, run_representation.parameters, run_representation.submission_date,
                 run_representation.start_date, run_representation.duration,
                 run_representation.submitter, run_representation.status, run_representation.template_name)
                for run_representation in runs_representations
            ]
        click.echo(tabulate(runs_table_data if not count else runs_table_data[-count:],
                            headers=runs_list_headers, tablefmt="orgtbl"))
    except InvalidRegularExpressionError:
        handle_error(logger, Texts.INVALID_REGEX_ERROR_MSG, Texts.INVALID_REGEX_ERROR_MSG,
                     add_verbosity_msg=verbosity_lvl == 0)
        exit(1)
    except Exception:
        handle_error(logger, Texts.OTHER_ERROR_MSG, Texts.OTHER_ERROR_MSG, add_verbosity_msg=verbosity_lvl == 0)
        exit(1)
コード例 #15
0
def cancel_experiment(exp_name: str, runs_to_cancel: List[Run],
                      namespace: str) -> Tuple[List[Run], List[Run]]:
    """
    Cancel experiment with a given name by cancelling runs given as a parameter. If given experiment
    contains more runs than is in the list of runs - experiment's state remains intact.

    :param exp_name: name of an experiment to which belong runs passed in run_list parameter
    :param runs_to_cancel: list of runs that should be deleted, they have to belong to exp_name experiment
    :param namespace: namespace where experiment is located
    :return: two list - first contains runs that were cancelled successfully, second - those which weren't
    """
    logger.debug(f"Cancelling {exp_name} experiment ...")

    deleted_runs: List[Run] = []
    not_deleted_runs: List[Run] = []

    experiment = Experiment.get(name=exp_name, namespace=namespace)
    if not experiment:
        raise RuntimeError(Texts.GET_EXPERIMENT_ERROR_MSG)

    experiment_runs = Run.list(namespace=namespace,
                               exp_name_filter=[exp_name],
                               excl_state=RunStatus.CANCELLED)
    # check whether experiment has more runs that should be cancelled
    cancel_whole_experiment = (len(experiment_runs) == len(runs_to_cancel))
    if cancel_whole_experiment:
        experiment.state = ExperimentStatus.CANCELLING
        experiment.update()

    try:
        deleted_runs, not_deleted_runs = cancel_experiment_runs(
            runs_to_cancel=runs_to_cancel, namespace=namespace)

        if cancel_whole_experiment and not not_deleted_runs:
            try:
                # change an experiment state to CANCELLED
                experiment.state = ExperimentStatus.CANCELLED
                experiment.update()
            except Exception:
                # problems during deleting experiments are hidden as if runs were
                # cancelled user doesn't have a possibility to remove them
                logger.exception(
                    "Error during cancelling Experiment resource.")

    except Exception:
        logger.exception("Error during cancelling experiment.")
        return deleted_runs, not_deleted_runs

    return deleted_runs, not_deleted_runs
コード例 #16
0
def list_unitialized_experiments_in_cli(verbosity_lvl: int, all_users: bool,
                                        name: str, headers: List[str], listed_runs_kinds: List[RunKinds] = None,
                                        count: int = None, brief: bool = False):
    """
    Display a list of selected runs in the cli.

    :param verbosity_lvl: level at which error messages should be logged or displayed
    :param all_users: whether to display runs regardless of their owner or not
    :param name: regular expression to which names of the shown runs have to match
    :param headers: headers which will be displayed on top of a table shown in the cli
    :param count: number of rows displayed on a list. If not given - content of a list is not limited
    """

    if not listed_runs_kinds:
        listed_runs_kinds = [RunKinds.TRAINING, RunKinds.JUPYTER]

    try:
        namespace = None if all_users else get_kubectl_current_context_namespace()

        creating_experiments = Experiment.list(namespace=namespace,
                                               state=ExperimentStatus.CREATING,
                                               run_kinds_filter=listed_runs_kinds,
                                               name_filter=name)
        runs = Run.list(namespace=namespace, name_filter=name, run_kinds_filter=listed_runs_kinds)

        # Get Experiments without associated Runs
        names_of_experiment_with_runs = set()
        for run in runs:
            names_of_experiment_with_runs.add(run.experiment_name)

        uninitialized_experiments = [experiment for experiment in creating_experiments
                                     if experiment.name not in names_of_experiment_with_runs]

        displayed_items_count = count if count else len(uninitialized_experiments)
        click.echo(tabulate([uninitialized_experiment_cli_representation(experiment)
                             for experiment in uninitialized_experiments][-displayed_items_count:],
                            headers=headers, tablefmt="orgtbl"))
    except InvalidRegularExpressionError:
        handle_error(logger, Texts.INVALID_REGEX_ERROR_MSG, Texts.INVALID_REGEX_ERROR_MSG,
                     add_verbosity_msg=verbosity_lvl == 0)
        exit(1)
    except Exception:
        handle_error(logger, Texts.OTHER_ERROR_MSG, Texts.OTHER_ERROR_MSG, add_verbosity_msg=verbosity_lvl == 0)
        exit(1)
コード例 #17
0
def test_show_logs_success(mocker):
    es_client_mock = mocker.patch('commands.common.K8sElasticSearchClient')
    es_client_instance = es_client_mock.return_value
    es_client_instance.get_experiment_logs_generator.return_value = TEST_LOG_ENTRIES

    proxy_mock = mocker.patch.object(common, 'K8sProxy')

    get_current_namespace_mock = mocker.patch('commands.common.get_kubectl_current_context_namespace')
    fake_experiment_name = 'fake-experiment'
    list_runs_mock = mocker.patch('commands.common.Run.list')
    list_runs_mock.return_value = [Run(name=fake_experiment_name, experiment_name=fake_experiment_name)]

    runner = CliRunner()
    runner.invoke(logs.logs, [fake_experiment_name])

    assert proxy_mock.call_count == 1, 'port forwarding was not initiated'
    assert get_current_namespace_mock.call_count == 1, 'namespace was not retrieved'
    assert list_runs_mock.call_count == 1, 'run was not retrieved'
    assert es_client_instance.get_experiment_logs_generator.call_count == 1, 'Experiment logs were not retrieved'
コード例 #18
0
def test_list_runs_name_filter(mock_k8s_api_client: CustomObjectsApi):
    mock_k8s_api_client.list_cluster_custom_object.return_value = LIST_RUNS_RESPONSE_RAW
    runs = Run.list(name_filter=TEST_RUNS[1].name)
    assert [TEST_RUNS[1]] == runs
コード例 #19
0
def get_logs(experiment_name: str, min_severity: SeverityLevel, start_date: str,
             end_date: str, pod_ids: str, pod_status: PodStatus, match: str, output: bool, pager: bool, follow: bool,
             runs_kinds: List[RunKinds], instance_type: str):
    """
    Show logs for a given experiment.
    """
    # check whether we have runs with a given name
    if experiment_name and match:
        handle_error(user_msg=Texts.NAME_M_BOTH_GIVEN_ERROR_MSG.format(instance_type=instance_type))
        exit(1)
    elif not experiment_name and not match:
        handle_error(user_msg=Texts.NAME_M_NONE_GIVEN_ERROR_MSG.format(instance_type=instance_type))
        exit(1)

    try:
        with K8sProxy(NAUTAAppNames.ELASTICSEARCH) as proxy:
            es_client = K8sElasticSearchClient(host="127.0.0.1", port=proxy.tunnel_port,
                                               verify_certs=False, use_ssl=False)
            namespace = get_kubectl_current_context_namespace()
            if match:
                experiment_name = match
                name_filter = match
            else:
                name_filter = f'^{experiment_name}$'
            runs = Run.list(namespace=namespace, name_filter=name_filter, run_kinds_filter=runs_kinds)
            if not runs:
                raise ValueError(f'Run with given name: {experiment_name} does not exists in namespace {namespace}.')

            pod_ids = pod_ids.split(',') if pod_ids else None
            min_severity = SeverityLevel[min_severity] if min_severity else None
            pod_status = PodStatus[pod_status] if pod_status else None
            follow_logs = True if follow and not output else False

            if output and len(runs) > 1:
                click.echo(Texts.MORE_EXP_LOGS_MESSAGE)

            for run in runs:
                start_date = start_date if start_date else run.creation_timestamp

                run_logs_generator = es_client.get_experiment_logs_generator(run=run, namespace=namespace,
                                                                             min_severity=min_severity,
                                                                             start_date=start_date, end_date=end_date,
                                                                             pod_ids=pod_ids, pod_status=pod_status,
                                                                             follow=follow_logs)

                if output:
                    save_logs_to_file(run=run, run_logs_generator=run_logs_generator, instance_type=instance_type)
                else:
                    if len(runs) > 1:
                        click.echo(f'Experiment : {run.name}')
                    print_logs(run_logs_generator=run_logs_generator, pager=pager)

    except K8sProxyCloseError:
        handle_error(logger, Texts.PROXY_CLOSE_LOG_ERROR_MSG, Texts.PROXY_CLOSE_USER_ERROR_MSG)
        exit(1)
    except LocalPortOccupiedError as exe:
        handle_error(logger, Texts.LOCAL_PORT_OCCUPIED_ERROR_MSG.format(exception_message=exe.message),
                     Texts.LOCAL_PORT_OCCUPIED_ERROR_MSG.format(exception_message=exe.message))
        exit(1)
    except K8sProxyOpenError:
        handle_error(logger, Texts.PROXY_CREATION_ERROR_MSG, Texts.PROXY_CREATION_ERROR_MSG)
        exit(1)
    except ValueError:
        handle_error(logger, Texts.EXPERIMENT_NOT_EXISTS_ERROR_MSG.format(experiment_name=experiment_name,
                                                                          instance_type=instance_type.capitalize()),
                     Texts.EXPERIMENT_NOT_EXISTS_ERROR_MSG.format(experiment_name=experiment_name,
                                                                  instance_type=instance_type.capitalize()))
        exit(1)
    except Exception:
        handle_error(logger, Texts.LOGS_GET_OTHER_ERROR_MSG.format(instance_type=instance_type),
                     Texts.LOGS_GET_OTHER_ERROR_MSG.format(instance_type=instance_type))
        exit(1)
コード例 #20
0
from unittest.mock import MagicMock
from kubernetes.client import V1Pod, V1PodStatus, V1Event, V1ObjectReference, V1ObjectMeta

from commands.experiment import view
from platform_resources.run import Run, RunStatus
from platform_resources.experiment import Experiment
from cli_text_consts import ExperimentViewCmdTexts as Texts
from util.k8s.k8s_statistics import ResourceUsage
from util.k8s.k8s_info import PodStatus

TEST_RUNS = [
    Run(name='test-experiment',
        parameters=['a 1', 'b 2'],
        creation_timestamp='2018-04-26T13:43:01Z',
        namespace='namespace-1',
        state=RunStatus.RUNNING,
        template_name='test-ex-template',
        metrics={'any metrics': 'a'},
        experiment_name='experiment_name',
        pod_count=1,
        pod_selector={}),
    Run(name='test-experiment-2',
        parameters=['a 1', 'b 2'],
        creation_timestamp='2018-05-08T13:05:04Z',
        namespace='namespace-2',
        state=RunStatus.COMPLETE,
        template_name='test-ex-template',
        metrics={'any metrics': 'a'},
        experiment_name='experiment_name',
        pod_count=1,
        pod_selector={})
]
コード例 #21
0
ファイル: logs_utils.py プロジェクト: pnijhara/nauta
def get_logs(experiment_name: str, min_severity: SeverityLevel,
             start_date: str, end_date: str, pod_ids: str,
             pod_status: PodStatus, match: str, output: bool, pager: bool,
             follow: bool, runs_kinds: List[RunKinds], instance_type: str):
    """
    Show logs for a given experiment.
    """
    # check whether we have runs with a given name
    if experiment_name and match:
        handle_error(user_msg=Texts.NAME_M_BOTH_GIVEN_ERROR_MSG.format(
            instance_type=instance_type))
        exit(1)
    elif not experiment_name and not match:
        handle_error(user_msg=Texts.NAME_M_NONE_GIVEN_ERROR_MSG.format(
            instance_type=instance_type))
        exit(1)

    try:
        es_client = K8sElasticSearchClient(
            host=f'{get_kubectl_host(with_port=True)}'
            f'/api/v1/namespaces/nauta/services/nauta-elasticsearch:nauta/proxy',
            verify_certs=False,
            use_ssl=True,
            headers={'Authorization': get_api_key()})
        namespace = get_kubectl_current_context_namespace()
        if match:
            experiment_name = match
            name_filter = match
        else:
            name_filter = f'^{experiment_name}$'
        runs = Run.list(namespace=namespace,
                        name_filter=name_filter,
                        run_kinds_filter=runs_kinds)
        if not runs:
            raise ValueError(
                f'Run with given name: {experiment_name} does not exists in namespace {namespace}.'
            )
        pod_ids = pod_ids.split(',') if pod_ids else None  # type: ignore
        follow_logs = True if follow and not output else False
        if output and len(runs) > 1:
            click.echo(Texts.MORE_EXP_LOGS_MESSAGE)
        for run in runs:
            start_date = start_date if start_date else run.creation_timestamp
            run_logs_generator = es_client.get_experiment_logs_generator(
                run=run,
                namespace=namespace,
                min_severity=min_severity,
                start_date=start_date,
                end_date=end_date,
                pod_ids=pod_ids,
                pod_status=pod_status,
                follow=follow_logs)
            if output:
                save_logs_to_file(logs_generator=run_logs_generator,
                                  instance_name=run.name,
                                  instance_type=instance_type)
            else:
                if len(runs) > 1:
                    click.echo(f'Experiment : {run.name}')
                print_logs(run_logs_generator=run_logs_generator, pager=pager)
    except ValueError:
        handle_error(
            logger,
            Texts.EXPERIMENT_NOT_EXISTS_ERROR_MSG.format(
                experiment_name=experiment_name,
                instance_type=instance_type.capitalize()),
            Texts.EXPERIMENT_NOT_EXISTS_ERROR_MSG.format(
                experiment_name=experiment_name,
                instance_type=instance_type.capitalize()))
        exit(1)
    except Exception:
        handle_error(
            logger,
            Texts.LOGS_GET_OTHER_ERROR_MSG.format(instance_type=instance_type),
            Texts.LOGS_GET_OTHER_ERROR_MSG.format(instance_type=instance_type))
        exit(1)
コード例 #22
0
def cancel(state: State,
           name: str,
           match: str,
           purge: bool,
           pod_ids: str,
           pod_status: str,
           listed_runs_kinds: List[RunKinds] = None):
    """
    Cancels chosen experiments based on a name provided as a parameter.
    """
    if not listed_runs_kinds:
        listed_runs_kinds = [RunKinds.TRAINING, RunKinds.JUPYTER]

    # check whether we have runs with a given name
    if name and match:
        handle_error(user_msg=Texts.NAME_M_BOTH_GIVEN_ERROR_MSG)
        exit(1)

    if not name and not match:
        handle_error(user_msg=Texts.NAME_M_NONE_GIVEN_ERROR_MSG)
        exit(1)

    current_namespace = get_current_namespace()

    if pod_ids or pod_status:
        if not name:
            name = match

        cancel_pods_mode(namespace=current_namespace,
                         run_name=name,
                         pod_ids=pod_ids,
                         pod_status=pod_status)
        exit(0)

    search_for_experiment = False
    exp_to_be_cancelled = None

    if name:
        exp_to_be_cancelled = Experiment.get(namespace=current_namespace,
                                             name=name)
        exp_to_be_cancelled_kind = RunKinds(exp_to_be_cancelled.metadata['labels'].get('runKind')) \
            if exp_to_be_cancelled else None
        exp_to_be_cancelled = exp_to_be_cancelled if exp_to_be_cancelled_kind in listed_runs_kinds else None

        if exp_to_be_cancelled:
            search_for_experiment = True
        else:
            name = f"^{name}$"
    else:
        name = match

    list_of_all_runs = None

    list_of_applicable_states = [RunStatus.QUEUED, RunStatus.RUNNING]

    if purge:
        list_of_applicable_states.extend(
            [RunStatus.FAILED, RunStatus.COMPLETE, RunStatus.CANCELLED])

    try:
        if search_for_experiment:
            list_of_all_runs = Run.list(namespace=current_namespace,
                                        exp_name_filter=[name],
                                        run_kinds_filter=listed_runs_kinds)
        else:
            list_of_all_runs = Run.list(namespace=current_namespace,
                                        name_filter=name,
                                        run_kinds_filter=listed_runs_kinds)
    except Exception:
        handle_error(
            logger,
            Texts.LIST_RUNS_ERROR_MSG.format(
                experiment_name_plural=experiment_name_plural),
            Texts.LIST_RUNS_ERROR_MSG.format(
                experiment_name_plural=experiment_name_plural))
        exit(1)

    # Handle cancellation of experiments with no associated Runs
    if exp_to_be_cancelled and not list_of_all_runs:
        cancel_uninitialized_experiment(experiment=exp_to_be_cancelled,
                                        namespace=current_namespace,
                                        purge=purge)

    if not list_of_all_runs:
        handle_error(user_msg=Texts.LACK_OF_EXPERIMENTS_ERROR_MSG.format(
            experiment_name_plural=experiment_name_plural,
            experiment_name=experiment_name))
        exit(1)
    elif not purge and not [
            run for run in list_of_all_runs
            if run.state in [RunStatus.QUEUED, RunStatus.RUNNING]
    ]:
        handle_error(
            user_msg=Texts.LACK_OF_EXP_TO_BE_CANCELLED_ERROR_MSG.format(
                experiment_name_plural=experiment_name_plural))
        exit(1)

    # check whether we have at least one experiment in state other than CANCELLED
    list_of_runs_to_be_deleted: List[Run] = []
    names_of_cancelled_runs: List[str] = []

    if not purge:
        # check whether we have at least one experiment in state other than CANCELLED
        for run in list_of_all_runs:
            if run.state in list_of_applicable_states:
                list_of_runs_to_be_deleted.append(run)
            else:
                names_of_cancelled_runs.append(run.name)

        if not list_of_runs_to_be_deleted:
            handle_error(
                user_msg=Texts.EXPERIMENTS_ALREADY_CANCELLED_ERROR_MSG.format(
                    experiment_name_plural=experiment_name_plural,
                    operation_word=Texts.
                    DELETE_OPERATION["deleted"] if experiment_name_plural ==
                    'pods' else Texts.CANCEL_OPERATION["cancelled"]))
            exit(1)
        elif len(list_of_runs_to_be_deleted) != len(list_of_all_runs):
            click.echo(
                Texts.ALREADY_CANCELLED_LIST_HEADER.format(
                    experiment_name_plural=experiment_name_plural,
                    operation_word=Texts.DELETE_OPERATION["deleted"]
                    if experiment_name_plural == 'pods' else
                    Texts.CANCEL_OPERATION["cancelled"]))
            for name in names_of_cancelled_runs:
                click.echo(f"     - {name}")
            click.echo(
                Texts.CAN_BE_CANCELLED_LIST_HEADER.format(
                    experiment_name_plural=experiment_name_plural,
                    operation_word=Texts.DELETE_OPERATION["deleted"]
                    if experiment_name_plural == 'pods' else
                    Texts.CANCEL_OPERATION["cancelled"]))
            for run in list_of_runs_to_be_deleted:
                click.echo(f"     - {run.name}")
        else:
            click.echo(
                Texts.WILL_BE_CANCELLED_LIST_HEADER.format(
                    experiment_name_plural=experiment_name_plural,
                    operation_word=Texts.DELETE_OPERATION["deleted"]
                    if experiment_name_plural == 'pods' else
                    Texts.CANCEL_OPERATION["cancelled"]))
            for run in list_of_runs_to_be_deleted:
                click.echo(f"     - {run.name}")
    else:
        list_of_runs_to_be_deleted = list_of_all_runs
        click.echo(
            Texts.WILL_BE_PURGED_LIST_HEADER.format(
                experiment_name_plural=experiment_name_plural,
                operation_word=Texts.DELETE_OPERATION["deleted"]
                if experiment_name_plural == 'pods' else
                Texts.CANCEL_OPERATION["cancelled"]))
        for run in list_of_runs_to_be_deleted:
            click.echo(f"     - {run.name}")

    if not click.confirm(
            Texts.CONFIRM_CANCEL_MSG.format(
                experiment_name_plural=experiment_name_plural,
                operation_word=Texts.DELETE_OPERATION["deletion"]
                if experiment_name_plural == 'pods' else
                Texts.CANCEL_OPERATION["cancellation"])):
        handle_error(user_msg=Texts.CANCELLATION_ABORTED_MSG.format(
            experiment_name_plural=experiment_name_plural,
            operation_word=Texts.
            DELETE_OPERATION["deletion"] if experiment_name_plural ==
            'pods' else Texts.CANCEL_OPERATION["cancellation"]))
        exit(0)

    # group runs by experiments
    exp_with_runs: defaultdict = defaultdict(list)

    for run in list_of_runs_to_be_deleted:
        exp_with_runs[run.experiment_name].append(run)

    deleted_runs = []
    not_deleted_runs = []

    if purge:
        # Connect to elasticsearch in order to purge run logs
        try:
            with K8sProxy(NAUTAAppNames.ELASTICSEARCH) as proxy:
                es_client = K8sElasticSearchClient(
                    host="127.0.0.1",
                    port=proxy.tunnel_port,
                    verify_certs=False,
                    use_ssl=False,
                    with_admin_privledges=is_current_user_administrator())
                for exp_name, run_list in exp_with_runs.items():
                    try:
                        exp_del_runs, exp_not_del_runs = purge_experiment(
                            exp_name=exp_name,
                            runs_to_purge=run_list,
                            namespace=current_namespace,
                            k8s_es_client=es_client)
                        deleted_runs.extend(exp_del_runs)
                        not_deleted_runs.extend(exp_not_del_runs)
                    except Exception:
                        handle_error(logger, Texts.OTHER_CANCELLING_ERROR_MSG)
                        not_deleted_runs.extend(run_list)
        except K8sProxyCloseError:
            handle_error(logger, Texts.PROXY_CLOSING_ERROR_LOG_MSG,
                         Texts.PROXY_CLOSING_ERROR_USER_MSG)
            exit(1)
        except LocalPortOccupiedError as exe:
            handle_error(
                logger, Texts.PORT_OCCUPIED_ERROR_LOG_MSG,
                Texts.PORT_OCCUPIED_ERROR_USER_MSG.format(
                    exception_message=exe.message))
            exit(1)
        except K8sProxyOpenError:
            handle_error(logger, Texts.PROXY_OPEN_ERROR_MSG,
                         Texts.PROXY_OPEN_ERROR_MSG)
            exit(1)
    else:
        for exp_name, run_list in exp_with_runs.items():
            try:
                exp_del_runs, exp_not_del_runs = cancel_experiment(
                    exp_name=exp_name,
                    runs_to_cancel=run_list,
                    namespace=current_namespace)
                deleted_runs.extend(exp_del_runs)
                not_deleted_runs.extend(exp_not_del_runs)
            except Exception:
                handle_error(logger, Texts.OTHER_CANCELLING_ERROR_MSG)
                not_deleted_runs.extend(run_list)

    if deleted_runs:
        click.echo(
            Texts.SUCCESSFULLY_CANCELLED_LIST_HEADER.format(
                experiment_name_plural=experiment_name_plural,
                operation_word=Texts.DELETE_OPERATION["deleted"]
                if experiment_name_plural == 'pods' else
                Texts.CANCEL_OPERATION["cancelled"]))
        for run in deleted_runs:
            click.echo(f"     - {run.name}")

    if not_deleted_runs:
        click.echo(
            Texts.FAILED_TO_CANCEL_LIST_HEADER.format(
                experiment_name_plural=experiment_name_plural,
                operation_word=Texts.DELETE_OPERATION["deleted"]
                if experiment_name_plural == 'pods' else
                Texts.CANCEL_OPERATION["cancelled"]))
        for run in not_deleted_runs:
            click.echo(f"     - {run.name}")
        sys.exit(1)
コード例 #23
0
def purge_experiment(exp_name: str, runs_to_purge: List[Run],
                     k8s_es_client: K8sElasticSearchClient,
                     namespace: str) -> Tuple[List[Run], List[Run]]:
    """
       Purge experiment with a given name by cancelling runs given as a parameter. If given experiment
       contains more runs than is in the list of runs - experiment's state remains intact.

       :param exp_name: name of an experiment to which belong runs passed in run_list parameter
       :param runs_to_purge: list of runs that should be purged, they have to belong to exp_name experiment
       :param k8s_es_client: Kubernetes ElasticSearch client
       :param namespace: namespace where experiment is located
       :return: two list - first contains runs that were cancelled successfully, second - those which weren't
       """
    logger.debug(f"Purging {exp_name} experiment ...")

    purged_runs: List[Run] = []
    not_purged_runs: List[Run] = []

    experiment = Experiment.get(name=exp_name, namespace=namespace)
    if not experiment:
        raise RuntimeError(Texts.GET_EXPERIMENT_ERROR_MSG)

    experiment_runs = Run.list(namespace=namespace, exp_name_filter=[exp_name])
    # check whether experiment has more runs that should be cancelled
    cancel_whole_experiment = (len(experiment_runs) == len(runs_to_purge))
    if cancel_whole_experiment:
        experiment.state = ExperimentStatus.CANCELLING
        experiment.update()

    try:
        cancelled_runs, not_cancelled_runs = cancel_experiment_runs(
            runs_to_cancel=runs_to_purge, namespace=namespace)
        not_purged_runs = not_cancelled_runs

        if cancel_whole_experiment:
            # Delete associated workflows
            experiment_associated_workflows = [
                wf for wf in ArgoWorkflow.list(namespace=namespace)
                if wf.labels.get('experimentName') == experiment.name
            ]
            for wf in experiment_associated_workflows:
                wf.delete()

            # Remove tags from git repo manager
            try:
                delete_exp_tag_from_git_repo_manager(
                    experiment_name=experiment.name,
                    username=namespace,
                    experiments_workdir=get_run_environment_path(''))
            except Exception:
                handle_error(logger, Texts.GIT_REPO_MANAGER_ERROR_MSG,
                             Texts.GIT_REPO_MANAGER_ERROR_MSG)
                raise

        for run in cancelled_runs:
            logger.debug(f"Purging {run.name} run ...")
            click.echo(Texts.PURGING_START_MSG.format(run_name=run.name))
            try:
                with spinner(text=Texts.PURGING_PROGRESS_MSG.format(
                        run_name=run.name)):
                    # purge helm release
                    delete_helm_release(run.name,
                                        namespace=namespace,
                                        purge=True)
                    # delete run
                    kubectl.delete_k8s_object("run", run.name)
                    purged_runs.append(run)
            except Exception as exe:
                not_purged_runs.append(run)
                logger.exception("Error during purging runs.")
                # occurence of NotFound error may mean, that run has been removed earlier
                if "NotFound" not in str(exe):
                    click.echo(
                        Texts.INCOMPLETE_PURGE_ERROR_MSG.format(
                            experiment_name=experiment_name))
                    raise exe
            try:
                # clear run logs
                if is_current_user_administrator():
                    logger.debug(f"Clearing logs for {run.name} run.")
                    with spinner(text=Texts.PURGING_LOGS_PROGRESS_MSG.format(
                            run_name=run.name)):
                        k8s_es_client.delete_logs_for_run(run=run.name,
                                                          namespace=namespace)
            except Exception:
                logger.exception("Error during clearing run logs.")

            # CAN-1099 - docker garbage collector has errors that prevent from correct removal of images
            # try:
            # try to remove images from docker registry
            #    delete_images_for_experiment(exp_name=run.name)
            # except Exception:
            #    logger.exception("Error during removing images.")

        if cancel_whole_experiment and not not_purged_runs:
            try:
                kubectl.delete_k8s_object("experiment", exp_name)
            except Exception:
                # problems during deleting experiments are hidden as if runs were
                # cancelled user doesn't have a possibility to remove them
                logger.exception("Error during purging experiment.")

    except Exception:
        logger.exception("Error during purging experiment.")
        return purged_runs, not_purged_runs

    return purged_runs, not_purged_runs
コード例 #24
0
    state=ExperimentStatus.CREATING,
    template_name='test-ex-template',
    template_namespace='test-ex-namespace',
    metadata={'labels': {
        'runKind': 'training'
    }})

RUN_QUEUED = Run(
    name="exp-mnist-single-node.py-18.05.17-16.05.45-1-tf-training",
    parameters=['mnist_single_node.py', '--data_dir', '/app'],
    state=RunStatus.QUEUED,
    metrics={'accuracy': 52.322},
    experiment_name="experiment-1",
    pod_count=1,
    pod_selector={
        'matchLabels': {
            'app': 'tf-training',
            'draft': 'exp-mnist-single-node.py-18.05.17-16.05.45-1',
            'release': 'exp-mnist-single-node.py-18.05.17-16.05.45-1'
        }
    },
    namespace="mciesiel-dev",
    creation_timestamp="2018-05-17T14:05:52Z",
    template_name="tf-training")
RUN_CANCELLED = Run(
    name="exp-mnist-single-node.py-18.05.17-16.05.45-1-tf-training",
    parameters=['mnist_single_node.py', '--data_dir', '/app'],
    state=RunStatus.CANCELLED,
    metrics={'accuracy': 52.322},
    experiment_name="experiment-name-will-be-added-soon",
    pod_count=1,
コード例 #25
0
def test_list_runs(mock_k8s_api_client):
    mock_k8s_api_client.list_cluster_custom_object.return_value = LIST_RUNS_RESPONSE_RAW
    runs = Run.list()
    assert runs == TEST_RUNS
コード例 #26
0
                                creation_timestamp='2018-04-26T13:43:01Z',
                                namespace='namespace-1',
                                state=ExperimentStatus.CREATING,
                                template_name='jupyter',
                                template_namespace='test-ex-namespace')

NON_JUPYTER_EXPERIMENT = Experiment(name='test-experiment-2',
                                    parameters_spec=['a 1', 'b 2'],
                                    creation_timestamp='2018-05-08T13:05:04Z',
                                    namespace='namespace-2',
                                    state=ExperimentStatus.SUBMITTED,
                                    template_name='test-ex-template',
                                    template_namespace='test-ex-namespace')
SUBMITTED_RUNS = [
    Run(name="exp-mnist-single-node.py-18.05.17-16.05.45-1-tf-training",
        experiment_name=CORRECT_INTERACT_NAME,
        state=RunStatus.QUEUED)
]

KO_EXPERIMENT = KubernetesObject(spec=JUPYTER_EXPERIMENT,
                                 metadata=client.V1ObjectMeta())


class InteractMocks:
    def __init__(self, mocker):
        self.mocker = mocker
        self.get_namespace = mocker.patch(
            "commands.experiment.interact.get_kubectl_current_context_namespace",
            side_effect=[EXPERIMENT_NAMESPACE, EXPERIMENT_NAMESPACE])
        self.get_experiment = mocker.patch(
            "commands.experiment.interact.Experiment.get", return_value=None)
コード例 #27
0
def test_create_list_of_runs_pr_and_ps(mocker):
    experiment_name = "experiment_name"
    template_name = "template_name"
    mocker.patch(
        "platform_resources.experiment_utils.generate_exp_name_and_labels",
        side_effect=[(experiment_name, {})])

    two_params_list = [("param1", "{0, 1}"), ("param2", "{0...2:1}")]
    multiple_two_params = ("{param3:0, param4:1}", "{param3:2,param4:3}")

    expected_result = [
        Run(name=experiment_name + "-1",
            experiment_name=experiment_name,
            parameters=("param3=0", "param4=1", "param1=0", "param2=0")),
        Run(name=experiment_name + "-2",
            experiment_name=experiment_name,
            parameters=("param3=0", "param4=1", "param1=0", "param2=1")),
        Run(name=experiment_name + "-3",
            experiment_name=experiment_name,
            parameters=("param3=0", "param4=1", "param1=0", "param2=2")),
        Run(name=experiment_name + "-4",
            experiment_name=experiment_name,
            parameters=("param3=0", "param4=1", "param1=1", "param2=0")),
        Run(name=experiment_name + "-5",
            experiment_name=experiment_name,
            parameters=("param3=0", "param4=1", "param1=1", "param2=1")),
        Run(name=experiment_name + "-6",
            experiment_name=experiment_name,
            parameters=("param3=0", "param4=1", "param1=1", "param2=2")),
        Run(name=experiment_name + "-7",
            experiment_name=experiment_name,
            parameters=("param3=2", "param4=3", "param1=0", "param2=0")),
        Run(name=experiment_name + "-8",
            experiment_name=experiment_name,
            parameters=("param3=2", "param4=3", "param1=0", "param2=1")),
        Run(name=experiment_name + "-9",
            experiment_name=experiment_name,
            parameters=("param3=2", "param4=3", "param1=0", "param2=2")),
        Run(name=experiment_name + "-10",
            experiment_name=experiment_name,
            parameters=("param3=2", "param4=3", "param1=1", "param2=0")),
        Run(name=experiment_name + "-11",
            experiment_name=experiment_name,
            parameters=("param3=2", "param4=3", "param1=1", "param2=1")),
        Run(name=experiment_name + "-12",
            experiment_name=experiment_name,
            parameters=("param3=2", "param4=3", "param1=1", "param2=2"))
    ]

    output = prepare_list_of_runs(two_params_list,
                                  experiment_name,
                                  multiple_two_params,
                                  template_name=template_name)
    assert len(output) == 12

    for expected_run, result_run in zip(expected_result, output):
        assert expected_run.parameters == result_run.parameters
コード例 #28
0
def test_list_runs_filter_status(mock_k8s_api_client: CustomObjectsApi):
    mock_k8s_api_client.list_cluster_custom_object.return_value = LIST_RUNS_RESPONSE_RAW
    runs = Run.list(state_list=[RunStatus.QUEUED])
    assert [TEST_RUNS[0]] == runs
コード例 #29
0
# limitations under the License.
#

import dateutil

from commands.common import list_utils
from platform_resources.experiment import Experiment
from platform_resources.run import Run, RunStatus

TEST_RUNS = [
    Run(name='test-experiment',
        parameters=('a 1', 'b 2'),
        metrics={
            'acc': 52.2,
            'loss': 1.62345
        },
        creation_timestamp='2018-04-26T13:43:01Z',
        namespace='namespace-1',
        state=RunStatus.QUEUED,
        experiment_name='test-experiment',
        pod_count=0,
        pod_selector={}),
    Run(name='test-experiment-2',
        parameters=('a 1', 'b 2'),
        metrics={
            'acc': 52.2,
            'loss': 1.62345
        },
        creation_timestamp='2018-05-08T13:05:04Z',
        namespace='namespace-2',
        state=RunStatus.COMPLETE,
        experiment_name='test-experiment',
コード例 #30
0
def view(context, state: State, experiment_name: str, tensorboard: bool,
         username: str):
    """
    Displays details of an experiment.
    """
    try:
        if username:
            namespace = username
        else:
            namespace = get_kubectl_current_context_namespace()

        run = Run.get(name=experiment_name, namespace=namespace)
        if not run:
            handle_error(user_msg=Texts.EXPERIMENT_NOT_FOUND_ERROR_MSG.format(
                experiment_name=experiment_name))
            exit(2)

        click.echo(
            tabulate([run.cli_representation],
                     headers=EXPERIMENTS_LIST_HEADERS,
                     tablefmt="orgtbl"))

        click.echo(Texts.PODS_PARTICIPATING_LIST_HEADER)

        pods = get_namespaced_pods(label_selector="runName=" + experiment_name,
                                   namespace=namespace)

        tabular_output = []
        containers_resources = []
        pending_pods = []

        for pod in pods:
            status_string = ""

            if pod.status.conditions:
                for cond in pod.status.conditions:
                    msg = "\n" if not cond.reason else "\n reason: " + \
                                                       wrap_text(cond.reason, width=POD_CONDITIONS_MAX_WIDTH)
                    msg = msg + ", \n message: " + wrap_text(cond.message, width=POD_CONDITIONS_MAX_WIDTH) \
                        if cond.message else msg
                    status_string += wrap_text(
                        cond.type + ": " + cond.status,
                        width=POD_CONDITIONS_MAX_WIDTH) + msg + "\n"
            else:
                pod_events = get_pod_events(namespace=namespace,
                                            name=pod.metadata.name)

                for event in pod_events:
                    msg = "\n" if not event.reason else "\n reason: " + \
                                                        wrap_text(event.reason, width=POD_CONDITIONS_MAX_WIDTH)
                    msg = msg + ", \n message: " + wrap_text(event.message, width=POD_CONDITIONS_MAX_WIDTH) \
                        if event.message else msg
                    status_string += msg + "\n"

            if pod.status.phase.upper() == PodStatus.PENDING.value:
                pending_pods.append(pod.metadata.name)

            container_statuses = defaultdict(lambda: None)
            if pod.status.container_statuses:
                for container_status in pod.status.container_statuses:
                    container_statuses[
                        container_status.name] = container_status.state

            container_details = []

            for container in pod.spec.containers:
                container_description = Texts.CONTAINER_DETAILS_MSG.format(
                    name=container.name,
                    status=container_status_to_msg(
                        container_statuses[container.name]),
                    volumes=container_volume_mounts_to_msg(
                        container.volume_mounts, spaces=2),
                    resources=container_resources_to_msg(container.resources,
                                                         spaces=4))
                container_details.append(container_description)
                containers_resources.append(container.resources)

            container_details = ''.join(container_details)

            tabular_output.append([
                pod.metadata.name,
                wrap_text(pod.metadata.uid, width=UID_MAX_WIDTH, spaces=0),
                status_string, container_details
            ])
        click.echo(
            tabulate(tabular_output,
                     Texts.PODS_TABLE_HEADERS,
                     tablefmt="orgtbl"))

        try:
            cpu_requests_sum = sum_cpu_resources([
                container_resource.requests["cpu"]
                for container_resource in containers_resources
                if container_resource.requests
                and container_resource.requests.get("cpu")
            ])
            mem_requests_sum = sum_mem_resources([
                container_resource.requests["memory"]
                for container_resource in containers_resources
                if container_resource.requests
                and container_resource.requests.get("memory")
            ])
            cpu_limits_sum = sum_cpu_resources([
                container_resource.limits["cpu"]
                for container_resource in containers_resources
                if container_resource.limits
                and container_resource.limits.get("cpu")
            ])
            mem_limits_sum = sum_mem_resources([
                container_resource.limits["memory"]
                for container_resource in containers_resources
                if container_resource.limits
                and container_resource.limits.get("memory")
            ])
        except ValueError as exception:
            handle_error(
                logger,
                Texts.RESOURCES_SUM_PARSING_ERROR_MSG.format(
                    error_msg=str(exception)),
                Texts.RESOURCES_SUM_PARSING_ERROR_MSG.format(
                    error_msg=str(exception)))

        click.echo(Texts.RESOURCES_SUM_LIST_HEADER)
        click.echo(
            tabulate(list(
                zip(Texts.RESOURCES_SUM_TABLE_ROWS_HEADERS, [
                    cpu_requests_sum, mem_requests_sum, cpu_limits_sum,
                    mem_limits_sum
                ])),
                     Texts.RESOURCES_SUM_TABLE_HEADERS,
                     tablefmt="orgtbl"))

        if tensorboard:
            click.echo()
            context.invoke(tensorboard_command,
                           experiment_name=[experiment_name])

        if pending_pods:
            click.echo()
            try:
                cpu = False
                memory = False
                for pod in pending_pods:
                    events_list = get_pod_events(namespace=namespace, name=pod)
                    for event in events_list:
                        if "insufficient cpu" in event.message.lower():
                            cpu = True
                        elif "insufficient memory" in event.message.lower():
                            memory = True
                        if cpu and memory:
                            break
                    if cpu and memory:
                        break

                if not cpu and not memory:
                    exit(0)

                if cpu and memory:
                    resources = "number of cpus and amount of memory"
                elif cpu:
                    resources = "number of cpus"
                else:
                    resources = "amount of memory"

                click.echo(
                    Texts.INSUFFICIENT_RESOURCES_MESSAGE.format(
                        resources=resources))
                click.echo()
                top_cpu_users, top_mem_users = get_highest_usage()
                click.echo(
                    Texts.TOP_CPU_CONSUMERS.format(consumers=", ".join([
                        res.user_name for res in
                        top_cpu_users[0:3 if len(top_cpu_users
                                                 ) > 2 else len(top_cpu_users)]
                    ])))
                click.echo(
                    Texts.TOP_MEMORY_CONSUMERS.format(consumers=", ".join([
                        res.user_name for res in
                        top_mem_users[0:3 if len(top_mem_users
                                                 ) > 2 else len(top_mem_users)]
                    ])))
            except Exception:
                click.echo(Texts.PROBLEMS_WHILE_GATHERING_USAGE_DATA)
                logger.exception(
                    Texts.PROBLEMS_WHILE_GATHERING_USAGE_DATA_LOGS)

    except Exception:
        handle_error(logger, Texts.VIEW_OTHER_ERROR_MSG,
                     Texts.VIEW_OTHER_ERROR_MSG)
        exit(1)