Beispiel #1
0
def status(ctx: click.Context, username: str):
    """
    Returns status of a model

    :param username; if checked - searches for model for a certain user
    """
    try:
        workflows: List[ArgoWorkflow.ArgoWorkflowCliModel] = []
        if not username:
            namespace = get_kubectl_current_context_namespace()
        else:
            namespace = username
        with spinner(text=Texts.LOAD_DATA_MSG):
            # filtering out workflows used to build images with training jobs
            workflows = [
                workflow.cli_representation for workflow in ArgoWorkflow.list(
                    namespace=namespace, label_selector="type!=build-workflow")
            ]

        click.echo(
            tabulate(workflows,
                     headers=MODEL_HEADERS,
                     tablefmt=TBLT_TABLE_FORMAT))
    except Exception:
        handle_error(logger,
                     Texts.OTHER_ERROR_MSG,
                     Texts.OTHER_ERROR_MSG,
                     add_verbosity_msg=True)
        exit(1)
Beispiel #2
0
def tensorboard(state: State, no_launch: bool,
                tensorboard_service_client_port: Optional[int],
                port_number: Optional[int], experiment_name: List[str]):
    """ Subcommand for launching tensorboard with credentials """
    current_namespace = get_kubectl_current_context_namespace()

    with spinner(Texts.TB_WAITING_MSG) as proxy_spinner, \
            K8sProxy(nauta_app_name=NAUTAAppNames.TENSORBOARD_SERVICE, app_name='tensorboard-service',
                     namespace=current_namespace, port=tensorboard_service_client_port) as proxy:

        tensorboard_service_client = TensorboardServiceClient(
            address=f'http://127.0.0.1:{proxy.tunnel_port}')

        requested_runs = build_tensorboard_run_list(
            exp_list=experiment_name, current_namespace=current_namespace)

        # noinspection PyBroadException
        try:
            tb = tensorboard_service_client.create_tensorboard(requested_runs)
            if tb.invalid_runs:
                list_of_invalid_runs = ', '.join([
                    f'{item.get("owner")}/{item.get("name")}'
                    for item in tb.invalid_runs
                ])
                click.echo(
                    Texts.TB_INVALID_RUNS_MSG.format(
                        invalid_runs=list_of_invalid_runs))
        except Exception as exe:
            err_message = Texts.TB_CREATE_ERROR_MSG
            if hasattr(
                    exe, 'error_code'
            ) and exe.error_code == HTTPStatus.UNPROCESSABLE_ENTITY:  # type: ignore
                err_message = str(exe)
            handle_error(logger,
                         err_message,
                         err_message,
                         add_verbosity_msg=state.verbosity == 0)
            sys.exit(1)

        for i in range(TENSORBOARD_TRIES_COUNT):
            # noinspection PyTypeChecker
            # tb.id is str
            tb = tensorboard_service_client.get_tensorboard(tb.id)
            if not tb:
                continue
            if tb.status == TensorboardStatus.RUNNING:
                proxy_spinner.hide()
                launch_app_with_proxy(k8s_app_name=NAUTAAppNames.TENSORBOARD,
                                      no_launch=no_launch,
                                      namespace=current_namespace,
                                      port=port_number,
                                      app_name=f"tensorboard-{tb.id}")
                return
            logger.warning(
                Texts.TB_WAITING_FOR_TB_MSG.format(
                    tb_id=tb.id, tb_status_value=tb.status.value))
            sleep(TENSORBOARD_CHECK_BACKOFF_SECONDS)

        click.echo(Texts.TB_TIMEOUT_ERROR_MSG)
        sys.exit(2)
Beispiel #3
0
def logs(state: State, workflow_name: str):
    try:
        namespace = get_kubectl_current_context_namespace()
        workflow: ArgoWorkflow = ArgoWorkflow.get(namespace=namespace, name=workflow_name)
        if not workflow:
            click.echo(Texts.NOT_FOUND_MSG.format(workflow_name=workflow_name))
            exit(0)

        with K8sProxy(NAUTAAppNames.ELASTICSEARCH) as proxy:
            es_client = K8sElasticSearchClient(host="127.0.0.1", port=proxy.tunnel_port,
                                               verify_certs=False, use_ssl=False)
            start_date = workflow.started_at
            workflow_logs_generator = es_client.get_argo_workflow_logs_generator(workflow=workflow,
                                                                                 namespace=namespace,
                                                                                 start_date=start_date)
            for log_entry in workflow_logs_generator:
                if not log_entry.content.isspace():
                    click.echo(f'{log_entry.date} {log_entry.pod_name} {log_entry.content}')
    except K8sProxyCloseError:
        handle_error(logger, Texts.PROXY_CLOSE_LOG_ERROR_MSG, Texts.PROXY_CLOSE_USER_ERROR_MSG)
        exit(1)
    except LocalPortOccupiedError as exe:
        handle_error(logger, Texts.LOCAL_PORT_OCCUPIED_ERROR_MSG.format(exception_message=exe.message),
                     Texts.LOCAL_PORT_OCCUPIED_ERROR_MSG.format(exception_message=exe.message))
        exit(1)
    except K8sProxyOpenError:
        handle_error(logger, Texts.PROXY_CREATION_ERROR_MSG, Texts.PROXY_CREATION_ERROR_MSG)
        exit(1)
    except Exception:
        handle_error(logger, Texts.OTHER_ERROR_MSG, Texts.OTHER_ERROR_MSG, add_verbosity_msg=True)
        exit(1)
Beispiel #4
0
def logs(ctx: click.Context, workflow_name: str):
    try:
        namespace = get_kubectl_current_context_namespace()
        workflow: ArgoWorkflow = ArgoWorkflow.get(namespace=namespace,
                                                  name=workflow_name)
        if not workflow:
            click.echo(Texts.NOT_FOUND_MSG.format(workflow_name=workflow_name))
            exit(0)

        es_client = K8sElasticSearchClient(
            host=f'{get_kubectl_host(with_port=True)}'
            f'/api/v1/namespaces/nauta/services/nauta-elasticsearch:nauta/proxy',
            verify_certs=False,
            use_ssl=True,
            headers={'Authorization': get_api_key()})
        start_date = workflow.started_at
        workflow_logs_generator = es_client.get_argo_workflow_logs_generator(
            workflow=workflow, namespace=namespace, start_date=start_date)
        for log_entry in workflow_logs_generator:
            if not log_entry.content.isspace():
                click.echo(
                    f'{log_entry.date} {log_entry.pod_name} {log_entry.content}'
                )
    except Exception:
        handle_error(logger,
                     Texts.OTHER_ERROR_MSG,
                     Texts.OTHER_ERROR_MSG,
                     add_verbosity_msg=True)
        exit(1)
Beispiel #5
0
def status(state: State, model_name: str, status: PodPhase, username: str):
    """
    Returns status of a model

    :param model_name: name of a model data of which should be displayed
    :param status: status of a model step that should be displayed
    :param username; if checked - searches for model for a certain user
    """
    try:
        if not username:
            namespace = get_kubectl_current_context_namespace()
        else:
            namespace = username
        with spinner(text=Texts.LOAD_DATA_MSG):
            workflow: ArgoWorkflow = ArgoWorkflow.get(namespace=namespace, name=model_name)

        if not workflow:
            click.echo(Texts.MODEL_NOT_FOUND.format(model_name=model_name))
            exit(0)
        click.echo('\nOperation details:\n')
        click.echo(tabulate([workflow.cli_representation], headers=MODEL_HEADERS, tablefmt=TBLT_TABLE_FORMAT))
        click.echo('\nOperation steps:\n')
        if workflow.steps:
            click.echo(tabulate([step.cli_representation for step in workflow.steps
                                 if status is None or status == step.phase], headers=STEP_HEADERS,
                                tablefmt=TBLT_TABLE_FORMAT))
        else:
            click.echo(Texts.LACK_OF_STEPS)
    except Exception:
        handle_error(logger, Texts.OTHER_ERROR_MSG, Texts.OTHER_ERROR_MSG, add_verbosity_msg=True)
        exit(1)
Beispiel #6
0
def export(path: str, format: str, operation_options: Tuple[str, ...]):
    additional_params_str = " ".join(operation_options)
    format = format.lower()

    workflow_exports_files = os.listdir(
        f'{Config().config_path}/workflows/exports')
    formats = [
        file.rstrip('.yaml') for file in workflow_exports_files
        if file.endswith('.yaml')
    ]

    if format not in formats:
        click.echo(f'Format: {format} does not exist. Choose from: {formats}')
        sys.exit(2)

    try:
        current_namespace = get_kubectl_current_context_namespace()

        export_workflow = ArgoWorkflow.from_yaml(
            f'{Config().config_path}/workflows/exports/{format}.yaml')

        export_workflow.parameters = {
            'cluster-registry-address': NAUTAConfigMap().registry,
            'saved-model-dir-path': path,
            'additional-params': additional_params_str
        }

        export_workflow.create(namespace=current_namespace)
    except Exception:
        error_msg = 'Failed to create export workflow.'
        click.echo(error_msg)
        logger.exception(error_msg)
        sys.exit(1)

    click.echo(f'Successfully created export workflow: {export_workflow.name}')
Beispiel #7
0
def stream(state: State, name: str, data: str, method_verb: InferenceVerb):
    """
    Perform stream inference task on launched prediction instance.
    """
    method_verb = InferenceVerb(method_verb)
    try:
        namespace = get_kubectl_current_context_namespace()

        # TODO: check if kind field of inference instance Run is correct
        inference_instance = Run.get(name=name, namespace=namespace)
        if not inference_instance:
            handle_error(user_msg=Texts.INSTANCE_NOT_EXISTS_ERROR_MSG.format(
                name=name))
            exit(1)
        if not inference_instance.state == RunStatus.RUNNING:
            handle_error(user_msg=Texts.INSTANCE_NOT_RUNNING_ERROR_MSG.format(
                name=name, running_code=RunStatus.RUNNING.value))
            exit(1)

        inference_instance_url = get_inference_instance_url(
            inference_instance=inference_instance)
        stream_url = f'{inference_instance_url}:{method_verb.value}'
    except Exception:
        handle_error(logger,
                     Texts.INSTANCE_GET_FAIL_ERROR_MSG.format(name=name),
                     Texts.INSTANCE_GET_FAIL_ERROR_MSG.format(name=name),
                     add_verbosity_msg=state.verbosity == 0)
        exit(1)

    try:
        with open(data, 'r', encoding='utf-8') as data_file:
            stream_data = json.load(data_file)
    except (json.JSONDecodeError, IOError):
        handle_error(logger, Texts.JSON_LOAD_ERROR_MSG.format(data=data),
                     Texts.JSON_LOAD_ERROR_MSG.format(data=data))
        exit(1)

    try:
        api_key = get_api_key()
        headers = {
            'Authorization': api_key,
            'Accept': 'application/json',
            'Content-Type': 'application/json'
        }
        with spinner(text=Texts.WAITING_FOR_RESPONSE_MSG):
            stream_response = requests.post(
                stream_url,
                data=json.dumps(stream_data),  # nosec - request to k8s cluster
                verify=False,
                headers=headers)
        stream_response.raise_for_status()
        click.echo(stream_response.text)
    except Exception as e:
        error_msg = Texts.INFERENCE_OTHER_ERROR_MSG.format(exception=e)
        if hasattr(e, 'response'):
            error_msg += Texts.INFERENCE_ERROR_RESPONSE_MSG.format(
                response_text=e.response.text)  # type: ignore
        handle_error(logger, error_msg, error_msg)
        exit(1)
Beispiel #8
0
def export(path: str, format: str, operation_options: Tuple[str, ...]):
    if path == FORMATS_OPTION:
        try:
            list_of_workflows = get_list_of_workflows(EXPORT_WORKFLOWS_LOCATION)
        except Exception:
            handle_error(logger, Texts.EXPORT_LIST_ERROR_MSG, Texts.EXPORT_LIST_ERROR_MSG)
            sys.exit(1)

        click.echo(tabulate(list_of_workflows, headers=EXPORT_LIST_HEADERS,
                            tablefmt=TBLT_TABLE_FORMAT))
        sys.exit(0)

    config_path = Config().config_path
    formats: List[str] = []  # noqa: E701
    if os.path.isdir(config_path):
        workflow_exports_files = os.listdir(f'{config_path}/workflows/exports')
        formats = [os.path.splitext(file)[0] for file in workflow_exports_files if file.endswith('.yaml')]

    if not format:
        click.echo(Texts.MISSING_EXPORT_FORMAT.format(formats=formats))
        sys.exit(2)

    format = format.lower()

    if format not in formats:
        click.echo(Texts.WRONG_EXPORT_FORMAT.format(format=format, formats=formats))
        sys.exit(2)

    additional_params_str = " ".join(operation_options)

    try:
        current_namespace = get_kubectl_current_context_namespace()

        export_workflow = ArgoWorkflow.from_yaml(f'{Config().config_path}/workflows/exports/{format}.yaml')

        export_workflow.parameters = {
            'cluster-registry-address': NAUTAConfigMap().registry,
            'saved-model-dir-path': path,
            'additional-params': additional_params_str
        }

        export_workflow.create(namespace=current_namespace)

        workflow: ArgoWorkflow = ArgoWorkflow.get(namespace=current_namespace, name=export_workflow.name)
    except Exception:
        error_msg = 'Failed to create export workflow.'
        click.echo(error_msg)
        logger.exception(error_msg)
        sys.exit(1)

    click.echo(tabulate([workflow.cli_representation], headers=MODEL_HEADERS, tablefmt=TBLT_TABLE_FORMAT))
    click.echo(f'\nSuccessfully created export workflow')
Beispiel #9
0
def view(state: State, workflow_name: str):
    try:
        namespace = get_kubectl_current_context_namespace()
        workflow: ArgoWorkflow = ArgoWorkflow.get(namespace=namespace, name=workflow_name)
        if not workflow:
            click.echo(Texts.NOT_FOUND_MSG.format(workflow_name=workflow_name))
            exit(0)
        click.echo(tabulate([workflow.cli_representation], headers=HEADERS, tablefmt="orgtbl"))
        click.echo('\nWorkflow status:\n')
        click.echo(yaml.dump(workflow.status))
    except Exception:
        handle_error(logger, Texts.OTHER_ERROR_MSG, Texts.OTHER_ERROR_MSG, add_verbosity_msg=True)
        exit(1)
Beispiel #10
0
def get_inference_instance_url(inference_instance: Run, model_name: str = None) -> str:
    """
    Get URL to inference instance.
    """
    service_name = inference_instance.name
    model_name = model_name if model_name else inference_instance.metadata['annotations']['modelName']
    k8s_host = get_kubectl_host(replace_https=False)
    k8s_namespace = get_kubectl_current_context_namespace()

    proxy_url = f'{k8s_host}/api/v1/namespaces/{k8s_namespace}/' \
                f'services/{service_name}:rest-port/proxy/v1/models/{model_name}'

    return proxy_url
Beispiel #11
0
def workflow_list(state: State):
    try:
        namespace = get_kubectl_current_context_namespace()
        workflows = ArgoWorkflow.list(namespace=namespace)
        click.echo(
            tabulate([workflow.cli_representation for workflow in workflows],
                     headers=HEADERS,
                     tablefmt="orgtbl"))
    except Exception:
        handle_error(logger,
                     Texts.OTHER_ERROR_MSG,
                     Texts.OTHER_ERROR_MSG,
                     add_verbosity_msg=True)
        exit(1)
Beispiel #12
0
def verify_cli_dependencies():
    try:
        namespace = 'kube-system' if is_current_user_administrator(request_timeout=VERIFY_REQUEST_TIMEOUT) \
            else get_kubectl_current_context_namespace()
    except Exception:
        error_msg = Texts.KUBECTL_NAMESPACE_ERROR_MSG
        handle_error(logger, error_msg, error_msg, add_verbosity_msg=True)
        sys.exit(1)
    try:
        check_os()
        check_all_binary_dependencies(namespace=namespace)
    except (InvalidDependencyError, InvalidOsError):
        error_msg = Texts.INVALID_DEPENDENCY_ERROR_MSG
        handle_error(logger, error_msg, error_msg, add_verbosity_msg=True)
Beispiel #13
0
def list_runs_in_cli(verbosity_lvl: int, all_users: bool, name: str, status: RunStatus,
                     listed_runs_kinds: List[RunKinds], runs_list_headers: List[str], with_metrics: bool,
                     count: int = None, brief: bool = False):
    """
    Display a list of selected runs in the cli.

    :param verbosity_lvl: level at which error messages should be logged or displayed
    :param all_users: whether to display runs regardless of their owner or not
    :param name: regular expression to which names of the shown runs have to match
    :param status: display runs with this status
    :param listed_runs_kinds: list of kinds of runs that will be listed out
    :param runs_list_headers: headers which will be displayed on top of a table shown in the cli
    :param with_metrics: whether to show metrics column or not
    :param count: number of rows displayed on a list. If not given - content of a list is not limited
    :param brief: when true only experiment name, submission date, owner and state will be print
    """

    try:
        namespace = None if all_users else get_kubectl_current_context_namespace()
        status = RunStatus[status] if status else None

        # List experiments command is actually listing Run resources instead of Experiment resources with one
        # exception - if run is initialized - nctl displays data of an experiment instead of data of a run
        runs = replace_initializing_runs(
            Run.list(namespace=namespace, state_list=[status], name_filter=name, run_kinds_filter=listed_runs_kinds))
        runs_representations = [run.cli_representation for run in runs]
        if brief:
            runs_table_data = [
                (run_representation.name, run_representation.submission_date, run_representation.submitter,
                 run_representation.status)
                for run_representation in runs_representations
            ]
        elif with_metrics:
            runs_table_data = runs_representations
        else:
            runs_table_data = [
                (run_representation.name, run_representation.parameters, run_representation.submission_date,
                 run_representation.start_date, run_representation.duration,
                 run_representation.submitter, run_representation.status, run_representation.template_name)
                for run_representation in runs_representations
            ]
        click.echo(tabulate(runs_table_data if not count else runs_table_data[-count:],
                            headers=runs_list_headers, tablefmt="orgtbl"))
    except InvalidRegularExpressionError:
        handle_error(logger, Texts.INVALID_REGEX_ERROR_MSG, Texts.INVALID_REGEX_ERROR_MSG,
                     add_verbosity_msg=verbosity_lvl == 0)
        exit(1)
    except Exception:
        handle_error(logger, Texts.OTHER_ERROR_MSG, Texts.OTHER_ERROR_MSG, add_verbosity_msg=verbosity_lvl == 0)
        exit(1)
Beispiel #14
0
def workflow_list(ctx: click.Context):
    try:
        namespace = get_kubectl_current_context_namespace()
        workflows: List[ArgoWorkflow] = ArgoWorkflow.list(namespace=namespace)
        click.echo(
            tabulate([workflow.cli_representation for workflow in workflows],
                     headers=HEADERS,
                     tablefmt=TBLT_TABLE_FORMAT))
    except Exception:
        handle_error(logger,
                     Texts.OTHER_ERROR_MSG,
                     Texts.OTHER_ERROR_MSG,
                     add_verbosity_msg=True)
        exit(1)
Beispiel #15
0
def cancel(state: State, workflow_name: str):
    try:
        namespace = get_kubectl_current_context_namespace()
        workflow: ArgoWorkflow = ArgoWorkflow.get(name=workflow_name,
                                                  namespace=namespace)
        if not workflow:
            click.echo(Texts.NOT_FOUND_MSG.format(workflow_name=workflow_name))
            exit(0)
        with spinner(text=Texts.PROGRESS_MSG.format(
                workflow_name=workflow_name)):
            workflow.delete()
        click.echo(Texts.SUCCESS_MSG.format(workflow_name=workflow_name))
    except Exception:
        handle_error(logger,
                     Texts.OTHER_ERROR_MSG,
                     Texts.OTHER_ERROR_MSG,
                     add_verbosity_msg=True)
        exit(1)
Beispiel #16
0
def list_unitialized_experiments_in_cli(verbosity_lvl: int, all_users: bool,
                                        name: str, headers: List[str], listed_runs_kinds: List[RunKinds] = None,
                                        count: int = None, brief: bool = False):
    """
    Display a list of selected runs in the cli.

    :param verbosity_lvl: level at which error messages should be logged or displayed
    :param all_users: whether to display runs regardless of their owner or not
    :param name: regular expression to which names of the shown runs have to match
    :param headers: headers which will be displayed on top of a table shown in the cli
    :param count: number of rows displayed on a list. If not given - content of a list is not limited
    """

    if not listed_runs_kinds:
        listed_runs_kinds = [RunKinds.TRAINING, RunKinds.JUPYTER]

    try:
        namespace = None if all_users else get_kubectl_current_context_namespace()

        creating_experiments = Experiment.list(namespace=namespace,
                                               state=ExperimentStatus.CREATING,
                                               run_kinds_filter=listed_runs_kinds,
                                               name_filter=name)
        runs = Run.list(namespace=namespace, name_filter=name, run_kinds_filter=listed_runs_kinds)

        # Get Experiments without associated Runs
        names_of_experiment_with_runs = set()
        for run in runs:
            names_of_experiment_with_runs.add(run.experiment_name)

        uninitialized_experiments = [experiment for experiment in creating_experiments
                                     if experiment.name not in names_of_experiment_with_runs]

        displayed_items_count = count if count else len(uninitialized_experiments)
        click.echo(tabulate([uninitialized_experiment_cli_representation(experiment)
                             for experiment in uninitialized_experiments][-displayed_items_count:],
                            headers=headers, tablefmt="orgtbl"))
    except InvalidRegularExpressionError:
        handle_error(logger, Texts.INVALID_REGEX_ERROR_MSG, Texts.INVALID_REGEX_ERROR_MSG,
                     add_verbosity_msg=verbosity_lvl == 0)
        exit(1)
    except Exception:
        handle_error(logger, Texts.OTHER_ERROR_MSG, Texts.OTHER_ERROR_MSG, add_verbosity_msg=verbosity_lvl == 0)
        exit(1)
Beispiel #17
0
def modify_draft_toml(experiment_folder: str, registry: str):
    log.debug("Modify draft.toml - start")
    draft_toml_filename = os.path.join(experiment_folder, "draft.toml")
    draft_toml_temp_filename = os.path.join(experiment_folder,
                                            "draft_temp.toml")
    namespace = k8s_info.get_kubectl_current_context_namespace()

    with open(draft_toml_filename, "r") as draft_toml_file:
        draft_toml_yaml = toml.load(draft_toml_file)

    log.debug(draft_toml_yaml["environments"])
    draft_toml_yaml["environments"]["development"]["namespace"] = namespace
    draft_toml_yaml["environments"]["development"]["registry"] = registry
    draft_toml_yaml["environments"]["development"]["wait"] = False

    with open(draft_toml_temp_filename, "w") as draft_toml_file:
        toml.dump(draft_toml_yaml, draft_toml_file)

    shutil.move(draft_toml_temp_filename, draft_toml_filename)
    log.debug("Modify draft.toml - end")
Beispiel #18
0
def submit(state: State, workflow_path: str):
    try:
        workflow: ArgoWorkflow = ArgoWorkflow.from_yaml(workflow_path)
        namespace = get_kubectl_current_context_namespace()
        with spinner(text=Texts.PROGRESS_MSG):
            workflow.create(namespace=namespace)
            workflow.namespace = namespace  # Set namespace, to properly display owner in CLI
        click.echo(
            tabulate([workflow.cli_representation],
                     headers=HEADERS,
                     tablefmt=TBLT_TABLE_FORMAT))
    except IOError as e:
        handle_error(logger, Texts.LOAD_SPEC_ERROR_MSG.format(msg=str(e)),
                     Texts.LOAD_SPEC_ERROR_MSG.format(msg=str(e)))
        exit(1)
    except Exception:
        handle_error(logger,
                     Texts.OTHER_ERROR_MSG,
                     Texts.OTHER_ERROR_MSG,
                     add_verbosity_msg=True)
        exit(1)
Beispiel #19
0
def process(path: str, kind: str, options: Tuple[str, ...]):
    additional_params_str = " ".join(options)
    kind = kind.lower()
    config_path = Config().config_path
    process_path = f'{config_path}/workflows/processes'
    kinds: List[str] = []
    if os.path.isdir(process_path):
        process_kinds = os.listdir(f'{config_path}/workflows/processes')
        kinds = [
            os.path.splitext(file)[0] for file in process_kinds
            if file.endswith('.yaml')
        ]

    if kind not in kinds:
        click.echo(Texts.WRONG_PROCESS_KIND.format(process=kind, kinds=kinds))
        sys.exit(2)

    try:
        current_namespace = get_kubectl_current_context_namespace()

        process_workflow = ArgoWorkflow.from_yaml(
            f'{Config().config_path}/workflows/processes/{kind}.yaml')

        process_workflow.parameters = {
            'cluster-registry-address': NAUTAConfigMap().registry,
            'saved-model-dir-path': path,
            'additional-params': additional_params_str
        }

        process_workflow.create(namespace=current_namespace)
    except Exception:
        error_msg = 'Failed to create export workflow.'
        click.echo(error_msg)
        logger.exception(error_msg)
        sys.exit(1)

    click.echo(
        f'Successfully created process workflow: {process_workflow.name}')
Beispiel #20
0
def get_logs(experiment_name: str, min_severity: SeverityLevel,
             start_date: str, end_date: str, pod_ids: str,
             pod_status: PodStatus, match: str, output: bool, pager: bool,
             follow: bool, runs_kinds: List[RunKinds], instance_type: str):
    """
    Show logs for a given experiment.
    """
    # check whether we have runs with a given name
    if experiment_name and match:
        handle_error(user_msg=Texts.NAME_M_BOTH_GIVEN_ERROR_MSG.format(
            instance_type=instance_type))
        exit(1)
    elif not experiment_name and not match:
        handle_error(user_msg=Texts.NAME_M_NONE_GIVEN_ERROR_MSG.format(
            instance_type=instance_type))
        exit(1)

    try:
        es_client = K8sElasticSearchClient(
            host=f'{get_kubectl_host(with_port=True)}'
            f'/api/v1/namespaces/nauta/services/nauta-elasticsearch:nauta/proxy',
            verify_certs=False,
            use_ssl=True,
            headers={'Authorization': get_api_key()})
        namespace = get_kubectl_current_context_namespace()
        if match:
            experiment_name = match
            name_filter = match
        else:
            name_filter = f'^{experiment_name}$'
        runs = Run.list(namespace=namespace,
                        name_filter=name_filter,
                        run_kinds_filter=runs_kinds)
        if not runs:
            raise ValueError(
                f'Run with given name: {experiment_name} does not exists in namespace {namespace}.'
            )
        pod_ids = pod_ids.split(',') if pod_ids else None  # type: ignore
        follow_logs = True if follow and not output else False
        if output and len(runs) > 1:
            click.echo(Texts.MORE_EXP_LOGS_MESSAGE)
        for run in runs:
            start_date = start_date if start_date else run.creation_timestamp
            run_logs_generator = es_client.get_experiment_logs_generator(
                run=run,
                namespace=namespace,
                min_severity=min_severity,
                start_date=start_date,
                end_date=end_date,
                pod_ids=pod_ids,
                pod_status=pod_status,
                follow=follow_logs)
            if output:
                save_logs_to_file(logs_generator=run_logs_generator,
                                  instance_name=run.name,
                                  instance_type=instance_type)
            else:
                if len(runs) > 1:
                    click.echo(f'Experiment : {run.name}')
                print_logs(run_logs_generator=run_logs_generator, pager=pager)
    except ValueError:
        handle_error(
            logger,
            Texts.EXPERIMENT_NOT_EXISTS_ERROR_MSG.format(
                experiment_name=experiment_name,
                instance_type=instance_type.capitalize()),
            Texts.EXPERIMENT_NOT_EXISTS_ERROR_MSG.format(
                experiment_name=experiment_name,
                instance_type=instance_type.capitalize()))
        exit(1)
    except Exception:
        handle_error(
            logger,
            Texts.LOGS_GET_OTHER_ERROR_MSG.format(instance_type=instance_type),
            Texts.LOGS_GET_OTHER_ERROR_MSG.format(instance_type=instance_type))
        exit(1)
Beispiel #21
0
def submit_experiment(template: str, name: str = None, run_kind: RunKinds = RunKinds.TRAINING,
                      script_location: str = None, script_parameters: Tuple[str, ...] = None,
                      pack_params: List[Tuple[str, str]] = None, parameter_range: List[Tuple[str, str]] = None,
                      parameter_set: Tuple[str, ...] = None,
                      script_folder_location: str = None,
                      env_variables: List[str] = None,
                      requirements_file: str = None) -> (List[Run], Dict[str, str], str):

    script_parameters = script_parameters if script_parameters else ()
    parameter_set = parameter_set if parameter_set else ()
    parameter_range = parameter_range if parameter_range else []

    log.debug("Submit experiment - start")
    try:
        namespace = get_kubectl_current_context_namespace()
        global submitted_namespace
        submitted_namespace = namespace
    except Exception:
        message = Texts.GET_NAMESPACE_ERROR_MSG
        log.exception(message)
        raise SubmitExperimentError(message)

    try:
        with spinner(text=Texts.PREPARING_RESOURCE_DEFINITIONS_MSG):
            experiment_name, labels = generate_exp_name_and_labels(script_name=script_location,
                                                                   namespace=namespace, name=name,
                                                                   run_kind=run_kind)
            runs_list = prepare_list_of_runs(experiment_name=experiment_name, parameter_range=parameter_range,
                                             parameter_set=parameter_set, template_name=template)
    except SubmitExperimentError as exe:
        log.exception(str(exe))
        raise exe
    except Exception:
        message = Texts.SUBMIT_PREPARATION_ERROR_MSG
        log.exception(message)
        raise SubmitExperimentError(message)

    global submitted_experiment
    submitted_experiment = experiment_name

    # Ctrl-C handling
    signal.signal(signal.SIGINT, ctrl_c_handler_for_submit)
    signal.signal(signal.SIGTERM, ctrl_c_handler_for_submit)

    try:
        config = Config()

        # start port forwarding
        # noinspection PyBroadException
        with K8sProxy(NAUTAAppNames.DOCKER_REGISTRY, port=config.local_registry_port) as proxy:
            # Save port that was actually used in configuration
            if proxy.tunnel_port != config.local_registry_port:
                config.local_registry_port = proxy.tunnel_port

            experiment_run_folders = []  # List of local directories used by experiment's runs
            try:
                # run socat if on Windows or Mac OS
                if get_current_os() in (OS.WINDOWS, OS.MACOS):
                    # noinspection PyBroadException
                    try:
                        with spinner(text=Texts.CLUSTER_CONNECTION_MSG):
                            socat.start(proxy.tunnel_port)
                    except Exception:
                        error_msg = Texts.LOCAL_DOCKER_TUNNEL_ERROR_MSG
                        log.exception(error_msg)
                        raise SubmitExperimentError(error_msg)

                cluster_registry_port = get_app_service_node_port(nauta_app_name=NAUTAAppNames.DOCKER_REGISTRY)

                # prepare environments for all experiment's runs
                for experiment_run in runs_list:
                    if script_parameters and experiment_run.parameters:
                        current_script_parameters = script_parameters + experiment_run.parameters
                    elif script_parameters:
                        current_script_parameters = script_parameters
                    elif experiment_run.parameters:
                        current_script_parameters = experiment_run.parameters
                    else:
                        current_script_parameters = ""

                    run_folder, script_location, pod_count = \
                        prepare_experiment_environment(experiment_name=experiment_name,
                                                       run_name=experiment_run.name,
                                                       local_script_location=script_location,
                                                       script_folder_location=script_folder_location,  # noqa: E501
                                                       script_parameters=current_script_parameters,
                                                       pack_type=template, pack_params=pack_params,
                                                       local_registry_port=proxy.tunnel_port,
                                                       cluster_registry_port=cluster_registry_port,
                                                       env_variables=env_variables,
                                                       requirements_file=requirements_file)
                    # Set correct pod count
                    if not pod_count or pod_count < 1:
                        raise SubmitExperimentError('Unable to determine pod count: make sure that values.yaml '
                                                    'file in your pack has podCount field with positive integer value.')
                    experiment_run.pod_count = pod_count

                    experiment_run_folders.append(run_folder)
                    script_name = None
                    if script_location is not None:
                        script_name = os.path.basename(script_location)

                    # Prepend script_name parameter to run description only for display purposes.
                    experiment_run.parameters = script_parameters if not experiment_run.parameters \
                        else experiment_run.parameters + script_parameters
                    if experiment_run.parameters and script_name:
                        experiment_run.parameters = (script_name, ) + experiment_run.parameters
                    elif script_name:
                        experiment_run.parameters = (script_name, )
            except SubmitExperimentError as e:
                log.exception(Texts.ENV_CREATION_ERROR_MSG)
                e.message += f' {Texts.ENV_CREATION_ERROR_MSG}'
                raise
            except Exception:
                # any error in this step breaks execution of this command
                message = Texts.ENV_CREATION_ERROR_MSG
                log.exception(message)
                # just in case - remove folders that were created with a success
                for experiment_run_folder in experiment_run_folders:
                    delete_environment(experiment_run_folder)

            # if ps or pr option is used - first ask whether experiment(s) should be submitted
            if parameter_range or parameter_set:
                click.echo(Texts.CONFIRM_SUBMIT_MSG)
                click.echo(tabulate({RUN_NAME: [run.name for run in runs_list],
                                     RUN_PARAMETERS: ["\n".join(run.parameters) if run.parameters
                                                      else "" for run in runs_list]},
                                    headers=[RUN_NAME, RUN_PARAMETERS], tablefmt="orgtbl"))

                if not click.confirm(Texts.CONFIRM_SUBMIT_QUESTION_MSG, default=True):
                    for experiment_run_folder in experiment_run_folders:
                        delete_environment(experiment_run_folder)
                    exit()

            # create Experiment model
            # TODO template_name & template_namespace should be filled after Template implementation
            parameter_range_spec = [f'-pr {param_name} {param_value}' for param_name, param_value in parameter_range]
            parameter_set_spec = [f'-ps {ps_spec}' for ps_spec in parameter_set]
            experiment_parameters_spec = list(script_parameters) + parameter_range_spec + parameter_set_spec
            experiment = experiments_model.Experiment(name=experiment_name, template_name=template,
                                                      parameters_spec=experiment_parameters_spec,
                                                      template_namespace="template-namespace")

            experiment.create(namespace=namespace, labels=labels)

            # submit runs
            run_errors = {}
            for run, run_folder in zip(runs_list, experiment_run_folders):
                try:
                    run.state = RunStatus.QUEUED
                    with spinner(text=Texts.CREATING_RESOURCES_MSG.format(run_name=run.name)):
                        # Add Run object with runKind label and pack params as annotations
                        run.create(namespace=namespace, labels={'runKind': run_kind.value},
                                   annotations={pack_param_name: pack_param_value
                                                for pack_param_name, pack_param_value in pack_params})
                        submitted_runs.append(run)
                        submit_draft_pack(run_folder, namespace=namespace)
                except Exception as exe:
                    delete_environment(run_folder)
                    try:
                        run.state = RunStatus.FAILED
                        run_errors[run.name] = str(exe)
                        run.update()
                    except Exception as rexe:
                        # update of non-existing run may fail
                        log.debug(Texts.ERROR_DURING_PATCHING_RUN.format(str(rexe)))

            # Delete experiment if no Runs were submitted
            if not submitted_runs:
                click.echo(Texts.SUBMISSION_FAIL_ERROR_MSG)
                delete_k8s_object("experiment", experiment_name)

            # Change experiment status to submitted
            experiment.state = experiments_model.ExperimentStatus.SUBMITTED
            experiment.update()
    except LocalPortOccupiedError as exe:
        click.echo(exe.message)
        raise SubmitExperimentError(exe.message)
    except K8sProxyCloseError:
        log.exception('Error during closing of a proxy for a {}'.format(NAUTAAppNames.DOCKER_REGISTRY))
        raise K8sProxyCloseError(Texts.PROXY_CLOSE_ERROR_MSG)
    except K8sProxyOpenError:
        error_msg = Texts.PROXY_OPEN_ERROR_MSG
        log.exception(error_msg)
        raise SubmitExperimentError(error_msg)
    except SubmitExperimentError:
        raise
    except Exception as exe:
        error_msg = Texts.SUBMIT_OTHER_ERROR_MSG
        log.exception(error_msg)
        raise SubmitExperimentError(error_msg) from exe
    finally:
        with spinner(text=Texts.CLUSTER_CONNECTION_CLOSING_MSG):
            # noinspection PyBroadException
            try:
                socat.stop()
            except Exception:
                log.exception("Error during closing of a proxy for a local docker-host tunnel")
                raise K8sProxyCloseError(Texts.DOCKER_TUNNEL_CLOSE_ERROR_MSG)
        # remove semaphores from all exp folders
        remove_sempahore(experiment_name)

    log.debug("Submit - finish")
    return runs_list, run_errors, script_location
Beispiel #22
0
def interact(ctx: click.Context, name: str, filename: str,
             pack_param: List[Tuple[str, str]], no_launch: bool,
             port_number: int, env: List[str], template: str):
    """
    Starts an interactive session with Jupyter Notebook.
    """
    current_namespace = get_kubectl_current_context_namespace()

    jupyters_number = calculate_number_of_running_jupyters(current_namespace)
    if jupyters_number > ACCEPTED_NUMBER_OF_NOTEBOOKS:
        if not click.confirm(
                Texts.TOO_MANY_JUPYTERS.format(
                    jupyter_number=str(jupyters_number))):
            click.echo(Texts.INTERACT_ABORT_MSG)
            sys.exit(0)

    create_new_notebook = True
    jupyter_experiment = None

    if name:
        try:
            jupyter_experiment = Experiment.get(name=name,
                                                namespace=current_namespace)

            if jupyter_experiment and filename:
                handle_error(user_msg=Texts.FILENAME_BUT_SESSION_EXISTS)
                sys.exit(1)

            if jupyter_experiment:
                metadata = jupyter_experiment.metadata
                if metadata and metadata.get("labels") and metadata.get(
                        "labels").get("script_name"):
                    filename = metadata.get("labels").get("script_name")
        except Exception:
            handle_error(logger, Texts.EXPERIMENT_GET_ERROR_MSG,
                         Texts.EXPERIMENT_GET_ERROR_MSG)
            sys.exit(1)

        # if experiment exists and is not based on jupyter image - we need to ask a user to choose another name
        if jupyter_experiment and jupyter_experiment.template_name not in JUPYTER_NOTEBOOK_TEMPLATES_NAMES:
            handle_error(user_msg=Texts.NAME_ALREADY_USED.format(name=name))
            sys.exit(1)

        # if experiment exists but its state is different than RUNNING - display info about a need of purging of
        # this experiment
        if jupyter_experiment and jupyter_experiment.state not in \
                [ExperimentStatus.SUBMITTED, ExperimentStatus.CREATING]:
            handle_error(
                user_msg=Texts.EXP_WITH_THE_SAME_NAME_MUST_BE_PURGED.format(
                    name=name))
            sys.exit(1)

        if not jupyter_experiment and (
                not click.get_current_context().obj.force
                and not click.confirm(Texts.CONFIRM_EXPERIMENT_CREATION)):
            sys.exit(0)

        if jupyter_experiment:
            create_new_notebook = False
        else:
            try:
                check_experiment_name(value=name)
            except click.BadParameter as exe:
                handle_error(user_msg=str(exe))
                sys.exit(1)

    number_of_retries = 0
    if create_new_notebook:
        number_of_retries = 5
        try:
            exp_name = name
            if not name and not filename:
                exp_name = generate_name("jup")

            click.echo(Texts.SUBMITTING_EXPERIMENT_USER_MSG)
            runs, runs_errors, filename = submit_experiment(
                run_kind=RunKinds.JUPYTER,
                script_location=filename,
                script_folder_location=None,
                template=template,
                name=exp_name,
                parameter_range=[],
                parameter_set=(),
                script_parameters=(),
                pack_params=pack_param,
                env_variables=env)
            click.echo(
                tabulate(
                    {
                        RUN_NAME:
                        [run.cli_representation.name for run in runs],
                        RUN_PARAMETERS:
                        [run.cli_representation.parameters for run in runs],
                        RUN_STATUS:
                        [run.cli_representation.status for run in runs],
                        RUN_MESSAGE:
                        [runs_errors.get(run.name, "") for run in runs]
                    },
                    headers=[
                        RUN_NAME, RUN_PARAMETERS, RUN_STATUS, RUN_MESSAGE
                    ],
                    tablefmt=TBLT_TABLE_FORMAT))
            if runs:
                name = runs[0].name
            else:
                # run wasn't created - error
                raise RuntimeError("Run wasn't created")

        except K8sProxyCloseError as exe:
            handle_error(user_msg=exe.message)
            sys.exit(1)
        except SubmitExperimentError as exe:
            handle_error(
                logger,
                Texts.SUBMIT_ERROR_MSG.format(exception_message=exe.message),
                Texts.SUBMIT_ERROR_MSG.format(exception_message=exe.message))
            sys.exit(1)
        except Exception:
            handle_error(logger, Texts.SUBMIT_OTHER_ERROR_MSG,
                         Texts.SUBMIT_OTHER_ERROR_MSG)
            sys.exit(1)
    else:
        # if jupyter service exists - the system only connects to it
        click.echo(Texts.SESSION_EXISTS_MSG)

    url_end = ""
    if filename:
        # only Jupyter notebooks are opened directly, other files are opened in edit mode
        url_end = f"/notebooks/output/experiment/"
        if jupyter_experiment and filename.endswith(".py"):
            filename = filename[:filename.index(".py", -3)] + ".ipynb"
        if not filename.endswith(".ipynb"):
            url_end = "/edit/"
        url_end = url_end + Path(filename).name

    # wait until all jupyter pods are ready
    for i in range(JUPYTER_CHECK_POD_READY_TRIES):
        try:
            if check_pods_status(run_name=name,
                                 namespace=current_namespace,
                                 status=PodStatus.RUNNING):
                break
        except Exception:
            handle_error(logger, Texts.NOTEBOOK_STATE_CHECK_ERROR_MSG)
            sys.exit(1)
        time.sleep(1)
    else:
        handle_error(user_msg=Texts.NOTEBOOK_NOT_READY_ERROR_MSG)
        sys.exit(1)

    try:
        launch_app(k8s_app_name=NAUTAAppNames.JUPYTER,
                   app_name=name,
                   no_launch=no_launch,
                   number_of_retries=number_of_retries,
                   url_end=url_end,
                   port=port_number)
    except LaunchError as exe:
        handle_error(logger, exe.message, exe.message)
        sys.exit(1)
    except ProxyClosingError:
        handle_error(user_msg=Texts.PROXY_CLOSING_ERROR_MSG)
        sys.exit(1)
    except Exception:
        handle_error(logger, Texts.SESSION_LAUNCH_OTHER_ERROR_MSG,
                     Texts.SESSION_LAUNCH_OTHER_ERROR_MSG)
        sys.exit(1)
Beispiel #23
0
def verify(state: State):
    try:
        with spinner(text=Texts.CHECKING_OS_MSG):
            check_os()
        click.echo(Texts.OS_SUPPORTED_MSG)
    except InvalidOsError as exception:
        handle_error(logger,
                     str(exception),
                     str(exception),
                     add_verbosity_msg=True)
        exit(1)

    dependencies = get_dependency_map()
    kubectl_dependency_name = 'kubectl'
    kubectl_dependency_spec = dependencies[kubectl_dependency_name]

    with spinner(text=Texts.VERIFYING_DEPENDENCY_MSG.format(
            dependency_name=kubectl_dependency_name)):
        valid, installed_version = check_dependency(
            dependency_name=kubectl_dependency_name,
            dependency_spec=kubectl_dependency_spec)

    supported_versions_sign = '>='
    logger.info(
        Texts.VERSION_CHECKING_MSG.format(
            dependency_name=kubectl_dependency_name,
            installed_version=installed_version,
            supported_versions_sign=supported_versions_sign,
            expected_version=kubectl_dependency_spec.expected_version))

    if valid:
        click.echo(
            Texts.DEPENDENCY_VERIFICATION_SUCCESS_MSG.format(
                dependency_name=kubectl_dependency_name))
    else:
        handle_error(
            logger,
            Texts.KUBECTL_INVALID_VERSION_ERROR_MSG.format(
                installed_version=installed_version,
                supported_versions_sign=supported_versions_sign,
                expected_version=  # noqa
                kubectl_dependency_spec.expected_version),
            Texts.KUBECTL_INVALID_VERSION_ERROR_MSG,
            add_verbosity_msg=state.verbosity == 0)
        exit(1)

    del dependencies[kubectl_dependency_name]

    try:
        with spinner(text=Texts.CHECKING_CONNECTION_TO_CLUSTER_MSG):
            check_connection_to_cluster()
        with spinner(text=Texts.CHECKING_PORT_FORWARDING_FROM_CLUSTER_MSG):
            check_port_forwarding()
    except KubectlConnectionError as e:
        handle_error(logger,
                     str(e),
                     str(e),
                     add_verbosity_msg=state.verbosity == 0)
        exit(1)
    except FileNotFoundError:
        handle_error(logger,
                     Texts.KUBECTL_NOT_INSTALLED_ERROR_MSG,
                     Texts.KUBECTL_NOT_INSTALLED_ERROR_MSG,
                     add_verbosity_msg=state.verbosity == 0)
        exit(1)

    try:
        namespace = 'kube-system' if is_current_user_administrator(
        ) else get_kubectl_current_context_namespace()
    except Exception:
        handle_error(logger,
                     Texts.GET_K8S_NAMESPACE_ERROR_MSG,
                     Texts.GET_K8S_NAMESPACE_ERROR_MSG,
                     add_verbosity_msg=state.verbosity == 0)
        exit(1)

    dependency_versions = {}
    for dependency_name, dependency_spec in dependencies.items():
        try:
            supported_versions_sign = '==' if dependency_spec.match_exact_version else '>='
            with spinner(text=Texts.VERIFYING_DEPENDENCY_MSG.format(
                    dependency_name=dependency_name)):
                valid, installed_version = check_dependency(
                    dependency_name=dependency_name,
                    dependency_spec=dependency_spec,
                    namespace=namespace)
            dependency_versions[dependency_name] = installed_version
            logger.info(
                Texts.VERSION_CHECKING_MSG.format(
                    dependency_name=dependency_name,
                    installed_version=installed_version,
                    supported_versions_sign=supported_versions_sign,
                    expected_version=dependency_spec.expected_version))
            if valid:
                click.echo(
                    Texts.DEPENDENCY_VERIFICATION_SUCCESS_MSG.format(
                        dependency_name=dependency_name))
            else:
                click.echo(
                    Texts.INVALID_VERSION_WARNING_MSG.format(
                        dependency_name=dependency_name,
                        installed_version=installed_version,
                        supported_versions_sign=supported_versions_sign,
                        expected_version=dependency_spec.expected_version))
        except FileNotFoundError:
            handle_error(logger,
                         Texts.DEPENDENCY_NOT_INSTALLED_ERROR_MSG.format(
                             dependency_name=dependency_name),
                         Texts.DEPENDENCY_NOT_INSTALLED_ERROR_MSG.format(
                             dependency_name=dependency_name),
                         add_verbosity_msg="client" not in dependency_name)
            exit(1)
        except (RuntimeError, ValueError, TypeError):
            handle_error(logger,
                         Texts.DEPENDENCY_VERSION_CHECK_ERROR_MSG.format(
                             dependency_name=dependency_name),
                         Texts.DEPENDENCY_VERSION_CHECK_ERROR_MSG.format(
                             dependency_name=dependency_name),
                         add_verbosity_msg=state.verbosity == 0)
            exit(1)
        except Exception:
            handle_error(logger,
                         Texts.DEPENDENCY_VERIFICATION_OTHER_ERROR_MSG.format(
                             dependency_name=dependency_name),
                         Texts.DEPENDENCY_VERIFICATION_OTHER_ERROR_MSG.format(
                             dependency_name=dependency_name),
                         add_verbosity_msg=state.verbosity == 0)
            exit(1)
    else:
        # This block is entered if all dependencies were validated successfully
        # Save dependency versions in a file
        save_dependency_versions(dependency_versions)
Beispiel #24
0
def launch(state: State, name: str, model_location: str,
           local_model_location: str, model_name: str,
           pack_param: List[Tuple[str, str]], requirements: str):
    """
    Starts a new prediction instance that can be used for performing prediction, classification and
    regression tasks on trained model.
    """
    if not model_location and not local_model_location:
        handle_error(user_msg=Texts.MISSING_MODEL_LOCATION_ERROR_MSG.format(
            local_model_location=local_model_location))
        exit(1)

    if local_model_location:
        validate_local_model_location(local_model_location)

    click.echo('Submitting prediction instance.')
    try:
        model_path = model_location.rstrip(
            '/') if model_location else local_model_location.rstrip('/')
        model_name = model_name if model_name else os.path.basename(model_path)
        name = name if name else generate_name(
            name=model_name, prefix=INFERENCE_INSTANCE_PREFIX)
        inference_instance = start_inference_instance(
            name=name,
            model_location=model_location,
            model_name=model_name,
            local_model_location=local_model_location,
            requirements=requirements,
            pack_params=pack_param)
        if inference_instance.state == RunStatus.FAILED:
            raise RuntimeError('Inference instance submission failed.')
    except Exception:
        handle_error(logger,
                     Texts.INSTANCE_START_ERROR_MSG,
                     Texts.INSTANCE_START_ERROR_MSG,
                     add_verbosity_msg=state.verbosity == 0)
        exit(1)

    click.echo(
        tabulate([[
            inference_instance.cli_representation.name, model_location,
            inference_instance.cli_representation.status
        ]],
                 headers=Texts.TABLE_HEADERS,
                 tablefmt="orgtbl"))

    try:
        namespace = get_kubectl_current_context_namespace()
        authorization_header = get_authorization_header(
            service_account_name=name, namespace=namespace)
        inference_instance_url = get_inference_instance_url(
            inference_instance=inference_instance, model_name=model_name)
        click.echo(
            Texts.INSTANCE_INFO_MSG.format(
                inference_instance_url=inference_instance_url,
                authorization_header=authorization_header))
    except Exception:
        handle_error(logger,
                     Texts.INSTANCE_URL_ERROR_MSG,
                     Texts.INSTANCE_URL_ERROR_MSG,
                     add_verbosity_msg=state.verbosity == 0)
        exit(1)
Beispiel #25
0
def get_logs(experiment_name: str, min_severity: SeverityLevel, start_date: str,
             end_date: str, pod_ids: str, pod_status: PodStatus, match: str, output: bool, pager: bool, follow: bool,
             runs_kinds: List[RunKinds], instance_type: str):
    """
    Show logs for a given experiment.
    """
    # check whether we have runs with a given name
    if experiment_name and match:
        handle_error(user_msg=Texts.NAME_M_BOTH_GIVEN_ERROR_MSG.format(instance_type=instance_type))
        exit(1)
    elif not experiment_name and not match:
        handle_error(user_msg=Texts.NAME_M_NONE_GIVEN_ERROR_MSG.format(instance_type=instance_type))
        exit(1)

    try:
        with K8sProxy(NAUTAAppNames.ELASTICSEARCH) as proxy:
            es_client = K8sElasticSearchClient(host="127.0.0.1", port=proxy.tunnel_port,
                                               verify_certs=False, use_ssl=False)
            namespace = get_kubectl_current_context_namespace()
            if match:
                experiment_name = match
                name_filter = match
            else:
                name_filter = f'^{experiment_name}$'
            runs = Run.list(namespace=namespace, name_filter=name_filter, run_kinds_filter=runs_kinds)
            if not runs:
                raise ValueError(f'Run with given name: {experiment_name} does not exists in namespace {namespace}.')

            pod_ids = pod_ids.split(',') if pod_ids else None
            min_severity = SeverityLevel[min_severity] if min_severity else None
            pod_status = PodStatus[pod_status] if pod_status else None
            follow_logs = True if follow and not output else False

            if output and len(runs) > 1:
                click.echo(Texts.MORE_EXP_LOGS_MESSAGE)

            for run in runs:
                start_date = start_date if start_date else run.creation_timestamp

                run_logs_generator = es_client.get_experiment_logs_generator(run=run, namespace=namespace,
                                                                             min_severity=min_severity,
                                                                             start_date=start_date, end_date=end_date,
                                                                             pod_ids=pod_ids, pod_status=pod_status,
                                                                             follow=follow_logs)

                if output:
                    save_logs_to_file(run=run, run_logs_generator=run_logs_generator, instance_type=instance_type)
                else:
                    if len(runs) > 1:
                        click.echo(f'Experiment : {run.name}')
                    print_logs(run_logs_generator=run_logs_generator, pager=pager)

    except K8sProxyCloseError:
        handle_error(logger, Texts.PROXY_CLOSE_LOG_ERROR_MSG, Texts.PROXY_CLOSE_USER_ERROR_MSG)
        exit(1)
    except LocalPortOccupiedError as exe:
        handle_error(logger, Texts.LOCAL_PORT_OCCUPIED_ERROR_MSG.format(exception_message=exe.message),
                     Texts.LOCAL_PORT_OCCUPIED_ERROR_MSG.format(exception_message=exe.message))
        exit(1)
    except K8sProxyOpenError:
        handle_error(logger, Texts.PROXY_CREATION_ERROR_MSG, Texts.PROXY_CREATION_ERROR_MSG)
        exit(1)
    except ValueError:
        handle_error(logger, Texts.EXPERIMENT_NOT_EXISTS_ERROR_MSG.format(experiment_name=experiment_name,
                                                                          instance_type=instance_type.capitalize()),
                     Texts.EXPERIMENT_NOT_EXISTS_ERROR_MSG.format(experiment_name=experiment_name,
                                                                  instance_type=instance_type.capitalize()))
        exit(1)
    except Exception:
        handle_error(logger, Texts.LOGS_GET_OTHER_ERROR_MSG.format(instance_type=instance_type),
                     Texts.LOGS_GET_OTHER_ERROR_MSG.format(instance_type=instance_type))
        exit(1)
Beispiel #26
0
def submit_experiment(
    template: str,
    name: str = None,
    run_kind: RunKinds = RunKinds.TRAINING,
    script_location: str = None,
    script_parameters: Tuple[str, ...] = None,
    pack_params: List[Tuple[str, str]] = None,
    parameter_range: List[Tuple[str, str]] = None,
    parameter_set: Tuple[str, ...] = None,
    script_folder_location: str = None,
    env_variables: List[str] = None,
    requirements_file: str = None
) -> Tuple[List[Run], Dict[str, str], Optional[str]]:

    script_parameters: Union[Tuple[str, ...], Tuple[(
    )]] = script_parameters if script_parameters else ()
    parameter_set: Union[Tuple[str, ...],
                         Tuple[()]] = parameter_set if parameter_set else ()
    parameter_range = parameter_range if parameter_range else []
    pack_params = pack_params if pack_params else []

    log.debug("Submit experiment - start")
    try:
        namespace = get_kubectl_current_context_namespace()
        global submitted_namespace
        submitted_namespace = namespace
    except Exception:
        message = Texts.GET_NAMESPACE_ERROR_MSG
        log.exception(message)
        raise SubmitExperimentError(message)

    try:
        with spinner(text=Texts.PREPARING_RESOURCE_DEFINITIONS_MSG):
            experiment_name, labels = generate_exp_name_and_labels(
                script_name=script_location,
                namespace=namespace,
                name=name,
                run_kind=run_kind)
            runs_list = prepare_list_of_runs(experiment_name=experiment_name,
                                             parameter_range=parameter_range,
                                             parameter_set=parameter_set,
                                             template_name=template)
    except SubmitExperimentError as exe:
        log.exception(str(exe))
        raise exe
    except Exception:
        message = Texts.SUBMIT_PREPARATION_ERROR_MSG
        log.exception(message)
        raise SubmitExperimentError(message)

    global submitted_experiment
    submitted_experiment = experiment_name

    # Ctrl-C handling
    signal.signal(signal.SIGINT, ctrl_c_handler_for_submit)
    signal.signal(signal.SIGTERM, ctrl_c_handler_for_submit)

    try:
        experiment_run_folders = [
        ]  # List of local directories used by experiment's runs
        try:
            cluster_registry_port = get_app_service_node_port(
                nauta_app_name=NAUTAAppNames.DOCKER_REGISTRY)
            # prepare environments for all experiment's runs
            for experiment_run in runs_list:
                if script_parameters and experiment_run.parameters:
                    current_script_parameters = script_parameters + experiment_run.parameters
                elif script_parameters:
                    current_script_parameters = script_parameters
                elif experiment_run.parameters:
                    current_script_parameters = experiment_run.parameters
                else:
                    current_script_parameters = None
                run_folder, script_location, pod_count = \
                    prepare_experiment_environment(experiment_name=experiment_name,
                                                   run_name=experiment_run.name,
                                                   local_script_location=script_location,
                                                   script_folder_location=script_folder_location,  # noqa: E501
                                                   script_parameters=current_script_parameters,
                                                   pack_type=template, pack_params=pack_params,
                                                   cluster_registry_port=cluster_registry_port,
                                                   env_variables=env_variables,
                                                   requirements_file=requirements_file,
                                                   username=namespace,
                                                   run_kind=run_kind)
                # Set correct pod count
                if not pod_count or pod_count < 1:
                    raise SubmitExperimentError(
                        'Unable to determine pod count: make sure that values.yaml '
                        'file in your pack has podCount field with positive integer value.'
                    )

                experiment_run.pod_count = pod_count
                experiment_run_folders.append(run_folder)
                script_name = None
                if script_location is not None:
                    script_name = os.path.basename(script_location)
                # Prepend script_name parameter to run description only for display purposes.
                experiment_run.parameters = script_parameters if not experiment_run.parameters \
                    else experiment_run.parameters + script_parameters
                if experiment_run.parameters and script_name:
                    experiment_run.parameters = (
                        script_name, ) + experiment_run.parameters
                elif script_name:
                    experiment_run.parameters = (script_name, )
        except SubmitExperimentError as e:
            log.exception(Texts.ENV_CREATION_ERROR_MSG)
            e.message += f' {Texts.ENV_CREATION_ERROR_MSG}'
            raise
        except Exception:
            # any error in this step breaks execution of this command
            message = Texts.ENV_CREATION_ERROR_MSG
            log.exception(message)
            # just in case - remove folders that were created with a success
            for experiment_run_folder in experiment_run_folders:
                delete_environment(experiment_run_folder)
        # if ps or pr option is used - first ask whether experiment(s) should be submitted
        if parameter_range or parameter_set:
            click.echo(Texts.CONFIRM_SUBMIT_MSG)
            click.echo(
                tabulate(
                    {
                        RUN_NAME: [run.name for run in runs_list],
                        RUN_PARAMETERS: [
                            "\n".join(run.parameters) if run.parameters else ""
                            for run in runs_list
                        ]
                    },
                    headers=[RUN_NAME, RUN_PARAMETERS],
                    tablefmt=TBLT_TABLE_FORMAT))
            if not click.confirm(Texts.CONFIRM_SUBMIT_QUESTION_MSG,
                                 default=True):
                for experiment_run_folder in experiment_run_folders:
                    delete_environment(experiment_run_folder)
                exit()
        # create Experiment model
        # TODO template_name & template_namespace should be filled after Template implementation
        parameter_range_spec = [
            f'-pr {param_name} {param_value}'
            for param_name, param_value in parameter_range
        ]
        parameter_set_spec = [f'-ps {ps_spec}' for ps_spec in parameter_set]
        experiment_parameters_spec = list(
            script_parameters) + parameter_range_spec + parameter_set_spec
        template_version = get_template_version(template)
        experiment = experiments_model.Experiment(
            name=experiment_name,
            template_name=template,
            parameters_spec=experiment_parameters_spec,
            template_namespace="template-namespace",
            template_version=template_version)
        experiment.create(namespace=namespace, labels=labels)

        with spinner('Uploading experiment...'):
            try:
                upload_experiment_to_git_repo_manager(
                    experiments_workdir=get_run_environment_path(''),
                    experiment_name=experiment_name,
                    run_name=runs_list[0].name,
                    username=namespace)
            except Exception:
                log.exception('Failed to upload experiment.')
                try:
                    experiment.state = experiments_model.ExperimentStatus.FAILED
                    experiment.update()
                except Exception:
                    log.exception(
                        f'Failed to set state of {experiment.name} experiment '
                        f'to {experiments_model.ExperimentStatus.FAILED}')
                raise SubmitExperimentError('Failed to upload experiment.')

        with spinner('Building experiment image...'):
            try:
                image_build_workflow: ExperimentImageBuildWorkflow = ExperimentImageBuildWorkflow.from_yaml(
                    yaml_template_path=
                    f'{Config().config_path}/workflows/{EXP_IMAGE_BUILD_WORKFLOW_SPEC}',
                    username=namespace,
                    experiment_name=experiment_name)
                image_build_workflow.create(namespace=namespace)
                image_build_workflow.wait_for_completion()
            except Exception:
                error_msg = 'Failed to build experiment image.'
                log.exception(error_msg)
                # Try to get workflow logs
                _debug_workflow_logs(workflow=image_build_workflow,
                                     namespace=namespace)

                if image_build_workflow.name:
                    error_msg += f' Run nctl workflow logs {image_build_workflow.name} command for more details.'

                try:
                    experiment.state = experiments_model.ExperimentStatus.FAILED
                    experiment.update()
                except Exception:
                    log.exception(
                        f'Failed to set state of {experiment.name} experiment '
                        f'to {experiments_model.ExperimentStatus.FAILED}')
                raise SubmitExperimentError(error_msg)
        # submit runs
        run_errors: Dict[str, str] = {}
        for run, run_folder in zip(runs_list, experiment_run_folders):
            try:
                run.state = RunStatus.QUEUED
                with spinner(text=Texts.CREATING_RESOURCES_MSG.format(
                        run_name=run.name)):
                    # Add Run object with runKind label and pack params as annotations
                    run.create(namespace=namespace,
                               labels={'runKind': run_kind.value},
                               annotations={
                                   pack_param_name: pack_param_value
                                   for pack_param_name, pack_param_value in
                                   pack_params
                               })
                    submitted_runs.append(run)
                    submit_draft_pack(run_name=run.name,
                                      run_folder=run_folder,
                                      namespace=namespace)
            except Exception as exe:
                delete_environment(run_folder)
                try:
                    run.state = RunStatus.FAILED
                    run_errors[run.name] = str(exe)
                    run.update()
                except Exception as rexe:
                    # update of non-existing run may fail
                    log.debug(Texts.ERROR_DURING_PATCHING_RUN.format(
                        str(rexe)))
        # Delete experiment if no Runs were submitted
        if not submitted_runs:
            click.echo(Texts.SUBMISSION_FAIL_ERROR_MSG)
            delete_k8s_object("experiment", experiment_name)
        # Change experiment status to submitted
        experiment.state = experiments_model.ExperimentStatus.SUBMITTED
        experiment.update()
    except LocalPortOccupiedError as exe:
        click.echo(exe.message)
        raise SubmitExperimentError(exe.message)
    except K8sProxyCloseError:
        log.exception('Error during closing of a proxy for a {}'.format(
            NAUTAAppNames.DOCKER_REGISTRY))
        raise K8sProxyCloseError(Texts.PROXY_CLOSE_ERROR_MSG)
    except K8sProxyOpenError:
        error_msg = Texts.PROXY_OPEN_ERROR_MSG
        log.exception(error_msg)
        raise SubmitExperimentError(error_msg)
    except SubmitExperimentError:
        raise
    except Exception as exe:
        error_msg = Texts.SUBMIT_OTHER_ERROR_MSG
        log.exception(error_msg)
        raise SubmitExperimentError(error_msg) from exe
    finally:
        # remove semaphores from all exp folders
        remove_sempahore(experiment_name)

    log.debug("Submit - finish")
    return runs_list, run_errors, script_location
Beispiel #27
0
def view(context, state: State, experiment_name: str, tensorboard: bool,
         username: str):
    """
    Displays details of an experiment.
    """
    try:
        if username:
            namespace = username
        else:
            namespace = get_kubectl_current_context_namespace()

        run = Run.get(name=experiment_name, namespace=namespace)
        if not run:
            handle_error(user_msg=Texts.EXPERIMENT_NOT_FOUND_ERROR_MSG.format(
                experiment_name=experiment_name))
            exit(2)

        click.echo(
            tabulate([run.cli_representation],
                     headers=EXPERIMENTS_LIST_HEADERS,
                     tablefmt="orgtbl"))

        click.echo(Texts.PODS_PARTICIPATING_LIST_HEADER)

        pods = get_namespaced_pods(label_selector="runName=" + experiment_name,
                                   namespace=namespace)

        tabular_output = []
        containers_resources = []
        pending_pods = []

        for pod in pods:
            status_string = ""

            if pod.status.conditions:
                for cond in pod.status.conditions:
                    msg = "\n" if not cond.reason else "\n reason: " + \
                                                       wrap_text(cond.reason, width=POD_CONDITIONS_MAX_WIDTH)
                    msg = msg + ", \n message: " + wrap_text(cond.message, width=POD_CONDITIONS_MAX_WIDTH) \
                        if cond.message else msg
                    status_string += wrap_text(
                        cond.type + ": " + cond.status,
                        width=POD_CONDITIONS_MAX_WIDTH) + msg + "\n"
            else:
                pod_events = get_pod_events(namespace=namespace,
                                            name=pod.metadata.name)

                for event in pod_events:
                    msg = "\n" if not event.reason else "\n reason: " + \
                                                        wrap_text(event.reason, width=POD_CONDITIONS_MAX_WIDTH)
                    msg = msg + ", \n message: " + wrap_text(event.message, width=POD_CONDITIONS_MAX_WIDTH) \
                        if event.message else msg
                    status_string += msg + "\n"

            if pod.status.phase.upper() == PodStatus.PENDING.value:
                pending_pods.append(pod.metadata.name)

            container_statuses = defaultdict(lambda: None)
            if pod.status.container_statuses:
                for container_status in pod.status.container_statuses:
                    container_statuses[
                        container_status.name] = container_status.state

            container_details = []

            for container in pod.spec.containers:
                container_description = Texts.CONTAINER_DETAILS_MSG.format(
                    name=container.name,
                    status=container_status_to_msg(
                        container_statuses[container.name]),
                    volumes=container_volume_mounts_to_msg(
                        container.volume_mounts, spaces=2),
                    resources=container_resources_to_msg(container.resources,
                                                         spaces=4))
                container_details.append(container_description)
                containers_resources.append(container.resources)

            container_details = ''.join(container_details)

            tabular_output.append([
                pod.metadata.name,
                wrap_text(pod.metadata.uid, width=UID_MAX_WIDTH, spaces=0),
                status_string, container_details
            ])
        click.echo(
            tabulate(tabular_output,
                     Texts.PODS_TABLE_HEADERS,
                     tablefmt="orgtbl"))

        try:
            cpu_requests_sum = sum_cpu_resources([
                container_resource.requests["cpu"]
                for container_resource in containers_resources
                if container_resource.requests
                and container_resource.requests.get("cpu")
            ])
            mem_requests_sum = sum_mem_resources([
                container_resource.requests["memory"]
                for container_resource in containers_resources
                if container_resource.requests
                and container_resource.requests.get("memory")
            ])
            cpu_limits_sum = sum_cpu_resources([
                container_resource.limits["cpu"]
                for container_resource in containers_resources
                if container_resource.limits
                and container_resource.limits.get("cpu")
            ])
            mem_limits_sum = sum_mem_resources([
                container_resource.limits["memory"]
                for container_resource in containers_resources
                if container_resource.limits
                and container_resource.limits.get("memory")
            ])
        except ValueError as exception:
            handle_error(
                logger,
                Texts.RESOURCES_SUM_PARSING_ERROR_MSG.format(
                    error_msg=str(exception)),
                Texts.RESOURCES_SUM_PARSING_ERROR_MSG.format(
                    error_msg=str(exception)))

        click.echo(Texts.RESOURCES_SUM_LIST_HEADER)
        click.echo(
            tabulate(list(
                zip(Texts.RESOURCES_SUM_TABLE_ROWS_HEADERS, [
                    cpu_requests_sum, mem_requests_sum, cpu_limits_sum,
                    mem_limits_sum
                ])),
                     Texts.RESOURCES_SUM_TABLE_HEADERS,
                     tablefmt="orgtbl"))

        if tensorboard:
            click.echo()
            context.invoke(tensorboard_command,
                           experiment_name=[experiment_name])

        if pending_pods:
            click.echo()
            try:
                cpu = False
                memory = False
                for pod in pending_pods:
                    events_list = get_pod_events(namespace=namespace, name=pod)
                    for event in events_list:
                        if "insufficient cpu" in event.message.lower():
                            cpu = True
                        elif "insufficient memory" in event.message.lower():
                            memory = True
                        if cpu and memory:
                            break
                    if cpu and memory:
                        break

                if not cpu and not memory:
                    exit(0)

                if cpu and memory:
                    resources = "number of cpus and amount of memory"
                elif cpu:
                    resources = "number of cpus"
                else:
                    resources = "amount of memory"

                click.echo(
                    Texts.INSUFFICIENT_RESOURCES_MESSAGE.format(
                        resources=resources))
                click.echo()
                top_cpu_users, top_mem_users = get_highest_usage()
                click.echo(
                    Texts.TOP_CPU_CONSUMERS.format(consumers=", ".join([
                        res.user_name for res in
                        top_cpu_users[0:3 if len(top_cpu_users
                                                 ) > 2 else len(top_cpu_users)]
                    ])))
                click.echo(
                    Texts.TOP_MEMORY_CONSUMERS.format(consumers=", ".join([
                        res.user_name for res in
                        top_mem_users[0:3 if len(top_mem_users
                                                 ) > 2 else len(top_mem_users)]
                    ])))
            except Exception:
                click.echo(Texts.PROBLEMS_WHILE_GATHERING_USAGE_DATA)
                logger.exception(
                    Texts.PROBLEMS_WHILE_GATHERING_USAGE_DATA_LOGS)

    except Exception:
        handle_error(logger, Texts.VIEW_OTHER_ERROR_MSG,
                     Texts.VIEW_OTHER_ERROR_MSG)
        exit(1)
Beispiel #28
0
def get_logs(operation_name: str, start_date: str, end_date: str, match: str,
             output: bool, pager: bool, follow: bool):
    """
    Show logs for a given model export operation.
    """
    # check whether we have operations with a given name
    if operation_name and match:
        handle_error(user_msg=Texts.NAME_M_BOTH_GIVEN_ERROR_MSG)
        exit(1)
    elif not operation_name and not match:
        handle_error(user_msg=Texts.NAME_M_NONE_GIVEN_ERROR_MSG)
        exit(1)

    try:
        with K8sProxy(NAUTAAppNames.ELASTICSEARCH) as proxy:
            es_client = K8sElasticSearchClient(host="127.0.0.1",
                                               port=proxy.tunnel_port,
                                               verify_certs=False,
                                               use_ssl=False)
            namespace = get_kubectl_current_context_namespace()
            if match:
                operation_name = match
                name_filter = match
            else:
                name_filter = f'^{operation_name}$'
            workflows = ArgoWorkflow.list(namespace=namespace,
                                          name_filter=name_filter)
            if not workflows:
                raise ValueError(
                    f'Operation with given name: {operation_name} does not '
                    f'exists in namespace {namespace}.')

            follow_logs = True if follow and not output else False

            if output and len(workflows) > 1:
                click.echo(Texts.MORE_EXP_LOGS_MESSAGE)

            for workflow in workflows:
                start_date = start_date if start_date else workflow.started_at

                ops_logs_generator = es_client.get_argo_workflow_logs_generator(
                    workflow=workflow,
                    namespace=namespace,
                    start_date=start_date,
                    end_date=end_date,
                    follow=follow_logs)

                if output:
                    save_logs_to_file(logs_generator=ops_logs_generator,
                                      instance_name=workflow.name,
                                      instance_type="operation")
                else:
                    if len(workflows) > 1:
                        click.echo(f'Operation : {workflow.name}')
                    print_logs(run_logs_generator=ops_logs_generator,
                               pager=pager)

    except K8sProxyCloseError:
        handle_error(logger, Texts.PROXY_CLOSE_LOG_ERROR_MSG,
                     Texts.PROXY_CLOSE_LOG_ERROR_MSG)
        exit(1)
    except LocalPortOccupiedError as exe:
        handle_error(
            logger,
            Texts.LOCAL_PORT_OCCUPIED_ERROR_MSG.format(
                exception_message=exe.message),
            Texts.LOCAL_PORT_OCCUPIED_ERROR_MSG.format(
                exception_message=exe.message))
        exit(1)
    except K8sProxyOpenError:
        handle_error(logger, Texts.PROXY_CREATION_ERROR_MSG,
                     Texts.PROXY_CREATION_ERROR_MSG)
        exit(1)
    except ValueError:
        handle_error(
            logger,
            Texts.OPERATION_NOT_EXISTS_ERROR_MSG.format(
                operation_name=operation_name),
            Texts.OPERATION_NOT_EXISTS_ERROR_MSG.format(
                experiment_name=operation_name))
        exit(1)
    except Exception:
        handle_error(logger, Texts.LOGS_GET_OTHER_ERROR_MSG,
                     Texts.LOGS_GET_OTHER_ERROR_MSG)
        exit(1)
Beispiel #29
0
def launch(ctx: click.Context, name: str, model_location: str,
           local_model_location: str, model_name: str,
           pack_param: List[Tuple[str, str]], requirements: str,
           runtime: InferenceRuntime):
    """
    Starts a new prediction instance that can be used for performing prediction, classification and
    regression tasks on trained model.
    """
    if not model_location and not local_model_location:
        handle_error(user_msg=Texts.MISSING_MODEL_LOCATION_ERROR_MSG.format(
            local_model_location=local_model_location))
        exit(1)

    if local_model_location:
        validate_local_model_location(local_model_location)

    click.echo('Submitting prediction instance.')
    try:
        template = INFERENCE_TEMPLATE_OVMS if InferenceRuntime(runtime) == InferenceRuntime.OVMS else \
            INFERENCE_TEMPLATE_TFSERVING
        model_path = model_location.rstrip(
            '/') if model_location else local_model_location.rstrip('/')
        model_name = model_name if model_name else os.path.basename(model_path)
        name = name if name else generate_name(
            name=model_name, prefix=INFERENCE_INSTANCE_PREFIX)
        inference_instance = start_inference_instance(
            name=name,
            model_location=model_location,
            model_name=model_name,
            local_model_location=local_model_location,
            template=template,
            requirements=requirements,
            pack_params=pack_param)
        if inference_instance.state == RunStatus.FAILED:
            raise RuntimeError('Inference instance submission failed.')
    except Exception:
        handle_error(logger,
                     Texts.INSTANCE_START_ERROR_MSG,
                     Texts.INSTANCE_START_ERROR_MSG,
                     add_verbosity_msg=ctx.obj.verbosity == 0)
        exit(1)

    click.echo(
        tabulate([[
            inference_instance.cli_representation.name, model_location,
            inference_instance.cli_representation.status
        ]],
                 headers=Texts.TABLE_HEADERS,
                 tablefmt=TBLT_TABLE_FORMAT))

    try:
        namespace = get_kubectl_current_context_namespace()
        authorization_header = get_authorization_header(
            service_account_name=name, namespace=namespace)
        inference_instance_url = get_inference_instance_url(
            inference_instance=inference_instance, model_name=model_name)
        click.echo(
            Texts.INSTANCE_INFO_MSG.format(
                inference_instance_url=inference_instance_url,
                authorization_header=authorization_header))
    except Exception:
        handle_error(logger,
                     Texts.INSTANCE_URL_ERROR_MSG,
                     Texts.INSTANCE_URL_ERROR_MSG,
                     add_verbosity_msg=ctx.obj.verbosity == 0)
        exit(1)

    # wait till pod is ready - no more than 40 seconds
    for _ in range(40):
        pods = get_namespaced_pods(label_selector=f'runName={name}',
                                   namespace=namespace)
        if pods and all(pod.status.phase == 'Running' for pod in pods) \
                and all(container.ready for pod in pods for container in pod.status.container_statuses):
            break
        if pods and any(pod.status.phase == 'Failed' for pod in pods):
            handle_error(logger,
                         Texts.INSTANCE_START_ERROR_MSG,
                         Texts.INSTANCE_START_ERROR_MSG,
                         add_verbosity_msg=ctx.obj.verbosity == 0)
            exit(1)

        time.sleep(1)
    else:
        handle_error(logger,
                     Texts.PREDICTION_INSTANCE_NOT_READY.format(name=name),
                     Texts.PREDICTION_INSTANCE_NOT_READY.format(name=name),
                     add_verbosity_msg=ctx.obj.verbosity == 0)
        exit(0)