Ejemplo n.º 1
0
def stream(state: State, name: str, data: str, method_verb: InferenceVerb):
    """
    Perform stream inference task on launched prediction instance.
    """
    method_verb = InferenceVerb(method_verb)
    try:
        namespace = get_kubectl_current_context_namespace()

        # TODO: check if kind field of inference instance Run is correct
        inference_instance = Run.get(name=name, namespace=namespace)
        if not inference_instance:
            handle_error(user_msg=Texts.INSTANCE_NOT_EXISTS_ERROR_MSG.format(
                name=name))
            exit(1)
        if not inference_instance.state == RunStatus.RUNNING:
            handle_error(user_msg=Texts.INSTANCE_NOT_RUNNING_ERROR_MSG.format(
                name=name, running_code=RunStatus.RUNNING.value))
            exit(1)

        inference_instance_url = get_inference_instance_url(
            inference_instance=inference_instance)
        stream_url = f'{inference_instance_url}:{method_verb.value}'
    except Exception:
        handle_error(logger,
                     Texts.INSTANCE_GET_FAIL_ERROR_MSG.format(name=name),
                     Texts.INSTANCE_GET_FAIL_ERROR_MSG.format(name=name),
                     add_verbosity_msg=state.verbosity == 0)
        exit(1)

    try:
        with open(data, 'r', encoding='utf-8') as data_file:
            stream_data = json.load(data_file)
    except (json.JSONDecodeError, IOError):
        handle_error(logger, Texts.JSON_LOAD_ERROR_MSG.format(data=data),
                     Texts.JSON_LOAD_ERROR_MSG.format(data=data))
        exit(1)

    try:
        api_key = get_api_key()
        headers = {
            'Authorization': api_key,
            'Accept': 'application/json',
            'Content-Type': 'application/json'
        }
        with spinner(text=Texts.WAITING_FOR_RESPONSE_MSG):
            stream_response = requests.post(
                stream_url,
                data=json.dumps(stream_data),  # nosec - request to k8s cluster
                verify=False,
                headers=headers)
        stream_response.raise_for_status()
        click.echo(stream_response.text)
    except Exception as e:
        error_msg = Texts.INFERENCE_OTHER_ERROR_MSG.format(exception=e)
        if hasattr(e, 'response'):
            error_msg += Texts.INFERENCE_ERROR_RESPONSE_MSG.format(
                response_text=e.response.text)  # type: ignore
        handle_error(logger, error_msg, error_msg)
        exit(1)
Ejemplo n.º 2
0
def view(context, state: State, experiment_name: str, tensorboard: bool,
         username: str):
    """
    Displays details of an experiment.
    """
    try:
        if username:
            namespace = username
        else:
            namespace = get_kubectl_current_context_namespace()

        run = Run.get(name=experiment_name, namespace=namespace)
        if not run:
            handle_error(user_msg=Texts.EXPERIMENT_NOT_FOUND_ERROR_MSG.format(
                experiment_name=experiment_name))
            exit(2)

        click.echo(
            tabulate([run.cli_representation],
                     headers=EXPERIMENTS_LIST_HEADERS,
                     tablefmt="orgtbl"))

        click.echo(Texts.PODS_PARTICIPATING_LIST_HEADER)

        pods = get_namespaced_pods(label_selector="runName=" + experiment_name,
                                   namespace=namespace)

        tabular_output = []
        containers_resources = []
        pending_pods = []

        for pod in pods:
            status_string = ""

            if pod.status.conditions:
                for cond in pod.status.conditions:
                    msg = "\n" if not cond.reason else "\n reason: " + \
                                                       wrap_text(cond.reason, width=POD_CONDITIONS_MAX_WIDTH)
                    msg = msg + ", \n message: " + wrap_text(cond.message, width=POD_CONDITIONS_MAX_WIDTH) \
                        if cond.message else msg
                    status_string += wrap_text(
                        cond.type + ": " + cond.status,
                        width=POD_CONDITIONS_MAX_WIDTH) + msg + "\n"
            else:
                pod_events = get_pod_events(namespace=namespace,
                                            name=pod.metadata.name)

                for event in pod_events:
                    msg = "\n" if not event.reason else "\n reason: " + \
                                                        wrap_text(event.reason, width=POD_CONDITIONS_MAX_WIDTH)
                    msg = msg + ", \n message: " + wrap_text(event.message, width=POD_CONDITIONS_MAX_WIDTH) \
                        if event.message else msg
                    status_string += msg + "\n"

            if pod.status.phase.upper() == PodStatus.PENDING.value:
                pending_pods.append(pod.metadata.name)

            container_statuses = defaultdict(lambda: None)
            if pod.status.container_statuses:
                for container_status in pod.status.container_statuses:
                    container_statuses[
                        container_status.name] = container_status.state

            container_details = []

            for container in pod.spec.containers:
                container_description = Texts.CONTAINER_DETAILS_MSG.format(
                    name=container.name,
                    status=container_status_to_msg(
                        container_statuses[container.name]),
                    volumes=container_volume_mounts_to_msg(
                        container.volume_mounts, spaces=2),
                    resources=container_resources_to_msg(container.resources,
                                                         spaces=4))
                container_details.append(container_description)
                containers_resources.append(container.resources)

            container_details = ''.join(container_details)

            tabular_output.append([
                pod.metadata.name,
                wrap_text(pod.metadata.uid, width=UID_MAX_WIDTH, spaces=0),
                status_string, container_details
            ])
        click.echo(
            tabulate(tabular_output,
                     Texts.PODS_TABLE_HEADERS,
                     tablefmt="orgtbl"))

        try:
            cpu_requests_sum = sum_cpu_resources([
                container_resource.requests["cpu"]
                for container_resource in containers_resources
                if container_resource.requests
                and container_resource.requests.get("cpu")
            ])
            mem_requests_sum = sum_mem_resources([
                container_resource.requests["memory"]
                for container_resource in containers_resources
                if container_resource.requests
                and container_resource.requests.get("memory")
            ])
            cpu_limits_sum = sum_cpu_resources([
                container_resource.limits["cpu"]
                for container_resource in containers_resources
                if container_resource.limits
                and container_resource.limits.get("cpu")
            ])
            mem_limits_sum = sum_mem_resources([
                container_resource.limits["memory"]
                for container_resource in containers_resources
                if container_resource.limits
                and container_resource.limits.get("memory")
            ])
        except ValueError as exception:
            handle_error(
                logger,
                Texts.RESOURCES_SUM_PARSING_ERROR_MSG.format(
                    error_msg=str(exception)),
                Texts.RESOURCES_SUM_PARSING_ERROR_MSG.format(
                    error_msg=str(exception)))

        click.echo(Texts.RESOURCES_SUM_LIST_HEADER)
        click.echo(
            tabulate(list(
                zip(Texts.RESOURCES_SUM_TABLE_ROWS_HEADERS, [
                    cpu_requests_sum, mem_requests_sum, cpu_limits_sum,
                    mem_limits_sum
                ])),
                     Texts.RESOURCES_SUM_TABLE_HEADERS,
                     tablefmt="orgtbl"))

        if tensorboard:
            click.echo()
            context.invoke(tensorboard_command,
                           experiment_name=[experiment_name])

        if pending_pods:
            click.echo()
            try:
                cpu = False
                memory = False
                for pod in pending_pods:
                    events_list = get_pod_events(namespace=namespace, name=pod)
                    for event in events_list:
                        if "insufficient cpu" in event.message.lower():
                            cpu = True
                        elif "insufficient memory" in event.message.lower():
                            memory = True
                        if cpu and memory:
                            break
                    if cpu and memory:
                        break

                if not cpu and not memory:
                    exit(0)

                if cpu and memory:
                    resources = "number of cpus and amount of memory"
                elif cpu:
                    resources = "number of cpus"
                else:
                    resources = "amount of memory"

                click.echo(
                    Texts.INSUFFICIENT_RESOURCES_MESSAGE.format(
                        resources=resources))
                click.echo()
                top_cpu_users, top_mem_users = get_highest_usage()
                click.echo(
                    Texts.TOP_CPU_CONSUMERS.format(consumers=", ".join([
                        res.user_name for res in
                        top_cpu_users[0:3 if len(top_cpu_users
                                                 ) > 2 else len(top_cpu_users)]
                    ])))
                click.echo(
                    Texts.TOP_MEMORY_CONSUMERS.format(consumers=", ".join([
                        res.user_name for res in
                        top_mem_users[0:3 if len(top_mem_users
                                                 ) > 2 else len(top_mem_users)]
                    ])))
            except Exception:
                click.echo(Texts.PROBLEMS_WHILE_GATHERING_USAGE_DATA)
                logger.exception(
                    Texts.PROBLEMS_WHILE_GATHERING_USAGE_DATA_LOGS)

    except Exception:
        handle_error(logger, Texts.VIEW_OTHER_ERROR_MSG,
                     Texts.VIEW_OTHER_ERROR_MSG)
        exit(1)
Ejemplo n.º 3
0
def test_get_run_not_found(mock_k8s_api_client: CustomObjectsApi):
    mock_k8s_api_client.get_namespaced_custom_object.side_effect = ApiException(status=404)
    run = Run.get(name=RUN_NAME, namespace=NAMESPACE)
    assert run is None
Ejemplo n.º 4
0
def test_get_run_failure(mock_k8s_api_client: CustomObjectsApi):
    mock_k8s_api_client.get_namespaced_custom_object.side_effect = ApiException(status=500)
    with pytest.raises(ApiException):
        Run.get(name=RUN_NAME, namespace=NAMESPACE)
Ejemplo n.º 5
0
def test_get_run_from_namespace(mock_k8s_api_client: CustomObjectsApi):
    mock_k8s_api_client.get_namespaced_custom_object.return_value = GET_RUN_RESPONSE_RAW
    run = Run.get(name=RUN_NAME, namespace=NAMESPACE)
    assert run is not None and type(run) is Run