def stream(state: State, name: str, data: str, method_verb: InferenceVerb): """ Perform stream inference task on launched prediction instance. """ method_verb = InferenceVerb(method_verb) try: namespace = get_kubectl_current_context_namespace() # TODO: check if kind field of inference instance Run is correct inference_instance = Run.get(name=name, namespace=namespace) if not inference_instance: handle_error(user_msg=Texts.INSTANCE_NOT_EXISTS_ERROR_MSG.format( name=name)) exit(1) if not inference_instance.state == RunStatus.RUNNING: handle_error(user_msg=Texts.INSTANCE_NOT_RUNNING_ERROR_MSG.format( name=name, running_code=RunStatus.RUNNING.value)) exit(1) inference_instance_url = get_inference_instance_url( inference_instance=inference_instance) stream_url = f'{inference_instance_url}:{method_verb.value}' except Exception: handle_error(logger, Texts.INSTANCE_GET_FAIL_ERROR_MSG.format(name=name), Texts.INSTANCE_GET_FAIL_ERROR_MSG.format(name=name), add_verbosity_msg=state.verbosity == 0) exit(1) try: with open(data, 'r', encoding='utf-8') as data_file: stream_data = json.load(data_file) except (json.JSONDecodeError, IOError): handle_error(logger, Texts.JSON_LOAD_ERROR_MSG.format(data=data), Texts.JSON_LOAD_ERROR_MSG.format(data=data)) exit(1) try: api_key = get_api_key() headers = { 'Authorization': api_key, 'Accept': 'application/json', 'Content-Type': 'application/json' } with spinner(text=Texts.WAITING_FOR_RESPONSE_MSG): stream_response = requests.post( stream_url, data=json.dumps(stream_data), # nosec - request to k8s cluster verify=False, headers=headers) stream_response.raise_for_status() click.echo(stream_response.text) except Exception as e: error_msg = Texts.INFERENCE_OTHER_ERROR_MSG.format(exception=e) if hasattr(e, 'response'): error_msg += Texts.INFERENCE_ERROR_RESPONSE_MSG.format( response_text=e.response.text) # type: ignore handle_error(logger, error_msg, error_msg) exit(1)
def test_get_inference_instance_url_run_description(mocker): fake_instance = MagicMock(spec=Run) fake_instance.name = 'inf' fake_host = 'https://localhost:8443' fake_namespace = 'fake_namespace' fake_model_name = 'fake_model' get_kubectl_host_mock = mocker.patch('commands.predict.common.get_kubectl_host') get_kubectl_host_mock.return_value = fake_host get_namespace_mock = mocker.patch('commands.predict.common.get_kubectl_current_context_namespace') get_namespace_mock.return_value = fake_namespace instance_url = get_inference_instance_url(inference_instance=fake_instance, model_name=fake_model_name) assert instance_url == f'{fake_host}/api/v1/namespaces/{fake_namespace}/' \ f'services/{fake_instance.name}:rest-port/proxy/v1/models/{fake_model_name}'
def launch(state: State, name: str, model_location: str, local_model_location: str, model_name: str, pack_param: List[Tuple[str, str]], requirements: str): """ Starts a new prediction instance that can be used for performing prediction, classification and regression tasks on trained model. """ if not model_location and not local_model_location: handle_error(user_msg=Texts.MISSING_MODEL_LOCATION_ERROR_MSG.format( local_model_location=local_model_location)) exit(1) if local_model_location: validate_local_model_location(local_model_location) click.echo('Submitting prediction instance.') try: model_path = model_location.rstrip( '/') if model_location else local_model_location.rstrip('/') model_name = model_name if model_name else os.path.basename(model_path) name = name if name else generate_name( name=model_name, prefix=INFERENCE_INSTANCE_PREFIX) inference_instance = start_inference_instance( name=name, model_location=model_location, model_name=model_name, local_model_location=local_model_location, requirements=requirements, pack_params=pack_param) if inference_instance.state == RunStatus.FAILED: raise RuntimeError('Inference instance submission failed.') except Exception: handle_error(logger, Texts.INSTANCE_START_ERROR_MSG, Texts.INSTANCE_START_ERROR_MSG, add_verbosity_msg=state.verbosity == 0) exit(1) click.echo( tabulate([[ inference_instance.cli_representation.name, model_location, inference_instance.cli_representation.status ]], headers=Texts.TABLE_HEADERS, tablefmt="orgtbl")) try: namespace = get_kubectl_current_context_namespace() authorization_header = get_authorization_header( service_account_name=name, namespace=namespace) inference_instance_url = get_inference_instance_url( inference_instance=inference_instance, model_name=model_name) click.echo( Texts.INSTANCE_INFO_MSG.format( inference_instance_url=inference_instance_url, authorization_header=authorization_header)) except Exception: handle_error(logger, Texts.INSTANCE_URL_ERROR_MSG, Texts.INSTANCE_URL_ERROR_MSG, add_verbosity_msg=state.verbosity == 0) exit(1)
def launch(ctx: click.Context, name: str, model_location: str, local_model_location: str, model_name: str, pack_param: List[Tuple[str, str]], requirements: str, runtime: InferenceRuntime): """ Starts a new prediction instance that can be used for performing prediction, classification and regression tasks on trained model. """ if not model_location and not local_model_location: handle_error(user_msg=Texts.MISSING_MODEL_LOCATION_ERROR_MSG.format( local_model_location=local_model_location)) exit(1) if local_model_location: validate_local_model_location(local_model_location) click.echo('Submitting prediction instance.') try: template = INFERENCE_TEMPLATE_OVMS if InferenceRuntime(runtime) == InferenceRuntime.OVMS else \ INFERENCE_TEMPLATE_TFSERVING model_path = model_location.rstrip( '/') if model_location else local_model_location.rstrip('/') model_name = model_name if model_name else os.path.basename(model_path) name = name if name else generate_name( name=model_name, prefix=INFERENCE_INSTANCE_PREFIX) inference_instance = start_inference_instance( name=name, model_location=model_location, model_name=model_name, local_model_location=local_model_location, template=template, requirements=requirements, pack_params=pack_param) if inference_instance.state == RunStatus.FAILED: raise RuntimeError('Inference instance submission failed.') except Exception: handle_error(logger, Texts.INSTANCE_START_ERROR_MSG, Texts.INSTANCE_START_ERROR_MSG, add_verbosity_msg=ctx.obj.verbosity == 0) exit(1) click.echo( tabulate([[ inference_instance.cli_representation.name, model_location, inference_instance.cli_representation.status ]], headers=Texts.TABLE_HEADERS, tablefmt=TBLT_TABLE_FORMAT)) try: namespace = get_kubectl_current_context_namespace() authorization_header = get_authorization_header( service_account_name=name, namespace=namespace) inference_instance_url = get_inference_instance_url( inference_instance=inference_instance, model_name=model_name) click.echo( Texts.INSTANCE_INFO_MSG.format( inference_instance_url=inference_instance_url, authorization_header=authorization_header)) except Exception: handle_error(logger, Texts.INSTANCE_URL_ERROR_MSG, Texts.INSTANCE_URL_ERROR_MSG, add_verbosity_msg=ctx.obj.verbosity == 0) exit(1) # wait till pod is ready - no more than 40 seconds for _ in range(40): pods = get_namespaced_pods(label_selector=f'runName={name}', namespace=namespace) if pods and all(pod.status.phase == 'Running' for pod in pods) \ and all(container.ready for pod in pods for container in pod.status.container_statuses): break if pods and any(pod.status.phase == 'Failed' for pod in pods): handle_error(logger, Texts.INSTANCE_START_ERROR_MSG, Texts.INSTANCE_START_ERROR_MSG, add_verbosity_msg=ctx.obj.verbosity == 0) exit(1) time.sleep(1) else: handle_error(logger, Texts.PREDICTION_INSTANCE_NOT_READY.format(name=name), Texts.PREDICTION_INSTANCE_NOT_READY.format(name=name), add_verbosity_msg=ctx.obj.verbosity == 0) exit(0)