def test_preprocess_json_request(mocker, input_format, request_body, expected_output): row_to_column_mock = mocker.patch( 'ie_serving.server.rest_msg_processing._row_to_column') row_to_column_mock.return_value = expected_output result = preprocess_json_request(request_body, input_format, [DEFAULT_INPUT_KEY]) if input_format == ROW_FORMAT: assert row_to_column_mock.called assert result == expected_output
def on_post(self, req, resp, model_name, requested_version=0): valid_model_spec, version = check_availability_of_requested_model( models=self.models, requested_version=requested_version, model_name=model_name) if not valid_model_spec: resp.status = falcon.HTTP_NOT_FOUND logger.debug("PREDICT, invalid model spec from request, " "{} - {}".format(model_name, requested_version)) err_out_json = { 'error': WRONG_MODEL_SPEC.format(model_name, requested_version) } resp.body = json.dumps(err_out_json) return body = req.media if type(body) is not dict: resp.status = falcon.HTTP_400 resp.body = json.dumps({'error': 'Invalid JSON in request body'}) return input_format = get_input_format( body, self.models[model_name].engines[version].input_key_names) if input_format == INVALID_FORMAT: resp.status = falcon.HTTP_400 resp.body = json.dumps( {'error': 'Invalid inputs in request ' 'body'}) return inputs = preprocess_json_request( body, input_format, self.models[model_name].engines[version].input_key_names) start_time = datetime.datetime.now() occurred_problem, inference_input, batch_size, code = \ prepare_input_data(models=self.models, model_name=model_name, version=version, data=inputs, rest=True) deserialization_end_time = datetime.datetime.now() duration = \ (deserialization_end_time - start_time).total_seconds() * 1000 logger.debug( "PREDICT; input deserialization completed; {}; {}; {}ms".format( model_name, version, duration)) if occurred_problem: resp.status = code err_out_json = {'error': inference_input} logger.debug( "PREDICT, problem with input data. Exit code {}".format(code)) resp.body = json.dumps(err_out_json) return self.models[model_name].engines[version].in_use.acquire() inference_start_time = datetime.datetime.now() try: inference_output = self.models[model_name].engines[version] \ .infer(inference_input, batch_size) except ValueError as error: resp.status = falcon.HTTP_400 err_out_json = {'error': 'Malformed input data'} logger.debug("PREDICT, problem with inference. " "Corrupted input: {}".format(error)) self.models[model_name].engines[version].in_use.release() resp.body = json.dumps(err_out_json) return inference_end_time = datetime.datetime.now() self.models[model_name].engines[version].in_use.release() duration = \ (inference_end_time - inference_start_time).total_seconds() * 1000 logger.debug( "PREDICT; inference execution completed; {}; {}; {}ms".format( model_name, version, duration)) for key, value in inference_output.items(): inference_output[key] = value.tolist() response = prepare_json_response( OUTPUT_REPRESENTATION[input_format], inference_output, self.models[model_name].engines[version].model_keys['outputs']) resp.status = falcon.HTTP_200 resp.body = json.dumps(response) serialization_end_time = datetime.datetime.now() duration = \ (serialization_end_time - inference_end_time).total_seconds() * 1000 logger.debug("PREDICT; inference results serialization completed;" " {}; {}; {}ms".format(model_name, version, duration)) return
def on_post(self, req, resp, model_name, requested_version=0): valid_model_spec, version = check_availability_of_requested_model( models=self.models, requested_version=requested_version, model_name=model_name) if not valid_model_spec: resp.status = falcon.HTTP_NOT_FOUND logger.debug("PREDICT, invalid model spec from request, " "{} - {}".format(model_name, requested_version)) err_out_json = { 'error': WRONG_MODEL_SPEC.format(model_name, requested_version) } resp.body = json.dumps(err_out_json) return body = req.media if type(body) is not dict: resp.status = falcon.HTTP_400 resp.body = json.dumps({'error': 'Invalid JSON in request body'}) return target_engine = self.models[model_name].engines[version] input_format = get_input_format(body, target_engine.input_key_names) if input_format == INVALID_FORMAT: resp.status = falcon.HTTP_400 resp.body = json.dumps( {'error': 'Invalid inputs in request ' 'body'}) return inputs = preprocess_json_request(body, input_format, target_engine.input_key_names) start_time = datetime.datetime.now() inference_input, error_message = \ prepare_input_data(target_engine=target_engine, data=inputs, service_type=REST) deserialization_end_time = datetime.datetime.now() duration = \ (deserialization_end_time - start_time).total_seconds() * 1000 logger.debug( "PREDICT; input deserialization completed; {}; {}; {}ms".format( model_name, version, duration)) if error_message is not None: resp.status = code = statusCodes['invalid_arg'][REST] err_out_json = {'error': error_message} logger.debug( "PREDICT, problem with input data. Exit code {}".format(code)) resp.body = json.dumps(err_out_json) return target_engine.in_use.acquire() ############################################### # Reshape network inputs if needed reshape_param = target_engine.detect_shapes_incompatibility( inference_input) if reshape_param is not None: error_message = target_engine.reshape(reshape_param) if error_message is not None: resp.status = falcon.HTTP_400 err_out_json = {'error': error_message} resp.body = json.dumps(err_out_json) target_engine.in_use.release() return ############################################## inference_start_time = datetime.datetime.now() inference_output, error_message = target_engine.infer(inference_input) if error_message is not None: resp.status = falcon.HTTP_400 err_out_json = {'error': error_message} resp.body = json.dumps(err_out_json) target_engine.in_use.release() return inference_end_time = datetime.datetime.now() target_engine.in_use.release() duration = \ (inference_end_time - inference_start_time).total_seconds() * 1000 logger.debug( "PREDICT; inference execution completed; {}; {}; {}ms".format( model_name, version, duration)) for key, value in inference_output.items(): inference_output[key] = value.tolist() response = prepare_json_response(OUTPUT_REPRESENTATION[input_format], inference_output, target_engine.model_keys['outputs']) resp.status = falcon.HTTP_200 resp.body = json.dumps(response) serialization_end_time = datetime.datetime.now() duration = \ (serialization_end_time - inference_end_time).total_seconds() * 1000 logger.debug("PREDICT; inference results serialization completed;" " {}; {}; {}ms".format(model_name, version, duration)) return