def create_plan_model(models_dir, max_batch, model_version, input_shape, output0_shape, output1_shape, input_dtype, output0_dtype, output1_dtype, input_memory_format, output_memory_format): if not tu.validate_for_trt_model(input_dtype, output0_dtype, output1_dtype, input_shape, output0_shape, output1_shape): return create_plan_modelconfig(models_dir, max_batch, model_version, input_shape, output0_shape, output1_shape, input_dtype, output0_dtype, output1_dtype, input_memory_format, output_memory_format, None) if (not tu.shape_is_fixed(input_shape) or not tu.shape_is_fixed(output0_shape) or not tu.shape_is_fixed(output1_shape)): create_plan_dynamic_modelfile(models_dir, max_batch, model_version, input_shape, output0_shape, output1_shape, input_dtype, output0_dtype, output1_dtype, input_memory_format, output_memory_format) else: create_plan_fixed_modelfile(models_dir, max_batch, model_version, input_shape, output0_shape, output1_shape, input_dtype, output0_dtype, output1_dtype, input_memory_format, output_memory_format)
def _trt_reshape(self, dtype, input_shapes, output_shapes=None, no_batch=True): # 'shapes' is list of shapes, one for each input. if output_shapes is None: output_shapes = input_shapes if tu.validate_for_trt_model(dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]): # model that supports batching for bs in (1, 8): iu.infer_zero( self, 'plan', bs, dtype, input_shapes, output_shapes, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) # model that does not support batching if no_batch: iu.infer_zero( self, 'plan_nobatch', 1, dtype, input_shapes, output_shapes, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)
def _trt_reshape(self, dtype, input_shapes, output_shapes=None, no_batch=True): # 'shapes' is list of shapes, one for each input. if output_shapes is None: output_shapes = input_shapes if tu.validate_for_trt_model(dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]): # model that supports batching for bs in (1, 8): iu.infer_zero(self, 'plan', bs, dtype, input_shapes, output_shapes) # model that does not support batching if no_batch: iu.infer_zero(self, 'plan_nobatch', 1, dtype, input_shapes, output_shapes)
def _full_exact(self, req_raw, input_dtype, output0_dtype, output1_dtype, swap): input_size = 16 if tu.validate_for_tf_model(input_dtype, output0_dtype, output1_dtype): # model that supports batching for bs in (1, 8): iu.infer_exact(self, 'graphdef', (input_size,), bs, req_raw, input_dtype, output0_dtype, output1_dtype, swap=swap) iu.infer_exact(self, 'savedmodel', (input_size,), bs, req_raw, input_dtype, output0_dtype, output1_dtype, swap=swap) # model that does not support batching iu.infer_exact(self, 'graphdef_nobatch', (input_size,), 1, req_raw, input_dtype, output0_dtype, output1_dtype, swap=swap) iu.infer_exact(self, 'savedmodel_nobatch', (input_size,), 1, req_raw, input_dtype, output0_dtype, output1_dtype, swap=swap) if tu.validate_for_c2_model(input_dtype, output0_dtype, output1_dtype): # model that supports batching for bs in (1, 8): iu.infer_exact(self, 'netdef', (input_size,), bs, req_raw, input_dtype, output0_dtype, output1_dtype, swap=swap) # model that does not support batching iu.infer_exact(self, 'netdef_nobatch', (input_size,), 1, req_raw, input_dtype, output0_dtype, output1_dtype, swap=swap) if tu.validate_for_trt_model(input_dtype, output0_dtype, output1_dtype): # model that supports batching for bs in (1, 8): iu.infer_exact(self, 'plan', (input_size, 1, 1), bs, req_raw, input_dtype, output0_dtype, output1_dtype, swap=swap) # model that does not support batching iu.infer_exact(self, 'plan_nobatch', (input_size, 1, 1), 1, req_raw, input_dtype, output0_dtype, output1_dtype, swap=swap) # the custom model is src/custom/addsub... it does not swap # the inputs so always set to False if tu.validate_for_custom_model(input_dtype, output0_dtype, output1_dtype): # model that supports batching for bs in (1, 8): iu.infer_exact(self, 'custom', (input_size,), bs, req_raw, input_dtype, output0_dtype, output1_dtype, swap=False) # model that does not support batching iu.infer_exact(self, 'custom_nobatch', (input_size,), 1, req_raw, input_dtype, output0_dtype, output1_dtype, swap=False)
def _full_exact(self, input_dtype, output0_dtype, output1_dtype, output0_raw, output1_raw, swap): def _infer_exact_helper(tester, pf, tensor_shape, batch_size, input_dtype, output0_dtype, output1_dtype, output0_raw=True, output1_raw=True, model_version=None, swap=False, outputs=("OUTPUT0", "OUTPUT1"), use_http=USE_HTTP, use_grpc=USE_GRPC, use_http_json_tensors=True, skip_request_id_check=True, use_streaming=True, correlation_id=0): for bs in (1, batch_size): # model that does not support batching if bs == 1: iu.infer_exact( tester, pf + "_nobatch", tensor_shape, bs, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, model_version=model_version, swap=swap, outputs=outputs, use_http=use_http, use_grpc=use_grpc, use_http_json_tensors=use_http_json_tensors, skip_request_id_check=skip_request_id_check, use_streaming=use_streaming, correlation_id=correlation_id, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) # model that supports batching iu.infer_exact( tester, pf, (bs, ) + tensor_shape, bs, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, model_version=model_version, swap=swap, outputs=outputs, use_http=use_http, use_grpc=use_grpc, use_http_json_tensors=use_http_json_tensors, skip_request_id_check=skip_request_id_check, use_streaming=use_streaming, correlation_id=correlation_id, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) input_size = 16 all_ensemble_prefix = ["simple_", "sequence_", "fan_"] ensemble_prefix = [""] if ENSEMBLES and OS_WINDOWS: for prefix in all_ensemble_prefix: if tu.validate_for_ensemble_model(prefix, input_dtype, output0_dtype, output1_dtype, (input_size, ), (input_size, ), (input_size, )): ensemble_prefix.append(prefix) if tu.validate_for_tf_model(input_dtype, output0_dtype, output1_dtype, (input_size, ), (input_size, ), (input_size, )): for prefix in ensemble_prefix: for pf in ["graphdef", "savedmodel"]: if pf in BACKENDS: _infer_exact_helper(self, prefix + pf, (input_size, ), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) if not CPU_ONLY and tu.validate_for_trt_model( input_dtype, output0_dtype, output1_dtype, (input_size, 1, 1), (input_size, 1, 1), (input_size, 1, 1)): for prefix in ensemble_prefix: if 'plan' in BACKENDS: if input_dtype == np.int8: _infer_exact_helper(self, prefix + 'plan', (input_size, 1, 1), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) else: _infer_exact_helper(self, prefix + 'plan', (input_size, ), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) if tu.validate_for_onnx_model(input_dtype, output0_dtype, output1_dtype, (input_size, ), (input_size, ), (input_size, )): for prefix in ensemble_prefix: if 'onnx' in BACKENDS: _infer_exact_helper(self, prefix + 'onnx', (input_size, ), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) if tu.validate_for_libtorch_model(input_dtype, output0_dtype, output1_dtype, (input_size, ), (input_size, ), (input_size, )): for prefix in ensemble_prefix: if 'libtorch' in BACKENDS: _infer_exact_helper(self, prefix + 'libtorch', (input_size, ), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) if prefix == "": if 'python' in BACKENDS: _infer_exact_helper(self, prefix + 'python', (input_size, ), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap)
def _full_exact(self, input_dtype, output0_dtype, output1_dtype, output0_raw, output1_raw, swap): def _infer_exact_helper(tester, pf, tensor_shape, batch_size, input_dtype, output0_dtype, output1_dtype, output0_raw=True, output1_raw=True, model_version=None, swap=False, outputs=("OUTPUT0", "OUTPUT1"), use_http=True, use_grpc=True, skip_request_id_check=False, use_streaming=True, correlation_id=0): for bs in (1, batch_size): iu.infer_exact(tester, pf, (bs, ) + tensor_shape, bs, input_dtype, output0_dtype, output1_dtype, output0_raw, output1_raw, model_version, swap, outputs, use_http, use_grpc, skip_request_id_check, use_streaming, correlation_id) input_size = 16 if tu.validate_for_tf_model(input_dtype, output0_dtype, output1_dtype, (input_size, ), (input_size, ), (input_size, )): for pf in ["graphdef", "savedmodel"]: _infer_exact_helper(self, pf, (input_size, ), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) if tu.validate_for_c2_model(input_dtype, output0_dtype, output1_dtype, (input_size, ), (input_size, ), (input_size, )): _infer_exact_helper(self, 'netdef', (input_size, ), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) if tu.validate_for_trt_model(input_dtype, output0_dtype, output1_dtype, (input_size, 1, 1), (input_size, 1, 1), (input_size, 1, 1)): if input_dtype == np.int8: _infer_exact_helper(self, 'plan', (input_size, 1, 1), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) else: _infer_exact_helper(self, 'plan', (input_size, ), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) if tu.validate_for_onnx_model(input_dtype, output0_dtype, output1_dtype, (input_size, ), (input_size, ), (input_size, )): _infer_exact_helper(self, 'onnx', (input_size, ), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) if tu.validate_for_libtorch_model(input_dtype, output0_dtype, output1_dtype, (input_size, ), (input_size, ), (input_size, )): _infer_exact_helper(self, 'libtorch', (input_size, ), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap)
def _full_exact(self, input_dtype, output0_dtype, output1_dtype, input_shape, output0_shape, output1_shape, output0_raw=True, output1_raw=True, swap=False): def _infer_exact_helper(tester, pf, tensor_shape, batch_size, input_dtype, output0_dtype, output1_dtype, output0_raw=True, output1_raw=True, model_version=None, swap=False, outputs=("OUTPUT0", "OUTPUT1"), use_http=True, use_grpc=True, skip_request_id_check=False, use_streaming=True, correlation_id=0): for bs in (1, batch_size): # model that does not support batching if bs == 1: iu.infer_exact(tester, pf + "_nobatch", tensor_shape, bs, input_dtype, output0_dtype, output1_dtype, output0_raw, output1_raw, model_version, swap, outputs, use_http, use_grpc, skip_request_id_check, use_streaming, correlation_id) # model that supports batching iu.infer_exact(tester, pf, tensor_shape, bs, input_dtype, output0_dtype, output1_dtype, output0_raw, output1_raw, model_version, swap, outputs, use_http, use_grpc, skip_request_id_check, use_streaming, correlation_id) all_ensemble_prefix = ["simple_", "sequence_", "fan_"] ensemble_prefix = [""] for prefix in all_ensemble_prefix: if tu.validate_for_ensemble_model(prefix, input_dtype, output0_dtype, output1_dtype, input_shape, input_shape, input_shape): ensemble_prefix.append(prefix) if tu.validate_for_tf_model(input_dtype, output0_dtype, output1_dtype, input_shape, output0_shape, output1_shape): for prefix in ensemble_prefix: for pf in ["graphdef", "savedmodel"]: _infer_exact_helper(self, prefix + pf, input_shape, 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) if tu.validate_for_trt_model(input_dtype, output0_dtype, output1_dtype, input_shape, output0_shape, output1_shape): for prefix in ensemble_prefix: if input_dtype == np.int8: _infer_exact_helper(self, prefix + 'plan', input_shape + (1, 1), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) else: _infer_exact_helper(self, prefix + 'plan', input_shape, 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) if tu.validate_for_c2_model(input_dtype, output0_dtype, output1_dtype, input_shape, output0_shape, output1_shape): for prefix in ensemble_prefix: _infer_exact_helper(self, prefix + 'netdef', input_shape, 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) # the custom model is src/custom/addsub... it does not swap # the inputs so always set to False if tu.validate_for_custom_model(input_dtype, output0_dtype, output1_dtype, input_shape, output0_shape, output1_shape): # No basic ensemble models are created against custom models _infer_exact_helper(self, 'custom', input_shape, 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=False) if tu.validate_for_onnx_model(input_dtype, output0_dtype, output1_dtype, input_shape, output0_shape, output1_shape): # No basic ensemble models are created against custom models [TODO] _infer_exact_helper(self, 'onnx', input_shape, 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) if tu.validate_for_libtorch_model(input_dtype, output0_dtype, output1_dtype, input_shape, output0_shape, output1_shape): # No basic ensemble models are created against custom models [TODO] _infer_exact_helper(self, 'libtorch', input_shape, 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap)
def _full_exact(self, input_dtype, output0_dtype, output1_dtype, output0_raw, output1_raw, swap): def _infer_exact_helper(tester, pf, tensor_shape, batch_size, input_dtype, output0_dtype, output1_dtype, output0_raw=True, output1_raw=True, model_version=None, swap=False, outputs=("OUTPUT0", "OUTPUT1"), use_http=True, use_grpc=True, use_http_json_tensors=True, skip_request_id_check=True, use_streaming=True, correlation_id=0): for bs in (1, batch_size): # model that does not support batching if bs == 1: iu.infer_exact( tester, pf + "_nobatch", tensor_shape, bs, input_dtype, output0_dtype, output1_dtype, output0_raw, output1_raw, model_version, swap, outputs, use_http, use_grpc, use_http_json_tensors, skip_request_id_check, use_streaming, correlation_id, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) # model that supports batching iu.infer_exact( tester, pf, (bs, ) + tensor_shape, bs, input_dtype, output0_dtype, output1_dtype, output0_raw, output1_raw, model_version, swap, outputs, use_http, use_grpc, use_http_json_tensors, skip_request_id_check, use_streaming, correlation_id, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) input_size = 16 all_ensemble_prefix = ["simple_", "sequence_", "fan_"] ensemble_prefix = [""] if ENSEMBLES and ("custom" in BACKENDS): for prefix in all_ensemble_prefix: if tu.validate_for_ensemble_model(prefix, input_dtype, output0_dtype, output1_dtype, (input_size, ), (input_size, ), (input_size, )): ensemble_prefix.append(prefix) if tu.validate_for_tf_model(input_dtype, output0_dtype, output1_dtype, (input_size, ), (input_size, ), (input_size, )): for prefix in ensemble_prefix: for pf in ["graphdef", "savedmodel"]: if pf in BACKENDS: _infer_exact_helper(self, prefix + pf, (input_size, ), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) if tu.validate_for_c2_model(input_dtype, output0_dtype, output1_dtype, (input_size, ), (input_size, ), (input_size, )): for prefix in ensemble_prefix: if 'netdef' in BACKENDS: _infer_exact_helper(self, prefix + 'netdef', (input_size, ), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) if not CPU_ONLY and tu.validate_for_trt_model( input_dtype, output0_dtype, output1_dtype, (input_size, 1, 1), (input_size, 1, 1), (input_size, 1, 1)): for prefix in ensemble_prefix: if 'plan' in BACKENDS: if input_dtype == np.int8: _infer_exact_helper(self, prefix + 'plan', (input_size, 1, 1), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) else: _infer_exact_helper(self, prefix + 'plan', (input_size, ), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) # the custom model is src/custom/addsub... it does not swap # the inputs so always set to False if tu.validate_for_custom_model(input_dtype, output0_dtype, output1_dtype, (input_size, ), (input_size, ), (input_size, )): # No basic ensemble models are created against custom models if 'custom' in BACKENDS: _infer_exact_helper(self, 'custom', (input_size, ), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=False) if tu.validate_for_onnx_model(input_dtype, output0_dtype, output1_dtype, (input_size, ), (input_size, ), (input_size, )): for prefix in ensemble_prefix: if 'onnx' in BACKENDS: _infer_exact_helper(self, prefix + 'onnx', (input_size, ), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) if tu.validate_for_libtorch_model(input_dtype, output0_dtype, output1_dtype, (input_size, ), (input_size, ), (input_size, )): for prefix in ensemble_prefix: if 'libtorch' in BACKENDS: _infer_exact_helper(self, prefix + 'libtorch', (input_size, ), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap)
def _full_exact(self, input_dtype, output0_dtype, output1_dtype, input_shape, output0_shape, output1_shape, output0_raw=True, output1_raw=True, swap=False): def _infer_exact_helper(tester, pf, tensor_shape, batch_size, input_dtype, output0_dtype, output1_dtype, output0_raw=True, output1_raw=True, model_version=None, swap=False, outputs=("OUTPUT0", "OUTPUT1"), use_http=True, use_grpc=True, skip_request_id_check=False, use_streaming=True, correlation_id=0): for bs in (1, batch_size): # model that does not support batching if bs == 1: iu.infer_exact( tester, pf + "_nobatch", tensor_shape, bs, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, model_version=model_version, swap=swap, outputs=outputs, use_http=use_http, use_grpc=use_grpc, skip_request_id_check=skip_request_id_check, use_streaming=use_streaming, correlation_id=correlation_id, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) # model that supports batching iu.infer_exact( tester, pf, (bs,) + tensor_shape, bs, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, model_version=model_version, swap=swap, outputs=outputs, use_http=use_http, use_grpc=use_grpc, skip_request_id_check=skip_request_id_check, use_streaming=use_streaming, correlation_id=correlation_id, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) all_ensemble_prefix = ["simple_", "sequence_", "fan_"] ensemble_prefix = [""] for prefix in all_ensemble_prefix: if tu.validate_for_ensemble_model(prefix, input_dtype, output0_dtype, output1_dtype, input_shape, input_shape, input_shape): ensemble_prefix.append(prefix) if tu.validate_for_tf_model(input_dtype, output0_dtype, output1_dtype, input_shape, output0_shape, output1_shape): for prefix in ensemble_prefix: for pf in ["graphdef", "savedmodel"]: _infer_exact_helper(self, prefix + pf, input_shape, 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) if tu.validate_for_trt_model(input_dtype, output0_dtype, output1_dtype, input_shape, output0_shape, output1_shape): for prefix in ensemble_prefix: if input_dtype == np.int8: _infer_exact_helper(self, prefix + 'plan', input_shape + (1, 1), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) else: _infer_exact_helper(self, prefix + 'plan', input_shape, 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) if tu.validate_for_onnx_model(input_dtype, output0_dtype, output1_dtype, input_shape, output0_shape, output1_shape): # No basic ensemble models are created against custom models [TODO] _infer_exact_helper(self, 'onnx', input_shape, 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) if tu.validate_for_libtorch_model(input_dtype, output0_dtype, output1_dtype, input_shape, output0_shape, output1_shape): # No basic ensemble models are created against custom models [TODO] _infer_exact_helper(self, 'libtorch', input_shape, 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap)
def create_plan_modelconfig(models_dir, max_batch, model_version, input_shape, output0_shape, output1_shape, input_dtype, output0_dtype, output1_dtype, input_memory_format, output_memory_format, version_policy): if not tu.validate_for_trt_model(input_dtype, output0_dtype, output1_dtype, input_shape, output0_shape, output1_shape): return # Unpack version policy version_policy_str = "{ latest { num_versions: 1 }}" if version_policy is not None: type, val = version_policy if type == 'latest': version_policy_str = "{{ latest {{ num_versions: {} }}}}".format( val) elif type == 'specific': version_policy_str = "{{ specific {{ versions: {} }}}}".format(val) else: version_policy_str = "{ all { }}" # Use a different model name for different kinds of models base_name = "plan_nobatch" if max_batch == 0 else "plan" base_name += "_" + trt_format_to_string( input_memory_format) + "_" + trt_format_to_string(output_memory_format) model_name = tu.get_model_name(base_name, input_dtype, output0_dtype, output1_dtype) config_dir = models_dir + "/" + model_name if -1 in input_shape: profile_index = 0 config = ''' name: "{}" platform: "tensorrt_plan" max_batch_size: {} version_policy: {} input [ {{ name: "INPUT0" data_type: {} dims: [ {} ] }}, {{ name: "INPUT1" data_type: {} dims: [ {} ] }} ] output [ {{ name: "OUTPUT0" data_type: {} dims: [ {} ] }}, {{ name: "OUTPUT1" data_type: {} dims: [ {} ] }} ] instance_group [ {{ profile:"{}" }} ] '''.format(model_name, max_batch, version_policy_str, np_to_model_dtype(input_dtype), tu.shape_to_dims_str(input_shape), np_to_model_dtype(input_dtype), tu.shape_to_dims_str(input_shape), np_to_model_dtype(output0_dtype), tu.shape_to_dims_str(output0_shape), np_to_model_dtype(output1_dtype), tu.shape_to_dims_str(output1_shape), profile_index) else: config = ''' name: "{}" platform: "tensorrt_plan" max_batch_size: {} version_policy: {} input [ {{ name: "INPUT0" data_type: {} dims: [ {} ] }}, {{ name: "INPUT1" data_type: {} dims: [ {} ] }} ] output [ {{ name: "OUTPUT0" data_type: {} dims: [ {} ] }}, {{ name: "OUTPUT1" data_type: {} dims: [ {} ] }} ] '''.format(model_name, max_batch, version_policy_str, np_to_model_dtype(input_dtype), tu.shape_to_dims_str(input_shape), np_to_model_dtype(input_dtype), tu.shape_to_dims_str(input_shape), np_to_model_dtype(output0_dtype), tu.shape_to_dims_str(output0_shape), np_to_model_dtype(output1_dtype), tu.shape_to_dims_str(output1_shape)) try: os.makedirs(config_dir) except OSError as ex: pass # ignore existing dir with open(config_dir + "/config.pbtxt", "w") as cfile: cfile.write(config)
def _full_exact(self, req_raw, input_dtype, output0_dtype, output1_dtype, swap): input_size = 16 if tu.validate_for_tf_model(input_dtype, output0_dtype, output1_dtype): # model that supports batching for bs in (1, 8): iu.infer_exact(self, 'graphdef', (input_size, ), bs, req_raw, input_dtype, output0_dtype, output1_dtype, swap=swap) iu.infer_exact(self, 'savedmodel', (input_size, ), bs, req_raw, input_dtype, output0_dtype, output1_dtype, swap=swap) # model that does not batching iu.infer_exact(self, 'graphdef_nobatch', (input_size, ), 1, req_raw, input_dtype, output0_dtype, output1_dtype, swap=swap) iu.infer_exact(self, 'savedmodel_nobatch', (input_size, ), 1, req_raw, input_dtype, output0_dtype, output1_dtype, swap=swap) if tu.validate_for_c2_model(input_dtype, output0_dtype, output1_dtype): # model that supports batching for bs in (1, 8): iu.infer_exact(self, 'netdef', (input_size, ), bs, req_raw, input_dtype, output0_dtype, output1_dtype, swap=swap) # model that does not batching iu.infer_exact(self, 'netdef_nobatch', (input_size, ), 1, req_raw, input_dtype, output0_dtype, output1_dtype, swap=swap) if tu.validate_for_trt_model(input_dtype, output0_dtype, output1_dtype): # model that supports batching for bs in (1, 8): iu.infer_exact(self, 'plan', (input_size, 1, 1), bs, req_raw, input_dtype, output0_dtype, output1_dtype, swap=swap) # model that does not batching iu.infer_exact(self, 'plan_nobatch', (input_size, 1, 1), 1, req_raw, input_dtype, output0_dtype, output1_dtype, swap=swap)
def _full_exact(self, input_dtype, output0_dtype, output1_dtype, output0_raw, output1_raw, swap): def _infer_exact_helper(tester, pf, tensor_shape, batch_size, input_dtype, output0_dtype, output1_dtype, output0_raw=True, output1_raw=True, model_version=None, swap=False, outputs=("OUTPUT0", "OUTPUT1"), use_http=USE_HTTP, use_grpc=USE_GRPC, use_http_json_tensors=True, skip_request_id_check=True, use_streaming=True, correlation_id=0): for bs in (1, batch_size): # model that does not support batching if bs == 1: iu.infer_exact( tester, pf + "_nobatch", tensor_shape, bs, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, model_version=model_version, swap=swap, outputs=outputs, use_http=use_http, use_grpc=use_grpc, use_http_json_tensors=use_http_json_tensors, skip_request_id_check=skip_request_id_check, use_streaming=use_streaming, correlation_id=correlation_id, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) # model that supports batching. iu.infer_exact( tester, pf, (bs, ) + tensor_shape, bs, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, model_version=model_version, swap=swap, outputs=outputs, use_http=use_http, use_grpc=use_grpc, use_http_json_tensors=use_http_json_tensors, skip_request_id_check=skip_request_id_check, use_streaming=use_streaming, correlation_id=correlation_id, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) input_size = 16 all_ensemble_prefix = ["simple_", "sequence_", "fan_"] ensemble_prefix = [""] if ENSEMBLES: for prefix in all_ensemble_prefix: if tu.validate_for_ensemble_model(prefix, input_dtype, output0_dtype, output1_dtype, (input_size, ), (input_size, ), (input_size, )): ensemble_prefix.append(prefix) if tu.validate_for_tf_model(input_dtype, output0_dtype, output1_dtype, (input_size, ), (input_size, ), (input_size, )): for prefix in ensemble_prefix: for pf in ["graphdef", "savedmodel"]: if pf in BACKENDS: _infer_exact_helper(self, prefix + pf, (input_size, ), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) if not CPU_ONLY and tu.validate_for_trt_model( input_dtype, output0_dtype, output1_dtype, (input_size, 1, 1), (input_size, 1, 1), (input_size, 1, 1)): for prefix in ensemble_prefix: if 'plan' in BACKENDS: if input_dtype == np.int8: _infer_exact_helper(self, prefix + 'plan', (input_size, 1, 1), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) else: _infer_exact_helper(self, prefix + 'plan', (input_size, ), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) if tu.validate_for_onnx_model(input_dtype, output0_dtype, output1_dtype, (input_size, ), (input_size, ), (input_size, )): for prefix in ensemble_prefix: if 'onnx' in BACKENDS: _infer_exact_helper(self, prefix + 'onnx', (input_size, ), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) if tu.validate_for_libtorch_model(input_dtype, output0_dtype, output1_dtype, (input_size, ), (input_size, ), (input_size, )): # Due to PyTorch bug # https://github.com/pytorch/pytorch/issues/66930 we can't # run this test with int8 input and int32 outputs. if ((input_dtype == np.int8) and (output0_dtype == np.int32) and (output1_dtype == np.int32)): print('skipping pytorch test for int8_int32_int32') else: for prefix in ensemble_prefix: if 'libtorch' in BACKENDS: # Skip batching for PyTorch String I/O if ((input_dtype == np_dtype_string) or (output0_dtype == np_dtype_string) or (output1_dtype == np_dtype_string)): iu.infer_exact( self, prefix + 'libtorch_nobatch', (input_size, ), 1, # batch_size input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap, use_http=USE_HTTP, use_grpc=USE_GRPC, use_system_shared_memory= TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) else: _infer_exact_helper(self, prefix + 'libtorch', (input_size, ), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) for prefix in ensemble_prefix: if prefix != "": continue if 'python_dlpack' in BACKENDS: _infer_exact_helper(self, prefix + 'python_dlpack', (input_size, ), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) elif 'python' in BACKENDS: _infer_exact_helper(self, prefix + 'python', (input_size, ), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap)