def _full_zero(self, dtype, shapes): # 'shapes' is list of shapes, one for each input. # For validation assume any shape can be used... if tu.validate_for_tf_model(dtype, dtype, dtype, shapes[0], shapes[0], shapes[0]): # model that supports batching for bs in (1, 8): iu.infer_zero(self, 'graphdef', bs, dtype, shapes, shapes) iu.infer_zero(self, 'savedmodel', bs, dtype, shapes, shapes) # model that does not support batching iu.infer_zero(self, 'graphdef_nobatch', 1, dtype, shapes, shapes) iu.infer_zero(self, 'savedmodel_nobatch', 1, dtype, shapes, shapes) if tu.validate_for_c2_model(dtype, dtype, dtype, shapes[0], shapes[0], shapes[0]): # model that supports batching for bs in (1, 8): iu.infer_zero(self, 'netdef', bs, dtype, shapes, shapes) # model that does not support batching iu.infer_zero(self, 'netdef_nobatch', 1, dtype, shapes, shapes) if tu.validate_for_onnx_model(dtype, dtype, dtype, shapes[0], shapes[0], shapes[0]): # model that supports batching for bs in (1, 8): iu.infer_zero(self, 'onnx', bs, dtype, shapes, shapes) # model that does not support batching iu.infer_zero(self, 'onnx_nobatch', 1, dtype, shapes, shapes) for name in ["simple_zero", "sequence_zero", "fan_zero"]: if tu.validate_for_ensemble_model(name, dtype, dtype, dtype, shapes[0], shapes[0], shapes[0]): # model that supports batching for bs in (1, 8): iu.infer_zero(self, name, bs, dtype, shapes, shapes) # model that does not support batching iu.infer_zero(self, name + '_nobatch', 1, dtype, shapes, shapes)
def _full_exact(self, input_dtype, output0_dtype, output1_dtype, output0_raw, output1_raw, swap): def _infer_exact_helper(tester, pf, tensor_shape, batch_size, input_dtype, output0_dtype, output1_dtype, output0_raw=True, output1_raw=True, model_version=None, swap=False, outputs=("OUTPUT0", "OUTPUT1"), use_http=USE_HTTP, use_grpc=USE_GRPC, use_http_json_tensors=True, skip_request_id_check=True, use_streaming=True, correlation_id=0): for bs in (1, batch_size): # model that does not support batching if bs == 1: iu.infer_exact( tester, pf + "_nobatch", tensor_shape, bs, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, model_version=model_version, swap=swap, outputs=outputs, use_http=use_http, use_grpc=use_grpc, use_http_json_tensors=use_http_json_tensors, skip_request_id_check=skip_request_id_check, use_streaming=use_streaming, correlation_id=correlation_id, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) # model that supports batching iu.infer_exact( tester, pf, (bs, ) + tensor_shape, bs, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, model_version=model_version, swap=swap, outputs=outputs, use_http=use_http, use_grpc=use_grpc, use_http_json_tensors=use_http_json_tensors, skip_request_id_check=skip_request_id_check, use_streaming=use_streaming, correlation_id=correlation_id, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) input_size = 16 all_ensemble_prefix = ["simple_", "sequence_", "fan_"] ensemble_prefix = [""] if ENSEMBLES and OS_WINDOWS: for prefix in all_ensemble_prefix: if tu.validate_for_ensemble_model(prefix, input_dtype, output0_dtype, output1_dtype, (input_size, ), (input_size, ), (input_size, )): ensemble_prefix.append(prefix) if tu.validate_for_tf_model(input_dtype, output0_dtype, output1_dtype, (input_size, ), (input_size, ), (input_size, )): for prefix in ensemble_prefix: for pf in ["graphdef", "savedmodel"]: if pf in BACKENDS: _infer_exact_helper(self, prefix + pf, (input_size, ), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) if not CPU_ONLY and tu.validate_for_trt_model( input_dtype, output0_dtype, output1_dtype, (input_size, 1, 1), (input_size, 1, 1), (input_size, 1, 1)): for prefix in ensemble_prefix: if 'plan' in BACKENDS: if input_dtype == np.int8: _infer_exact_helper(self, prefix + 'plan', (input_size, 1, 1), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) else: _infer_exact_helper(self, prefix + 'plan', (input_size, ), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) if tu.validate_for_onnx_model(input_dtype, output0_dtype, output1_dtype, (input_size, ), (input_size, ), (input_size, )): for prefix in ensemble_prefix: if 'onnx' in BACKENDS: _infer_exact_helper(self, prefix + 'onnx', (input_size, ), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) if tu.validate_for_libtorch_model(input_dtype, output0_dtype, output1_dtype, (input_size, ), (input_size, ), (input_size, )): for prefix in ensemble_prefix: if 'libtorch' in BACKENDS: _infer_exact_helper(self, prefix + 'libtorch', (input_size, ), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) if prefix == "": if 'python' in BACKENDS: _infer_exact_helper(self, prefix + 'python', (input_size, ), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap)
def _full_exact(self, input_dtype, output0_dtype, output1_dtype, input_shape, output0_shape, output1_shape, output0_raw=True, output1_raw=True, swap=False): def _infer_exact_helper(tester, pf, tensor_shape, batch_size, input_dtype, output0_dtype, output1_dtype, output0_raw=True, output1_raw=True, model_version=None, swap=False, outputs=("OUTPUT0", "OUTPUT1"), use_http=True, use_grpc=True, skip_request_id_check=False, use_streaming=True, correlation_id=0): for bs in (1, batch_size): # model that does not support batching if bs == 1: iu.infer_exact(tester, pf + "_nobatch", tensor_shape, bs, input_dtype, output0_dtype, output1_dtype, output0_raw, output1_raw, model_version, swap, outputs, use_http, use_grpc, skip_request_id_check, use_streaming, correlation_id) # model that supports batching iu.infer_exact(tester, pf, tensor_shape, bs, input_dtype, output0_dtype, output1_dtype, output0_raw, output1_raw, model_version, swap, outputs, use_http, use_grpc, skip_request_id_check, use_streaming, correlation_id) all_ensemble_prefix = ["simple_", "sequence_", "fan_"] ensemble_prefix = [""] for prefix in all_ensemble_prefix: if tu.validate_for_ensemble_model(prefix, input_dtype, output0_dtype, output1_dtype, input_shape, input_shape, input_shape): ensemble_prefix.append(prefix) if tu.validate_for_tf_model(input_dtype, output0_dtype, output1_dtype, input_shape, output0_shape, output1_shape): for prefix in ensemble_prefix: for pf in ["graphdef", "savedmodel"]: _infer_exact_helper(self, prefix + pf, input_shape, 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) if tu.validate_for_c2_model(input_dtype, output0_dtype, output1_dtype, input_shape, output0_shape, output1_shape): for prefix in ensemble_prefix: _infer_exact_helper(self, prefix + 'netdef', input_shape, 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) # the custom model is src/custom/addsub... it does not swap # the inputs so always set to False if tu.validate_for_custom_model(input_dtype, output0_dtype, output1_dtype, input_shape, output0_shape, output1_shape): # No basic ensemble models are created against custom models _infer_exact_helper(self, 'custom', input_shape, 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=False) if tu.validate_for_onnx_model(input_dtype, output0_dtype, output1_dtype, input_shape, output0_shape, output1_shape): # No basic ensemble models are created against custom models [TODO] _infer_exact_helper(self, 'onnx', input_shape, 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap)
def _full_reshape(self, dtype, input_shapes, output_shapes=None, no_batch=True): # 'shapes' is list of shapes, one for each input. if output_shapes is None: output_shapes = input_shapes # For validation assume any shape can be used... if tu.validate_for_tf_model(dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]): # model that supports batching for bs in (1, 8): full_shapes = [[ bs, ] + input_shape for input_shape in input_shapes] full_output_shapes = [[ bs, ] + output_shape for output_shape in output_shapes] iu.infer_zero( self, 'graphdef', bs, dtype, full_shapes, full_output_shapes, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) iu.infer_zero( self, 'savedmodel', bs, dtype, full_shapes, full_output_shapes, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) # model that does not support batching if no_batch: iu.infer_zero( self, 'graphdef_nobatch', 1, dtype, input_shapes, output_shapes, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) iu.infer_zero( self, 'savedmodel_nobatch', 1, dtype, input_shapes, output_shapes, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) if tu.validate_for_onnx_model(dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]): # model that supports batching for bs in (1, 8): full_shapes = [[ bs, ] + input_shape for input_shape in input_shapes] full_output_shapes = [[ bs, ] + output_shape for output_shape in output_shapes] iu.infer_zero( self, 'onnx', bs, dtype, full_shapes, full_output_shapes, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) # model that does not support batching if no_batch: iu.infer_zero( self, 'onnx_nobatch', 1, dtype, input_shapes, output_shapes, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) # Skip for libtorch string I/O if tu.validate_for_libtorch_model(dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]) and \ (dtype != np_dtype_string): # skip variable size reshape on libtorch for now, # see "gen_qa_reshape_model.py" for detail if dtype != np.int32: # model that does not support batching if no_batch: iu.infer_zero( self, 'libtorch_nobatch', 1, dtype, input_shapes, output_shapes, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) # model that supports batching for bs in (1, 8): full_shapes = [[ bs, ] + input_shape for input_shape in input_shapes] full_output_shapes = [[ bs, ] + output_shape for output_shape in output_shapes] iu.infer_zero( self, 'libtorch', bs, dtype, full_shapes, full_output_shapes, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) for name in ["simple_reshape", "sequence_reshape", "fan_reshape"]: # [TODO] Skip variable size reshape on ensemble for now. # Need rework on how ensemble for reshape are generated if dtype == np.int32: break if tu.validate_for_ensemble_model(name, dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]): # model that supports batching for bs in (1, 8): full_shapes = [[ bs, ] + input_shape for input_shape in input_shapes] full_output_shapes = [[ bs, ] + output_shape for output_shape in output_shapes] iu.infer_zero( self, name, bs, dtype, full_shapes, full_output_shapes, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) # model that does not support batching if no_batch: iu.infer_zero( self, name + '_nobatch', 1, dtype, input_shapes, output_shapes, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)
def _full_exact(self, input_dtype, output0_dtype, output1_dtype, output0_raw, output1_raw, swap): def _infer_exact_helper(tester, pf, tensor_shape, batch_size, input_dtype, output0_dtype, output1_dtype, output0_raw=True, output1_raw=True, model_version=None, swap=False, outputs=("OUTPUT0", "OUTPUT1"), use_http=True, use_grpc=True, skip_request_id_check=False, use_streaming=True, correlation_id=0): for bs in (1, batch_size): iu.infer_exact(tester, pf, (bs, ) + tensor_shape, bs, input_dtype, output0_dtype, output1_dtype, output0_raw, output1_raw, model_version, swap, outputs, use_http, use_grpc, skip_request_id_check, use_streaming, correlation_id) input_size = 16 if tu.validate_for_tf_model(input_dtype, output0_dtype, output1_dtype, (input_size, ), (input_size, ), (input_size, )): for pf in ["graphdef", "savedmodel"]: _infer_exact_helper(self, pf, (input_size, ), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) if tu.validate_for_c2_model(input_dtype, output0_dtype, output1_dtype, (input_size, ), (input_size, ), (input_size, )): _infer_exact_helper(self, 'netdef', (input_size, ), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) if tu.validate_for_trt_model(input_dtype, output0_dtype, output1_dtype, (input_size, 1, 1), (input_size, 1, 1), (input_size, 1, 1)): if input_dtype == np.int8: _infer_exact_helper(self, 'plan', (input_size, 1, 1), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) else: _infer_exact_helper(self, 'plan', (input_size, ), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) if tu.validate_for_onnx_model(input_dtype, output0_dtype, output1_dtype, (input_size, ), (input_size, ), (input_size, )): _infer_exact_helper(self, 'onnx', (input_size, ), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) if tu.validate_for_libtorch_model(input_dtype, output0_dtype, output1_dtype, (input_size, ), (input_size, ), (input_size, )): _infer_exact_helper(self, 'libtorch', (input_size, ), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap)
def _full_zero(self, dtype, shapes): # 'shapes' is list of shapes, one for each input. # For validation assume any shape can be used... if tu.validate_for_tf_model(dtype, dtype, dtype, shapes[0], shapes[0], shapes[0]): # model that supports batching for bs in (1, 8): batch_shapes = [[ bs, ] + shape for shape in shapes] iu.infer_zero( self, 'graphdef', bs, dtype, batch_shapes, batch_shapes, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) iu.infer_zero( self, 'savedmodel', bs, dtype, batch_shapes, batch_shapes, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) # model that does not support batching iu.infer_zero(self, 'graphdef_nobatch', 1, dtype, shapes, shapes, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) iu.infer_zero(self, 'savedmodel_nobatch', 1, dtype, shapes, shapes, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) if tu.validate_for_onnx_model(dtype, dtype, dtype, shapes[0], shapes[0], shapes[0]): # model that supports batching for bs in (1, 8): batch_shapes = [[ bs, ] + shape for shape in shapes] iu.infer_zero( self, 'onnx', bs, dtype, batch_shapes, batch_shapes, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) # model that does not support batching iu.infer_zero(self, 'onnx_nobatch', 1, dtype, shapes, shapes, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) for name in ["simple_zero", "sequence_zero", "fan_zero"]: if tu.validate_for_ensemble_model(name, dtype, dtype, dtype, shapes[0], shapes[0], shapes[0]): # model that supports batching for bs in (1, 8): batch_shapes = [[ bs, ] + shape for shape in shapes] iu.infer_zero( self, name, bs, dtype, batch_shapes, batch_shapes, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) # model that does not support batching iu.infer_zero( self, name + '_nobatch', 1, dtype, shapes, shapes, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)
def _full_exact(self, input_dtype, output0_dtype, output1_dtype, output0_raw, output1_raw, swap): def _infer_exact_helper(tester, pf, tensor_shape, batch_size, input_dtype, output0_dtype, output1_dtype, output0_raw=True, output1_raw=True, model_version=None, swap=False, outputs=("OUTPUT0", "OUTPUT1"), use_http=True, use_grpc=True, use_http_json_tensors=True, skip_request_id_check=True, use_streaming=True, correlation_id=0): for bs in (1, batch_size): # model that does not support batching if bs == 1: iu.infer_exact( tester, pf + "_nobatch", tensor_shape, bs, input_dtype, output0_dtype, output1_dtype, output0_raw, output1_raw, model_version, swap, outputs, use_http, use_grpc, use_http_json_tensors, skip_request_id_check, use_streaming, correlation_id, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) # model that supports batching iu.infer_exact( tester, pf, (bs, ) + tensor_shape, bs, input_dtype, output0_dtype, output1_dtype, output0_raw, output1_raw, model_version, swap, outputs, use_http, use_grpc, use_http_json_tensors, skip_request_id_check, use_streaming, correlation_id, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) input_size = 16 all_ensemble_prefix = ["simple_", "sequence_", "fan_"] ensemble_prefix = [""] if ENSEMBLES and ("custom" in BACKENDS): for prefix in all_ensemble_prefix: if tu.validate_for_ensemble_model(prefix, input_dtype, output0_dtype, output1_dtype, (input_size, ), (input_size, ), (input_size, )): ensemble_prefix.append(prefix) if tu.validate_for_tf_model(input_dtype, output0_dtype, output1_dtype, (input_size, ), (input_size, ), (input_size, )): for prefix in ensemble_prefix: for pf in ["graphdef", "savedmodel"]: if pf in BACKENDS: _infer_exact_helper(self, prefix + pf, (input_size, ), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) if tu.validate_for_c2_model(input_dtype, output0_dtype, output1_dtype, (input_size, ), (input_size, ), (input_size, )): for prefix in ensemble_prefix: if 'netdef' in BACKENDS: _infer_exact_helper(self, prefix + 'netdef', (input_size, ), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) if not CPU_ONLY and tu.validate_for_trt_model( input_dtype, output0_dtype, output1_dtype, (input_size, 1, 1), (input_size, 1, 1), (input_size, 1, 1)): for prefix in ensemble_prefix: if 'plan' in BACKENDS: if input_dtype == np.int8: _infer_exact_helper(self, prefix + 'plan', (input_size, 1, 1), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) else: _infer_exact_helper(self, prefix + 'plan', (input_size, ), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) # the custom model is src/custom/addsub... it does not swap # the inputs so always set to False if tu.validate_for_custom_model(input_dtype, output0_dtype, output1_dtype, (input_size, ), (input_size, ), (input_size, )): # No basic ensemble models are created against custom models if 'custom' in BACKENDS: _infer_exact_helper(self, 'custom', (input_size, ), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=False) if tu.validate_for_onnx_model(input_dtype, output0_dtype, output1_dtype, (input_size, ), (input_size, ), (input_size, )): for prefix in ensemble_prefix: if 'onnx' in BACKENDS: _infer_exact_helper(self, prefix + 'onnx', (input_size, ), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) if tu.validate_for_libtorch_model(input_dtype, output0_dtype, output1_dtype, (input_size, ), (input_size, ), (input_size, )): for prefix in ensemble_prefix: if 'libtorch' in BACKENDS: _infer_exact_helper(self, prefix + 'libtorch', (input_size, ), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap)
def _full_exact(self, input_dtype, output0_dtype, output1_dtype, input_shape, output0_shape, output1_shape, output0_raw=True, output1_raw=True, swap=False): def _infer_exact_helper(tester, pf, tensor_shape, batch_size, input_dtype, output0_dtype, output1_dtype, output0_raw=True, output1_raw=True, model_version=None, swap=False, outputs=("OUTPUT0", "OUTPUT1"), use_http=True, use_grpc=True, skip_request_id_check=False, use_streaming=True, correlation_id=0): for bs in (1, batch_size): # model that does not support batching if bs == 1: iu.infer_exact( tester, pf + "_nobatch", tensor_shape, bs, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, model_version=model_version, swap=swap, outputs=outputs, use_http=use_http, use_grpc=use_grpc, skip_request_id_check=skip_request_id_check, use_streaming=use_streaming, correlation_id=correlation_id, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) # model that supports batching iu.infer_exact( tester, pf, (bs,) + tensor_shape, bs, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, model_version=model_version, swap=swap, outputs=outputs, use_http=use_http, use_grpc=use_grpc, skip_request_id_check=skip_request_id_check, use_streaming=use_streaming, correlation_id=correlation_id, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) all_ensemble_prefix = ["simple_", "sequence_", "fan_"] ensemble_prefix = [""] for prefix in all_ensemble_prefix: if tu.validate_for_ensemble_model(prefix, input_dtype, output0_dtype, output1_dtype, input_shape, input_shape, input_shape): ensemble_prefix.append(prefix) if tu.validate_for_tf_model(input_dtype, output0_dtype, output1_dtype, input_shape, output0_shape, output1_shape): for prefix in ensemble_prefix: for pf in ["graphdef", "savedmodel"]: _infer_exact_helper(self, prefix + pf, input_shape, 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) if tu.validate_for_trt_model(input_dtype, output0_dtype, output1_dtype, input_shape, output0_shape, output1_shape): for prefix in ensemble_prefix: if input_dtype == np.int8: _infer_exact_helper(self, prefix + 'plan', input_shape + (1, 1), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) else: _infer_exact_helper(self, prefix + 'plan', input_shape, 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) if tu.validate_for_onnx_model(input_dtype, output0_dtype, output1_dtype, input_shape, output0_shape, output1_shape): # No basic ensemble models are created against custom models [TODO] _infer_exact_helper(self, 'onnx', input_shape, 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) if tu.validate_for_libtorch_model(input_dtype, output0_dtype, output1_dtype, input_shape, output0_shape, output1_shape): # No basic ensemble models are created against custom models [TODO] _infer_exact_helper(self, 'libtorch', input_shape, 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap)
def _full_reshape(self, dtype, input_shapes, output_shapes=None, no_batch=True): # 'shapes' is list of shapes, one for each input. if output_shapes is None: output_shapes = input_shapes # For validation assume any shape can be used... if tu.validate_for_tf_model(dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]): # model that supports batching for bs in (1, 8): iu.infer_zero(self, 'graphdef', bs, dtype, input_shapes, output_shapes) iu.infer_zero(self, 'savedmodel', bs, dtype, input_shapes, output_shapes) # model that does not support batching if no_batch: iu.infer_zero(self, 'graphdef_nobatch', 1, dtype, input_shapes, output_shapes) iu.infer_zero(self, 'savedmodel_nobatch', 1, dtype, input_shapes, output_shapes) if tu.validate_for_c2_model(dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]): # model that supports batching for bs in (1, 8): iu.infer_zero(self, 'netdef', bs, dtype, input_shapes, output_shapes) # model that does not support batching if no_batch: iu.infer_zero(self, 'netdef_nobatch', 1, dtype, input_shapes, output_shapes) if tu.validate_for_custom_model(dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]): # model that supports batching for bs in (1, 8): iu.infer_zero(self, 'custom', bs, dtype, input_shapes, output_shapes) # model that does not support batching if no_batch: iu.infer_zero(self, 'custom_nobatch', 1, dtype, input_shapes, output_shapes) if tu.validate_for_onnx_model(dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]): # model that supports batching for bs in (1, 8): iu.infer_zero(self, 'onnx', bs, dtype, input_shapes, output_shapes) # model that does not support batching if no_batch: iu.infer_zero(self, 'onnx_nobatch', 1, dtype, input_shapes, output_shapes) if tu.validate_for_libtorch_model(dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]): # model that supports batching for bs in (1, 8): iu.infer_zero(self, 'libtorch', bs, dtype, input_shapes, output_shapes) # model that does not support batching if no_batch: iu.infer_zero(self, 'libtorch_nobatch', 1, dtype, input_shapes, output_shapes) for name in ["simple_reshape", "sequence_reshape", "fan_reshape"]: if tu.validate_for_ensemble_model(name, dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]): if len(input_shapes) <= 2: # Skip cases that reshape to zero-sized tensors # (know from qa/common/gen_qa_reshape_model.py) return # model that supports batching for bs in (1, 8): iu.infer_zero(self, name, bs, dtype, input_shapes, output_shapes) # model that does not support batching if no_batch: iu.infer_zero(self, name + '_nobatch', 1, dtype, input_shapes, output_shapes)
def _full_reshape(self, dtype, input_shapes, output_shapes=None, no_batch=True): # 'shapes' is list of shapes, one for each input. if output_shapes is None: output_shapes = input_shapes # For validation assume any shape can be used... if tu.validate_for_tf_model(dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]): # model that supports batching for bs in (1, 8): iu.infer_zero(self, 'graphdef', bs, dtype, input_shapes, output_shapes) iu.infer_zero(self, 'savedmodel', bs, dtype, input_shapes, output_shapes) # model that does not support batching if no_batch: iu.infer_zero(self, 'graphdef_nobatch', 1, dtype, input_shapes, output_shapes) iu.infer_zero(self, 'savedmodel_nobatch', 1, dtype, input_shapes, output_shapes) if tu.validate_for_c2_model(dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]): # model that supports batching for bs in (1, 8): iu.infer_zero(self, 'netdef', bs, dtype, input_shapes, output_shapes) # model that does not support batching if no_batch: iu.infer_zero(self, 'netdef_nobatch', 1, dtype, input_shapes, output_shapes) if tu.validate_for_custom_model(dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]): # model that supports batching for bs in (1, 8): iu.infer_zero(self, 'custom', bs, dtype, input_shapes, output_shapes) # model that does not support batching if no_batch: iu.infer_zero(self, 'custom_nobatch', 1, dtype, input_shapes, output_shapes) if tu.validate_for_onnx_model(dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]): # model that supports batching for bs in (1, 8): iu.infer_zero(self, 'onnx', bs, dtype, input_shapes, output_shapes) # model that does not support batching if no_batch: iu.infer_zero(self, 'onnx_nobatch', 1, dtype, input_shapes, output_shapes) if tu.validate_for_libtorch_model(dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]): # skip variable size reshape on libtorch for now, # see "gen_qa_reshape_model.py" for detail if dtype != np.int32: # model that supports batching for bs in (1, 8): iu.infer_zero(self, 'libtorch', bs, dtype, input_shapes, output_shapes) # model that does not support batching if no_batch: iu.infer_zero(self, 'libtorch_nobatch', 1, dtype, input_shapes, output_shapes) for name in ["simple_reshape", "sequence_reshape", "fan_reshape"]: # [TODO] Skip variable size reshape on ensemble for now. # Need rework on how ensemble for reshape are generated if dtype == np.int32: break if tu.validate_for_ensemble_model(name, dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]): # model that supports batching for bs in (1, 8): iu.infer_zero(self, name, bs, dtype, input_shapes, output_shapes) # model that does not support batching if no_batch: iu.infer_zero(self, name + '_nobatch', 1, dtype, input_shapes, output_shapes)
def _full_exact(self, input_dtype, output0_dtype, output1_dtype, output0_raw, output1_raw, swap): def _infer_exact_helper(tester, pf, tensor_shape, batch_size, input_dtype, output0_dtype, output1_dtype, output0_raw=True, output1_raw=True, model_version=None, swap=False, outputs=("OUTPUT0", "OUTPUT1"), use_http=USE_HTTP, use_grpc=USE_GRPC, use_http_json_tensors=True, skip_request_id_check=True, use_streaming=True, correlation_id=0): for bs in (1, batch_size): # model that does not support batching if bs == 1: iu.infer_exact( tester, pf + "_nobatch", tensor_shape, bs, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, model_version=model_version, swap=swap, outputs=outputs, use_http=use_http, use_grpc=use_grpc, use_http_json_tensors=use_http_json_tensors, skip_request_id_check=skip_request_id_check, use_streaming=use_streaming, correlation_id=correlation_id, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) # model that supports batching. iu.infer_exact( tester, pf, (bs, ) + tensor_shape, bs, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, model_version=model_version, swap=swap, outputs=outputs, use_http=use_http, use_grpc=use_grpc, use_http_json_tensors=use_http_json_tensors, skip_request_id_check=skip_request_id_check, use_streaming=use_streaming, correlation_id=correlation_id, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) input_size = 16 all_ensemble_prefix = ["simple_", "sequence_", "fan_"] ensemble_prefix = [""] if ENSEMBLES: for prefix in all_ensemble_prefix: if tu.validate_for_ensemble_model(prefix, input_dtype, output0_dtype, output1_dtype, (input_size, ), (input_size, ), (input_size, )): ensemble_prefix.append(prefix) if tu.validate_for_tf_model(input_dtype, output0_dtype, output1_dtype, (input_size, ), (input_size, ), (input_size, )): for prefix in ensemble_prefix: for pf in ["graphdef", "savedmodel"]: if pf in BACKENDS: _infer_exact_helper(self, prefix + pf, (input_size, ), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) if not CPU_ONLY and tu.validate_for_trt_model( input_dtype, output0_dtype, output1_dtype, (input_size, 1, 1), (input_size, 1, 1), (input_size, 1, 1)): for prefix in ensemble_prefix: if 'plan' in BACKENDS: if input_dtype == np.int8: _infer_exact_helper(self, prefix + 'plan', (input_size, 1, 1), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) else: _infer_exact_helper(self, prefix + 'plan', (input_size, ), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) if tu.validate_for_onnx_model(input_dtype, output0_dtype, output1_dtype, (input_size, ), (input_size, ), (input_size, )): for prefix in ensemble_prefix: if 'onnx' in BACKENDS: _infer_exact_helper(self, prefix + 'onnx', (input_size, ), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) if tu.validate_for_libtorch_model(input_dtype, output0_dtype, output1_dtype, (input_size, ), (input_size, ), (input_size, )): # Due to PyTorch bug # https://github.com/pytorch/pytorch/issues/66930 we can't # run this test with int8 input and int32 outputs. if ((input_dtype == np.int8) and (output0_dtype == np.int32) and (output1_dtype == np.int32)): print('skipping pytorch test for int8_int32_int32') else: for prefix in ensemble_prefix: if 'libtorch' in BACKENDS: # Skip batching for PyTorch String I/O if ((input_dtype == np_dtype_string) or (output0_dtype == np_dtype_string) or (output1_dtype == np_dtype_string)): iu.infer_exact( self, prefix + 'libtorch_nobatch', (input_size, ), 1, # batch_size input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap, use_http=USE_HTTP, use_grpc=USE_GRPC, use_system_shared_memory= TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) else: _infer_exact_helper(self, prefix + 'libtorch', (input_size, ), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) for prefix in ensemble_prefix: if prefix != "": continue if 'python_dlpack' in BACKENDS: _infer_exact_helper(self, prefix + 'python_dlpack', (input_size, ), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap) elif 'python' in BACKENDS: _infer_exact_helper(self, prefix + 'python', (input_size, ), 8, input_dtype, output0_dtype, output1_dtype, output0_raw=output0_raw, output1_raw=output1_raw, swap=swap)