def _trt_reshape(self, dtype, input_shapes, output_shapes=None, no_batch=True): # 'shapes' is list of shapes, one for each input. if output_shapes is None: output_shapes = input_shapes if tu.validate_for_trt_model(dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]): # model that supports batching for bs in (1, 8): iu.infer_zero( self, 'plan', bs, dtype, input_shapes, output_shapes, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) # model that does not support batching if no_batch: iu.infer_zero( self, 'plan_nobatch', 1, dtype, input_shapes, output_shapes, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)
def check_response(self, bs, dtype, shape, priority, timeout_us, thresholds): try: start_ms = int(round(time.time() * 1000)) iu.infer_zero(self, "custom", bs, dtype, shape, shape, model_version=1, use_grpc=False, use_streaming=False, priority=priority, timeout_us=timeout_us) end_ms = int(round(time.time() * 1000)) lt_ms = thresholds[0] gt_ms = thresholds[1] if lt_ms is not None: self.assertTrue( (end_ms - start_ms) < lt_ms, "expected less than " + str(lt_ms) + "ms response time, got " + str(end_ms - start_ms) + " ms") if gt_ms is not None: self.assertTrue( (end_ms - start_ms) > gt_ms, "expected greater than " + str(gt_ms) + "ms response time, got " + str(end_ms - start_ms) + " ms") except Exception as ex: self.add_deferred_exception(ex)
def check_response(self, bs, dtype, shapes, priority, timeout_us, thresholds, is_http_trial=True): full_shapes = [[bs, ] + shape for shape in shapes] try: start_ms = int(round(time.time() * 1000)) iu.infer_zero(self, "custom", bs, dtype, full_shapes, full_shapes, model_version=1, use_http_json_tensors=False, use_http=is_http_trial, use_grpc=(not is_http_trial), use_streaming=False, priority=priority, timeout_us=timeout_us) end_ms = int(round(time.time() * 1000)) lt_ms = thresholds[0] gt_ms = thresholds[1] if lt_ms is not None: self.assertTrue((end_ms - start_ms) < lt_ms, "expected less than " + str(lt_ms) + "ms response time, got " + str(end_ms - start_ms) + " ms") if gt_ms is not None: self.assertTrue((end_ms - start_ms) > gt_ms, "expected greater than " + str(gt_ms) + "ms response time, got " + str(end_ms - start_ms) + " ms") except Exception as ex: self.add_deferred_exception(ex)
def test_ensemble_zero_dimension_reshape(self): for shapes in [([1],), ([1],[8])]: for name in ["simple_reshape", "sequence_reshape", "fan_reshape"]: # model that supports batching for bs in (1, 8): try: iu.infer_zero(self, name, bs, np.float32, shapes, shapes) self.assertTrue(False, "Unexpected success in infer") except InferenceServerException as ex: self.assertEqual("inference:0", ex.server_id()) self.assertTrue( "but model configuration specifies shape []" in ex.message())
def _trt_reshape(self, dtype, input_shapes, output_shapes=None, no_batch=True): # 'shapes' is list of shapes, one for each input. if output_shapes is None: output_shapes = input_shapes if tu.validate_for_trt_model(dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]): # model that supports batching for bs in (1, 8): iu.infer_zero(self, 'plan', bs, dtype, input_shapes, output_shapes) # model that does not support batching if no_batch: iu.infer_zero(self, 'plan_nobatch', 1, dtype, input_shapes, output_shapes)
def _full_reshape(self, dtype, input_shapes, output_shapes=None, no_batch=True): # 'shapes' is list of shapes, one for each input. if output_shapes is None: output_shapes = input_shapes # For validation assume any shape can be used... if tu.validate_for_tf_model(dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]): # model that supports batching for bs in (1, 8): full_shapes = [[ bs, ] + input_shape for input_shape in input_shapes] full_output_shapes = [[ bs, ] + output_shape for output_shape in output_shapes] iu.infer_zero( self, 'graphdef', bs, dtype, full_shapes, full_output_shapes, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) iu.infer_zero( self, 'savedmodel', bs, dtype, full_shapes, full_output_shapes, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) # model that does not support batching if no_batch: iu.infer_zero( self, 'graphdef_nobatch', 1, dtype, input_shapes, output_shapes, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) iu.infer_zero( self, 'savedmodel_nobatch', 1, dtype, input_shapes, output_shapes, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) if tu.validate_for_onnx_model(dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]): # model that supports batching for bs in (1, 8): full_shapes = [[ bs, ] + input_shape for input_shape in input_shapes] full_output_shapes = [[ bs, ] + output_shape for output_shape in output_shapes] iu.infer_zero( self, 'onnx', bs, dtype, full_shapes, full_output_shapes, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) # model that does not support batching if no_batch: iu.infer_zero( self, 'onnx_nobatch', 1, dtype, input_shapes, output_shapes, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) # Skip for libtorch string I/O if tu.validate_for_libtorch_model(dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]) and \ (dtype != np_dtype_string): # skip variable size reshape on libtorch for now, # see "gen_qa_reshape_model.py" for detail if dtype != np.int32: # model that does not support batching if no_batch: iu.infer_zero( self, 'libtorch_nobatch', 1, dtype, input_shapes, output_shapes, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) # model that supports batching for bs in (1, 8): full_shapes = [[ bs, ] + input_shape for input_shape in input_shapes] full_output_shapes = [[ bs, ] + output_shape for output_shape in output_shapes] iu.infer_zero( self, 'libtorch', bs, dtype, full_shapes, full_output_shapes, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) for name in ["simple_reshape", "sequence_reshape", "fan_reshape"]: # [TODO] Skip variable size reshape on ensemble for now. # Need rework on how ensemble for reshape are generated if dtype == np.int32: break if tu.validate_for_ensemble_model(name, dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]): # model that supports batching for bs in (1, 8): full_shapes = [[ bs, ] + input_shape for input_shape in input_shapes] full_output_shapes = [[ bs, ] + output_shape for output_shape in output_shapes] iu.infer_zero( self, name, bs, dtype, full_shapes, full_output_shapes, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) # model that does not support batching if no_batch: iu.infer_zero( self, name + '_nobatch', 1, dtype, input_shapes, output_shapes, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)
def _full_zero(self, dtype, shapes): # 'shapes' is list of shapes, one for each input. # For validation assume any shape can be used... if tu.validate_for_tf_model(dtype, dtype, dtype, shapes[0], shapes[0], shapes[0]): # model that supports batching for bs in (1, 8): iu.infer_zero(self, 'graphdef', bs, dtype, shapes) iu.infer_zero(self, 'savedmodel', bs, dtype, shapes) # model that does not support batching iu.infer_zero(self, 'graphdef_nobatch', 1, dtype, shapes) iu.infer_zero(self, 'savedmodel_nobatch', 1, dtype, shapes) if tu.validate_for_c2_model(dtype, dtype, dtype, shapes[0], shapes[0], shapes[0]): # model that supports batching for bs in (1, 8): iu.infer_zero(self, 'netdef', bs, dtype, shapes) # model that does not support batching iu.infer_zero(self, 'netdef_nobatch', 1, dtype, shapes)
def _full_zero(self, dtype, shapes): # 'shapes' is list of shapes, one for each input. # For validation assume any shape can be used... if tu.validate_for_tf_model(dtype, dtype, dtype, shapes[0], shapes[0], shapes[0]): # model that supports batching for bs in (1, 8): batch_shapes = [[ bs, ] + shape for shape in shapes] iu.infer_zero( self, 'graphdef', bs, dtype, batch_shapes, batch_shapes, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) iu.infer_zero( self, 'savedmodel', bs, dtype, batch_shapes, batch_shapes, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) # model that does not support batching iu.infer_zero(self, 'graphdef_nobatch', 1, dtype, shapes, shapes, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) iu.infer_zero(self, 'savedmodel_nobatch', 1, dtype, shapes, shapes, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) if tu.validate_for_onnx_model(dtype, dtype, dtype, shapes[0], shapes[0], shapes[0]): # model that supports batching for bs in (1, 8): batch_shapes = [[ bs, ] + shape for shape in shapes] iu.infer_zero( self, 'onnx', bs, dtype, batch_shapes, batch_shapes, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) # model that does not support batching iu.infer_zero(self, 'onnx_nobatch', 1, dtype, shapes, shapes, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) for name in ["simple_zero", "sequence_zero", "fan_zero"]: if tu.validate_for_ensemble_model(name, dtype, dtype, dtype, shapes[0], shapes[0], shapes[0]): # model that supports batching for bs in (1, 8): batch_shapes = [[ bs, ] + shape for shape in shapes] iu.infer_zero( self, name, bs, dtype, batch_shapes, batch_shapes, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY) # model that does not support batching iu.infer_zero( self, name + '_nobatch', 1, dtype, shapes, shapes, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY)
def _full_reshape(self, dtype, input_shapes, output_shapes=None, no_batch=True): # 'shapes' is list of shapes, one for each input. if output_shapes is None: output_shapes = input_shapes # For validation assume any shape can be used... if tu.validate_for_tf_model(dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]): # model that supports batching for bs in (1, 8): iu.infer_zero(self, 'graphdef', bs, dtype, input_shapes, output_shapes) iu.infer_zero(self, 'savedmodel', bs, dtype, input_shapes, output_shapes) # model that does not support batching if no_batch: iu.infer_zero(self, 'graphdef_nobatch', 1, dtype, input_shapes, output_shapes) iu.infer_zero(self, 'savedmodel_nobatch', 1, dtype, input_shapes, output_shapes) if tu.validate_for_c2_model(dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]): # model that supports batching for bs in (1, 8): iu.infer_zero(self, 'netdef', bs, dtype, input_shapes, output_shapes) # model that does not support batching if no_batch: iu.infer_zero(self, 'netdef_nobatch', 1, dtype, input_shapes, output_shapes) if tu.validate_for_custom_model(dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]): # model that supports batching for bs in (1, 8): iu.infer_zero(self, 'custom', bs, dtype, input_shapes, output_shapes) # model that does not support batching if no_batch: iu.infer_zero(self, 'custom_nobatch', 1, dtype, input_shapes, output_shapes) if tu.validate_for_onnx_model(dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]): # model that supports batching for bs in (1, 8): iu.infer_zero(self, 'onnx', bs, dtype, input_shapes, output_shapes) # model that does not support batching if no_batch: iu.infer_zero(self, 'onnx_nobatch', 1, dtype, input_shapes, output_shapes) if tu.validate_for_libtorch_model(dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]): # model that supports batching for bs in (1, 8): iu.infer_zero(self, 'libtorch', bs, dtype, input_shapes, output_shapes) # model that does not support batching if no_batch: iu.infer_zero(self, 'libtorch_nobatch', 1, dtype, input_shapes, output_shapes) for name in ["simple_reshape", "sequence_reshape", "fan_reshape"]: if tu.validate_for_ensemble_model(name, dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]): if len(input_shapes) <= 2: # Skip cases that reshape to zero-sized tensors # (know from qa/common/gen_qa_reshape_model.py) return # model that supports batching for bs in (1, 8): iu.infer_zero(self, name, bs, dtype, input_shapes, output_shapes) # model that does not support batching if no_batch: iu.infer_zero(self, name + '_nobatch', 1, dtype, input_shapes, output_shapes)
def _full_zero(self, dtype, shapes): # 'shapes' is list of shapes, one for each input. # For validation assume any shape can be used... if tu.validate_for_tf_model(dtype, dtype, dtype, shapes[0], shapes[0], shapes[0]): # model that supports batching for bs in (1, 8): iu.infer_zero(self, 'graphdef', bs, dtype, shapes, shapes) iu.infer_zero(self, 'savedmodel', bs, dtype, shapes, shapes) # model that does not support batching iu.infer_zero(self, 'graphdef_nobatch', 1, dtype, shapes, shapes) iu.infer_zero(self, 'savedmodel_nobatch', 1, dtype, shapes, shapes) if tu.validate_for_c2_model(dtype, dtype, dtype, shapes[0], shapes[0], shapes[0]): # model that supports batching for bs in (1, 8): iu.infer_zero(self, 'netdef', bs, dtype, shapes, shapes) # model that does not support batching iu.infer_zero(self, 'netdef_nobatch', 1, dtype, shapes, shapes) for name in ["simple_zero", "sequence_zero", "fan_zero"]: if tu.validate_for_ensemble_model(name, dtype, dtype, dtype, shapes[0], shapes[0], shapes[0]): # model that supports batching for bs in (1, 8): iu.infer_zero(self, name, bs, dtype, shapes, shapes) # model that does not support batching iu.infer_zero(self, name + '_nobatch', 1, dtype, shapes, shapes)
def _full_reshape(self, dtype, input_shapes, output_shapes=None, no_batch=True): # 'shapes' is list of shapes, one for each input. if output_shapes is None: output_shapes = input_shapes # For validation assume any shape can be used... if tu.validate_for_tf_model(dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]): # model that supports batching for bs in (1, 8): iu.infer_zero(self, 'graphdef', bs, dtype, input_shapes, output_shapes) iu.infer_zero(self, 'savedmodel', bs, dtype, input_shapes, output_shapes) # model that does not support batching if no_batch: iu.infer_zero(self, 'graphdef_nobatch', 1, dtype, input_shapes, output_shapes) iu.infer_zero(self, 'savedmodel_nobatch', 1, dtype, input_shapes, output_shapes) if tu.validate_for_c2_model(dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]): # model that supports batching for bs in (1, 8): iu.infer_zero(self, 'netdef', bs, dtype, input_shapes, output_shapes) # model that does not support batching if no_batch: iu.infer_zero(self, 'netdef_nobatch', 1, dtype, input_shapes, output_shapes) if tu.validate_for_custom_model(dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]): # model that supports batching for bs in (1, 8): iu.infer_zero(self, 'custom', bs, dtype, input_shapes, output_shapes) # model that does not support batching if no_batch: iu.infer_zero(self, 'custom_nobatch', 1, dtype, input_shapes, output_shapes)
def _full_reshape(self, dtype, input_shapes, output_shapes=None, no_batch=True): # 'shapes' is list of shapes, one for each input. if output_shapes is None: output_shapes = input_shapes # For validation assume any shape can be used... if tu.validate_for_tf_model(dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]): # model that supports batching for bs in (1, 8): iu.infer_zero(self, 'graphdef', bs, dtype, input_shapes, output_shapes) iu.infer_zero(self, 'savedmodel', bs, dtype, input_shapes, output_shapes) # model that does not support batching if no_batch: iu.infer_zero(self, 'graphdef_nobatch', 1, dtype, input_shapes, output_shapes) iu.infer_zero(self, 'savedmodel_nobatch', 1, dtype, input_shapes, output_shapes) if tu.validate_for_c2_model(dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]): # model that supports batching for bs in (1, 8): iu.infer_zero(self, 'netdef', bs, dtype, input_shapes, output_shapes) # model that does not support batching if no_batch: iu.infer_zero(self, 'netdef_nobatch', 1, dtype, input_shapes, output_shapes) if tu.validate_for_custom_model(dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]): # model that supports batching for bs in (1, 8): iu.infer_zero(self, 'custom', bs, dtype, input_shapes, output_shapes) # model that does not support batching if no_batch: iu.infer_zero(self, 'custom_nobatch', 1, dtype, input_shapes, output_shapes) if tu.validate_for_onnx_model(dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]): # model that supports batching for bs in (1, 8): iu.infer_zero(self, 'onnx', bs, dtype, input_shapes, output_shapes) # model that does not support batching if no_batch: iu.infer_zero(self, 'onnx_nobatch', 1, dtype, input_shapes, output_shapes) if tu.validate_for_libtorch_model(dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]): # skip variable size reshape on libtorch for now, # see "gen_qa_reshape_model.py" for detail if dtype != np.int32: # model that supports batching for bs in (1, 8): iu.infer_zero(self, 'libtorch', bs, dtype, input_shapes, output_shapes) # model that does not support batching if no_batch: iu.infer_zero(self, 'libtorch_nobatch', 1, dtype, input_shapes, output_shapes) for name in ["simple_reshape", "sequence_reshape", "fan_reshape"]: # [TODO] Skip variable size reshape on ensemble for now. # Need rework on how ensemble for reshape are generated if dtype == np.int32: break if tu.validate_for_ensemble_model(name, dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]): # model that supports batching for bs in (1, 8): iu.infer_zero(self, name, bs, dtype, input_shapes, output_shapes) # model that does not support batching if no_batch: iu.infer_zero(self, name + '_nobatch', 1, dtype, input_shapes, output_shapes)