def get_calibration_table_yolov3_variant(model_path, augmented_model_path, calibration_dataset): calibrator = create_calibrator(model_path, [], augmented_model_path=augmented_model_path, calibrate_method=CalibrationMethod.Entropy) calibrator.set_execution_providers(["CUDAExecutionProvider"]) # DataReader can handle dataset with batch or serial processing depends on its implementation # Following examples show two different ways to generate calibration table ''' 1. Use serial processing We can use only one data reader to do serial processing, however, some machines don't have sufficient memory to hold all dataset images and all intermediate output. So let multiple data readers to handle different stride of dataset one by one. DataReader will use serial processing when batch_size is 1. ''' width = 608 height = 608 total_data_size = len(os.listdir(calibration_dataset)) start_index = 0 stride = 20 batch_size = 1 for i in range(0, total_data_size, stride): data_reader = YoloV3VariantDataReader(calibration_dataset, width=width, height=height, start_index=start_index, end_index=start_index + stride, stride=stride, batch_size=batch_size, model_path=augmented_model_path) calibrator.collect_data(data_reader) start_index += stride ''' 2. Use batch processing (much faster) Batch processing requires less memory for intermediate output, therefore let only one data reader to handle dataset in batch. However, if encountering OOM, we can make multiple data reader to do the job just like serial processing does. DataReader will use batch processing when batch_size > 1. ''' # batch_size = 20 # stride=1000 # data_reader = YoloV3VariantDataReader(calibration_dataset, # width=width, # height=height, # stride=stride, # batch_size=batch_size, # model_path=augmented_model_path) # calibrator.collect_data(data_reader) write_calibration_table(calibrator.compute_range()) print('calibration table generated and saved.')
def get_calibration_table(model_path, augmented_model_path, calibration_dataset): calibrator = create_calibrator(model_path, None, augmented_model_path=augmented_model_path) # DataReader can handle dataset with batch or serial processing depends on its implementation # Following examples show two different ways to generate calibration table ''' 1. Use serial processing We can use only one DataReader to do serial processing, however, some machines don't have sufficient memory to hold all dataset images and all intermediate output. So let multiple DataReader do handle different stride of dataset one by one. DataReader will use serial processing when batch_size is 1. ''' total_data_size = len(os.listdir(calibration_dataset)) start_index = 0 stride = 2000 for i in range(0, total_data_size, stride): data_reader = YoloV3DataReader(calibration_dataset, start_index=start_index, end_index=start_index + stride, stride=stride, batch_size=1, model_path=augmented_model_path) calibrator.collect_data(data_reader) start_index += stride ''' 2. Use batch processing (much faster) Batch processing requires less memory for intermediate output, therefore let only one DataReader to handle dataset in batch. However, if encountering OOM, we can make multiple DataReader to do the job just like serial processing does. DataReader will use batch processing when batch_size > 1. ''' # data_reader = YoloV3DataReader(calibration_dataset, stride=1000, batch_size=20, model_path=augmented_model_path) # calibrator.collect_data(data_reader) write_calibration_table(calibrator.compute_range()) print('calibration table generated and saved.')
"ORT_TENSORRT_INT8_CALIBRATION_TABLE_NAME"] = "calibration.flatbuffers" # Calibration table name os.environ[ "ORT_TENSORRT_ENGINE_CACHE_ENABLE"] = "1" # Enable engine caching execution_provider = ["TensorrtExecutionProvider"] # Convert static batch to dynamic batch [new_model_path, input_name] = convert_model_batch_to_dynamic(model_path) # Get calibration and prediction dataset size [calibration_dataset_size, prediction_dataset_size] = get_dataset_size(ilsvrc2012_dataset_path, calibration_dataset_size) # Generate INT8 calibration table if calibration_table_generation_enable: calibrator = create_calibrator( new_model_path, [], augmented_model_path=augmented_model_path) calibrator.set_execution_providers(["CUDAExecutionProvider"]) data_reader = ImageNetDataReader(ilsvrc2012_dataset_path, start_index=0, end_index=calibration_dataset_size, stride=calibration_dataset_size, batch_size=batch_size, model_path=augmented_model_path, input_name=input_name) calibrator.collect_data(data_reader) write_calibration_table(calibrator.compute_range()) # Run prediction in Tensorrt EP data_reader = ImageNetDataReader(ilsvrc2012_dataset_path, start_index=calibration_dataset_size, end_index=calibration_dataset_size +