def benchmark_using_loadgen(): "Perform the benchmark using python API for the LoadGen library" global num_classes global model_output_volume pycuda_context, max_batch_size, input_volume, model_output_volume, num_layers = initialize_predictor( ) num_classes = len(class_labels) scenario = { 'SingleStream': lg.TestScenario.SingleStream, 'MultiStream': lg.TestScenario.MultiStream, 'Server': lg.TestScenario.Server, 'Offline': lg.TestScenario.Offline, }[LOADGEN_SCENARIO] mode = { 'AccuracyOnly': lg.TestMode.AccuracyOnly, 'PerformanceOnly': lg.TestMode.PerformanceOnly, 'SubmissionRun': lg.TestMode.SubmissionRun, }[LOADGEN_MODE] ts = lg.TestSettings() ts.FromConfig(MLPERF_CONF_PATH, MODEL_NAME, LOADGEN_SCENARIO) ts.FromConfig(USER_CONF_PATH, MODEL_NAME, LOADGEN_SCENARIO) ts.scenario = scenario ts.mode = mode if LOADGEN_MULTISTREAMNESS: ts.multi_stream_samples_per_query = int(LOADGEN_MULTISTREAMNESS) if LOADGEN_COUNT_OVERRIDE: ts.min_query_count = int(LOADGEN_COUNT_OVERRIDE) ts.max_query_count = int(LOADGEN_COUNT_OVERRIDE) sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies) qsl = lg.ConstructQSL(LOADGEN_DATASET_SIZE, LOADGEN_BUFFER_SIZE, load_query_samples, unload_query_samples) log_settings = lg.LogSettings() log_settings.enable_trace = False lg.StartTestWithLogSettings(sut, qsl, ts, log_settings) lg.DestroyQSL(qsl) lg.DestroySUT(sut) pycuda_context.pop()
def main(): setup_time_begin = time.time() # Cleanup results directory if os.path.isdir(DETECTIONS_OUT_DIR): shutil.rmtree(DETECTIONS_OUT_DIR) os.mkdir(DETECTIONS_OUT_DIR) pycuda_context, max_batch_size, input_volume, output_volume, num_layers = initialize_predictor( ) print('Images dir: ' + IMAGE_DIR) print('Image list file: ' + IMAGE_LIST_FILE) print('Batch size: {}'.format(BATCH_SIZE)) print('Batch count: {}'.format(BATCH_COUNT)) print('Detections dir: ' + DETECTIONS_OUT_DIR) print('Normalize: {}'.format(MODEL_NORMALIZE_DATA)) print('Subtract mean: {}'.format(SUBTRACT_MEAN)) print('Per-channel means to subtract: {}'.format(GIVEN_CHANNEL_MEANS)) print("Data layout: {}".format(MODEL_DATA_LAYOUT)) print("DLA mode used: {}".format(MODEL_USE_DLA)) print('Model image height: {}'.format(MODEL_IMAGE_HEIGHT)) print('Model image width: {}'.format(MODEL_IMAGE_WIDTH)) print('Model image channels: {}'.format(MODEL_IMAGE_CHANNELS)) print('Model input data type: {}'.format(MODEL_INPUT_DATA_TYPE)) print('Model (internal) data type: {}'.format(MODEL_DATA_TYPE)) print('Model BGR colours: {}'.format(MODEL_COLOURS_BGR)) print('Model max_batch_size: {}'.format(max_batch_size)) print('Model output volume (number of outputs per one prediction): {}'. format(output_volume)) print('Model num_layers: {}'.format(num_layers)) print('Number of class_labels: {}'.format(num_classes)) print('Post-detection confidence score threshold: {}'.format( SCORE_THRESHOLD)) print("") setup_time = time.time() - setup_time_begin # Run batched mode test_time_begin = time.time() total_load_time = 0 next_batch_offset = 0 total_inference_time = 0 first_inference_time = 0 images_loaded = 0 for batch_index in range(BATCH_COUNT): batch_number = batch_index + 1 begin_time = time.time() current_batch_offset = next_batch_offset batch_data, next_batch_offset = load_preprocessed_batch( image_filenames, current_batch_offset) load_time = time.time() - begin_time total_load_time += load_time images_loaded += BATCH_SIZE trimmed_batch_results, inference_time_s = inference_for_given_batch( batch_data) print("[batch {} of {}] loading={:.2f} ms, inference={:.2f} ms".format( batch_number, BATCH_COUNT, load_time * 1000, inference_time_s * 1000)) total_inference_time += inference_time_s # Remember inference_time for the first batch if batch_index == 0: first_inference_time = inference_time_s # Process results for index_in_batch in range(BATCH_SIZE): single_image_predictions = trimmed_batch_results[index_in_batch] num_boxes = single_image_predictions[MODEL_MAX_PREDICTIONS * 7].view('int32') global_image_index = current_batch_offset + index_in_batch width_orig, height_orig = original_w_h[global_image_index] filename_orig = image_filenames[global_image_index] detections_filename = os.path.splitext(filename_orig)[0] + '.txt' detections_filepath = os.path.join(DETECTIONS_OUT_DIR, detections_filename) with open(detections_filepath, 'w') as det_file: det_file.write('{:d} {:d}\n'.format(width_orig, height_orig)) for row in range(num_boxes): (image_id, ymin, xmin, ymax, xmax, confidence, class_number) = single_image_predictions[row * 7:(row + 1) * 7] if confidence >= SCORE_THRESHOLD: class_number = int(class_number) if class_map: class_number = class_map[class_number] image_id = int(image_id) x1 = xmin * width_orig y1 = ymin * height_orig x2 = xmax * width_orig y2 = ymax * height_orig class_label = class_labels[class_number - bg_class_offset] det_file.write( '{:.2f} {:.2f} {:.2f} {:.2f} {:.3f} {} {}\n'. format(x1, y1, x2, y2, confidence, class_number, class_label)) pycuda_context.pop() test_time = time.time() - test_time_begin if BATCH_COUNT > 1: avg_inference_time = (total_inference_time - first_inference_time) / ( images_loaded - BATCH_SIZE) else: avg_inference_time = total_inference_time / images_loaded avg_load_time = total_load_time / images_loaded # Store benchmarking results: output_dict = { 'run_time_state': { 'setup_time_s': setup_time, 'test_time_s': test_time, 'images_load_time_total_s': total_load_time, 'images_load_time_avg_s': avg_load_time, 'prediction_time_total_s': total_inference_time, 'prediction_time_avg_s': avg_inference_time, 'avg_time_ms': avg_inference_time * 1000, 'avg_fps': 1.0 / avg_inference_time, 'batch_time_ms': avg_inference_time * 1000 * BATCH_SIZE, 'batch_size': BATCH_SIZE, } } with open('tmp-ck-timer.json', 'w') as out_file: json.dump(output_dict, out_file, indent=4, sort_keys=True)
def main(): setup_time_begin = time.time() # Cleanup results directory if os.path.isdir(RESULTS_DIR): shutil.rmtree(RESULTS_DIR) os.mkdir(RESULTS_DIR) pycuda_context, max_batch_size, input_volume, output_volume, num_layers = initialize_predictor() num_classes = len(class_labels) print('Images dir: ' + IMAGE_DIR) print('Image list file: ' + IMAGE_LIST_FILE) print('Batch size: {}'.format(BATCH_SIZE)) print('Batch count: {}'.format(BATCH_COUNT)) print('Results dir: ' + RESULTS_DIR); print('Normalize: {}'.format(MODEL_NORMALIZE_DATA)) print('Subtract mean: {}'.format(SUBTRACT_MEAN)) print('Per-channel means to subtract: {}'.format(GIVEN_CHANNEL_MEANS)) print("Data layout: {}".format(MODEL_DATA_LAYOUT) ) print("DLA mode used: {}".format(MODEL_USE_DLA) ) print('Model image height: {}'.format(MODEL_IMAGE_HEIGHT)) print('Model image width: {}'.format(MODEL_IMAGE_WIDTH)) print('Model image channels: {}'.format(MODEL_IMAGE_CHANNELS)) print('Model input data type: {}'.format(MODEL_INPUT_DATA_TYPE)) print('Model (internal) data type: {}'.format(MODEL_DATA_TYPE)) print('Model BGR colours: {}'.format(MODEL_COLOURS_BGR)) print('Model max_batch_size: {}'.format(max_batch_size)) print('Model output volume (number of outputs per one prediction): {}'.format(output_volume)) print('Model num_layers: {}'.format(num_layers)) print('Number of class_labels: {}'.format(num_classes)) print("") setup_time = time.time() - setup_time_begin # Run batched mode test_time_begin = time.time() image_index = 0 total_load_time = 0 total_classification_time = 0 first_classification_time = 0 images_loaded = 0 for batch_index in range(BATCH_COUNT): batch_number = batch_index+1 begin_time = time.time() batch_data, image_index = load_preprocessed_batch(image_list, image_index) load_time = time.time() - begin_time total_load_time += load_time images_loaded += BATCH_SIZE trimmed_batch_results, inference_time_s = inference_for_given_batch(batch_data) print("[batch {} of {}] loading={:.2f} ms, inference={:.2f} ms".format( batch_number, BATCH_COUNT, load_time*1000, inference_time_s*1000)) total_classification_time += inference_time_s # Remember first batch prediction time if batch_index == 0: first_classification_time = inference_time_s # Process results for index_in_batch in range(BATCH_SIZE): one_batch_result = trimmed_batch_results[index_in_batch] if output_volume==1: arg_max = one_batch_result[0] softmax_vector = [0]*arg_max + [1] + [0]*(num_classes-arg_max-1) else: softmax_vector = one_batch_result[-num_classes:] # skipping the background class on the left (if present) global_index = batch_index * BATCH_SIZE + index_in_batch res_file = os.path.join(RESULTS_DIR, image_list[global_index]) with open(res_file + '.txt', 'w') as f: for prob in softmax_vector: f.write('{}\n'.format(prob)) pycuda_context.pop() test_time = time.time() - test_time_begin if BATCH_COUNT > 1: avg_classification_time = (total_classification_time - first_classification_time) / (images_loaded - BATCH_SIZE) else: avg_classification_time = total_classification_time / images_loaded avg_load_time = total_load_time / images_loaded # Store benchmarking results: output_dict = { 'setup_time_s': setup_time, 'test_time_s': test_time, 'images_load_time_total_s': total_load_time, 'images_load_time_avg_s': avg_load_time, 'prediction_time_total_s': total_classification_time, 'prediction_time_avg_s': avg_classification_time, 'avg_time_ms': avg_classification_time * 1000, 'avg_fps': 1.0 / avg_classification_time, 'batch_time_ms': avg_classification_time * 1000 * BATCH_SIZE, 'batch_size': BATCH_SIZE, } with open('tmp-ck-timer.json', 'w') as out_file: json.dump(output_dict, out_file, indent=4, sort_keys=True)