Esempio n. 1
0
def benchmark_using_loadgen():
    "Perform the benchmark using python API for the LoadGen library"

    global num_classes
    global model_output_volume

    pycuda_context, max_batch_size, input_volume, model_output_volume, num_layers = initialize_predictor(
    )
    num_classes = len(class_labels)

    scenario = {
        'SingleStream': lg.TestScenario.SingleStream,
        'MultiStream': lg.TestScenario.MultiStream,
        'Server': lg.TestScenario.Server,
        'Offline': lg.TestScenario.Offline,
    }[LOADGEN_SCENARIO]

    mode = {
        'AccuracyOnly': lg.TestMode.AccuracyOnly,
        'PerformanceOnly': lg.TestMode.PerformanceOnly,
        'SubmissionRun': lg.TestMode.SubmissionRun,
    }[LOADGEN_MODE]

    ts = lg.TestSettings()
    ts.FromConfig(MLPERF_CONF_PATH, MODEL_NAME, LOADGEN_SCENARIO)
    ts.FromConfig(USER_CONF_PATH, MODEL_NAME, LOADGEN_SCENARIO)
    ts.scenario = scenario
    ts.mode = mode

    if LOADGEN_MULTISTREAMNESS:
        ts.multi_stream_samples_per_query = int(LOADGEN_MULTISTREAMNESS)

    if LOADGEN_COUNT_OVERRIDE:
        ts.min_query_count = int(LOADGEN_COUNT_OVERRIDE)
        ts.max_query_count = int(LOADGEN_COUNT_OVERRIDE)

    sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies)
    qsl = lg.ConstructQSL(LOADGEN_DATASET_SIZE, LOADGEN_BUFFER_SIZE,
                          load_query_samples, unload_query_samples)

    log_settings = lg.LogSettings()
    log_settings.enable_trace = False
    lg.StartTestWithLogSettings(sut, qsl, ts, log_settings)

    lg.DestroyQSL(qsl)
    lg.DestroySUT(sut)
    pycuda_context.pop()
def main():
    setup_time_begin = time.time()

    # Cleanup results directory
    if os.path.isdir(DETECTIONS_OUT_DIR):
        shutil.rmtree(DETECTIONS_OUT_DIR)
    os.mkdir(DETECTIONS_OUT_DIR)

    pycuda_context, max_batch_size, input_volume, output_volume, num_layers = initialize_predictor(
    )

    print('Images dir: ' + IMAGE_DIR)
    print('Image list file: ' + IMAGE_LIST_FILE)
    print('Batch size: {}'.format(BATCH_SIZE))
    print('Batch count: {}'.format(BATCH_COUNT))
    print('Detections dir: ' + DETECTIONS_OUT_DIR)
    print('Normalize: {}'.format(MODEL_NORMALIZE_DATA))
    print('Subtract mean: {}'.format(SUBTRACT_MEAN))
    print('Per-channel means to subtract: {}'.format(GIVEN_CHANNEL_MEANS))

    print("Data layout: {}".format(MODEL_DATA_LAYOUT))
    print("DLA mode used: {}".format(MODEL_USE_DLA))
    print('Model image height: {}'.format(MODEL_IMAGE_HEIGHT))
    print('Model image width: {}'.format(MODEL_IMAGE_WIDTH))
    print('Model image channels: {}'.format(MODEL_IMAGE_CHANNELS))
    print('Model input data type: {}'.format(MODEL_INPUT_DATA_TYPE))
    print('Model (internal) data type: {}'.format(MODEL_DATA_TYPE))
    print('Model BGR colours: {}'.format(MODEL_COLOURS_BGR))
    print('Model max_batch_size: {}'.format(max_batch_size))
    print('Model output volume (number of outputs per one prediction): {}'.
          format(output_volume))
    print('Model num_layers: {}'.format(num_layers))
    print('Number of class_labels: {}'.format(num_classes))
    print('Post-detection confidence score threshold: {}'.format(
        SCORE_THRESHOLD))
    print("")

    setup_time = time.time() - setup_time_begin

    # Run batched mode
    test_time_begin = time.time()
    total_load_time = 0
    next_batch_offset = 0
    total_inference_time = 0
    first_inference_time = 0
    images_loaded = 0

    for batch_index in range(BATCH_COUNT):
        batch_number = batch_index + 1

        begin_time = time.time()
        current_batch_offset = next_batch_offset
        batch_data, next_batch_offset = load_preprocessed_batch(
            image_filenames, current_batch_offset)

        load_time = time.time() - begin_time
        total_load_time += load_time
        images_loaded += BATCH_SIZE

        trimmed_batch_results, inference_time_s = inference_for_given_batch(
            batch_data)

        print("[batch {} of {}] loading={:.2f} ms, inference={:.2f} ms".format(
            batch_number, BATCH_COUNT, load_time * 1000,
            inference_time_s * 1000))

        total_inference_time += inference_time_s
        # Remember inference_time for the first batch
        if batch_index == 0:
            first_inference_time = inference_time_s

        # Process results
        for index_in_batch in range(BATCH_SIZE):
            single_image_predictions = trimmed_batch_results[index_in_batch]
            num_boxes = single_image_predictions[MODEL_MAX_PREDICTIONS *
                                                 7].view('int32')
            global_image_index = current_batch_offset + index_in_batch
            width_orig, height_orig = original_w_h[global_image_index]

            filename_orig = image_filenames[global_image_index]
            detections_filename = os.path.splitext(filename_orig)[0] + '.txt'
            detections_filepath = os.path.join(DETECTIONS_OUT_DIR,
                                               detections_filename)

            with open(detections_filepath, 'w') as det_file:
                det_file.write('{:d} {:d}\n'.format(width_orig, height_orig))

                for row in range(num_boxes):
                    (image_id, ymin, xmin, ymax, xmax, confidence,
                     class_number) = single_image_predictions[row *
                                                              7:(row + 1) * 7]

                    if confidence >= SCORE_THRESHOLD:
                        class_number = int(class_number)
                        if class_map:
                            class_number = class_map[class_number]

                        image_id = int(image_id)
                        x1 = xmin * width_orig
                        y1 = ymin * height_orig
                        x2 = xmax * width_orig
                        y2 = ymax * height_orig
                        class_label = class_labels[class_number -
                                                   bg_class_offset]
                        det_file.write(
                            '{:.2f} {:.2f} {:.2f} {:.2f} {:.3f} {} {}\n'.
                            format(x1, y1, x2, y2, confidence, class_number,
                                   class_label))

    pycuda_context.pop()

    test_time = time.time() - test_time_begin

    if BATCH_COUNT > 1:
        avg_inference_time = (total_inference_time - first_inference_time) / (
            images_loaded - BATCH_SIZE)
    else:
        avg_inference_time = total_inference_time / images_loaded

    avg_load_time = total_load_time / images_loaded

    # Store benchmarking results:
    output_dict = {
        'run_time_state': {
            'setup_time_s': setup_time,
            'test_time_s': test_time,
            'images_load_time_total_s': total_load_time,
            'images_load_time_avg_s': avg_load_time,
            'prediction_time_total_s': total_inference_time,
            'prediction_time_avg_s': avg_inference_time,
            'avg_time_ms': avg_inference_time * 1000,
            'avg_fps': 1.0 / avg_inference_time,
            'batch_time_ms': avg_inference_time * 1000 * BATCH_SIZE,
            'batch_size': BATCH_SIZE,
        }
    }
    with open('tmp-ck-timer.json', 'w') as out_file:
        json.dump(output_dict, out_file, indent=4, sort_keys=True)
def main():
    setup_time_begin = time.time()

    # Cleanup results directory
    if os.path.isdir(RESULTS_DIR):
        shutil.rmtree(RESULTS_DIR)
    os.mkdir(RESULTS_DIR)

    pycuda_context, max_batch_size, input_volume, output_volume, num_layers = initialize_predictor()
    num_classes = len(class_labels)

    print('Images dir: ' + IMAGE_DIR)
    print('Image list file: ' + IMAGE_LIST_FILE)
    print('Batch size: {}'.format(BATCH_SIZE))
    print('Batch count: {}'.format(BATCH_COUNT))
    print('Results dir: ' + RESULTS_DIR);
    print('Normalize: {}'.format(MODEL_NORMALIZE_DATA))
    print('Subtract mean: {}'.format(SUBTRACT_MEAN))
    print('Per-channel means to subtract: {}'.format(GIVEN_CHANNEL_MEANS))

    print("Data layout: {}".format(MODEL_DATA_LAYOUT) )
    print("DLA mode used: {}".format(MODEL_USE_DLA) )
    print('Model image height: {}'.format(MODEL_IMAGE_HEIGHT))
    print('Model image width: {}'.format(MODEL_IMAGE_WIDTH))
    print('Model image channels: {}'.format(MODEL_IMAGE_CHANNELS))
    print('Model input data type: {}'.format(MODEL_INPUT_DATA_TYPE))
    print('Model (internal) data type: {}'.format(MODEL_DATA_TYPE))
    print('Model BGR colours: {}'.format(MODEL_COLOURS_BGR))
    print('Model max_batch_size: {}'.format(max_batch_size))
    print('Model output volume (number of outputs per one prediction): {}'.format(output_volume))
    print('Model num_layers: {}'.format(num_layers))
    print('Number of class_labels: {}'.format(num_classes))
    print("")


    setup_time = time.time() - setup_time_begin

    # Run batched mode
    test_time_begin = time.time()
    image_index = 0
    total_load_time = 0
    total_classification_time = 0
    first_classification_time = 0
    images_loaded = 0

    for batch_index in range(BATCH_COUNT):
        batch_number = batch_index+1

        begin_time = time.time()
        batch_data, image_index = load_preprocessed_batch(image_list, image_index)

        load_time = time.time() - begin_time
        total_load_time += load_time
        images_loaded += BATCH_SIZE

        trimmed_batch_results, inference_time_s = inference_for_given_batch(batch_data)

        print("[batch {} of {}] loading={:.2f} ms, inference={:.2f} ms".format(
                      batch_number, BATCH_COUNT, load_time*1000, inference_time_s*1000))

        total_classification_time += inference_time_s
        # Remember first batch prediction time
        if batch_index == 0:
            first_classification_time = inference_time_s

        # Process results
        for index_in_batch in range(BATCH_SIZE):
            one_batch_result = trimmed_batch_results[index_in_batch]
            if output_volume==1:
                arg_max = one_batch_result[0]
                softmax_vector = [0]*arg_max + [1] + [0]*(num_classes-arg_max-1)
            else:
                softmax_vector = one_batch_result[-num_classes:]    # skipping the background class on the left (if present)
            global_index = batch_index * BATCH_SIZE + index_in_batch
            res_file = os.path.join(RESULTS_DIR, image_list[global_index])
            with open(res_file + '.txt', 'w') as f:
                for prob in softmax_vector:
                    f.write('{}\n'.format(prob))
                
    pycuda_context.pop()

    test_time = time.time() - test_time_begin
 
    if BATCH_COUNT > 1:
        avg_classification_time = (total_classification_time - first_classification_time) / (images_loaded - BATCH_SIZE)
    else:
        avg_classification_time = total_classification_time / images_loaded

    avg_load_time = total_load_time / images_loaded

    # Store benchmarking results:
    output_dict = {
        'setup_time_s': setup_time,
        'test_time_s': test_time,
        'images_load_time_total_s': total_load_time,
        'images_load_time_avg_s': avg_load_time,
        'prediction_time_total_s': total_classification_time,
        'prediction_time_avg_s': avg_classification_time,

        'avg_time_ms': avg_classification_time * 1000,
        'avg_fps': 1.0 / avg_classification_time,
        'batch_time_ms': avg_classification_time * 1000 * BATCH_SIZE,
        'batch_size': BATCH_SIZE,
    }
    with open('tmp-ck-timer.json', 'w') as out_file:
        json.dump(output_dict, out_file, indent=4, sort_keys=True)