Example #1
0
def run_model(baseline_model, all_inputs, use_gpu, use_openmp,
              graph_optimization_level):
    session = create_session(baseline_model,
                             use_gpu,
                             use_openmp,
                             graph_optimization_level,
                             num_threads=psutil.cpu_count(logical=True),
                             wait_policy='ACTIVE')
    output_names = [output.name for output in session.get_outputs()]
    results, latency_list = onnxruntime_inference(session, all_inputs,
                                                  output_names)
    return results, latency_list, output_names
Example #2
0
def run_model(model_path, all_inputs, use_gpu, disable_optimization):
    import onnxruntime

    graph_optimization_level = None
    if disable_optimization:
        graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_DISABLE_ALL

    intra_op_num_threads = psutil.cpu_count(logical=False)

    session = create_session(model_path, use_gpu, intra_op_num_threads,
                             graph_optimization_level)

    output_names = [output.name for output in session.get_outputs()]
    results, latency_list = onnxruntime_inference(session, all_inputs,
                                                  output_names)
    return results, latency_list, output_names
Example #3
0
def run_model(model_path, all_inputs, use_gpu, use_openmp,
              disable_optimization):
    # Import onnxruntime shall be after OpenMP environment variable setting.
    # So we put import here to delay importing.
    import onnxruntime

    graph_optimization_level = None
    if disable_optimization:
        graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_DISABLE_ALL

    intra_op_num_threads = 1 if use_openmp else psutil.cpu_count(logical=False)

    session = create_session(model_path, use_gpu, intra_op_num_threads,
                             graph_optimization_level)

    output_names = [output.name for output in session.get_outputs()]
    results, latency_list = onnxruntime_inference(session, all_inputs,
                                                  output_names)
    return results, latency_list, output_names