def run_model(baseline_model, all_inputs, use_gpu, use_openmp, graph_optimization_level): session = create_session(baseline_model, use_gpu, use_openmp, graph_optimization_level, num_threads=psutil.cpu_count(logical=True), wait_policy='ACTIVE') output_names = [output.name for output in session.get_outputs()] results, latency_list = onnxruntime_inference(session, all_inputs, output_names) return results, latency_list, output_names
def run_model(model_path, all_inputs, use_gpu, disable_optimization): import onnxruntime graph_optimization_level = None if disable_optimization: graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_DISABLE_ALL intra_op_num_threads = psutil.cpu_count(logical=False) session = create_session(model_path, use_gpu, intra_op_num_threads, graph_optimization_level) output_names = [output.name for output in session.get_outputs()] results, latency_list = onnxruntime_inference(session, all_inputs, output_names) return results, latency_list, output_names
def run_model(model_path, all_inputs, use_gpu, use_openmp, disable_optimization): # Import onnxruntime shall be after OpenMP environment variable setting. # So we put import here to delay importing. import onnxruntime graph_optimization_level = None if disable_optimization: graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_DISABLE_ALL intra_op_num_threads = 1 if use_openmp else psutil.cpu_count(logical=False) session = create_session(model_path, use_gpu, intra_op_num_threads, graph_optimization_level) output_names = [output.name for output in session.get_outputs()] results, latency_list = onnxruntime_inference(session, all_inputs, output_names) return results, latency_list, output_names