def run(args): onnx_filename = os.path.join(args.test_dir, args.model_file) input_names, output_names = run_onnx_util.onnx_input_output_names( onnx_filename) test_data_dir = os.path.join(args.test_dir, 'test_data_set_0') inputs, outputs = run_onnx_util.load_test_data( test_data_dir, input_names, output_names) model = onnx.load(onnx_filename) ng_func = import_onnx_model(model) runtime = ng.runtime(backend_name=args.backend) computation = runtime.computation(ng_func) inputs = [v for n, v in inputs] outputs = [v for n, v in outputs] actual_outputs = computation(*inputs) for i, (name, expected, actual) in enumerate( zip(output_names, outputs, actual_outputs)): np.testing.assert_allclose(expected, actual, rtol=1e-3, atol=1e-4), name print('%s: OK' % name) print('ALL OK') def compute(): computation(*inputs) return run_onnx_util.run_benchmark(compute, args.iterations)
def run(args): onnx_filename = run_onnx_util.onnx_model_file(args.test_dir, args.model_file) input_names, output_names = run_onnx_util.onnx_input_output_names( onnx_filename) test_data_dir = os.path.join(args.test_dir, 'test_data_set_0') inputs, outputs = run_onnx_util.load_test_data(test_data_dir, input_names, output_names) sess = rt.InferenceSession(onnx_filename) inputs = dict(inputs) outputs = [v for n, v in outputs] actual_outputs = sess.run(output_names, inputs) for i, (name, expected, actual) in enumerate(zip(output_names, outputs, actual_outputs)): np.testing.assert_allclose(expected, actual, rtol=1e-3, atol=1e-4), name print('%s: OK' % name) print('ALL OK') def compute(): sess.run(output_names, inputs) return run_onnx_util.run_benchmark(compute, args.iterations)
def run(args): test_dir = os.path.abspath(args.test_dir) test_dir_name = test_dir.split(os.path.sep)[-1] onnx_filename = run_onnx_util.onnx_model_file(test_dir, args.model_file) input_names, output_names = run_onnx_util.onnx_input_output_names( onnx_filename) test_data_dir = os.path.join(test_dir, 'test_data_set_0') inputs, outputs = run_onnx_util.load_test_data(test_data_dir, input_names, output_names) mo_output_dir = os.path.join( 'out', 'dldt_{}.{}'.format(test_dir_name, args.data_type.lower())) mo_model_xml = os.path.join(mo_output_dir, 'model.xml') mo_model_bin = os.path.join(mo_output_dir, 'model.bin') # make optimized model not_found_mo = True if not os.path.exists(mo_output_dir): os.makedirs(mo_output_dir, exist_ok=True) else: if os.path.exists(mo_model_xml) and os.path.exists(mo_model_bin): not_found_mo = False if args.force_mo or not_found_mo: args.input_model = onnx_filename args.output_dir = mo_output_dir from mo.main import driver driver(args) else: log.basicConfig(format="[ %(levelname)s ] %(message)s", level=args.log_level, stream=sys.stdout) # compute inference engine return inference(args, mo_model_xml, mo_model_bin, inputs, outputs)
def run(args): onnx_filename = os.path.join(args.test_dir, args.model_file) input_names, output_names = run_onnx_util.onnx_input_output_names( onnx_filename) test_data_dir = os.path.join(args.test_dir, 'test_data_set_0') inputs, outputs = run_onnx_util.load_test_data(test_data_dir, input_names, output_names) model = onnx.load(onnx_filename) tf_model = onnx_tf.backend.prepare(model) inputs = dict(inputs) outputs = dict(outputs) actual_outputs = tf_model.run(inputs) for name in output_names: expected = outputs[name] actual = actual_outputs[name] np.testing.assert_allclose(expected, actual, rtol=1e-3, atol=1e-4), name print('%s: OK' % name) print('ALL OK') def compute(): tf_model.run(inputs) return run_onnx_util.run_benchmark(compute, args.iterations)
def run(args): onnx_model = onnx.load_model(run_onnx_util.onnx_model_file(args.test_dir, args.model_file)) ctx = tvm.gpu() input_names, output_names = run_onnx_util.onnx_input_output_names( os.path.join(args.test_dir, args.model_file)) test_data_dir = os.path.join(args.test_dir, 'test_data_set_0') inputs, outputs = run_onnx_util.load_test_data( test_data_dir, input_names, output_names) inputs = dict(inputs) graph_module = None if args.frontend == 'nnvm': graph_module = build_graph_nnvm(args, ctx, onnx_model, inputs, input_names) elif args.frontend == 'relay': graph_module = build_graph_relay(args, ctx, onnx_model, inputs, input_names) else: raise RuntimeError('Invalid frontend: {}'.format(args.frontend)) graph_module.run() for i, (name, expected) in enumerate(outputs): tvm_output = tvm.nd.empty(expected.shape, expected.dtype, ctx=ctx) actual = graph_module.get_output(i, tvm_output).asnumpy() np.testing.assert_allclose(expected, actual, rtol=1e-3, atol=1e-4), name print('%s: OK' % name) print('ALL OK') def compute(): graph_module.run() cupy.cuda.device.Device().synchronize() return run_onnx_util.run_benchmark(compute, args.iterations)
def run(args): onnx_model = onnx.load_model(os.path.join(args.test_dir, args.model_file)) symbol, params = nnvm.frontend.from_onnx(onnx_model) input_names = symbol.list_input_names() output_names = symbol.list_output_names() test_data_dir = os.path.join(args.test_dir, 'test_data_set_0') inputs, outputs = run_onnx_util.load_test_data(test_data_dir, input_names, output_names) inputs = dict(inputs) # assert len(input_names) == len(inputs) + len(params) # assert len(output_names) == len(outputs) graph, lib, params = compile(symbol, args.target, input_names, inputs, params, args.opt_level, args.autotvm_log) if args.dump_nnvm: print(graph.ir()) print(graph.json()) ctx = tvm.gpu() # Prepare inputs. tvm_inputs = {} for name, value in inputs.items(): tvm_inputs[name] = tvm.nd.array(value, ctx=ctx) for name, value in params.items(): tvm_inputs[name] = tvm.nd.array(value, ctx=ctx) graph_module = None if args.debug: try: graph_module = debug_runtime.create(graph, lib, ctx) except: print('debug_runtime is disabled. ' 'Set USE_GRAPH_RUNTIME_DEBUG=ON and rebuild TVM') if graph_module is None: graph_module = graph_runtime.create(graph, lib, ctx) graph_module.set_input(**tvm_inputs) graph_module.run() for i, (name, expected) in enumerate(outputs): tvm_output = tvm.nd.empty(expected.shape, expected.dtype, ctx=ctx) actual = graph_module.get_output(i, tvm_output).asnumpy() np.testing.assert_allclose(expected, actual, rtol=1e-3, atol=1e-4), name print('%s: OK' % name) print('ALL OK') def compute(): graph_module.run() cupy.cuda.device.Device().synchronize() return run_onnx_util.run_benchmark(compute, args.iterations)
def run(args): onnx_filename = os.path.join(args.test_dir, args.model_file) input_names, output_names = run_onnx_util.onnx_input_output_names( onnx_filename) test_data_dir = os.path.join(args.test_dir, 'test_data_set_0') inputs, outputs = run_onnx_util.load_test_data( test_data_dir, input_names, output_names) with open(onnx_filename, 'rb') as f: onnx_proto = f.read() if args.debug: logger = tensorrt.Logger(tensorrt.Logger.Severity.INFO) else: logger = tensorrt.Logger() builder = tensorrt.Builder(logger) if args.fp16_mode: builder.fp16_mode = True # TODO(hamaji): Infer batch_size from inputs. builder.max_batch_size = args.batch_size network = builder.create_network() parser = tensorrt.OnnxParser(network, logger) if not parser.parse(onnx_proto): for i in range(parser.num_errors): sys.stderr.write('ONNX import failure: %s\n' % parser.get_error(i)) raise RuntimeError('ONNX import failed') engine = builder.build_cuda_engine(network) context = engine.create_execution_context() assert len(inputs) + len(outputs) == engine.num_bindings for i, (_, input) in enumerate(inputs): assert args.batch_size == input.shape[0] assert input.shape[1:] == engine.get_binding_shape(i) for i, (_, output) in enumerate(outputs): assert args.batch_size == output.shape[0] i += len(inputs) assert output.shape[1:] == engine.get_binding_shape(i) inputs = [v for n, v in inputs] outputs = [v for n, v in outputs] gpu_inputs = to_gpu(inputs) gpu_outputs = [] for output in outputs: gpu_outputs.append(cupy.zeros_like(cupy.array(output))) bindings = [a.data.ptr for a in gpu_inputs] bindings += [a.data.ptr for a in gpu_outputs] context.execute(args.batch_size, bindings) actual_outputs = to_cpu(gpu_outputs) for i, (name, expected, actual) in enumerate( zip(output_names, outputs, actual_outputs)): np.testing.assert_allclose(expected, actual, rtol=args.rtol, atol=args.atol), name print('%s: OK' % name) print('ALL OK') def compute(): context.execute(args.batch_size, bindings) cupy.cuda.device.Device().synchronize() return run_onnx_util.run_benchmark(compute, args.iterations)