def test_bind_input_types(self): opset = onnx_opset_version() devices = [(C_OrtDevice(C_OrtDevice.cpu(), C_OrtDevice.default_memory(), 0), ['CPUExecutionProvider'])] if "CUDAExecutionProvider" in onnxrt.get_all_providers(): devices.append((C_OrtDevice(C_OrtDevice.cuda(), C_OrtDevice.default_memory(), 0), ['CUDAExecutionProvider'])) for device, provider in devices: for dtype in [np.float32, np.float64, np.int32, np.uint32, np.int64, np.uint64, np.int16, np.uint16, np.int8, np.uint8, np.float16, np.bool_]: with self.subTest(dtype=dtype, device=str(device)): x = np.arange(8).reshape((-1, 2)).astype(dtype) proto_dtype = NP_TYPE_TO_TENSOR_TYPE[x.dtype] X = helper.make_tensor_value_info('X', proto_dtype, [None, x.shape[1]]) Y = helper.make_tensor_value_info('Y', proto_dtype, [None, x.shape[1]]) # inference node_add = helper.make_node('Identity', ['X'], ['Y']) # graph graph_def = helper.make_graph([node_add], 'lr', [X], [Y], []) model_def = helper.make_model( graph_def, producer_name='dummy', ir_version=7, producer_version="0", opset_imports=[helper.make_operatorsetid('', opset)]) sess = onnxrt.InferenceSession(model_def.SerializeToString(), providers=provider) bind = SessionIOBinding(sess._sess) ort_value = C_OrtValue.ortvalue_from_numpy(x, device) bind.bind_ortvalue_input('X', ort_value) bind.bind_output('Y', device) sess._sess.run_with_iobinding(bind, None) ortvalue = bind.get_outputs()[0] y = ortvalue.numpy() assert_almost_equal(x, y) bind = SessionIOBinding(sess._sess) bind.bind_input('X', device, dtype, x.shape, ort_value.data_ptr()) bind.bind_output('Y', device) sess._sess.run_with_iobinding(bind, None) ortvalue = bind.get_outputs()[0] y = ortvalue.numpy() assert_almost_equal(x, y)
def forward(self, inputs, training=False, forward_outputs_cache=None): """ Implements forward function. :param inputs: inputs :param training: only inference or training as well :return: output as :epkg:`OrtValueVector` """ logger = self._logger cls = self.__class__ def _log(msg, *args): logger.debug("[%s.forward] (%dI) " + msg, cls.__name__, len(inputs), *args) if logger is not None: _log("begin with gradient" if training else "begin") _log("torch function %r", type(cls)) _log("ort class %r", cls) _log("create OrtValueVector (through dlpack)") forward_inputs = cls.input_to_ort(inputs, cls._devices, cls._debug) if training: forward_outputs = forward_outputs_cache or OrtValueVector() state = PartialGraphExecutionState() self.states_.append(state) if logger is not None: _log("run_forward") cls._training_agent.run_forward(forward_inputs, forward_outputs, state, cls._cache) self.save_for_backward(inputs) if logger is not None: _log("end") return forward_outputs else: # what about bind_input (+ data_ptr) if len(forward_inputs) != len(cls._grad_input_names): raise RuntimeError( # pragma: no cover "Size mismatch len(inputs)=%d, len(onnx inputs)=%d." % (len(forward_inputs), len(cls._grad_input_names))) iobinding = SessionIOBinding(cls._sess_eval._sess) if logger is not None: _log("bind inputs %r", cls._grad_input_names) for name, inp in zip(cls._grad_input_names, forward_inputs): iobinding.bind_ortvalue_input(name, inp) # bind output if logger is not None: _log("bind outputs %r", cls._output_names) for name, dev in zip(cls._output_names, cls._fw_no_grad_output_device_info): iobinding.bind_output(name, dev) # if the shape is known in advance # iobinding.bind_output( # output_desc.name, torch_tensor.device.type, # _utils.get_device_index(target_device), # _utils.dtype_torch_to_numpy(torch_tensor.dtype), # list(torch_tensor.size()), torch_tensor.data_ptr()) if logger is not None: _log("grad_enabled=False (run_with_iobinding)") cls._sess_eval._sess.run_with_iobinding(iobinding, cls._run_options) if logger is not None: _log("get_outputs") ortvalues = iobinding.get_outputs() if logger is not None: _log("to torck.tensor (%d)", len(ortvalues)) _log("end") return ortvalues