def execution_session_run_forward(execution_session, onnx_model, device, *inputs): """Runs the forward graph on execution_session with given model inputs and device""" # Assert that the input and model device match _utils._check_same_device(device, "Input argument to forward", *inputs) # TODO: Try to reuse the output buffers as some of the output tensors are same sizes, # especially the backward graph outputs. # REVIEW(codemzs): Consolidate Training Agent with InferenceAgent on C++ side to not # have the need for passing IOBinding. state = C.PartialGraphExecutionState() forward_inputs = C.OrtValueVector() for input in inputs: forward_inputs.append(_utils._ortvalue_from_torch_tensor(input)) forward_outputs = C.OrtValueVector() # Run and return module outputs. execution_session.run_forward(forward_inputs, forward_outputs, state) user_outputs = tuple( _utils._ortvalue_to_torch_tensor(forward_output) for forward_output in forward_outputs) # Assert that the outputs and model device match _utils._check_same_device(device, "Output argument from forward", *user_outputs) output_info = [(output.shape, output.device, output.dtype) for output in user_outputs] run_info = RunStateInfo(state, output_info) # Return user outputs and forward run information return user_outputs, run_info
def execution_session_run_forward(execution_session, onnx_model, device, gradient_accumulation_manager, *inputs): """Runs the forward graph on execution_session with given model inputs and device""" # TODO: Try to reuse the output buffers as some of the output tensors are same sizes, # especially the backward graph outputs. # REVIEW(codemzs): Consolidate Training Agent with InferenceAgent on C++ side to not # have the need for passing IOBinding. state = C.PartialGraphExecutionState() forward_inputs = C.OrtValueVector() forward_inputs.reserve(len(inputs)) for input in inputs: # TODO: Non-contiguous tensor input in execution_session_run_forward, need tensor copy. if not input.is_contiguous(): input = input.contiguous() if input.device.type == "ort": forward_inputs.push_back(C.aten_ort_tensor_to_ort_value(input)) else: valid_ort_tensor = _utils._torch_tensor_to_dlpack(input) forward_inputs.push_back(valid_ort_tensor, input.dtype == torch.bool) forward_outputs = C.OrtValueVector() # Run and return module outputs. execution_session.run_forward(forward_inputs, forward_outputs, state, gradient_accumulation_manager.cache) user_outputs = gradient_accumulation_manager.extract_outputs_and_maybe_update_cache( forward_outputs, device) output_info = [(output.shape, output.device, output.dtype) for output in user_outputs] run_info = _RunStateInfo(state, output_info) # Return user outputs and forward run information return user_outputs, run_info
def execution_session_run_forward(execution_session, onnx_model, *inputs): """Runs the forward graph on execution_session with given model inputs and device""" # TODO: Try to reuse the output buffers as some of the output tensors are same sizes, # especially the backward graph outputs. # REVIEW(codemzs): Consolidate Training Agent with InferenceAgent on C++ side to not # have the need for passing IOBinding. state = C.PartialGraphExecutionState() forward_inputs = C.OrtValueVector() forward_inputs.reserve(len(inputs)) for input in inputs: forward_inputs.push_back(to_dlpack(input), input.dtype == torch.bool) forward_outputs = C.OrtValueVector() # Run and return module outputs. execution_session.run_forward(forward_inputs, forward_outputs, state) user_outputs = tuple(_utils._ortvalue_to_torch_tensor(forward_output) for forward_output in forward_outputs) output_info = [(output.shape, output.device, output.dtype) for output in user_outputs] run_info = RunStateInfo(state, output_info) # Return user outputs and forward run information return user_outputs, run_info
def execution_session_run_forward(execution_session, onnx_model, device, gradient_accumulation_manager, *inputs): """Runs the forward graph on execution_session with given model inputs and device""" # Clear all gradient functions, to avoid a deadlock issue. # Check the called function for more detailed comments. clear_all_grad_fns() # TODO: Try to reuse the output buffers as some of the output tensors are same sizes, # especially the backward graph outputs. # REVIEW(codemzs): Consolidate Training Agent with InferenceAgent on C++ side to not # have the need for passing IOBinding. state = C.PartialGraphExecutionState() forward_inputs = C.OrtValueVector() forward_inputs.reserve(len(inputs)) for input in inputs: if input.device.type == 'ort': forward_inputs.push_back(C.aten_ort_tensor_to_ort_value(input)) else: valid_ort_tensor = _utils._torch_tensor_to_dlpack(input) forward_inputs.push_back(valid_ort_tensor, input.dtype == torch.bool) forward_outputs = C.OrtValueVector() # Run and return module outputs. execution_session.run_forward(forward_inputs, forward_outputs, state, gradient_accumulation_manager.cache) user_outputs = gradient_accumulation_manager.extract_outputs_and_maybe_update_cache( forward_outputs, device) output_info = [(output.shape, output.device, output.dtype) for output in user_outputs] run_info = _RunStateInfo(state, output_info) # Return user outputs and forward run information return user_outputs, run_info