def prepare_io_binding( ort_session, input_ids, position_ids, attention_mask, past, output_buffers, output_shapes, beam_select_idx=None, input_log_probs=None, input_unfinished_sents=None, prev_step_results=None, prev_step_scores=None, ): """Returnas IO binding object for a session.""" # Bind (input_ids, position_ids, attention_mask and past_*) and all outputs io_binding = Gpt2Helper.prepare_io_binding( ort_session, input_ids, position_ids, attention_mask, past=past, output_buffers=output_buffers, output_shapes=output_shapes, ) # Bind the remaining inputs other_inputs = { "beam_select_idx": beam_select_idx, "input_log_probs": input_log_probs, "input_unfinished_sents": input_unfinished_sents, "prev_step_results": prev_step_results, "prev_step_scores": prev_step_scores, } name_to_np_type = TypeHelper.get_io_numpy_type_map(ort_session) for name, tensor in other_inputs.items(): if tensor is not None: assert tensor.is_contiguous() io_binding.bind_input( name, tensor.device.type, 0, name_to_np_type[name], list(tensor.size()), tensor.data_ptr(), ) return io_binding
def prepare_io_binding(ort_session, input_ids, position_ids, attention_mask, past, output_buffers, output_shapes, beam_select_idx=None, input_log_probs=None, input_unfinished_sents=None, prev_step_results=None, prev_step_scores=None): """Returnas IO binding object for a session.""" # Bind inputs and outputs to onnxruntime session io_binding = Gpt2Helper.prepare_io_binding( ort_session, input_ids, position_ids, attention_mask, past=past, output_buffers=output_buffers, output_shapes=output_shapes) # Bind inputs data_type = output_buffers[ort_session.get_outputs()[1].name].dtype float_type = numpy.float16 if data_type == torch.float16 else numpy.float32 if past is not None: for i, past_i in enumerate(past): assert past_i.is_contiguous() data_ptr = past_i.data_ptr() if data_ptr == 0: # When past_sequence_length is 0, its data_ptr will be zero. IO Binding asserts that data_ptr shall not be zero. # Here we workaround and pass data pointer of input_ids. Actual data is not used for past so it does not matter. data_ptr = input_ids.data_ptr() io_binding.bind_input(f'past_{i}', past_i.device.type, 0, float_type, list(past_i.size()), data_ptr) if attention_mask is not None: assert attention_mask.is_contiguous() io_binding.bind_input('attention_mask', attention_mask.device.type, 0, float_type, list(attention_mask.size()), attention_mask.data_ptr()) if beam_select_idx is not None: assert beam_select_idx.is_contiguous() io_binding.bind_input( "beam_select_idx", beam_select_idx.device.type, 0, numpy.longlong, list(beam_select_idx.size()), beam_select_idx.data_ptr(), ) if input_log_probs is not None: assert input_log_probs.is_contiguous() io_binding.bind_input( "input_log_probs", input_log_probs.device.type, 0, float_type, list(input_log_probs.size()), input_log_probs.data_ptr(), ) if input_unfinished_sents is not None: assert input_unfinished_sents.is_contiguous() io_binding.bind_input( "input_unfinished_sents", input_unfinished_sents.device.type, 0, numpy.bool, list(input_unfinished_sents.size()), input_unfinished_sents.data_ptr(), ) if prev_step_results is not None: assert prev_step_results.is_contiguous() io_binding.bind_input( "prev_step_results", prev_step_results.device.type, 0, numpy.longlong, list(prev_step_results.size()), prev_step_results.data_ptr(), ) if prev_step_scores is not None: assert prev_step_scores.is_contiguous() io_binding.bind_input( "prev_step_scores", prev_step_scores.device.type, 0, float_type, list(prev_step_scores.size()), prev_step_scores.data_ptr(), ) # Bind outputs for output in ort_session.get_outputs(): output_name = output.name output_buffer = output_buffers[output_name] logger.debug( f"{output_name} device type={output_buffer.device.type} shape={list(output_buffer.size())}" ) if (output_name == "output_selected_indices" or output_name == "last_state" or output_name == "current_step_results"): io_binding.bind_output( output_name, output_buffer.device.type, 0, numpy.longlong, output_shapes[output_name], output_buffer.data_ptr(), ) elif output_name == "output_unfinished_sents": io_binding.bind_output( output_name, output_buffer.device.type, 0, numpy.bool, output_shapes[output_name], output_buffer.data_ptr(), ) else: io_binding.bind_output( output_name, output_buffer.device.type, 0, float_type, output_shapes[output_name], output_buffer.data_ptr(), ) return io_binding