Example #1
0
    def prepare_io_binding(
        ort_session,
        input_ids,
        position_ids,
        attention_mask,
        past,
        output_buffers,
        output_shapes,
        beam_select_idx=None,
        input_log_probs=None,
        input_unfinished_sents=None,
        prev_step_results=None,
        prev_step_scores=None,
    ):
        """Returnas IO binding object for a session."""

        # Bind (input_ids, position_ids, attention_mask and past_*) and all outputs
        io_binding = Gpt2Helper.prepare_io_binding(
            ort_session,
            input_ids,
            position_ids,
            attention_mask,
            past=past,
            output_buffers=output_buffers,
            output_shapes=output_shapes,
        )

        # Bind the remaining inputs
        other_inputs = {
            "beam_select_idx": beam_select_idx,
            "input_log_probs": input_log_probs,
            "input_unfinished_sents": input_unfinished_sents,
            "prev_step_results": prev_step_results,
            "prev_step_scores": prev_step_scores,
        }
        name_to_np_type = TypeHelper.get_io_numpy_type_map(ort_session)
        for name, tensor in other_inputs.items():
            if tensor is not None:
                assert tensor.is_contiguous()
                io_binding.bind_input(
                    name,
                    tensor.device.type,
                    0,
                    name_to_np_type[name],
                    list(tensor.size()),
                    tensor.data_ptr(),
                )

        return io_binding
Example #2
0
    def prepare_io_binding(ort_session,
                           input_ids,
                           position_ids,
                           attention_mask,
                           past,
                           output_buffers,
                           output_shapes,
                           beam_select_idx=None,
                           input_log_probs=None,
                           input_unfinished_sents=None,
                           prev_step_results=None,
                           prev_step_scores=None):
        """Returnas IO binding object for a session."""

        # Bind inputs and outputs to onnxruntime session
        io_binding = Gpt2Helper.prepare_io_binding(
            ort_session,
            input_ids,
            position_ids,
            attention_mask,
            past=past,
            output_buffers=output_buffers,
            output_shapes=output_shapes)

        # Bind inputs
        data_type = output_buffers[ort_session.get_outputs()[1].name].dtype
        float_type = numpy.float16 if data_type == torch.float16 else numpy.float32

        if past is not None:
            for i, past_i in enumerate(past):
                assert past_i.is_contiguous()

                data_ptr = past_i.data_ptr()
                if data_ptr == 0:
                    # When past_sequence_length is 0, its data_ptr will be zero. IO Binding asserts that data_ptr shall not be zero.
                    # Here we workaround and pass data pointer of input_ids. Actual data is not used for past so it does not matter.
                    data_ptr = input_ids.data_ptr()

                io_binding.bind_input(f'past_{i}',
                                      past_i.device.type, 0, float_type,
                                      list(past_i.size()), data_ptr)

        if attention_mask is not None:
            assert attention_mask.is_contiguous()
            io_binding.bind_input('attention_mask', attention_mask.device.type,
                                  0, float_type, list(attention_mask.size()),
                                  attention_mask.data_ptr())

        if beam_select_idx is not None:
            assert beam_select_idx.is_contiguous()
            io_binding.bind_input(
                "beam_select_idx",
                beam_select_idx.device.type,
                0,
                numpy.longlong,
                list(beam_select_idx.size()),
                beam_select_idx.data_ptr(),
            )

        if input_log_probs is not None:
            assert input_log_probs.is_contiguous()
            io_binding.bind_input(
                "input_log_probs",
                input_log_probs.device.type,
                0,
                float_type,
                list(input_log_probs.size()),
                input_log_probs.data_ptr(),
            )

        if input_unfinished_sents is not None:
            assert input_unfinished_sents.is_contiguous()
            io_binding.bind_input(
                "input_unfinished_sents",
                input_unfinished_sents.device.type,
                0,
                numpy.bool,
                list(input_unfinished_sents.size()),
                input_unfinished_sents.data_ptr(),
            )

        if prev_step_results is not None:
            assert prev_step_results.is_contiguous()
            io_binding.bind_input(
                "prev_step_results",
                prev_step_results.device.type,
                0,
                numpy.longlong,
                list(prev_step_results.size()),
                prev_step_results.data_ptr(),
            )

        if prev_step_scores is not None:
            assert prev_step_scores.is_contiguous()
            io_binding.bind_input(
                "prev_step_scores",
                prev_step_scores.device.type,
                0,
                float_type,
                list(prev_step_scores.size()),
                prev_step_scores.data_ptr(),
            )

        # Bind outputs
        for output in ort_session.get_outputs():
            output_name = output.name
            output_buffer = output_buffers[output_name]
            logger.debug(
                f"{output_name} device type={output_buffer.device.type} shape={list(output_buffer.size())}"
            )
            if (output_name == "output_selected_indices"
                    or output_name == "last_state"
                    or output_name == "current_step_results"):
                io_binding.bind_output(
                    output_name,
                    output_buffer.device.type,
                    0,
                    numpy.longlong,
                    output_shapes[output_name],
                    output_buffer.data_ptr(),
                )
            elif output_name == "output_unfinished_sents":
                io_binding.bind_output(
                    output_name,
                    output_buffer.device.type,
                    0,
                    numpy.bool,
                    output_shapes[output_name],
                    output_buffer.data_ptr(),
                )
            else:
                io_binding.bind_output(
                    output_name,
                    output_buffer.device.type,
                    0,
                    float_type,
                    output_shapes[output_name],
                    output_buffer.data_ptr(),
                )

        return io_binding