コード例 #1
0
ファイル: model.py プロジェクト: spnettec/kura
    def execute(self, requests):
        output0_dtype = self.output0_dtype
        output1_dtype = self.output1_dtype

        responses = []

        for request in requests:
            THRESHOLD = 0.20

            # Get input
            x_recon = pb_utils.get_input_tensor_by_name(
                request, "RECONSTR0").as_numpy()
            x_orig = pb_utils.get_input_tensor_by_name(request,
                                                       "ORIG0").as_numpy()

            # Get Mean square error between reconstructed input and original input
            reconstruction_score = np.mean((x_orig - x_recon)**2, axis=1)

            anomaly = reconstruction_score > THRESHOLD

            # Create output tensors
            out_tensor_0 = pb_utils.Tensor(
                "ANOMALY_SCORE0", reconstruction_score.astype(output0_dtype))
            out_tensor_1 = pb_utils.Tensor("ANOMALY0",
                                           anomaly.astype(output1_dtype))

            inference_response = pb_utils.InferenceResponse(
                output_tensors=[out_tensor_0, out_tensor_1])
            responses.append(inference_response)

        return responses
コード例 #2
0
    def execute(self, requests):
        """ Create a response sender object and use that
        for sending the response.
        """

        # This model does not support batching, so 'request_count' should always be 1.
        if len(requests) != 1:
            raise pb_utils.TritonModelException("unsupported batch size " +
                                                len(requests))

        output0_dtype = self.output0_dtype
        output1_dtype = self.output1_dtype

        response_sender = requests[0].get_response_sender()
        in_0 = pb_utils.get_input_tensor_by_name(requests[0], "INPUT0")
        in_1 = pb_utils.get_input_tensor_by_name(requests[0], "INPUT1")
        out_0, out_1 = (in_0.as_numpy() + in_1.as_numpy(),
                        in_0.as_numpy() - in_1.as_numpy())

        out_tensor_0 = pb_utils.Tensor("OUTPUT0", out_0.astype(output0_dtype))
        out_tensor_1 = pb_utils.Tensor("OUTPUT1", out_1.astype(output1_dtype))
        response = pb_utils.InferenceResponse([out_tensor_0, out_tensor_1])

        response_sender.send(
            flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL)
        response_sender.send(response)
コード例 #3
0
    def execute(self, requests):
        output0_dtype = self.output0_dtype
        output1_dtype = self.output1_dtype

        responses = []
        for request in requests:
            in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1")

            # If both of the tensors are in CPU, use NumPy.
            if in_0.is_cpu() and in_1.is_cpu():
                if in_0.as_numpy().dtype.type is np.bytes_ or in_0.as_numpy(
                ).dtype == np.object_:
                    out_0, out_1 = (in_0.as_numpy().astype(np.int32) - in_1.as_numpy().astype(np.int32),\
                        in_0.as_numpy().astype(np.int32) + in_1.as_numpy().astype(np.int32))
                    out_tensor_0 = pb_utils.Tensor("OUTPUT0",
                                                   out_0.astype(output0_dtype))
                    out_tensor_1 = pb_utils.Tensor("OUTPUT1",
                                                   out_1.astype(output1_dtype))
                else:
                    in_0_pytorch, in_1_pytorch = from_dlpack(
                        in_0.to_dlpack()), from_dlpack(in_1.to_dlpack())
                    out_0, out_1 = (in_0_pytorch - in_1_pytorch,
                                    in_0_pytorch + in_1_pytorch)

                    if self.output0_dtype == np.object_:
                        out_tensor_0 = pb_utils.Tensor(
                            "OUTPUT0",
                            out_0.numpy().astype(output0_dtype))
                    else:
                        out_0 = out_0.type(
                            self.numpy_to_pytorch_dtype[output0_dtype])
                        out_tensor_0 = pb_utils.Tensor.from_dlpack(
                            "OUTPUT0", to_dlpack(out_0))

                    if self.output1_dtype == np.object_:
                        out_tensor_1 = pb_utils.Tensor(
                            "OUTPUT1",
                            out_1.numpy().astype(output1_dtype))
                    else:
                        out_1 = out_1.type(
                            self.numpy_to_pytorch_dtype[output1_dtype])
                        out_tensor_1 = pb_utils.Tensor.from_dlpack(
                            "OUTPUT1", to_dlpack(out_1))

            else:
                in_0_pytorch, in_1_pytorch = from_dlpack(
                    in_0.to_dlpack()).cuda(), from_dlpack(
                        in_1.to_dlpack()).cuda()
                out_0, out_1 = (in_0_pytorch - in_1_pytorch,
                                in_0_pytorch + in_1_pytorch)
                out_tensor_0 = pb_utils.Tensor.from_dlpack(
                    "OUTPUT0", to_dlpack(out_0))
                out_tensor_1 = pb_utils.Tensor.from_dlpack(
                    "OUTPUT1", to_dlpack(out_1))

            responses.append(
                pb_utils.InferenceResponse([out_tensor_0, out_tensor_1]))

        return responses
コード例 #4
0
    def execute(self, requests: List[InferenceRequest]) -> List[InferenceResponse]:
        """Transforms the input batches by running through a NVTabular workflow.transform
        function.
        """
        responses = []
        for request in requests:
            # transform the triton tensors to a dict of name:numpy tensor
            input_tensors = {
                name: _convert_tensor(get_input_tensor_by_name(request, name))
                for name in self.input_dtypes
            }

            # multihots are represented as a tuple of (values, offsets)
            for name, dtype in self.input_multihots.items():
                values = _convert_tensor(get_input_tensor_by_name(request, name + "__values"))
                offsets = _convert_tensor(get_input_tensor_by_name(request, name + "__nnzs"))
                input_tensors[name] = (values, offsets)

            raw_tensor_tuples = self.runner.run_workflow(input_tensors)

            result = [Tensor(name, data) for name, data in raw_tensor_tuples]

            responses.append(InferenceResponse(result))

        return responses
コード例 #5
0
    def execute(self, requests):
        """Model supporting optional inputs. If the input is not provided, an
        input tensor of size 1 containing scalar 5 will be used."""
        responses = []
        for request in requests:
            input0_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            input1_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT1")

            if input0_tensor is not None:
                input0_numpy = input0_tensor.as_numpy()
            else:
                input0_numpy = np.array([5], dtype=np.int32)

            if input1_tensor is not None:
                input1_numpy = input1_tensor.as_numpy()
            else:
                input1_numpy = np.array([5], dtype=np.int32)

            output0_tensor = pb_utils.Tensor("OUTPUT0",
                                             input0_numpy + input1_numpy)
            output1_tensor = pb_utils.Tensor("OUTPUT1",
                                             input0_numpy - input1_numpy)
            responses.append(
                pb_utils.InferenceResponse([output0_tensor, output1_tensor]))

        return responses
コード例 #6
0
ファイル: model.py プロジェクト: bnookala/server
    def execute(self, requests):
        responses = []
        for request in requests:
            input0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            print('ISCPU', input0.is_cpu())
            gpu_output = pb_utils.get_input_tensor_by_name(
                request, "GPU_OUTPUT").as_numpy()

            if input0.is_cpu():
                if not gpu_output[0]:
                    output0 = pb_utils.Tensor.from_dlpack(
                        "OUTPUT0", input0.to_dlpack())
                else:
                    outptu0_pytorch = from_dlpack(input0.to_dlpack()).cuda()
                    output0 = pb_utils.Tensor.from_dlpack(
                        "OUTPUT0", to_dlpack(outptu0_pytorch))
            else:
                if gpu_output[0]:
                    output0 = pb_utils.Tensor.from_dlpack(
                        "OUTPUT0", input0.to_dlpack())
                else:
                    outptu0_pytorch = from_dlpack(input0.to_dlpack()).cpu()
                    output0 = pb_utils.Tensor.from_dlpack(
                        "OUTPUT0", to_dlpack(outptu0_pytorch))

            next_gpu_output = pb_utils.Tensor("NEXT_GPU_OUTPUT",
                                              gpu_output[1:])
            responses.append(
                pb_utils.InferenceResponse([output0, next_gpu_output]))

        return responses
コード例 #7
0
    def execute(self, requests):
        """ This function is called on inference request.
        """

        output0_dtype = self.output0_dtype
        output1_dtype = self.output1_dtype

        responses = []
        for request in requests:
            input_tensors = request.inputs()
            in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1")
            if in_0.as_numpy().dtype.type is np.bytes_ or in_0.as_numpy(
            ).dtype == np.object:
                out_0, out_1 = (in_0.as_numpy().astype(np.int32) - in_1.as_numpy().astype(np.int32),\
                    in_0.as_numpy().astype(np.int32) + in_1.as_numpy().astype(np.int32))
            else:
                out_0, out_1 = (in_0.as_numpy() - in_1.as_numpy(),
                                in_0.as_numpy() + in_1.as_numpy())

            out_tensor_0 = pb_utils.Tensor("OUTPUT0",
                                           out_0.astype(output0_dtype))
            out_tensor_1 = pb_utils.Tensor("OUTPUT1",
                                           out_1.astype(output1_dtype))
            responses.append(
                pb_utils.InferenceResponse([out_tensor_0, out_tensor_1]))
        return responses
コード例 #8
0
    def execute(self, requests):
        """`execute` MUST be implemented in every Python model. `execute`
        function receives a list of pb_utils.InferenceRequest as the only
        argument. This function is called when an inference request is made
        for this model. Depending on the batching configuration (e.g. Dynamic
        Batching) used, `requests` may contain multiple requests. Every
        Python model, must create one pb_utils.InferenceResponse for every
        pb_utils.InferenceRequest in `requests`. If there is an error, you can
        set the error argument when creating a pb_utils.InferenceResponse

        Parameters
        ----------
        requests : list
          A list of pb_utils.InferenceRequest

        Returns
        -------
        list
          A list of pb_utils.InferenceResponse. The length of this list must
          be the same as `requests`
        """

        output0_dtype = self.output0_dtype
        output1_dtype = self.output1_dtype

        responses = []

        # Every Python backend must iterate over everyone of the requests
        # and create a pb_utils.InferenceResponse for each of them.
        for request in requests:
            # Get INPUT0
            in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            # Get INPUT1
            in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1")

            out_0, out_1 = (in_0.as_numpy() + in_1.as_numpy(),
                            in_0.as_numpy() - in_1.as_numpy())

            # Create output tensors. You need pb_utils.Tensor
            # objects to create pb_utils.InferenceResponse.
            out_tensor_0 = pb_utils.Tensor("OUTPUT0",
                                           out_0.astype(output0_dtype))
            out_tensor_1 = pb_utils.Tensor("OUTPUT1",
                                           out_1.astype(output1_dtype))

            # Create InferenceResponse. You can set an error here in case
            # there was a problem with handling this inference request.
            # Below is an example of how you can set errors in inference
            # response:
            #
            # pb_utils.InferenceResponse(
            #    output_tensors=..., TritonError("An error occured"))
            inference_response = pb_utils.InferenceResponse(
                output_tensors=[out_tensor_0, out_tensor_1])
            responses.append(inference_response)

        # You should return a list of pb_utils.InferenceResponse. Length
        # of this list must match the length of `requests` list.
        return responses
コード例 #9
0
ファイル: model.py プロジェクト: VibhuJawa/rapids-examples
    def execute(self, requests):
        """`execute` must be implemented in every Python model. `execute`
        function receives a list of pb_utils.InferenceRequest as the only
        argument. This function is called when an inference is requested
        for this model. Depending on the batching configuration (e.g. Dynamic
        Batching) used, `requests` may contain multiple requests. Every
        Python model, must create one pb_utils.InferenceResponse for every
        pb_utils.InferenceRequest in `requests`. If there is an error, you can
        set the error argument when creating a pb_utils.InferenceResponse.

        Parameters
        ----------
        requests : list
          A list of pb_utils.InferenceRequest

        Returns
        -------
        list
          A list of pb_utils.InferenceResponse. The length of this list must
          be the same as `requests`
        """

        responses = []

        # Every Python backend must iterate over everyone of the requests
        # and create a pb_utils.InferenceResponse for each of them.
        for request in requests:
            # Get INPUT0
            input_ids = pb_utils.get_input_tensor_by_name(
                request, "input_ids").to_dlpack()
            attention_mask = pb_utils.get_input_tensor_by_name(
                request, "attention_mask").to_dlpack()

            # TODO: Set environment variable to prevent to(self.device)
            input_ids = from_dlpack(input_ids).long().to(self.device)
            attention_mask = from_dlpack(attention_mask).long().to(self.device)

            with torch.no_grad():
                outputs = self.model(input_ids, attention_mask)
                conf, preds = torch.max(outputs, dim=1)
                preds = preds.int()

            out_tensor_0 = pb_utils.Tensor("preds", preds.cpu().numpy())

            # Create InferenceResponse. You can set an error here in case
            # there was a problem with handling this inference request.
            # Below is an example of how you can set errors in inference
            # response:
            #
            # pb_utils.InferenceResponse(
            #    output_tensors=..., TritonError("An error occured"))
            inference_response = pb_utils.InferenceResponse(
                output_tensors=[out_tensor_0])
            responses.append(inference_response)

        # You should return a list of pb_utils.InferenceResponse. Length
        # of this list must match the length of `requests` list.
        return responses
コード例 #10
0
ファイル: model.py プロジェクト: spnettec/kura
    def execute(self, requests):
        output0_dtype = self.output0_dtype

        responses = []

        for request in requests:
            acc_x = pb_utils.get_input_tensor_by_name(request,
                                                      "ACC_X").as_numpy()
            acc_y = pb_utils.get_input_tensor_by_name(request,
                                                      "ACC_Y").as_numpy()
            acc_z = pb_utils.get_input_tensor_by_name(request,
                                                      "ACC_Z").as_numpy()
            gyro_x = pb_utils.get_input_tensor_by_name(request,
                                                       "GYRO_X").as_numpy()
            gyro_y = pb_utils.get_input_tensor_by_name(request,
                                                       "GYRO_Y").as_numpy()
            gyro_z = pb_utils.get_input_tensor_by_name(request,
                                                       "GYRO_Z").as_numpy()
            humidity = pb_utils.get_input_tensor_by_name(
                request, "HUMIDITY").as_numpy()
            pressure = pb_utils.get_input_tensor_by_name(
                request, "PRESSURE").as_numpy()
            temp_hum = pb_utils.get_input_tensor_by_name(
                request, "TEMP_HUM").as_numpy()
            temp_press = pb_utils.get_input_tensor_by_name(
                request, "TEMP_PRESS").as_numpy()

            out_0 = np.array([
                acc_y, acc_x, acc_z, pressure, temp_press, temp_hum, humidity,
                gyro_x, gyro_y, gyro_z
            ]).transpose()

            #                  ACC_Y     ACC_X     ACC_Z    PRESSURE   TEMP_PRESS   TEMP_HUM   HUMIDITY    GYRO_X    GYRO_Y    GYRO_Z
            min = np.array([
                -0.132551, -0.049693, 0.759847, 976.001709, 38.724998,
                40.220890, 13.003981, -1.937896, -0.265019, -0.250647
            ])
            max = np.array([
                0.093099, 0.150289, 1.177543, 1007.996338, 46.093750,
                48.355824, 23.506138, 1.923712, 0.219204, 0.671759
            ])

            # MinMax scaling
            out_0_scaled = (out_0 - min) / (max - min)

            # Create output tensor
            out_tensor_0 = pb_utils.Tensor("INPUT0",
                                           out_0_scaled.astype(output0_dtype))

            inference_response = pb_utils.InferenceResponse(
                output_tensors=[out_tensor_0])
            responses.append(inference_response)

        return responses
コード例 #11
0
    def execute(self,
                requests: List[InferenceRequest]) -> List[InferenceResponse]:
        """Transforms the input batches by running through a NVTabular workflow.transform
        function.
        """
        responses = []
        for request in requests:
            # create a cudf DataFrame from the triton request
            input_df = cudf.DataFrame({
                name: _convert_tensor(get_input_tensor_by_name(request, name))
                for name in self.input_dtypes
            })

            for name, dtype in self.input_multihots.items():
                values = as_column(
                    _convert_tensor(
                        get_input_tensor_by_name(request, name + "__values")))
                nnzs = as_column(
                    _convert_tensor(
                        get_input_tensor_by_name(request, name + "__nnzs")))
                input_df[name] = build_column(None,
                                              dtype=dtype,
                                              size=nnzs.size - 1,
                                              children=(nnzs, values))

            # use our NVTabular workflow to transform the dataframe
            output_df = nvtabular.workflow._transform_partition(
                input_df, [self.workflow.column_group])

            # convert back to a triton response
            output_tensors = []
            for name in output_df.columns:
                col = output_df[name]
                if is_list_dtype(col.dtype):
                    # convert list values to match TF dataloader
                    values = col.list.leaves.values_host.astype(
                        self.output_dtypes[name + "__values"])
                    values = values.reshape(len(values), 1)
                    output_tensors.append(Tensor(name + "__values", values))

                    offsets = col._column.offsets.values_host.astype(
                        self.output_dtypes[name + "__nnzs"])
                    nnzs = offsets[1:] - offsets[:-1]
                    nnzs = nnzs.reshape(len(nnzs), 1)
                    output_tensors.append(Tensor(name + "__nnzs", nnzs))
                else:
                    d = col.values_host.astype(self.output_dtypes[name])
                    d = d.reshape(len(d), 1)
                    output_tensors.append(Tensor(name, d))

            responses.append(InferenceResponse(output_tensors))

        return responses
コード例 #12
0
    def execute(self, requests):
        responses = []

        for request in requests:
            in0 = pb_utils.get_input_tensor_by_name(request, "PYTHON_INPUT_0")
            in0_t = in0.as_numpy()
            decoded = []
            for inp in in0_t:
                aud_sr = decode_audio(inp.tobytes())
                decoded.append((aud_sr[0], aud_sr[0].shape[0]))
            max_len = 0
            for dec in decoded:
                max_len = max_len if max_len > dec[1] else dec[1]
            audio = []
            audio_lens = []
            for aud, length in decoded:
                audio.append(aud)
                np.pad(audio[-1], (0, max_len - audio[-1].shape[0]))
                audio_lens.append(length)
            audio_array = np.array(audio)
            len_array = np.array(audio_lens)
            dec_t = torch.Tensor(audio_array)
            len_t = torch.Tensor(len_array)
            dec_t = dec_t.cuda()
            len_t = len_t.cuda()
            out_audio, out_len = self.feat_proc(dec_t, len_t)
            out0_tensor = pb_utils.Tensor.from_dlpack(
                "PYTHON_OUTPUT_0", torch.utils.dlpack.to_dlpack(out_audio))

        response = pb_utils.InferenceResponse(output_tensors=[out0_tensor])
        responses.append(response)
        return responses
コード例 #13
0
ファイル: model.py プロジェクト: lgardenhire/NVTabular
    def execute(self, requests: List[InferenceRequest]) -> List[InferenceResponse]:
        """Transforms the input batches by running through a NVTabular workflow.transform
        function.
        """
        responses = []
        for request in requests:
            # create a cudf DataFrame from the triton request
            input_df = cudf.DataFrame(
                {
                    name: _convert_tensor(get_input_tensor_by_name(request, name))
                    for name in self.workflow.column_group.input_column_names
                }
            )

            # use our NVTabular workflow to transform the dataframe
            output_df = nvtabular.workflow._transform_partition(
                input_df, [self.workflow.column_group]
            )

            # convert back to a triton response
            output_tensors = []
            for col in output_df.columns:
                d = output_df[col].values_host.astype(self.output_dtypes[col])
                d = d.reshape(len(d), 1)
                output_tensors.append(Tensor(col, d))

            responses.append(InferenceResponse(output_tensors))

        return responses
コード例 #14
0
    def execute(self,
                requests: List[InferenceRequest]) -> List[InferenceResponse]:
        """Transforms the input batches by running through a NVTabular workflow.transform
        function.
        """
        responses = []
        for request in requests:
            # create a cudf DataFrame from the triton request
            input_df = cudf.DataFrame({
                name: _convert_tensor(get_input_tensor_by_name(request, name))
                for name in self.workflow.column_group.input_column_names
            })

            # use our NVTabular workflow to transform the dataframe
            output_df = nvtabular.workflow._transform_partition(
                input_df, [self.workflow.column_group])

            output_tensors = []
            for col, val in self.output_columns.items():
                d = _convert_cudf2numpy(output_df[val["columns"]],
                                        val["dtype"])
                output_tensors.append(Tensor(col, d))

            responses.append(InferenceResponse(output_tensors))

        return responses
コード例 #15
0
    def execute(self, requests):

        output0_dtype = self.output0_dtype

        responses = []

        for request in requests:
            in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            input_smiles = in_0.as_numpy()[0].decode()
            print('processing', input_smiles)
            generated_smiles, neighboring_embeddings, pad_mask = \
                self.find_similars_smiles_list(input_smiles,
                                               num_requested=10,
                                               force_unique=True)

            out_0 = np.array(generated_smiles).astype(np.object)

            out_tensor_0 = pb_utils.Tensor("OUTPUT0",
                                           out_0.astype(output0_dtype))

            # pb_utils.InferenceResponse(
            #    output_tensors=..., TritonError("An error occured"))
            inference_response = pb_utils.InferenceResponse(
                output_tensors=[out_tensor_0])
            responses.append(inference_response)

        return responses
コード例 #16
0
    def execute(self, requests):
        for request in requests:
            input0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            gpu_output = pb_utils.get_input_tensor_by_name(
                request, "GPU_OUTPUT").as_numpy()

            thread = threading.Thread(target=self.response_thread,
                                      args=(request.get_response_sender(),
                                            input0, gpu_output))

            thread.daemon = True

            with self.inflight_thread_count_lck:
                self.inflight_thread_count += 1

            thread.start()
コード例 #17
0
ファイル: model.py プロジェクト: Beam-wi/tritonserver
 def execute(self, requests):
     responses = []
     for request in requests:
         input_tensor = pb_utils.get_input_tensor_by_name(request, "IN")
         out_tensor = pb_utils.Tensor("OUT", input_tensor.as_numpy())
         responses.append(pb_utils.InferenceResponse([out_tensor], error))
     return responses
コード例 #18
0
 def execute(self, requests):
     """ This function is called on inference request.
     """
     responses = []
     for request in requests:
         input_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT0")
         out_tensor = pb_utils.Tensor("OUTPUT0", input_tensor.as_numpy())
         responses.append(pb_utils.InferenceResponse([out_tensor]))
     return responses
コード例 #19
0
ファイル: model.py プロジェクト: luvwinnie/server
 def execute(self, requests):
     responses = []
     for request in requests:
         in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
         out_tensor_0 = pb_utils.Tensor(
             "OUTPUT0",
             in_0.as_numpy().astype(self._dtypes[self._index]))
         self._index += 1
         responses.append(pb_utils.InferenceResponse([out_tensor_0]))
     return responses
コード例 #20
0
 def execute(self, requests):
     """
     Identity model in Python backend.
     """
     responses = []
     for request in requests:
         input_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT0")
         out_tensor = pb_utils.Tensor("OUTPUT0", input_tensor.as_numpy())
         responses.append(pb_utils.InferenceResponse([out_tensor]))
     return responses
コード例 #21
0
 def execute(self, requests):
     """ This function is called on inference request.
     """
     responses = []
     for request in requests:
         input_tensor = pb_utils.get_input_tensor_by_name(request, "IN")
         out_tensor = pb_utils.Tensor("OUT", input_tensor.as_numpy())
         error = pb_utils.TritonError('An error occured during execution')
         responses.append(pb_utils.InferenceResponse([out_tensor], error))
     return responses
コード例 #22
0
 def execute(self, requests):
     """
     The body of this model doesn't matter. The main purpose of this model is
     to test correct handling of Python errors in the `finalize` function.
     """
     responses = []
     for request in requests:
         input_tensor = pb_utils.get_input_tensor_by_name(request, "IN")
         out_tensor = pb_utils.Tensor("OUT", input_tensor.as_numpy())
         responses.append(pb_utils.InferenceResponse([out_tensor], error))
     return responses
コード例 #23
0
    def execute(self, requests):
        """Identity model in Python backend that works with GPU and CPU
        tensors."""

        responses = []
        for request in requests:
            input_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            out_tensor = pb_utils.Tensor.from_dlpack("OUTPUT0",
                                                     input_tensor.to_dlpack())
            responses.append(pb_utils.InferenceResponse([out_tensor]))
        return responses
コード例 #24
0
 def execute(self, requests):
     """
     The main purpose of this function is to check whether undefined
     variables are correctly handled in `initialize` function. The body of
     this function is never called or used.
     """
     responses = []
     for request in requests:
         input_tensor = pb_utils.get_input_tensor_by_name(request, "IN")
         out_tensor = pb_utils.Tensor("OUT", input_tensor.as_numpy())
         responses.append(pb_utils.InferenceResponse([out_tensor], error))
     return responses
コード例 #25
0
ファイル: model.py プロジェクト: luvwinnie/server
    def execute(self, requests):
        responses = []
        for request in requests:
            input_tensor = pb_utils.get_input_tensor_by_name(request, "IN")
            # This tensor is read-only, we need to make a copy
            input_data_ro = input_tensor.as_numpy()
            input_data = np.array(input_data_ro)
            result = self.model(torch.tensor(input_data))

            out_tensor = pb_utils.Tensor("OUT", result.detach().numpy())
            responses.append(pb_utils.InferenceResponse([out_tensor]))
        return responses
コード例 #26
0
 def execute(self, requests):
     """
     This model ensures that errors in the execute function are properly
     handles.
     """
     responses = []
     for request in requests:
         input_tensor = pb_utils.get_input_tensor_by_name(request, "IN")
         out_tensor = pb_utils.Tensor("OUT", input_tensor.as_numpy())
         lorem_ipsum
         responses.append(pb_utils.InferenceResponse([out_tensor]))
     return responses
コード例 #27
0
    def execute(self,
                requests: List[InferenceRequest]) -> List[InferenceResponse]:
        """Transforms the input batches by running through a NVTabular workflow.transform
        function.
        """
        responses = []
        for request in requests:
            # transform the triton tensors to a dict of name:numpy tensor
            input_tensors = {
                name: _convert_tensor(get_input_tensor_by_name(request, name))
                for name in self.input_dtypes
            }

            # multihots are represented as a tuple of (values, offsets)
            for name, dtype in self.input_multihots.items():
                values = _convert_tensor(
                    get_input_tensor_by_name(request, name + "__values"))
                offsets = _convert_tensor(
                    get_input_tensor_by_name(request, name + "__nnzs"))
                input_tensors[name] = (values, offsets)

            # use our NVTabular workflow to transform the dataset
            transformed, kind = _transform_tensors(input_tensors,
                                                   self.workflow.column_group)

            # if we don't have tensors in numpy format, convert back so that the we can return
            # to triton
            if kind != Supports.CPU_DICT_ARRAY:
                transformed, kind = convert_format(transformed, kind,
                                                   Supports.CPU_DICT_ARRAY)

            # convert to the format expected by the DL models
            if self.output_model == "hugectr":
                response = self._transform_hugectr_outputs(transformed)
            else:
                response = self._transform_outputs(transformed)
            responses.append(response)

        return responses
コード例 #28
0
    def execute(self, requests):
        """ Tries to create a response sender object and use that
        for sending the response.
        """

        output0_dtype = self.output0_dtype
        output1_dtype = self.output1_dtype

        responses = []
        for request in requests:
            in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1")
            out_0, out_1 = (in_0.as_numpy() + in_1.as_numpy(),
                            in_0.as_numpy() - in_1.as_numpy())

            out_tensor_0 = pb_utils.Tensor("OUTPUT0",
                                           out_0.astype(output0_dtype))
            out_tensor_1 = pb_utils.Tensor("OUTPUT1",
                                           out_1.astype(output1_dtype))
            responses.append(
                pb_utils.InferenceResponse([out_tensor_0, out_tensor_1]))
        return responses
コード例 #29
0
    def execute(self, requests):
        """ This function is called on inference request.
        """

        responses = []
        for request in requests:
            in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            out_tensor_0 = pb_utils.Tensor(
                "OUTPUT0",
                np.array(['123456'], dtype=self._dtypes[self._index]))
            self._index += 1
            responses.append(pb_utils.InferenceResponse([out_tensor_0]))
        return responses
コード例 #30
0
    def execute(self, requests: List[InferenceRequest]) -> List[InferenceResponse]:
        """Transforms the input batches by running through a NVTabular workflow.transform
        function.
        """
        responses = []
        for request in requests:
            # create a cudf DataFrame from the triton request
            input_df = cudf.DataFrame(
                {
                    name: _convert_tensor(get_input_tensor_by_name(request, name))
                    for name in self.workflow.column_group.input_column_names
                }
            )

            # use our NVTabular workflow to transform the dataframe
            output_df = nvtabular.workflow._transform_partition(
                input_df, [self.workflow.column_group]
            )

            output_tensors = []
            if "conts" in self.column_types:
                output_tensors.append(
                    Tensor(
                        "DES",
                        _convert_cudf2numpy(output_df[self.column_types["conts"]], np.float32),
                    )
                )
            else:
                output_tensors.append(Tensor("DES", np.array([[]], np.float32)))

            if "cats" in self.column_types:
                output_df[self.column_types["cats"]] = (
                    output_df[self.column_types["cats"]] + self.slot_sizes
                )
                cats_np = _convert_cudf2numpy(output_df[self.column_types["cats"]], np.int64)
                output_tensors.append(
                    Tensor(
                        "CATCOLUMN",
                        cats_np,
                    )
                )
            else:
                output_tensors.append(Tensor("CATCOLUMN", np.array([[]], np.int64)))

            len_cats_np = cats_np.shape[1]
            row_index = np.arange(len_cats_np + 1, dtype=np.int32).reshape(1, len_cats_np + 1)
            output_tensors.append(Tensor("ROWINDEX", row_index))

            responses.append(InferenceResponse(output_tensors))

        return responses