def wrap_all_outputs(result, training_mode_flag):
        def extract_context(result):
            # Search for context among all outputs.
            ctx = None
            for arg in result:
                if not isinstance(arg, torch.Tensor) or not hasattr(
                        arg, 'grad_fn'):
                    continue
                # Use the first context we see because all of arg's
                # share the same one.
                ctx = arg.grad_fn
                break
            if training_mode_flag:
                # Must extract one valid context from result tensors.
                assert ctx is not None
            else:
                # Context must not present under non-training mode.
                assert ctx is None

            return ctx

        if isinstance(result, torch.Tensor):
            ctx = extract_context([result])
            return [ctx, to_dlpack(result)]
        elif isinstance(result, tuple) or isinstance(result, list):
            ctx = extract_context(result)
            wrapped = [ctx]
            wrapped.extend(list(to_dlpack(value) for value in result))
            # Inside the returned list, first element is context and the rest
            # are DLPack tensors.
            return wrapped
        else:
            raise TypeError('Unsupported returned type: ', type(result))
 def wrap_all_outputs(result):
     if isinstance(result, torch.Tensor):
         return [to_dlpack(result)]
     elif isinstance(result, tuple) or isinstance(result, list):
         return [to_dlpack(value) for value in result if value is not None]
     else:
         raise Exception('Unsupported returned type: ', type(result))
Ejemplo n.º 3
0
    def _test_gpu_bls_add_sub(self, is_input0_gpu, is_input1_gpu):
        input0 = torch.rand(16)
        input1 = torch.rand(16)

        if is_input0_gpu:
            input0 = input0.to('cuda')

        if is_input1_gpu:
            input1 = input1.to('cuda')

        input0_pb = pb_utils.Tensor.from_dlpack('INPUT0', to_dlpack(input0))
        input1_pb = pb_utils.Tensor.from_dlpack('INPUT1', to_dlpack(input1))
        gpu_bls_return = self._get_gpu_bls_outputs(input0_pb, input1_pb)
        if gpu_bls_return:
            output0_dlpack, output1_dlpack = gpu_bls_return
        else:
            return False

        expected_output_0 = from_dlpack(
            input0_pb.to_dlpack()).to('cpu') + from_dlpack(
                input1_pb.to_dlpack()).to('cpu')
        expected_output_1 = from_dlpack(
            input0_pb.to_dlpack()).to('cpu') - from_dlpack(
                input1_pb.to_dlpack()).to('cpu')

        output0_matches = torch.all(
            expected_output_0 == from_dlpack(output0_dlpack).to('cpu'))
        output1_matches = torch.all(
            expected_output_1 == from_dlpack(output1_dlpack).to('cpu'))
        if not output0_matches or not output1_matches:
            return False

        return True
Ejemplo n.º 4
0
 def inference_torch(self, *args, **kw):
     if self.input_names is None:
         assert len(args) == 0 and len(kw) != 0
         for k, v in kw.items():
             if isinstance(v, torch.Tensor):
                 self.tvm_context.set_input(k, tvm.nd.from_dlpack(dlpack.to_dlpack(v)))
             else:
                 self.tvm_context.set_input(k, tvm.nd.array(v, ctx=self.ctx))
     else:
         assert len(args) == len(self.input_names)
         for k, v in zip(self.input_names, args):
             if isinstance(v, torch.Tensor):
                 self.tvm_context.set_input(k, tvm.nd.from_dlpack(dlpack.to_dlpack(v)))
             else:
                 self.tvm_context.set_input(k, tvm.nd.array(v, ctx=self.ctx))
     self.tvm_context.run()
     num_outputs = self.tvm_context.get_num_outputs()
     outputs = []
     for i in range(num_outputs):
         # outputs = [dlpack.from_dlpack(o) for o in outputs]
         out = self.tvm_context.get_output(i).to_dlpack()
         outputs.append(dlpack.from_dlpack(out))
     if len(outputs) == 1:
         return outputs[0]
     return tuple(outputs)
Ejemplo n.º 5
0
    def execute(self, requests):
        responses = []
        for request in requests:
            input0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            print('ISCPU', input0.is_cpu())
            gpu_output = pb_utils.get_input_tensor_by_name(
                request, "GPU_OUTPUT").as_numpy()

            if input0.is_cpu():
                if not gpu_output[0]:
                    output0 = pb_utils.Tensor.from_dlpack(
                        "OUTPUT0", input0.to_dlpack())
                else:
                    outptu0_pytorch = from_dlpack(input0.to_dlpack()).cuda()
                    output0 = pb_utils.Tensor.from_dlpack(
                        "OUTPUT0", to_dlpack(outptu0_pytorch))
            else:
                if gpu_output[0]:
                    output0 = pb_utils.Tensor.from_dlpack(
                        "OUTPUT0", input0.to_dlpack())
                else:
                    outptu0_pytorch = from_dlpack(input0.to_dlpack()).cpu()
                    output0 = pb_utils.Tensor.from_dlpack(
                        "OUTPUT0", to_dlpack(outptu0_pytorch))

            next_gpu_output = pb_utils.Tensor("NEXT_GPU_OUTPUT",
                                              gpu_output[1:])
            responses.append(
                pb_utils.InferenceResponse([output0, next_gpu_output]))

        return responses
Ejemplo n.º 6
0
    def _test_gpu_bls_add_sub(self, is_input0_gpu, is_input1_gpu):
        input0 = torch.rand(16)
        input1 = torch.rand(16)

        if is_input0_gpu:
            input0 = input0.to('cuda')

        if is_input1_gpu:
            input1 = input1.to('cuda')

        input0_pb = pb_utils.Tensor.from_dlpack('INPUT0', to_dlpack(input0))
        input1_pb = pb_utils.Tensor.from_dlpack('INPUT1', to_dlpack(input1))
        output0_dlpack, output1_dlpack = self._get_gpu_bls_outputs(
            input0_pb, input1_pb)

        expected_output_0 = from_dlpack(
            input0_pb.to_dlpack()).to('cpu') + from_dlpack(
                input1_pb.to_dlpack()).to('cpu')
        expected_output_1 = from_dlpack(
            input0_pb.to_dlpack()).to('cpu') - from_dlpack(
                input1_pb.to_dlpack()).to('cpu')

        self.assertTrue(
            torch.all(
                expected_output_0 == from_dlpack(output0_dlpack).to('cpu')))
        self.assertTrue(
            torch.all(
                expected_output_1 == from_dlpack(output1_dlpack).to('cpu')))
Ejemplo n.º 7
0
    def execute(self, requests):
        output0_dtype = self.output0_dtype
        output1_dtype = self.output1_dtype

        responses = []
        for request in requests:
            in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
            in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1")

            # If both of the tensors are in CPU, use NumPy.
            if in_0.is_cpu() and in_1.is_cpu():
                if in_0.as_numpy().dtype.type is np.bytes_ or in_0.as_numpy(
                ).dtype == np.object_:
                    out_0, out_1 = (in_0.as_numpy().astype(np.int32) - in_1.as_numpy().astype(np.int32),\
                        in_0.as_numpy().astype(np.int32) + in_1.as_numpy().astype(np.int32))
                    out_tensor_0 = pb_utils.Tensor("OUTPUT0",
                                                   out_0.astype(output0_dtype))
                    out_tensor_1 = pb_utils.Tensor("OUTPUT1",
                                                   out_1.astype(output1_dtype))
                else:
                    in_0_pytorch, in_1_pytorch = from_dlpack(
                        in_0.to_dlpack()), from_dlpack(in_1.to_dlpack())
                    out_0, out_1 = (in_0_pytorch - in_1_pytorch,
                                    in_0_pytorch + in_1_pytorch)

                    if self.output0_dtype == np.object_:
                        out_tensor_0 = pb_utils.Tensor(
                            "OUTPUT0",
                            out_0.numpy().astype(output0_dtype))
                    else:
                        out_0 = out_0.type(
                            self.numpy_to_pytorch_dtype[output0_dtype])
                        out_tensor_0 = pb_utils.Tensor.from_dlpack(
                            "OUTPUT0", to_dlpack(out_0))

                    if self.output1_dtype == np.object_:
                        out_tensor_1 = pb_utils.Tensor(
                            "OUTPUT1",
                            out_1.numpy().astype(output1_dtype))
                    else:
                        out_1 = out_1.type(
                            self.numpy_to_pytorch_dtype[output1_dtype])
                        out_tensor_1 = pb_utils.Tensor.from_dlpack(
                            "OUTPUT1", to_dlpack(out_1))

            else:
                in_0_pytorch, in_1_pytorch = from_dlpack(
                    in_0.to_dlpack()).cuda(), from_dlpack(
                        in_1.to_dlpack()).cuda()
                out_0, out_1 = (in_0_pytorch - in_1_pytorch,
                                in_0_pytorch + in_1_pytorch)
                out_tensor_0 = pb_utils.Tensor.from_dlpack(
                    "OUTPUT0", to_dlpack(out_0))
                out_tensor_1 = pb_utils.Tensor.from_dlpack(
                    "OUTPUT1", to_dlpack(out_1))

            responses.append(
                pb_utils.InferenceResponse([out_tensor_0, out_tensor_1]))

        return responses
Ejemplo n.º 8
0
    def test_pytorch_cpu_tensor_to_submatrix(self):
        '''
        Note that we have two ways to convert a
        PyTorch CPU tensor to kaldi's FloatSubMatrix:

        Method 1:
            v = kaldi.SubMatrixFromDLPack(to_dlpack(tensor))

        Method 2:
            v = kaldi.DLPackFloatSubMatrix.from_dlpack(to_dlpack(tensor))
        '''
        tensor = torch.arange(6).reshape(2, 3).float()

        m = kaldi.SubMatrixFromDLPack(to_dlpack(tensor))
        self.assertIsInstance(m, kaldi.FloatSubMatrix)

        m[0, 0] = 100  # also changes tensor, since memory is shared
        self.assertEqual(tensor[0, 0], 100)

        del m

        # memory is shared between `m` and `tensor`
        m = kaldi.DLPackFloatSubMatrix.from_dlpack(to_dlpack(tensor))
        m[0, 1] = 200
        self.assertEqual(tensor[0, 1], 200)
Ejemplo n.º 9
0
 def generate(dim):
     shape = np.random.randint(1, 10, [dim]).tolist()
     if self.as_tensor:
         data = to_dlpack(torch.rand(size=[self.batch_size] + shape, device=self.device))
     else:
         data = [to_dlpack(torch.rand(shape, device=self.device)) for _ in range(self.batch_size)]
     return data
Ejemplo n.º 10
0
 def __init__(self, indexes: torch.Tensor, data: torch.Tensor):
     assert indexes.dtype == torch.int32
     assert data.dtype == torch.float32
     assert data.shape[1] == 2
     self.indexes = indexes
     self.data = data
     super().__init__(to_dlpack(self.indexes), to_dlpack(self.data))
Ejemplo n.º 11
0
    def response_thread(self, response_sender, input0, gpu_output):
        # Sleep 5 seconds to make sure the main thread has exited.
        time.sleep(5)

        if input0.is_cpu():
            if not gpu_output[0]:
                output0 = pb_utils.Tensor.from_dlpack("OUTPUT0",
                                                      input0.to_dlpack())
            else:
                outptu0_pytorch = from_dlpack(input0.to_dlpack()).cuda()
                output0 = pb_utils.Tensor.from_dlpack(
                    "OUTPUT0", to_dlpack(outptu0_pytorch))
        else:
            if gpu_output[0]:
                output0 = pb_utils.Tensor.from_dlpack("OUTPUT0",
                                                      input0.to_dlpack())
            else:
                output0_pytorch = from_dlpack(input0.to_dlpack()).cpu()
                output0 = pb_utils.Tensor.from_dlpack(
                    "OUTPUT0", to_dlpack(output0_pytorch))

        next_gpu_output = pb_utils.Tensor("NEXT_GPU_OUTPUT", gpu_output[1:])
        infer_response = pb_utils.InferenceResponse([output0, next_gpu_output])

        # Number of times to repeat the response
        response_repeat = 2
        for _ in range(response_repeat):
            response_sender.send(infer_response)

        response_sender.send(
            flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL)

        with self.inflight_thread_count_lck:
            self.inflight_thread_count -= 1
Ejemplo n.º 12
0
def to_cugraph(edge_index: Tensor,
               edge_weight: Optional[Tensor] = None,
               relabel_nodes: bool = True):
    r"""Converts a graph given by :obj:`edge_index` and optional
    :obj:`edge_weight` into a :obj:`cugraph` graph object.

    Args:
        relabel_nodes (bool, optional): If set to :obj:`True`,
            :obj:`cugraph` will remove any isolated nodes, leading to a
            relabeling of nodes. (default: :obj:`True`)
    """
    import cudf
    import cugraph

    df = cudf.from_dlpack(to_dlpack(edge_index.t()))

    if edge_weight is not None:
        assert edge_weight.dim() == 1
        df[2] = cudf.from_dlpack(to_dlpack(edge_weight))

    return cugraph.from_cudf_edgelist(
        df,
        source=0,
        destination=1,
        edge_attr=2 if edge_weight is not None else None,
        renumber=relabel_nodes)
Ejemplo n.º 13
0
    def test_get_cfsa_vec_size_multiple(self):
        s1 = r'''
        0 1 1
        0 2 2
        1 3 3
        2 3 3
        3 16 -1
        16
        '''

        fsa1 = k2.string_to_fsa(s1)
        cfsa1 = k2.Cfsa(fsa1)

        s2 = r'''
        0 1 1
        0 2 2
        1 3 3
        3 10 -1
        10
        '''
        fsa2 = k2.string_to_fsa(s2)
        cfsa2 = k2.Cfsa(fsa2)

        cfsa_std_vec = k2.CfsaStdVec()

        cfsa_std_vec.push_back(cfsa1)

        cfsa_std_vec.push_back(cfsa2)
        self.assertEqual(len(cfsa_std_vec), 2)
        num_bytes = k2.get_cfsa_vec_size(cfsa_std_vec)
        # the value is taken from the corresponding fsa_test.cc
        self.assertEqual(num_bytes, 360)

        # now test from dlpack
        if SKIP_DLPACK:
            print('skip dlpack test')
            return
        else:
            print('Do dlpack testing')

        num_int32 = num_bytes // 4
        tensor = torch.empty((num_int32, ), dtype=torch.int32)
        dlpack = to_dlpack(tensor)

        cfsa_vec = k2.create_cfsa_vec(dlpack, cfsa_std_vec)
        self.assertEqual(cfsa_vec.num_fsas(), 2)
        self.assertEqual(cfsa_vec[0], cfsa1)
        self.assertEqual(cfsa_vec[1], cfsa2)

        self.assertEqual(tensor[0], 1)  # version
        self.assertEqual(tensor[1], 2)  # num_fsas
        self.assertEqual(tensor[2], 64 // 4)  # state_offsets_start

        # construct a CfsaVec from a `torch::Tensor` which has already been filled
        dlpack = to_dlpack(tensor.clone())
        cfsa_vec = k2.create_cfsa_vec(dlpack)
        self.assertEqual(cfsa_vec.num_fsas(), 2)
        self.assertEqual(cfsa_vec[0], cfsa1)
        self.assertEqual(cfsa_vec[1], cfsa2)
 def wrap_all_outputs(result):
     if isinstance(result, torch.Tensor):
         return [to_dlpack(result)]
     elif isinstance(result, tuple) or isinstance(result, list):
         return [to_dlpack(value) if value is not None else None for value in result]
     else:
         raise wrap_exception(ORTModuleIOError,
                             TypeError(f'ORTModule does not support the following model output type {type(result)}.'))
    def wrap_all_outputs(result, training_mode_flag):
        # This is mainly to hold grad_fn references by registering it into our PyNodeSharedPointerPool.
        def register_context(result):
            # Search for context among all outputs.
            ctx = None
            # All forward outputs of torch.autograd.Function shared a same gradient function pointer,
            # so here we just get the first tensor having grad_fn attribute.
            # (https://github.com/pytorch/pytorch/blob/15532595209d2daf34d35e10f8d3d3b64966aea2/torch/csrc/autograd/custom_function.cpp#L267)
            first_tensor_output = None
            for arg in result:
                if not isinstance(arg, torch.Tensor) or not hasattr(arg, 'grad_fn'):
                    continue
                # Use the first context we see because all of arg's
                # share the same one.
                ctx = arg.grad_fn
                first_tensor_output = arg
                break
            if training_mode_flag:
                # Must extract one valid context from result tensors.
                assert ctx is not None

                #         FORWARD                                                    BACKWARD FUNCTION CONNECTIONS
                # input_1 (leaf, constructed by from_dlpack)   <----reference----  AccumulateGrad gradient function
                #             ↓                                                                 ↑
                # autograd.Function apply()                        ------------>    autograd.Function backward()
                #             ↓                                    |                            ↑
                #    output_1, output_2   --- shared_ptr<PyNode> ---                            ↑
                #             ↓                                                       previous gradient function

                # We remove the edges starting between current autograd.Function's gradient function and
                # it's input's gradient function (e.g. AccumulateGrad gradient function), then
                # AccumulateGrad gradient function will be destroyed, releasing the reference to input_1
                # (https://github.com/pytorch/pytorch/blob/15532595209d2daf34d35e10f8d3d3b64966aea2/torch/csrc/autograd/functions/accumulate_grad.cpp#L21).
                # The next edges are stored in Node, with which we can get next gradient function.
                # https://github.com/pytorch/pytorch/blob/15532595209d2daf34d35e10f8d3d3b64966aea2/torch/csrc/autograd/function.h#L527
                # filter out the None in the saved_tensors.
                saved_tensors = [t for t in ctx.saved_tensors if t is not None]
                torch_interop_utils.clear_grad_fns_for_next_edges(first_tensor_output, saved_tensors)
                torch_interop_utils.register_grad_fn(id(ctx), first_tensor_output)
            else:
                # Context must not present under non-training mode.
                assert ctx is None
            return ctx

        if isinstance(result, torch.Tensor):
            ctx = register_context([result])
            return [ctx, to_dlpack(result)]
        elif isinstance(result, tuple) or isinstance(result, list):
            ctx = register_context(result)
            wrapped = [ctx]
            wrapped.extend(list(to_dlpack(value) if value is not None else None for value in result))
            # Inside the returned list, first element is context and the rest
            # are DLPack tensors.
            return wrapped
        else:
            raise wrap_exception(ORTModuleIOError,
                                 TypeError(f'ORTModule does not support the following model output type {type(result)}.'))
Ejemplo n.º 16
0
def pytorch_adapter(fun, in1, in2):
    with torch.cuda.stream(torch_stream):
        tin1 = [torch_dlpack.from_dlpack(dltensor) for dltensor in in1]
        tin2 = [torch_dlpack.from_dlpack(dltensor) for dltensor in in2]
        tout1, tout2 = fun(tin1, tin2)
        out1, out2 = [torch_dlpack.to_dlpack(tout) for tout in tout1], \
                     [torch_dlpack.to_dlpack(tout) for tout in tout2]
    torch_stream.synchronize()
    return out1, out2
Ejemplo n.º 17
0
    def predict(self,
                input_data,
                max_seq_len=128,
                batch_size=32,
                threshold=0.5):
        """
        Predict the class with the trained model

        :param input_data: input text data for prediction
        :type input_data: cudf.Series
        :param max_seq_len: Limits the length of the sequence returned by tokenizer. If tokenized sentence is shorter than max_seq_len, output will be padded with 0s. If the tokenized sentence is longer than max_seq_len it will be truncated to max_seq_len.
        :type max_seq_len: int
        :param batch_size: batch size
        :type batch_size: int
        :param threshold: results with probabilities higher than this will be labeled as positive
        :type threshold: float
        :return: predictions, probabilities: predictions are labels (0 or 1) based on minimum threshold
        :rtype: cudf.Series, cudf.Series

        Examples
        --------
        >>> from cuml.preprocessing.model_selection import train_test_split
        >>> emails_train, emails_test, labels_train, labels_test = train_test_split(train_emails_df, 'label', train_size=0.8)
        >>> sc.train_model(emails_train, labels_train)
        >>> predictions = sc.predict(emails_test, threshold=0.8)
        """
        predict_inputs, predict_masks = self._bert_uncased_tokenize(
            input_data, max_seq_len)
        predict_inputs = predict_inputs.type(torch.LongTensor).to(self._device)
        predict_masks = predict_masks.to(self._device)

        predict_inputs = predict_inputs.type(torch.LongTensor)
        predict_data = TensorDataset(predict_inputs, predict_masks)
        predict_sampler = SequentialSampler(predict_data)
        predict_dataloader = DataLoader(predict_data,
                                        sampler=predict_sampler,
                                        batch_size=batch_size)

        preds = cudf.Series()
        probs = cudf.Series()
        for batch in predict_dataloader:
            batch = tuple(t.to(self._device) for t in batch)
            b_input_ids, b_input_masks = batch
            with torch.no_grad():
                logits = self._model(b_input_ids,
                                     token_type_ids=None,
                                     attention_mask=b_input_masks)[0]
                b_probs = torch.sigmoid(logits[:, 1])
                b_preds = b_probs.ge(threshold)

            b_probs = cudf.io.from_dlpack(to_dlpack(b_probs))
            b_preds = cudf.io.from_dlpack(to_dlpack(b_preds))
            preds = preds.append(b_preds)
            probs = probs.append(b_probs)

        return preds, probs
Ejemplo n.º 18
0
    def evaluate_model(self,
                       test_data,
                       labels,
                       max_seq_len=128,
                       batch_size=32):
        """
        Evaluate trained model

        :param test_data: test data to evaluate model
        :type test_data: cudf.Series
        :param labels: labels for each element in test_data
        :type labels: cudf.Series
        :param max_seq_len: Limits the length of the sequence returned by tokenizer. If tokenized sentence is shorter than max_seq_len, output will be padded with 0s. If the tokenized sentence is longer than max_seq_len it will be truncated to max_seq_len.
        :type max_seq_len: int
        :param batch_size: batch size
        :type batch_size: int

        Examples
        --------
        >>> from cuml.preprocessing.model_selection import train_test_split
        >>> emails_train, emails_test, labels_train, labels_test = train_test_split(train_emails_df, 'label', train_size=0.8)
        >>> sc.evaluate_model(emails_test, labels_test)
        """
        self._model.eval()
        test_gdf = cudf.DataFrame()
        test_gdf["text"] = test_data
        test_gdf["label"] = labels

        test_dataset = Dataset(test_gdf)
        test_dataloader = DataLoader(test_dataset, batchsize=batch_size)

        eval_accuracy = 0
        nb_eval_steps = 0
        for df in test_dataloader.get_chunks():
            b_input_ids, b_input_mask = self._bert_uncased_tokenize(
                df["text"], max_seq_len)
            b_labels = torch.tensor(df["label"].to_numpy())
            with torch.no_grad():
                logits = self._model(b_input_ids,
                                     token_type_ids=None,
                                     attention_mask=b_input_mask)[0]

            logits = logits.type(torch.DoubleTensor).to(self._device)
            logits = cupy.fromDlpack(to_dlpack(logits))
            label_ids = b_labels.type(torch.IntTensor).to(self._device)
            label_ids = cupy.fromDlpack(to_dlpack(label_ids))
            temp_eval_accuracy = self._flatten_accuracy(logits, label_ids)

            eval_accuracy += temp_eval_accuracy
            nb_eval_steps += 1

        accuracy = eval_accuracy / nb_eval_steps

        return float(accuracy)
Ejemplo n.º 19
0
    def _train(self, train_dataloader, validation_dataloader, model, epochs):
        model.train()  # Enable training mode
        for _ in trange(epochs, desc="Epoch"):
            tr_loss = 0  # Tracking variables
            nb_tr_examples, nb_tr_steps = 0, 0
            for step, batch in enumerate(train_dataloader):
                batch = tuple(t.to(self._device)
                              for t in batch)  # Add batch to GPU
                b_input_ids, b_input_mask, b_labels = batch  # Unpack the inputs from dataloader
                self._optimizer.zero_grad()  # Clear out the gradients
                loss = model(b_input_ids,
                             token_type_ids=None,
                             attention_mask=b_input_mask,
                             labels=b_labels)[0]  # forwardpass

                loss.sum().backward()
                self._optimizer.step()  # update parameters
                tr_loss += loss.sum().item()  # get a numeric value
                nb_tr_examples += b_input_ids.size(0)
                nb_tr_steps += 1

            print("Train loss: {}".format(tr_loss / nb_tr_steps))

            model.eval(
            )  # Put model in evaluation mode to evaluate loss on the validation set

            eval_accuracy = 0
            nb_eval_steps = 0

            for batch in validation_dataloader:
                batch = tuple(t.to(self._device) for t in batch)

                b_input_ids, b_input_mask, b_labels = batch

                with torch.no_grad(
                ):  # Telling the model not to compute or store gradients, saving memory and speeding up validation
                    logits = model(
                        b_input_ids,
                        token_type_ids=None,
                        attention_mask=b_input_mask)[
                            0]  # Forward pass, calculate logit predictions
                logits = cupy.fromDlpack(to_dlpack(logits))
                label_ids = cupy.fromDlpack(to_dlpack(b_labels))
                # logits = logits.detach().cpu().numpy()
                # label_ids = b_labels.to('cpu').numpy()
                temp_eval_accuracy = self._flatten_accuracy(logits, label_ids)

                eval_accuracy += temp_eval_accuracy
                nb_eval_steps += 1

            print("Validation Accuracy: {}".format(eval_accuracy /
                                                   nb_eval_steps))

        return model
Ejemplo n.º 20
0
 def forward(ctx, input, proj):
     batchsize = input.shape[0]
     ctx.save_for_backward(input)
     if (input.shape[1] != 1):
         raise NotImplementedError
     sp = Config.getScanParam()
     out = input.clone()
     for i in range(batchsize):
         cupy_input = fromDlpack(to_dlpack(out[i, 0, :, :]))
         cupy_proj = fromDlpack(to_dlpack(proj[i, 0, :, :]))
         CTOperator.SART2D(cupy_proj, sp, sp['order'], cupy_input)
     return out
Ejemplo n.º 21
0
    def __call__(self, loc, fg_score, anchor, img_size, scale=1.):
        """
        Arg:
         - loc: (N,4)
         - fg_score: (N,)
         - anchor: (9, 4)
         - img_size: (2)
        """

        if self.parent_model.training:
            n_pre_nms = self.n_train_pre_nms
            n_post_nms = self.n_train_post_nms
        else:
            n_pre_nms = self.n_test_pre_nms
            n_post_nms = self.n_test_post_nms

        loc = cp.fromDlpack(to_dlpack(loc))
        fg_score = cp.fromDlpack(to_dlpack(fg_score))
        anchor = cp.asarray(anchor)
        roi = loc2bbox(anchor, loc)

        # clip
        roi[:, slice(0, 4, 2)] = cp.clip(roi[:, slice(0, 4, 2)], 0,
                                         img_size[1])
        roi[:, slice(1, 4, 2)] = cp.clip(roi[:, slice(1, 4, 2)], 0,
                                         img_size[0])

        # remove small box less than threshold
        min_size = self.min_size * scale
        hs = roi[:, 3] - roi[:, 1]
        ws = roi[:, 2] - roi[:, 0]
        keep = cp.where((hs > min_size) & (ws > min_size))[0]
        roi = roi[keep, :]
        fg_score = fg_score[keep]

        # sort the score
        order = cp.argsort(fg_score.ravel())[::-1]
        if n_pre_nms > 0:
            order = order[0:n_pre_nms]
        roi = roi[order, :]

        keep = non_maximum_suppression(cp.ascontiguousarray(cp.asarray(roi)),
                                       thresh=self.nms_thresh)

        if n_post_nms > 0:
            keep = keep[:n_post_nms]
        roi = roi[keep]
        return roi
Ejemplo n.º 22
0
def _to_o3d(tensor, min_ndim=0, max_len=None):
    """Convert Tensorflow, PyTorch and Numpy tensors to Open3D tensor without
    copying. Python lists and tuples are also accepted, but will be copied. If
    max_len is specified, a tuple of tensors is returned, with the input split
    into tuple elements along the first dimension.  This allows converting a
    batch of variable size data.

    Args:
        tensor: Input tensor to be converted.
        min_ndim (int): Tensor shape will be padded with ones on the left to
            reach this minimum number of dimensions.
        max_len (int): The max size along the first dimension. Other data will
            be discarded.
    """
    if max_len is not None:
        return tuple(
            _to_o3d(tensor[k], min_ndim - 1)
            for k in range(min(max_len, len(tensor))))
    if isinstance(tensor, o3d.core.Tensor):
        pass
    elif tf is not None and isinstance(tensor, tf.Tensor):
        tensor = o3d.core.Tensor.from_dlpack(tf_dlpack.to_dlpack(tensor))
    elif torch is not None and isinstance(tensor, torch.Tensor):
        tensor = o3d.core.Tensor.from_dlpack(torch_dlpack.to_dlpack(tensor))
    else:
        tensor = o3d.core.Tensor.from_numpy(np.asarray(tensor))
    exp_shape = tensor.shape
    for _ in range(tensor.ndim, min_ndim):
        exp_shape.insert(0, 1)
    return tensor.reshape(exp_shape)
def test_dlpack_tensor_list_cpu_direct_creation():
    arr = torch.rand(size=[3, 5, 6], device="cpu")
    tensor_list = TensorListCPU(to_dlpack(arr), "NHWC")
    dali_torch_tensor = convert_to_torch(tensor_list,
                                         device=arr.device,
                                         dtype=arr.dtype)
    assert torch.all(arr.eq(dali_torch_tensor))
def test_dlpack_tensor_list_gpu_to_cpu():
    arr = torch.rand(size=[3, 5, 6], device="cuda")
    tensor_list = TensorListGPU(to_dlpack(arr), "NHWC")
    dali_torch_tensor = convert_to_torch(tensor_list,
                                         device=arr.device,
                                         dtype=arr.dtype)
    assert torch.all(arr.cpu().eq(dali_torch_tensor.cpu()))
def test_dlpack_tensor_gpu_direct_creation():
    arr = torch.rand(size=[3, 5, 6], device="cuda")
    tensor = TensorGPU(to_dlpack(arr))
    dali_torch_tensor = convert_to_torch(tensor,
                                         device=arr.device,
                                         dtype=arr.dtype)
    assert torch.all(arr.eq(dali_torch_tensor))
Ejemplo n.º 26
0
def update_W(W, a_last, z, u, rho):
    size = dist.get_world_size()
    rank = dist.get_rank()
    # convert to pytorch data

    #update W
    temp1 = z + u / rho

    temp1 = from_dlpack(toDlpack(temp1))
    a_last = from_dlpack(toDlpack(a_last))

    data1 = torch.mm(temp1, torch.t(a_last))
    data2 = torch.mm(a_last, torch.t(a_last))
    data = torch.cat((data1, data2), 0)
    # data = comm.reduce(data, op=MPI.SUM, root=0)
    dist.reduce(data, dst=0, op=dist.ReduceOp.SUM)

    if rank == 0:
        middle_pos = data1.shape[0]
        data1 = data[0:middle_pos]
        data2 = data[middle_pos:]
        inverse_data = torch.pinverse(data2)
        W = torch.mm(data1, inverse_data)
    else:
        W = from_dlpack(toDlpack(W))
        # W = None
    dist.broadcast(W, src=0)

    # convert to cupy data
    W = fromDlpack(to_dlpack(W))
    return W
Ejemplo n.º 27
0
    def execution_session_run_forward(execution_session, onnx_model, device, *inputs):
        """Runs the forward graph on execution_session with given model inputs and device"""

        # Assert that the input and model device match
        _utils._check_same_device(device, "Input argument to forward", *inputs)

        # TODO: Try to reuse the output buffers as some of the output tensors are same sizes,
        #   especially the backward graph outputs.
        # REVIEW(codemzs): Consolidate Training Agent with InferenceAgent on C++ side to not
        # have the need for passing IOBinding.
        state = C.PartialGraphExecutionState()
        forward_inputs = C.OrtValueVector()
        forward_inputs.reserve(len(inputs))
        for input in inputs:
            forward_inputs.push_back(to_dlpack(input), input.dtype == torch.bool)

        forward_outputs = C.OrtValueVector()
        # Run and return module outputs.
        execution_session.run_forward(forward_inputs, forward_outputs, state)
        user_outputs = tuple(_utils._ortvalue_to_torch_tensor(forward_output) for forward_output in forward_outputs)

        output_info = [(output.shape, output.device, output.dtype) for output in user_outputs]
        run_info = RunStateInfo(state, output_info)
        # Return user outputs and forward run information
        return user_outputs, run_info
Ejemplo n.º 28
0
 def th2nd():
     ans = np.array([[1., 1., 1., 1.], [0., 0., 0., 0.], [0., 0., 0., 0.]])
     x = th.zeros((3, 4))
     dl = dlpack.to_dlpack(x)
     y = nd.from_dlpack(dl)
     x[0] = 1
     assert np.allclose(y.asnumpy(), ans)
Ejemplo n.º 29
0
    def test_dlpack_gpu_tensors(self):
        # Test different dtypes
        pytorch_dtypes = [
            torch.float16, torch.float32, torch.float64, torch.int8,
            torch.int16, torch.int32, torch.int64, torch.uint8, torch.bool
        ]

        for pytorch_dtype in pytorch_dtypes:
            pytorch_tensor = torch.rand(
                [100], dtype=torch.float16, device='cuda') * 100
            pytorch_tensor = pytorch_tensor.type(pytorch_dtype)
            dlpack_tensor = to_dlpack(pytorch_tensor)
            pb_tensor = pb_utils.Tensor.from_dlpack('test_tensor',
                                                    dlpack_tensor)

            # Convert the tensor back to DLPack and ensure that both tensors are
            # the same
            pytorch_tensor_dlpack = from_dlpack(pb_tensor.to_dlpack())
            self.assertTrue(torch.all(pytorch_tensor_dlpack == pytorch_tensor))

            # DLPack does not properly support bool type:
            # https://github.com/google/jax/issues/4719
            if pytorch_dtype != torch.bool:
                self.assertTrue(
                    pytorch_tensor.type() == pytorch_tensor_dlpack.type())
            else:
                self.assertFalse(
                    pytorch_tensor.type() == pytorch_tensor_dlpack.type())
Ejemplo n.º 30
0
def torch2cupy(tensor):
    """
    :param tensor: PyTorch CUDA tensor.
    :return: CuPy tensor.
    """
    dx = to_dlpack(tensor)
    return cupy.fromDlpack(dx)