Python get_device_from_arrayの例、chainer.cuda.get_device_from_array Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_lstm.py プロジェクト: jekbradbury/chainer

    def check_forward(self, x1_data, x2_data, x3_data):
        xp = self.link.xp
        x1 = chainer.Variable(x1_data) if self.input_variable else x1_data
        h1 = self.link(x1)
        with cuda.get_device_from_array(x1_data):
            c0 = chainer.Variable(xp.zeros((len(self.x1), self.out_size),
                                           dtype=self.x1.dtype))
            c1_expect, h1_expect = functions.lstm(c0, self.link.upward(x1))
        testing.assert_allclose(h1.data, h1_expect.data)
        testing.assert_allclose(self.link.h.data, h1_expect.data)
        testing.assert_allclose(self.link.c.data, c1_expect.data)

        batch = len(x2_data)
        x2 = chainer.Variable(x2_data) if self.input_variable else x2_data
        h1_in, h1_rest = functions.split_axis(
            self.link.h.data, [batch], axis=0)
        y2 = self.link(x2)
        with cuda.get_device_from_array(x1):
            c2_expect, y2_expect = \
                functions.lstm(c1_expect,
                               self.link.upward(x2) + self.link.lateral(h1_in))
        testing.assert_allclose(y2.data, y2_expect.data)
        testing.assert_allclose(self.link.h.data[:batch], y2_expect.data)
        testing.assert_allclose(self.link.h.data[batch:], h1_rest.data)

        x3 = chainer.Variable(x3_data) if self.input_variable else x3_data
        h2_rest = self.link.h
        y3 = self.link(x3)
        c3_expect, y3_expect = \
            functions.lstm(c2_expect, self.link.upward(x3))
        testing.assert_allclose(y3.data, y3_expect.data)
        testing.assert_allclose(self.link.h.data, h2_rest.data)

コード例 #2

0

ファイルを表示

ファイル: variable.py プロジェクト: watarufk/chainer

    def addgrad(self, var):
        """Accumulates the gradient array from given source variable.

        This method adds the gradient of a given variable to the gradient of
        this variable. The accumulation is even done across the host and
        different devices. If this variable has uninitialized data/grad arrays,
        this method initializes it with the shape of the given variable and
        then accumulates the gradient.

        Args:
            var (Variable): Source variable.

        """
        src = var._grad_var
        if src is None:
            return

        if self.data is None:
            self.initialize(var.shape)
        dst = self._grad_var

        src_dev = cuda.get_device_from_array(src.data)
        dst_dev = cuda.get_device_from_array(self.data)

        if src_dev.id != dst_dev.id:
            src = chainer.functions.copy(src, dst_dev.id)
        self._grad_var = src if dst is None else src + dst

コード例 #3

0

ファイルを表示

ファイル: test_lstm.py プロジェクト: thibaultbarbie/chainer

    def check_forward(self, x1_data, x2_data, x3_data):
        xp = self.link.xp
        x1 = chainer.Variable(x1_data) if self.input_variable else x1_data
        h1 = self.link(x1)
        with cuda.get_device_from_array(x1_data):
            c0 = chainer.Variable(
                xp.zeros((len(self.x1), self.out_size), dtype=self.x1.dtype))
            c1_expect, h1_expect = functions.lstm(c0, self.link.upward(x1))
        testing.assert_allclose(h1.data, h1_expect.data)
        testing.assert_allclose(self.link.h.data, h1_expect.data)
        testing.assert_allclose(self.link.c.data, c1_expect.data)

        batch = len(x2_data)
        x2 = chainer.Variable(x2_data) if self.input_variable else x2_data
        h1_in, h1_rest = functions.split_axis(self.link.h.data, [batch],
                                              axis=0)
        y2 = self.link(x2)
        with cuda.get_device_from_array(x1):
            c2_expect, y2_expect = \
                functions.lstm(c1_expect,
                               self.link.upward(x2) + self.link.lateral(h1_in))
        testing.assert_allclose(y2.data, y2_expect.data)
        testing.assert_allclose(self.link.h.data[:batch], y2_expect.data)
        testing.assert_allclose(self.link.h.data[batch:], h1_rest.data)

        x3 = chainer.Variable(x3_data) if self.input_variable else x3_data
        h2_rest = self.link.h
        y3 = self.link(x3)
        c3_expect, y3_expect = \
            functions.lstm(c2_expect, self.link.upward(x3))
        testing.assert_allclose(y3.data, y3_expect.data)
        testing.assert_allclose(self.link.h.data, h2_rest.data)

コード例 #4

0

ファイルを表示

ファイル: variable.py プロジェクト: kiyomaro927/chainer

    def addgrad(self, var):
        """Accumulates the gradient array from given source variable.

        This method adds the gradient of a given variable to the gradient of
        this variable. The accumulation is even done across the host and
        different devices. If this variable has uninitialized data/grad arrays,
        this method initializes it with the shape of the given variable and
        then accumulates the gradient.

        Args:
            var (Variable): Source variable.

        """
        src = var._grad_var
        if src is None:
            return

        if self.data is None:
            self.initialize(var.shape)
        dst = self._grad_var

        src_dev = cuda.get_device_from_array(src.data)
        dst_dev = cuda.get_device_from_array(self.data)

        if src_dev.id != dst_dev.id:
            src = chainer.functions.copy(src, dst_dev.id)
        self._grad_var = src if dst is None else src + dst

コード例 #5

0

ファイルを表示

ファイル: test_variable.py プロジェクト: delta2323/chainer

    def test_to_gpu_from_another_gpu(self):
        cp = cuda.cupy
        a = chainer.Variable(cp.zeros(3, dtype=np.float32))
        a.grad = cuda.cupy.ones_like(a.data)
        b = a.data.copy()
        gb = a.grad.copy()
        a.to_gpu(1)

        self.assertEqual(int(cuda.get_device_from_array(a.data)), 1)
        self.assertEqual(int(cuda.get_device_from_array(a.grad)), 1)
        cp.testing.assert_array_equal(a.data, b)
        cp.testing.assert_array_equal(a.grad, gb)

コード例 #6

0

ファイルを表示

    def test_to_gpu_from_another_gpu(self):
        cp = cuda.cupy
        a = chainer.Variable(cp.zeros(3, dtype=np.float32))
        a.grad = cuda.cupy.ones_like(a.data)
        b = a.data.copy()
        gb = a.grad.copy()
        a.to_gpu(1)

        self.assertEqual(int(cuda.get_device_from_array(a.data)), 1)
        self.assertEqual(int(cuda.get_device_from_array(a.grad)), 1)
        cp.testing.assert_array_equal(a.data, b)
        cp.testing.assert_array_equal(a.grad, gb)

コード例 #7

0

ファイルを表示

def _sum_sqnorm(arr):
    sq_sum = collections.defaultdict(float)
    for x in arr:
        with cuda.get_device_from_array(x) as dev:
            x = x.ravel()
            s = x.dot(x)
            sq_sum[int(dev)] += s
    # If only a single device is used, aggregate square norms on it.
    if len(sq_sum) == 1:
        with cuda.get_device_from_array(arr[0]):
            return sum(six.itervalues(sq_sum))
    else:
        return sum([float(i) for i in six.itervalues(sq_sum)])

コード例 #8

0

ファイルを表示

 def check_addgrad(self, src, dst, expect,
                   clear_src_grad=False, clear_dst_grad=False):
     xp = cuda.get_array_module(dst)
     a = chainer.Variable(src)
     a.grad = src
     b = chainer.Variable(dst)
     b.grad = dst
     if clear_src_grad:
         a.cleargrad()
     if clear_dst_grad:
         b.cleargrad()
     b.addgrad(a)
     xp.testing.assert_array_equal(b.grad, expect)
     self.assertEqual(cuda.get_device_from_array(b.data),
                      cuda.get_device_from_array(b.grad))

コード例 #9

0

ファイルを表示

ファイル: optimizer.py プロジェクト: jekbradbury/chainer

    def prepare(self):
        """Prepares for an update.

        This method initializes missing optimizer states (e.g. for newly added
        parameters after the set up), and copies arrays in each state
        dictionary to CPU or GPU according to the corresponding parameter
        array.

        """
        states = self._states
        for name, param in self.target.namedparams():
            if name not in states:
                state = {}
                self.init_state(param, state)
                states[name] = state
            else:
                state = states[name]
                with cuda.get_device_from_array(param.data) as dev:
                    if int(dev) == -1:  # cpu
                        for key, value in six.iteritems(state):
                            if isinstance(value, cuda.ndarray):
                                state[key] = value.get()
                    else:  # gpu
                        cupy = cuda.cupy
                        for key, value in six.iteritems(state):
                            if isinstance(value, numpy.ndarray):
                                state[key] = cuda.to_gpu(value)
                            elif (isinstance(value, cupy.ndarray) and
                                  value.device != dev):
                                state[key] = cupy.copy(value)

コード例 #10

0

ファイルを表示

ファイル: optimizer.py プロジェクト: jekbradbury/chainer

    def accumulate_grads(self, grads):
        """Accumulates gradients from other source.

        This method just adds given gradient arrays to gradients that this
        optimizer holds. It is typically used in data-parallel optimization,
        where gradients for different shards are computed in parallel and
        aggregated by this method. This method correctly treats multiple GPU
        devices.

        Args:
            grads (Iterable): Iterable of gradient arrays to be accumulated.

        .. deprecated:: v1.5
           Use the :meth:`chainer.Link.addgrads` method of the target link
           instead.

        """
        for param, g_src in zip(self.target.params(), grads):
            g_dst = param.grad
            if isinstance(g_dst, numpy.ndarray):
                g_dst += cuda.to_cpu(g_src)
                continue

            with cuda.get_device_from_array(g_dst):
                if (isinstance(g_src, cuda.ndarray) and
                        g_dst.device != g_src.device):
                    g_dst += cuda.copy(g_src, out_device=g_dst.device)
                else:
                    g_dst += cuda.to_gpu(g_src)

コード例 #11

0

ファイルを表示

ファイル: chainer.py プロジェクト: DeNA/ChainerPruner

    def __call__(self, opt):
        count = 0
        for name, param in opt.target.namedparams():
            p, g = param.data, param.grad
            if p is None or g is None:
                return

            # gammaを含むLink(BatchNormalization)がpruning対象
            # targets指定がある場合は、その名前を含むBatchNormalizationのみ対象
            # targetsがNoneなら、すべてのBatchNormalizationが対象
            if 'gamma' not in name:
                continue
            elif self.target_layers is not None and not any(
                [target in name for target in self.target_layers]):
                continue
            else:
                # pruning targets
                count += 1
                logger.debug('Lasso: {}'.format(name))
                pass

            # TODO(tkato) reporter

            xp = backend.get_array_module(p)
            with cuda.get_device_from_array(p) as dev:
                sign = xp.sign(p)
                if int(dev) == -1:
                    g += self.rate * sign
                else:
                    kernel = cuda.elementwise('T s, T decay', 'T g',
                                              'g += decay * s', 'lasso')
                    kernel(sign, self.rate, g)
        if count == 0:
            logger.warning('Lasso is not apply')

コード例 #12

0

ファイルを表示

    def get_xp(self, array):
        if isinstance(array, Variable):
            array = array.data

        with cuda.get_device_from_array(array) as dev:
            xp = np if int(dev) == -1 else cuda.cupy
            return xp

コード例 #13

0

ファイルを表示

ファイル: optimizer.py プロジェクト: nnaoi/chainer

    def _prepare(self, param):
        with cuda.get_device_from_array(param.data) as device:
            state = self.state
            if state is None:
                state = self._state = {}
                self.init_state(param)

            for name, value in six.iteritems(state):
                if not isinstance(value, (numpy.ndarray, cuda.ndarray)):
                    continue
                value_device = cuda.get_device_from_array(value)
                if value_device.id != device.id:
                    if device.id >= 0:
                        state[name] = cuda.to_gpu(value)
                    else:
                        state[name] = cuda.to_cpu(value)

コード例 #14

0

ファイルを表示

ファイル: test_tree_lstm.py プロジェクト: MakotoSeto/chainer

def _child_sum_tree_lstm(func, *inputs):
    cs = inputs[:len(inputs) // 2]
    hs = inputs[len(inputs) // 2:-1]
    x = inputs[-1]
    xp = cuda.get_array_module(x)
    with cuda.get_device_from_array(x):
        W_x = func.W_x.W.data.T
        b_x = func.W_x.b.data
        W_h_aio = func.W_h_aio.W.data.T
        W_h_f = func.W_h_f.W.data.T

        W_xa, W_xi, W_xo, W_xf = xp.split(W_x, 4, 1)
        b_a, b_i, b_o, b_f = xp.split(b_x[None, ], 4, 1)
        W_ha, W_hi, W_ho = xp.split(W_h_aio, 3, 1)
        W_hf = W_h_f

        sum_h = sum(hs)
        a = x.dot(W_xa) + sum_h.dot(W_ha) + b_a
        i = x.dot(W_xi) + sum_h.dot(W_hi) + b_i
        o = x.dot(W_xo) + sum_h.dot(W_ho) + b_o
        f_list = [x.dot(W_xf) + h.dot(W_hf) + b_f for h in hs]

        a = xp.tanh(a)
        i = _sigmoid(i)
        o = _sigmoid(o)
        f_list = [_sigmoid(f) for f in f_list]

        c_next = a * i + sum(f * c for f, c in zip(f_list, cs))
        y = o * xp.tanh(c_next)
    return c_next, y

コード例 #15

0

ファイルを表示

ファイル: faster_rcnn.py プロジェクト: zhangxujinsh/AU_R-CNN

    def predict(self, imgs, bbox):  # 传入的是一个batch的数据
        """predict AUs from image and bbox.

        Args:
            imgs(iterable of numpy.ndarray): Arrays holding images. shape = (B,C,H,W), where B is batch_size
                All images are in CHW and RGB format
                and the range of their value is :math:`[0, 255]`.
            bboxes (iterable of numpy.ndarray): Arrays holding bounding boxes, each is bboxes inside one image shape=(Batch,R,4)

        Returns:
           tuple of lists:
           This method returns a tuple of three lists,
           :obj:`(bboxes, labels, scores)`.

           * **labels** : A list of integer arrays of shape :math:`(R,)`. \
               Each value indicates the class of the bounding box. \
               Values are in range :math:`[0, L - 1]`, where :math:`L` is the \
               number of the foreground classes.
           * **scores** : A list of float arrays of shape :math:`(R,)`. \
               Each value indicates how confident the prediction is.
        """
        xp = cuda.get_array_module(imgs)

        with cuda.get_device_from_array(imgs) as device, \
                chainer.function.no_backprop_mode():
            roi_scores, _, _ = self.__call__(
                imgs, bbox, layers=["fc"])  # shape = R', class_num
            pred_label = self.fetch_labels_from_scores(xp, roi_scores.data)
        return pred_label, roi_scores.data

コード例 #16

0

ファイルを表示

ファイル: copy.py プロジェクト: kiyomaro927/chainer

 def forward(self, inputs):
     x, = inputs
     self._in_device = cuda.get_device_from_array(x).id
     if int(self.out_device) == -1:
         return cuda.to_cpu(x),
     else:
         return cuda.to_gpu(x, self.out_device),

コード例 #17

0

ファイルを表示

ファイル: textrec_metrics.py プロジェクト: BenJamesbabala/see-1

    def calc_accuracy(self, x, t):
        batch_predictions, _, _ = x
        batch_predictions = F.concat([F.expand_dims(prediction, axis=0) for prediction in batch_predictions], axis=0)

        self.xp = cuda.get_array_module(batch_predictions[0], t)
        accuracies = []

        with cuda.get_device_from_array(batch_predictions.data):
            classification = F.softmax(batch_predictions, axis=2)
            classification = classification.data
            classification = self.xp.argmax(classification, axis=2)
            classification = self.xp.transpose(classification, (1, 0))

            words = self.strip_prediction(classification)
            labels = self.strip_prediction(t)

            num_correct_words = 0
            for word, label in zip(words, labels):
                word = "".join(map(self.label_to_char, word))
                label = "".join(map(self.label_to_char, label))
                if word == label:
                    num_correct_words += 1

            accuracy = num_correct_words / len(labels)
            accuracies.append(accuracy)

        overall_accuracy = sum(accuracies) / max(len(accuracies), 1)
        self.scale_area_loss_factor(overall_accuracy)
        return overall_accuracy

コード例 #18

0

ファイルを表示

ファイル: gwle_graph_conv_model.py プロジェクト: xiaoye77/chainer-chemistry

    def __call__(self, atom_array, adj, wle_array=None, is_real_node=None):
        self.reset_state()

        if atom_array.dtype == self.xp.int32:
            h = self.embed(atom_array)
        else:
            # TODO: GraphLinear or GraphMLP can be used.
            h = atom_array

        h0 = functions.copy(h, cuda.get_device_from_array(h.data).id)

        # all Combined NLE processes are done here.
        if self.with_wle:
            h_s = self.embed_wle(wle_array)

            # gated sum
            gate_input = self.gate_W1(h) + self.gate_W2(h_s)
            gate_coefff = functions.sigmoid(gate_input)
            h = (1.0 - gate_coefff) * h + gate_coefff * h_s

        additional_kwargs = self.preprocess_addtional_kwargs(
            atom_array, adj, wle_array=wle_array, is_real_node=is_real_node)

        if self.scale_adj:
            adj = rescale_adj(adj)

        g_list = []
        for step in range(self.n_update_layers):
            update_layer_index = 0 if self.weight_tying else step
            h = self.update_layers[update_layer_index](h=h,
                                                       adj=adj,
                                                       **additional_kwargs)

            if self.use_batchnorm:
                h = self.bnorms[update_layer_index](h)

            if self.dropout_ratio > 0.:
                h = functions.dropout(h, ratio=self.dropout_ratio)

            if self.activation is not None and step < self.n_activation:
                h = self.activation(h)

            if self.concat_hidden or self.sum_hidden:
                g = self.readout_layers[step](h=h,
                                              h0=h0,
                                              is_real_node=is_real_node,
                                              **additional_kwargs)
                g_list.append(g)

        if self.concat_hidden:
            return functions.concat(g_list, axis=1)
        else:
            if self.sum_hidden:
                g = functions.sum(functions.stack(g_list), axis=0)
            else:
                g = self.readout_layers[0](h=h,
                                           h0=h0,
                                           is_real_node=is_real_node)

            return g

コード例 #19

0

ファイルを表示

    def localization_net(self, images):
        self.lstm.reset_state()
        self.transform_2.reset_state()

        images = self.data_bn(images)
        h = F.relu(self.bn0(self.conv0(images)))
        h = F.max_pooling_2d(h, 3, stride=2, pad=1)

        h = self.rs1_1(h)
        h = self.rs1_2(h)

        h = self.rs2_1(h)
        h = self.rs2_2(h)

        h = self.rs3_1(h)
        h = self.rs3_2(h)

        # h = self.rs4_1(h)
        # h = self.rs4_2(h)

        self.localization_vis_anchor = h

        h = F.average_pooling_2d(h, 5, stride=1)

        localizations = []

        with cuda.get_device_from_array(h.data):
            for _ in range(self.num_timesteps):
                in_feature = h
                lstm_prediction = F.relu(self.lstm(in_feature))
                transformed = self.transform_2(lstm_prediction)
                transformed = F.reshape(transformed, (-1, 2, 3))
                localizations.append(rotation_dropout(transformed, ratio=self.dropout_ratio))

        return F.concat(localizations, axis=0)

コード例 #20

0

ファイルを表示

ファイル: evaluator.py プロジェクト: DeepLearningT/see_tf

    def calc_accuracy(self, predictions, labels):
        batch_predictions = predictions
        # concat all individual predictions and slice for each time step
        batch_predictions = F.concat([F.expand_dims(p, axis=2) for p in batch_predictions], axis=2)

        t = F.reshape(labels, (1, self.args.timesteps, -1))

        accuracies = []
        with cuda.get_device_from_array(batch_predictions.data):
            for prediction, label in zip(F.separate(batch_predictions, axis=0), F.separate(t, axis=2)):
                classification = F.softmax(prediction, axis=2)
                classification = classification.data
                classification = self.xp.argmax(classification, axis=2)
                # classification = self.xp.transpose(classification, (1, 0))

                words = self.strip_prediction(classification)
                labels = self.strip_prediction(label.data)

                for word, label in zip(words, labels):
                    word = "".join(map(self.label_to_char, word))
                    label = "".join(map(self.label_to_char, label))
                    if word == label:
                        self.num_correct_words += 1
                    self.num_words += 1

        return word, label

コード例 #21

0

ファイルを表示

ファイル: updater.py プロジェクト: JirenJin/chainer-DRCN

def get_gaussian_noise(X, std):
    device = cuda.get_device_from_array(X.data)
    X = cuda.to_cpu(X)
    Y = np.random.normal(X, scale=std).astype('f')
    Y = np.clip(Y, 0., 1.)
    Y = cuda.to_gpu(Y, device=device)
    return Y

コード例 #22

0

ファイルを表示

ファイル: updater.py プロジェクト: JirenJin/chainer-DRCN

def get_impulse_noise(X, level):
    p = 1. - level
    device = cuda.get_device_from_array(X.data)
    X = cuda.to_cpu(X)
    Y = X * np.random.binomial(1, p, size=X.shape).astype('f')
    Y = cuda.to_gpu(Y, device=device)
    return Y

コード例 #23

0

ファイルを表示

    def __call__(self, images):
        self.lstm.reset_state()

        h = self.bn0(self.conv0(images))
        h = F.average_pooling_2d(F.relu(h), 2, stride=2)

        h = self.rs1(h)
        h = F.max_pooling_2d(h, 2, stride=2)

        h = self.rs2(h)
        h = F.max_pooling_2d(h, 2, stride=2)

        h = self.rs3(h)
        # h = self.rs4(h)
        self.vis_anchor = h
        h = F.average_pooling_2d(h, 5, stride=2)

        localizations = []
        # 预测产出N个二维仿射转换矩阵A
        with cuda.get_device_from_array(h.data):
            for _ in range(self.num_timesteps):
                in_feature = h
                lstm_prediction = F.relu(self.lstm(in_feature))
                transformed = self.transform_2(lstm_prediction)
                transformed = F.reshape(transformed, (-1, 2, 3))
                # rotation_dropout 旋转dropout 防止过度旋转
                localizations.append(
                    rotation_dropout(transformed, ratio=self.dropout_ratio))

        return F.concat(localizations, axis=0)

コード例 #24

0

ファイルを表示

    def calc_accuracy(self, x, t):
        batch_predictions, _, _ = x
        self.xp = cuda.get_array_module(batch_predictions[0], t)
        batch_size = t.shape[0]
        t = F.reshape(t, (batch_size, self.num_timesteps, -1))
        accuracies = []

        for predictions, labels in zip(batch_predictions, F.separate(t,
                                                                     axis=1)):
            if isinstance(predictions, list):
                predictions = F.concat(
                    [F.expand_dims(p, axis=0) for p in predictions], axis=0)
            with cuda.get_device_from_array(predictions.data):

                classification = F.softmax(predictions, axis=2)
                classification = classification.data
                classification = self.xp.argmax(classification, axis=2)
                classification = self.xp.transpose(classification, (1, 0))

                words = self.strip_prediction(classification)
                labels = self.strip_prediction(labels.data)

                num_correct_words = 0
                for word, label in zip(words, labels):
                    word = "".join(map(self.label_to_char, word))
                    label = "".join(map(self.label_to_char, label))
                    if word == label:
                        num_correct_words += 1

                accuracy = num_correct_words / len(labels)
                accuracies.append(accuracy)

        overall_accuracy = sum(accuracies) / max(len(accuracies), 1)
        self.scale_area_loss_factor(overall_accuracy)
        return overall_accuracy

コード例 #25

0

ファイルを表示

ファイル: optimizer.py プロジェクト: kiyomaro927/chainer

 def __call__(self, rule, param):
     grad = param.grad
     if grad is None:
         return
     xp = cuda.get_array_module(grad)
     with cuda.get_device_from_array(grad):
         xp.clip(grad, self.lower_bound, self.upper_bound, out=grad)

コード例 #26

0

ファイルを表示

ファイル: variable.py プロジェクト: watarufk/chainer

    def zerograd(self):
        """Initializes the gradient array by zeros.

        Note that the gradient variable is unchained from the computational
        graph by this method because this operation breaks the backprop
        validity.

        .. deprecated:: v1.15
           Use :meth:`cleargrad` instead.

        """
        warnings.warn(
            'Variable.zerograd is deprecated. Use Variable.cleargrad instead.',
            DeprecationWarning)

        if self.data is None:
            return

        with cuda.get_device_from_array(self.data) as dev:
            gv = self._grad_var
            if gv is None:
                xp = numpy if dev.id == -1 else cuda.cupy
                self.grad = xp.zeros_like(self.data)
            else:
                gv.unchain()
                gv.data.fill(0)

コード例 #27

0

ファイルを表示

    def __call__(self, atom_array, adj):
        """Forward propagation

        Args:
            atom_array (numpy.ndarray): minibatch of molecular which is
                represented with atom IDs (representing C, O, S, ...)
                `atom_array[mol_index, atom_index]` represents `mol_index`-th
                molecule's `atom_index`-th atomic number
            adj (numpy.ndarray): minibatch of adjancency matrix with edge-type
                information

        Returns:
            ~chainer.Variable: minibatch of fingerprint
        """
        # reset state
        self.update_layer.reset_state()
        if atom_array.dtype == numpy.int32 \
                or atom_array.dtype == cuda.cupy.int32:
            h = self.embed(atom_array)  # (minibatch, max_num_atoms)
        else:
            h = atom_array
        h0 = functions.copy(h, cuda.get_device_from_array(h.data).id)
        g_list = []
        for step in range(self.n_layers):
            h = self.update(h, adj, step)
            if self.concat_hidden:
                g = self.readout(h, h0, step)
                g_list.append(g)

        if self.concat_hidden:
            return functions.concat(g_list, axis=2)
        else:
            g = self.readout(h, h0, 0)
            return g

コード例 #28

0

ファイルを表示

    def __call__(self, sparse_batch, is_real_node=None):
        if sparse_batch.x.dtype == self.xp.int32:
            h = self.embed(sparse_batch.x)  # (minibatch, max_num_atoms)
        else:
            h = self.first_mlp(sparse_batch.x)

        h0 = functions.copy(h, cuda.get_device_from_array(h.data).id)

        g_list = []
        for step in range(self.n_message_layers):
            message_layer_index = 0 if self.weight_tying else step
            h = self.update_layers[message_layer_index](
                h, sparse_batch.edge_index)
            if step != self.n_message_layers - 1:
                h = functions.relu(h)
            if self.concat_hidden:
                g = self.readout_layers[step](h, h0, is_real_node)
                g_list.append(g)

        if self.node_embedding:
            return h

        if self.concat_hidden:
            return functions.concat(g_list, axis=1)
        else:
            g = self.readout_layers[0](h, sparse_batch.batch, h0, is_real_node)
            return g

コード例 #29

0

ファイルを表示

    def I_up_loss(self, z, lossfun=None, converter=concat_examples):
        """Calculate I_up_loss(z, z_test)
        
        Note that `calc_s_test` must be executed beforehand, z_test is used 
        in this method.
        
        Args:
            z: 
            lossfun: 
            converter: 

        Returns:

        """
        # TODO: currently, z must be 1 minibatch size.
        # Change this to use `elementwise_grad` so that we can calculate
        # I_up_loss for each z much more efficiently.

        if lossfun is None:
            # use self.target.__call__ as loss function if not set.
            lossfun = self.target

        # [Note] Use `grad_original` state
        self._calc_and_register_grad(z, self.STATE_GRAD_ORIGINAL, lossfun, converter)
        states = self._infl_states

        final_loss = 0
        for name, param in self.target.namedparams():
            with cuda.get_device_from_array(param.data):
                state = states[name]
                xp = cuda.get_array_module(param.data)
                final_loss += xp.sum(-state[self.STATE_HINV_V] * state[self.STATE_GRAD_ORIGINAL])
        return final_loss

コード例 #30

0

ファイルを表示

ファイル: optimizer.py プロジェクト: MakotoSeto/chainer

    def _prepare(self, param):
        with cuda.get_device_from_array(param.data) as device:
            state = self.state
            if state is None:
                state = self._state = {}
                self.init_state(param)

            for name, value in six.iteritems(state):
                if not isinstance(value, (numpy.ndarray, cuda.ndarray)):
                    continue
                value_device = cuda.get_device_from_array(value)
                if value_device.id != device.id:
                    if device.id >= 0:
                        state[name] = cuda.to_gpu(value)
                    else:
                        state[name] = cuda.to_cpu(value)

コード例 #31

0

ファイルを表示

ファイル: ggnn_dev_jknet.py プロジェクト: Minys233/GCN-BMP

    def __call__(self, atom_array, adj):
        # reset state
        # self.update_layer.reset_state()
        # [layer.reset_state() for layer in self.update_layer]
        if atom_array.dtype == self.xp.int32:
            h = self.embed(atom_array)  # (minibatch, max_num_atoms)
        else:
            h = atom_array
        h0 = functions.copy(h, cuda.get_device_from_array(h.data).id)
        g_list = []
        h_list = []
        for step in range(self.n_layers):
            h = self.update(h, adj, step)

            if self.dropout_rate != 0.0:
                h = functions.dropout(h, ratio=self.dropout_rate)

            if self.concat_hidden:
                g = self.readout(h, h0, step)
                g_list.append(g)

            if self.layer_aggr:
                h_list.append(h)

        if self.concat_hidden:
            return functions.concat(g_list, axis=1)
        elif self.layer_aggr:
            output = self.aggr(h_list)

            return self.readout(output, h0, 0)
        else:
            g = self.readout(h, h0, 0)
            return g

コード例 #32

0

ファイルを表示

ファイル: ggnn_chin.py プロジェクト: Minys233/GCN-BMP

    def __call__(self, atom_array, adj, is_real_node=None):
        """Forward propagation
        Args:
            atom_array (numpy.ndarray): minibatch of molecular which is
                represented with atom IDs (representing C, O, S, ...)
                `atom_array[mol_index, atom_index]` represents `mol_index`-th
                molecule's `atom_index`-th atomic number
            adj (numpy.ndarray): minibatch of adjancency matrix with edge-type
                information
            is_real_node (numpy.ndarray): 2-dim array (minibatch, num_nodes).
                1 for real node, 0 for virtual node.
                If `None`, all node is considered as real node.
        Returns:
            ~chainer.Variable: minibatch of fingerprint
        """
        # reset state
        self.reset_state()
        if atom_array.dtype == self.xp.int32:
            h = self.embed(atom_array)  # (minibatch, max_num_atoms)
        else:
            h = atom_array
        h0 = functions.copy(h, cuda.get_device_from_array(h.data).id)
        g_list = []
        for step in range(self.n_layers):
            message_layer_index = 0 if self.weight_tying else step
            h = self.update_layers[message_layer_index](h, adj)
            if self.concat_hidden:
                g = self.readout_layers[step](h, h0, is_real_node)
                g_list.append(g)

        if self.concat_hidden:
            return functions.concat(g_list, axis=1)
        else:
            g = self.readout_layers[0](h, h0, is_real_node)
            return g

コード例 #33

0

ファイルを表示

ファイル: optimizer.py プロジェクト: nnaoi/chainer

 def __call__(self, rule, param):
     grad = param.grad
     if grad is None:
         return
     xp = cuda.get_array_module(grad)
     with cuda.get_device_from_array(grad):
         xp.clip(grad, self.lower_bound, self.upper_bound, out=grad)

コード例 #34

0

ファイルを表示

def _concat_arrays(arrays, padding):
    if padding is not None:
        return _concat_arrays_with_padding(arrays, padding)

    xp = cuda.get_array_module(arrays[0])
    with cuda.get_device_from_array(arrays[0]):
        return xp.concatenate([array[None] for array in arrays])

コード例 #35

0

ファイルを表示

ファイル: copy.py プロジェクト: MakotoSeto/chainer

 def forward_gpu(self, x):
     self.retain_inputs(())
     self._in_device = cuda.get_device_from_array(x[0])
     if self.out_device == -1:
         return cuda.to_cpu(x[0]),
     else:
         return cuda.copy(x[0], out_device=self.out_device),

コード例 #36

0

ファイルを表示

 def init_state(self, param):
     xp = cuda.get_array_module(param.data)
     with cuda.get_device_from_array(param.data):
         self.state['v'] = xp.zeros_like(param.data)
     if ia.all_ready((self.state['v'], )):
         self.state['v'] = ia.array(self.state['v'],
                                    itype=ia.ideep4py.wgt_array)

コード例 #37

0

ファイルを表示

ファイル: test_tree_lstm.py プロジェクト: lanpa/chainer-tensorboard-example

def _child_sum_tree_lstm(func, *inputs):
    cs = inputs[:len(inputs) // 2]
    hs = inputs[len(inputs) // 2:-1]
    x = inputs[-1]
    xp = cuda.get_array_module(x)
    with cuda.get_device_from_array(x):
        W_x = func.W_x.W.data.T
        b_x = func.W_x.b.data
        W_h_aio = func.W_h_aio.W.data.T
        W_h_f = func.W_h_f.W.data.T

        W_xa, W_xi, W_xo, W_xf = xp.split(W_x, 4, 1)
        b_a, b_i, b_o, b_f = xp.split(b_x[None, ], 4, 1)
        W_ha, W_hi, W_ho = xp.split(W_h_aio, 3, 1)
        W_hf = W_h_f

        sum_h = sum(hs)
        a = x.dot(W_xa) + sum_h.dot(W_ha) + b_a
        i = x.dot(W_xi) + sum_h.dot(W_hi) + b_i
        o = x.dot(W_xo) + sum_h.dot(W_ho) + b_o
        f_list = [x.dot(W_xf) + h.dot(W_hf) + b_f for h in hs]

        a = xp.tanh(a)
        i = _sigmoid(i)
        o = _sigmoid(o)
        f_list = [_sigmoid(f) for f in f_list]

        c_next = a * i + sum(f * c for f, c in zip(f_list, cs))
        y = o * xp.tanh(c_next)
    return c_next, y

コード例 #38

0

ファイルを表示

ファイル: variable.py プロジェクト: kiyomaro927/chainer

    def zerograd(self):
        """Initializes the gradient array by zeros.

        Note that the gradient variable is unchained from the computational
        graph by this method because this operation breaks the backprop
        validity.

        .. deprecated:: v1.15
           Use :meth:`cleargrad` instead.

        """
        warnings.warn(
            'Variable.zerograd is deprecated. Use Variable.cleargrad instead.',
            DeprecationWarning)

        if self.data is None:
            return

        with cuda.get_device_from_array(self.data) as dev:
            gv = self._grad_var
            if gv is None:
                xp = numpy if dev.id == -1 else cuda.cupy
                self.grad = xp.zeros_like(self.data)
            else:
                gv.unchain()
                gv.data.fill(0)

コード例 #39

0

ファイルを表示

ファイル: gradient_clipping.py プロジェクト: asi1024/chainer

 def __call__(self, opt):
     sqnorm = _sum_sqnorm([p.grad for p in opt.target.params(False)])
     with cuda.get_device_from_array(sqnorm) as dev:
         norm = backend.get_array_module(sqnorm).sqrt(sqnorm)
         rate = self.threshold / norm
         # When no clipping is needed, skip the clipping on CPU and
         # multiply 1.0 on the device otherwise.
         if int(dev) == -1:
             if rate >= 1:
                 return
         else:
             rate = rate.clip(None, 1)
     for param in opt.target.params(False):
         grad = param.grad
         with cuda.get_device_from_array(grad):
             grad *= rate

コード例 #40

0

ファイルを表示

    def __call__(self, atom_array, adj):
        # reset state
        self.reset_state()
        if atom_array.dtype == self.xp.int32:
            h = self.embed(atom_array)
        else:
            h = atom_array
        if self.readout_func == 'ggnn':
            h0 = functions.copy(h, cuda.get_device_from_array(h.data).id)
            readout_layers = [
                partial(readout_layer, h0=h0)
                for readout_layer in self.readout_layers
            ]
        else:
            readout_layers = self.readout_layers
        g_list = []
        for step in range(self.n_layers):
            message_layer_index = 0 if self.weight_tying else step
            h = self.update_layers[message_layer_index](h, adj)
            if self.concat_hidden:
                g = readout_layers[step](h)
                g_list.append(g)

        if self.concat_hidden:
            return functions.concat(g_list, axis=1)
        else:
            g = readout_layers[0](h)
            return g

コード例 #41

0

ファイルを表示

    def __call__(self, images):
        self.lstm.reset_state()
        self.transform_2.reset_state()

        h = self.bn0(self.conv0(images))
        h = F.average_pooling_2d(F.relu(h), 2, stride=2)

        h = self.rs1(h)
        h = F.max_pooling_2d(h, 2, stride=2)

        h = self.rs2(h)
        h = F.max_pooling_2d(h, 2, stride=2)

        h = self.rs3(h)
        # h = self.rs4(h)
        self.vis_anchor = h
        h = F.average_pooling_2d(h, 5, stride=2)

        localizations = []

        with cuda.get_device_from_array(h.data):
            # lstm_prediction = chainer.Variable(self.xp.zeros((len(images), self.lstm.state_size), dtype=h.dtype))

            for _ in range(self.num_timesteps):
                # in_feature = self.attend(h, lstm_prediction)
                in_feature = h
                lstm_prediction = F.relu(self.lstm(in_feature))
                transformed = self.transform_2(lstm_prediction)
                transformed = F.reshape(transformed, (-1, 2, 3))
                localizations.append(
                    rotation_dropout(transformed, ratio=self.dropout_ratio))

        return F.concat(localizations, axis=0)

コード例 #42

0

ファイルを表示

ファイル: ggnn_dev.py プロジェクト: Minys233/GCN-BMP

    def __call__(self, atom_array, adj):
        # reset state
        self.atoms_list = []
        self.g_vec_list = []
        self.update_layer.reset_state()
        if atom_array.dtype == self.xp.int32:
            h = self.embed(atom_array)  # (minibatch, max_num_atoms)
        else:
            h = atom_array
        h0 = functions.copy(h, cuda.get_device_from_array(h.data).id)
        g_list = []
        h_list = []
        for step in range(self.n_layers):
            h = self.update(h, adj, step)

            if self.dropout_rate != 0.0:
                h = functions.dropout(h, ratio=self.dropout_rate)

            if self.concat_hidden:
                g = self.readout(h, h0, step)
                g_list.append(g)

            h_list.append(h)
            self.atoms_list.append(h)
            g_vec = self.readout(h, h0, step)
            self.g_vec_list.append(g_vec)

        if self.concat_hidden:
            return functions.concat(g_list, axis=1)
        else:
            g = self.readout(h, h0, 0)
            # g = self.att_readout(h, h_list, 0)
            g = functions.sum(h, axis=1)
            return g

コード例 #43

0

ファイルを表示

ファイル: optim.py プロジェクト: musyoku/adversarial-autoencoder

def _sum_sqnorm(arr):
	sq_sum = collections.defaultdict(float)
	for x in arr:
		with cuda.get_device_from_array(x) as dev:
			x = x.ravel()
			s = x.dot(x)
			sq_sum[int(dev)] += s
	return sum([float(i) for i in six.itervalues(sq_sum)])

コード例 #44

0

ファイルを表示

ファイル: optimizer.py プロジェクト: jekbradbury/chainer

 def __call__(self, opt):
     norm = numpy.sqrt(_sum_sqnorm([p.grad for p in opt.target.params()]))
     rate = self.threshold / norm
     if rate < 1:
         for param in opt.target.params():
             grad = param.grad
             with cuda.get_device_from_array(grad):
                 grad *= rate

コード例 #45

0

ファイルを表示

ファイル: optimizer.py プロジェクト: MakotoSeto/chainer

 def __call__(self, rule, param):
     p, g = param.data, param.grad
     with cuda.get_device_from_array(p) as dev:
         if int(dev) == -1:
             g += self.rate * p
         else:
             kernel = cuda.elementwise(
                 'T p, T decay', 'T g', 'g += decay * p', 'weight_decay')
             kernel(p, self.rate, g)

コード例 #46

0

ファイルを表示

ファイル: optimizer.py プロジェクト: jekbradbury/chainer

 def __call__(self, opt):
     rate = self.rate
     for param in opt.target.params():
         p, g = param.data, param.grad
         with cuda.get_device_from_array(p) as dev:
             if int(dev) == -1:
                 g += rate * p
             else:
                 self.kernel()(p, rate, g)

コード例 #47

0

ファイルを表示

ファイル: optimizer.py プロジェクト: jekbradbury/chainer

 def __call__(self, opt):
     for param in opt.target.params():
         g = param.grad
         xp = cuda.get_array_module(g)
         with cuda.get_device_from_array(g) as dev:
             noise = self.noise_func(xp, g.shape, g.dtype, self, opt)
             if int(dev) == -1:
                 g += noise
             else:
                 self.kernel()(noise, g)

コード例 #48

0

ファイルを表示

ファイル: utils.py プロジェクト: kzky/works

def add_normal_noise(h, sigma=0.03):
    if np.random.randint(0, 2):
        n = np.random.normal(0, sigma, h.data.shape).astype(np.float32)
        device = cuda.get_device_from_array(h)
        if device.id ==  -1:
            n_ = Variable(n)
        else:
            n_ = Variable(cuda.to_gpu(n, device.id))
        h = h + n_
    return h

コード例 #49

0

ファイルを表示

ファイル: variable.py プロジェクト: delta2323/chainer

    def addgrad(self, var):
        """Accumulates the gradient array from given source variable.

        This method adds the gradient of a given variable to the gradient of
        this variable. The accumulation is even done across the host and
        different devices. If this variable has uninitialized data/grad arrays,
        this method initializes it with the shape of the given varaible and
        then accumulates the gradient.

        Args:
            var (Variable): Source variable.

        """
        src = var._node._grad
        if src is None:
            return

        if self.data is None:
            self.initialize(var.shape)
        dst = self._node._grad

        src_dev = cuda.get_device_from_array(src)
        dst_dev = cuda.get_device_from_array(self.data)

        if src_dev.id == dst_dev.id:
            with dst_dev:
                if dst is None:
                    xp = cuda.get_array_module(src)
                    self._node.grad = xp.copy(src)
                else:
                    dst += src
            return

        if dst_dev.id < 0:
            src_grad = cuda.to_cpu(src)
        else:
            src_grad = cuda.to_gpu(src, device=dst_dev)

        if dst is None:
            self._node.grad = src_grad
        else:
            with dst_dev:
                dst += src_grad

コード例 #50

0

ファイルを表示

ファイル: base_calculator.py プロジェクト: ir5/chainer-chemistry

 def __init__(self, model, target_extractor=None, output_extractor=None,
              device=None, logger=None):
     self.model = model  # type: chainer.Chain
     if device is not None:
         self._device = device
     else:
         self._device = cuda.get_device_from_array(*model.params()).id
     self.target_extractor = target_extractor
     self.output_extractor = output_extractor
     self.logger = logger or getLogger(__name__)

コード例 #51

0

ファイルを表示

ファイル: optim.py プロジェクト: musyoku/adversarial-autoencoder

	def __call__(self, opt):
		norm = np.sqrt(_sum_sqnorm([p.grad for p in opt.target.params(False)]))
		if norm == 0:
			return
		rate = self.threshold / norm
		if rate < 1:
			for param in opt.target.params(False):
				grad = param.grad
				with cuda.get_device_from_array(grad):
					grad *= rate

コード例 #52

0

ファイルを表示

ファイル: anchor_target_layer.py プロジェクト: quan821223/pulmonary-nodules-MaskRCNN

    def __call__(self, feat_h, feat_w, gt_boxes, img_info):
        """Calc targets of classification labels and bbox regression.

        Args:
            feat_h (int): The height of feature map.
            feat_w (int): The width of feature map.
            gt_boxes (:class:`~chainer.Variable`): The ground truth bounding
                boxes and its class label array. The shape should be
                :math:`(1, n_gt_boxes, 5)` and the batchsize should be 1.
                Each 5-dimensional vector has :math:`(x1, y1, x2, y2, cls_id)`.
                The scale of these values is at the input image scale.
            img_info (:class:`~chainer.Variable`): The input image info. It
                contains :math:`(height, width)` and the batchsize should be 1.
                So the shape should be :math:`(1, 2)`.
                
        Returns:
            bbox_labels (:class:`~numpy.ndarray` or :class:`~cupy.ndarray`):
                Classification labels of all anchor boxes. It contains values
                from :math:`{-1, 0, 1}` and the numbers of negative (=0) and
                positive (=1) are the same.
            bbox_reg_targets (:class:`~numpy.ndarray` or
                    :class:`~cupy.ndarray`):
                The regression targets of bounding box transformation
                parameters.
            inds_inside (:class:`~numpy.ndarray` or :class:`~cupy.ndarray`):
                Indices of all anchor boxes that inside of the input image out
                of all possible anchor boxes (`all_anchors`) that has
                :math:`K \times A` anchor boxes. This should be used to select
                proposals to be compared with the above two targets.
            n_all_bbox (int): The number of all possible bbox. This value
                is always larger than `len(inds_inside)`.

        """
        if self.type_check_enable:
            self._check_data_type_forward(gt_boxes, img_info)

        # Currently it assumes that the batchsize is always 1
        gt_boxes = gt_boxes.data[0]
        img_info = img_info.data[0]

        with cuda.get_device_from_array(gt_boxes):
            # (feat_h x feat_w x n_anchors, 4)
            xp = cuda.get_array_module(gt_boxes)
            all_bbox = xp.asarray(self._generate_all_bbox(feat_h, feat_w))
            inds_inside, all_inside_bbox = keep_inside(all_bbox, img_info)
            argmax_overlaps_inds, bbox_labels = \
                self._create_bbox_labels(
                    inds_inside, all_inside_bbox, gt_boxes)

            # Convert fixed anchors in (x, y, w, h) to (dx, dy, dw, dh)
            gt_boxes = gt_boxes[argmax_overlaps_inds]
            bbox_reg_targets = bbox_transform(all_inside_bbox, gt_boxes)
            bbox_reg_targets = bbox_reg_targets.astype(xp.float32)

            return bbox_labels, bbox_reg_targets, inds_inside, len(all_bbox)

コード例 #53

0

ファイルを表示

ファイル: extend.py プロジェクト: ir5/chainer-chemistry

def extend_arrays_to_shape(arrays, out_shape, value=0):
    # Ref: `_concat_arrays_with_padding` method in chainer convert.py
    # https://github.com/chainer/chainer/blob/master/chainer/dataset/convert.py
    xp = cuda.get_array_module(arrays[0])
    with cuda.get_device_from_array(arrays[0]):
        result = xp.full(out_shape, value, dtype=arrays[0].dtype)
        for i in six.moves.range(len(arrays)):
            src = arrays[i]
            slices = tuple(slice(dim) for dim in src.shape)
            result[(i,) + slices] = src
    return result

コード例 #54

0

ファイルを表示

ファイル: optimizer.py プロジェクト: MakotoSeto/chainer

 def __call__(self, rule, param):
     p, g = param.data, param.grad
     xp = cuda.get_array_module(p)
     with cuda.get_device_from_array(p) as dev:
         sign = xp.sign(p)
         if int(dev) == -1:
             g += self.rate * sign
         else:
             kernel = cuda.elementwise(
                 'T s, T decay', 'T g', 'g += decay * s', 'lasso')
             kernel(sign, self.rate, g)

コード例 #55

0

ファイルを表示

ファイル: optimizer.py プロジェクト: MakotoSeto/chainer

 def __call__(self, rule, param):
     g = param.grad
     xp = cuda.get_array_module(g)
     with cuda.get_device_from_array(g) as dev:
         noise = self.noise_func(xp, g.shape, g.dtype, self, rule)
         if int(dev) == -1:
             g += noise
         else:
             kernel = cuda.elementwise(
                 'T noise', 'T g', 'g += noise', 'gradient_noise')
             kernel(noise, g)

コード例 #56

0

ファイルを表示

ファイル: optimizer.py プロジェクト: jekbradbury/chainer

 def __call__(self, opt):
     rate = self.rate
     for param in opt.target.params():
         p, g = param.data, param.grad
         xp = cuda.get_array_module(p)
         sign = xp.sign(p)
         with cuda.get_device_from_array(p) as dev:
             if int(dev) == -1:
                 g += rate * sign
             else:
                 self.kernel()(sign, rate, g)

コード例 #57

0

ファイルを表示

ファイル: convert.py プロジェクト: jekbradbury/chainer

def _concat_arrays(arrays, padding):
    # Convert `arrays` to numpy.ndarray if `arrays` consists of the built-in
    # types such as int or float.
    if not isinstance(arrays[0], numpy.ndarray) and\
       not isinstance(arrays[0], cuda.ndarray):
        arrays = numpy.asarray(arrays)
    if padding is not None:
        return _concat_arrays_with_padding(arrays, padding)

    xp = cuda.get_array_module(arrays[0])
    with cuda.get_device_from_array(arrays[0]):
        return xp.concatenate([array[None] for array in arrays])

コード例 #58

0

ファイルを表示

ファイル: theano_function.py プロジェクト: MakotoSeto/chainer

    def forward(self, inputs):
        gpu = cuda.get_array_module(*inputs) is not numpy
        inputs = [cuda.to_cpu(x) for x in inputs]

        outputs = self.forward_func(*inputs)

        if gpu:
            # TODO(unno): We can remove redundant gpu-cpu copy using
            # theano.sandbox.cuda.CudaNdarray.gpudata
            device = cuda.get_device_from_array(inputs)
            outputs = [cuda.to_gpu(x, device) for x in outputs]

        return tuple(outputs)