예제 #1
0
    def copydata(self, var):
        """Copies the data array from given source variable.

        This method copies the data array from given variable to this variable.
        The copy is done even if the arrays reside on different devices,
        including across the host and a GPU device. If this variable has an
        uninitialized data array, this method initializes it by the data array
        of the given variable. Similarly, if the given variable has an
        uninitialized data array, this method initializes it by the data array
        of this variable (``self``). If both are uninitialized, this method
        does nothing.

        Args:
            var (Variable): Source variable.

        """
        src = var.data
        dst = self.data
        if src is None:
            if dst is None:
                return
            var.initialize(self.shape)
            src = var.data
        elif dst is None:
            self.initialize(src.shape)
            dst = self.data
        src_xp = cuda.get_array_module(src)
        dst_xp = cuda.get_array_module(dst)
        if dst_xp is src_xp:
            dst_xp.copyto(dst, src)
        elif dst_xp is numpy:
            dst_xp.copyto(dst, src.get())
        else:
            dst.set(src)
예제 #2
0
파일: experiments.py 프로젝트: kzky/works
    def train(self, x):
        # Encoder/Decoder
        h = self.encoder(x)
        x_rec = self.decoder(h)
        l_rec = self.recon_loss(x, x_rec)
        self.cleargrads()
        l_rec.backward()
        self.optimizer_enc.update()
        self.optimizer_dec.update()

        # Discriminator
        h = Variable(h.data)  # disconnect
        xp = cuda.get_array_module(x)
        z = Variable(cuda.to_gpu(xp.random.rand(x.shape[0], self.dim).astype(xp.float32), self.device))
        x_gen = self.decoder(self.generator0(z))
        d_x_gen = self.discriminator(x_gen)
        d_x_real = self.discriminator(x)
        l_dis = self.lsgan_loss(d_x_gen, d_x_real)
        self.cleargrads()
        l_dis.backward()
        self.optimizer_dis.update()
        
        # Generator
        xp = cuda.get_array_module(x)
        z = Variable(cuda.to_gpu(xp.random.rand(x.shape[0], self.dim).astype(xp.float32), self.device))
        x_gen = self.decoder(self.generator0(z))
        d_x_gen = self.discriminator(x_gen)
        h_gen = self.encoder(x_gen)
        l_gen = self.lsgan_loss(d_x_gen)
        self.cleargrads()
        l_gen.backward()
        self.optimizer_gen.update()
예제 #3
0
def _preprocess_const(x, value):
    xp = cuda.get_array_module(x)
    if not numpy.isscalar(value) and cuda.get_array_module(value) != xp:
        # TODO(unno): We can transfer arrays automatically
        raise TypeError('Cannot mix cupy.ndarray and numpy.ndarray')

    b = xp.broadcast(x, value)
    if b.shape != x.shape:
        raise ValueError('Failed to broadcast arrays')
    return utils.force_type(x.dtype, value)
예제 #4
0
    def check_backward_consistency_regression(self, x_data, gy_data,
                                              use_cudnn=True):
        # Regression test to two-dimensional max pooling layer.

        if len(self.dims) != 2:
            return

        ksize = self.ksize
        stride = self.stride
        pad = self.pad
        xp = cuda.get_array_module(x_data)

        # Backward computation for N-dimensional max pooling layer.
        x_nd = chainer.Variable(xp.array(x_data))
        func_nd = functions.MaxPoolingND(self.ndim, ksize, stride=stride,
                                         pad=pad, use_cudnn=use_cudnn,
                                         cover_all=self.cover_all)
        y_nd = func_nd(x_nd)
        y_nd.grad = gy_data
        y_nd.backward()

        # Backward computation for two-dimensional max pooling layer.
        x_2d = chainer.Variable(xp.array(x_data))
        func_2d = functions.MaxPooling2D(ksize, stride=stride, pad=pad,
                                         use_cudnn=use_cudnn,
                                         cover_all=self.cover_all)
        y_2d = func_2d(x_2d)
        y_2d.grad = gy_data
        y_2d.backward()

        # Test that the two result gradients are close enough.
        testing.assert_allclose(x_nd.grad, x_2d.grad)
예제 #5
0
def _bbox_transform_inv(boxes, deltas):
    xp = get_array_module(boxes)

    if boxes.shape[0] == 0:
        return xp.zeros((0, deltas.shape[1]), dtype=deltas.dtype)

    widths = boxes[:, 2] - boxes[:, 0] + 1.0
    heights = boxes[:, 3] - boxes[:, 1] + 1.0
    ctr_x = boxes[:, 0] + 0.5 * widths
    ctr_y = boxes[:, 1] + 0.5 * heights

    dx = deltas[:, 0::4]
    dy = deltas[:, 1::4]
    dw = deltas[:, 2::4]
    dh = deltas[:, 3::4]

    pred_ctr_x = dx * widths[:, xp.newaxis] + ctr_x[:, xp.newaxis]
    pred_ctr_y = dy * heights[:, xp.newaxis] + ctr_y[:, xp.newaxis]
    pred_w = xp.exp(dw) * widths[:, xp.newaxis]
    pred_h = xp.exp(dh) * heights[:, xp.newaxis]

    pred_boxes = xp.zeros(deltas.shape, dtype=deltas.dtype)
    # x1
    pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
    # y1
    pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
    # x2
    pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w
    # y2
    pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h

    return pred_boxes
예제 #6
0
파일: maxout.py 프로젝트: hillbig/chainer
    def backward(self, inputs, grad_outputs):
        gy = grad_outputs[0]
        x = _as_mat(inputs[0])
        W = inputs[1]

        xp = cuda.get_array_module(*inputs)
        # gradient of z = xW + b
        gz = xp.zeros((gy.shape[0], W.shape[1], gy.shape[1]), x.dtype)
        if xp == numpy:
            idx0 = xp.arange(len(gy))[:, None]
            idx1 = xp.arange(gy.shape[1])
            gz[idx0, self.argmax, idx1] = gy
        else:
            gz_r = xp.rollaxis(gz, 1)
            cuda.elementwise(
                'T gy, S argmax, int32 n', 'raw T gz',
                'gz[argmax * n + i] = gy', 'maxout_bwd'
            )(gy, self.argmax, gz_r.size // len(gz_r), gz_r)
        gx = xp.tensordot(gz, W, ((1, 2), (1, 2))).reshape(inputs[0].shape)
        gW = xp.tensordot(x, gz, (0, 0))

        if len(inputs) == 3:
            gb = gz.sum(axis=0)
            return gx, gW, gb
        else:
            return gx, gW
예제 #7
0
    def update_parameter_by_meta_learner(
            self, model_params, loss, 
            x_l0, x_l1, y_l):

        # Forward meta-learner
        namedparams = model_params
        for i, elm in enumerate(namedparams.items()):  # parameter-loop
            k, p = elm
            with cuda.get_device_from_id(self.device):
                shape = p.shape
                xp = cuda.get_array_module(p.data)

                x = p.grad
                grad = xp.reshape(x, (np.prod(shape), ))
                meta_learner = self.meta_learners[i]
                g = meta_learner(Variable(grad))  # forward
                w = p - F.reshape(g, shape)
                self.model_params[k] = w

        # Train meta-learner with main objective
        y_pred = self.model(x_l0, self.model_params)
        loss_ce = F.softmax_cross_entropy(y_pred, y_l)
        
        self.cleargrads()  # need to clear W'grad due to loss_rec.backward
        for meta_learner in self.meta_learners:
            meta_learner.cleargrads()
        loss_ce.backward(retain_grad=True)
        for opt in self.opt_meta_learners:
            opt.update()

        loss_ce.unchain_backward()  #TODO: here is a proper place to unchain?
def _offset2grid(offset, kh, kw, sy, sx, ph, pw, h, w):
    n, khkw2, out_h, out_w = offset.shape
    khkw = int(khkw2 / 2)
    xp = cuda.get_array_module(offset)

    ys, xs = xp.meshgrid(
        xp.arange(0, sy * out_h, sy, dtype=numpy.float32),
        xp.arange(0, sx * out_w, sx, dtype=numpy.float32), indexing='ij',
        copy=False
    )
    filter_offset_x = xp.tile(xp.arange(kw, dtype=numpy.float32), kh)
    filter_offset_y = xp.repeat(xp.arange(kh, dtype=numpy.float32), kw)
    x_coord = (offset[:, :khkw] + xs[None, None] +
               filter_offset_x[None, :, None, None])
    y_coord = (offset[:, khkw:] + ys[None, None] +
               filter_offset_y[None, :, None, None])

    # The values of this variable is clipped in range [-1, 1].
    # The coordinate (-1, -1) corresponds to the upper-left
    # corner of the input image.
    x_coord = (x_coord / (w + 2 * pw - 1) - 0.5) * 2
    y_coord = (y_coord / (h + 2 * ph - 1) - 0.5) * 2

    # Shape of `coord` is (n, 2 * kh * kw, out_h, out_w)
    coord = concat.concat([x_coord, y_coord], axis=1)
    return coord
예제 #9
0
파일: lstm.py 프로젝트: musyoku/NLP
	def backward(self, inputs, grad_outputs):
		xp = cuda.get_array_module(inputs[0])
		context, weight = inputs
		weight = weight[:, self.index].reshape(-1, 1)
		z = xp.zeros((context.shape[0], 2), dtype=xp.float32)
		z[:, self.index] = xp.sum(grad_outputs[0] * context, axis=1)
		return grad_outputs[0] * weight, z
예제 #10
0
    def check_backward_consistency_regression(self, x_data, gy_data):
        # Regression test to two-dimensional unpooling layer.

        ndim = len(self.dims)
        if ndim != 2:
            return

        ksize = self.ksize
        stride = self.stride
        pad = self.pad
        xp = cuda.get_array_module(x_data)

        # Backward computation for N-dimensional unpooling layer.
        x_nd = chainer.Variable(xp.array(x_data))
        func_nd = functions.UnpoolingND(ndim, ksize, stride=stride,
                                        pad=pad, cover_all=self.cover_all)
        y_nd = func_nd(x_nd)
        y_nd.grad = gy_data
        y_nd.backward()

        # Backward computation for two-dimensional unpooling layer.
        x_2d = chainer.Variable(xp.array(x_data))
        func_2d = functions.Unpooling2D(ksize, stride=stride, pad=pad,
                                        cover_all=self.cover_all)
        y_2d = func_2d.apply((x_2d,))[0]
        y_2d.grad = gy_data
        y_2d.backward()

        # Test that the two result gradients are close enough.
        opt = self.check_backward_options
        testing.assert_allclose(
            x_nd.grad, x_2d.grad, atol=opt['atol'], rtol=opt['rtol'])
예제 #11
0
파일: variable.py 프로젝트: BRETT71/chainer
    def addgrad(self, var):
        """Accumulates the gradient array from given source variable.

        This method just runs ``self.grad += var.grad``, except that the
        accumulation is even done across the host and different devices.

        Args:
            var (Variable): Source variable.

        """
        src = var._grad
        dst = self._grad
        if src is None:
            raise ValueError('Source gradient is not set.')
        if dst is None:
            raise ValueError('Target graidient is not set.')

        xp = cuda.get_array_module(dst)
        if xp is numpy:
            dst += cuda.to_cpu(src)
        elif isinstance(src, numpy.ndarray):
            dst += cuda.to_gpu(src, device=dst)
        else:
            dst_dev = dst.device
            if dst_dev == src.device:
                dst += src
            else:
                with dst_dev:
                    dst += xp.copy(src)
예제 #12
0
 def backward(self, inputs, grads):
     xp = cuda.get_array_module(*inputs)
     _, indices, _ = inputs
     g = grads[0]
     gv = g[xp.arange(len(indices)), indices]
     g[xp.arange(len(indices)), indices] = 0
     return g, None, gv
예제 #13
0
    def backward(self, x_orig, gy):
        # TODO(beam2d): Support backprop on inference mode
        assert self.use_batch_mean and not self.is_finetune

        ldim, cdim, rdim = self._internal_shape(x_orig[0])
        gy = gy[0].reshape(ldim, cdim, rdim)
        inv_m = 1. / (ldim * rdim)

        gbeta = gy.sum(axis=(0, 2), keepdims=True)
        self.gbeta += gbeta

        ggamma = (gy * self.x_hat).sum(axis=(0, 2), keepdims=True)
        self.ggamma += ggamma

        if cuda.get_array_module(*x_orig) == numpy:
            coeff = self.gamma / self.std
            gx = coeff * (gy - (self.x_hat * ggamma + gbeta) * inv_m)
        else:
            gx = cuda.elementwise(
                'T gy, T gbeta, T ggamma, T x_hat, T gamma, T std, T inv_m',
                'T gx',
                'gx = gamma / std * (gy - (x_hat * ggamma + gbeta) * inv_m)',
                'bn_bwd')(
                    gy, gbeta, ggamma, self.x_hat, self.gamma, self.std, inv_m)

        return gx.reshape(x_orig[0].shape),
예제 #14
0
파일: cnn_model_001.py 프로젝트: kzky/works
    def __call__(self, x, test=False):
        # add gaussian noise
        xp = cuda.get_array_module(x.data)
        with cuda.get_device(self.device):
            noise = xp.random.randn(*x.shape) * 0.15
            x.data += noise
        
        # (conv -> act -> bn) x 3 -> maxpool -> dropout
        h = self.bn_conv0(self.act(self.conv0(x), 0.1), test)
        h = self.bn_conv1(self.act(self.conv1(h), 0.1), test)
        h = self.bn_conv2(self.act(self.conv2(h), 0.1), test)
        h = F.max_pooling_2d(h, (2, 2))  # 32 -> 16
        h = F.dropout(h, 0.5, not test)
        
        # (conv -> act -> bn) x 3 -> maxpool -> dropout
        h = self.bn_conv3(self.act(self.conv3(h), 0.1), test)
        h = self.bn_conv4(self.act(self.conv4(h), 0.1), test)
        h = self.bn_conv5(self.act(self.conv5(h), 0.1), test)
        h = F.max_pooling_2d(h, (2, 2))  # 16 -> 8
        h = F.dropout(h, 0.5, not test)
        
        # conv -> act -> bn -> (nin -> act -> bn) x 2
        h = self.bn_conv6(self.act(self.conv6(h), 0.1), test) # 8 -> 6
        h = self.bn_conv7(self.act(self.conv7(h), 0.1), test)
        h = self.bn_conv8(self.act(self.conv8(h), 0.1), test)

        h = F.average_pooling_2d(h, (6, 6))
        h = self.linear(h)
        
        return h
예제 #15
0
    def backward(self, inputs, grad_outputs):
        x, gamma = inputs[:2]
        gy = grad_outputs[0]

        head_ndim = gamma.ndim + 1
        expander = (None, Ellipsis) + (None,) * (x.ndim - head_ndim)
        m = gamma.dtype.type(x.size // gamma.size)

        axis = (0,) + tuple(range(head_ndim, x.ndim))
        gbeta = gy.sum(axis=axis)
        ggamma = (gy * self.x_hat).sum(axis=axis)

        xp = cuda.get_array_module(x)
        if len(inputs) == 5:
            var = inputs[4]
            gs = gamma / self.std
            gmean = -gs * gbeta
            gvar = -0.5 * gamma / var * ggamma
            gx = gs[expander] * gy
            return gx, ggamma, gbeta, gmean, gvar

        if xp is numpy:
            gx = (gamma / self.std)[expander] * (
                gy - (self.x_hat * ggamma[expander] + gbeta[expander]) / m)
        else:
            inv_m = numpy.float32(1) / m
            gx = cuda.elementwise(
                'T gy, T x_hat, T gamma, T std, T ggamma, T gbeta, T inv_m',
                'T gx',
                'gx = (gamma / std) * (gy - (x_hat * ggamma + gbeta) * inv_m)',
                'bn_bwd')(gy, self.x_hat, gamma[expander], self.std[expander],
                          ggamma[expander], gbeta[expander], inv_m)
        return gx, ggamma, gbeta
예제 #16
0
 def backward(self, x, gy):
     xp = cuda.get_array_module(*x)
     gx = utils.force_array(xp.cos(x[0]))
     xp.square(gx, out=gx)
     xp.reciprocal(gx, out=gx)
     gx *= gy[0]
     return gx,
예제 #17
0
    def __init__(self, initializer=None, shape=None, name=None):
        if initializer is None:
            initializer = constant.NaN()
        elif numpy.isscalar(initializer):
            initializer = constant.Constant(initializer)
        if shape is None:
            if isinstance(initializer, (numpy.ndarray, cuda.ndarray)):
                # parameter initialized by the initial array
                super(Parameter, self).__init__(initializer, name=name)
            else:
                # uninitialized parameter
                super(Parameter, self).__init__(name=name)
                self.initializer = initializer
                dtype = getattr(initializer, 'dtype', numpy.float32)
                self._grad_initializer = constant.NaN(dtype)
        else:
            # parameter initialized with a given shape
            if isinstance(initializer, (numpy.ndarray, cuda.ndarray)):
                xp = cuda.get_array_module(initializer)
                initializer = constant.Constant(initializer)
            else:
                xp = numpy
            data = initializers.generate_array(initializer, shape, xp)
            grad = xp.full_like(data, numpy.nan)
            super(Parameter, self).__init__(data, name=name, grad=grad)

        self.update_rule = None
예제 #18
0
    def backward(self, inputs, gy):
        xp = cuda.get_array_module(*inputs)
        x, gamma, beta = inputs
        gy = gy[0]

        g_beta = gy.sum(axis=0)
        g_scaled_x = gy

        g_gamma = xp.sum(g_scaled_x * self.x_hat, axis=0)
        g_x_hat = g_scaled_x * gamma[None, ]

        g_inv_std = xp.sum(g_x_hat * self.x_mu, axis=1, keepdims=True)
        g_x_mu_1 = g_x_hat * self.inv_std

        g_std = g_inv_std * (- 1. / self.var)
        g_var = g_std * 0.5 * self.inv_std

        n_units = x.shape[1]
        g_squ_x_mu = _broadcast_to(xp, g_var * 1. / n_units, x.shape)
        g_x_mu_2 = g_squ_x_mu * 2 * self.x_mu

        g_x_1 = g_x_mu_1 + g_x_mu_2
        g_mu = xp.sum(g_x_1, axis=1, keepdims=True) * (- 1.)

        g_x_2 = _broadcast_to(xp, g_mu * 1. / n_units, x.shape)

        g_x = g_x_1 + g_x_2

        return g_x, g_gamma, g_beta,
예제 #19
0
def variable_repr(var):
    """Return the string representation of a variable.

    Args:
        var (~chainer.Variable): Input Variable.
    .. seealso:: numpy.array_repr
    """
    xp = cuda.get_array_module(var)
    if xp is numpy:
        arr = var.data
    else:
        arr = var.data.get()

    if var.name:
        prefix = 'variable ' + var.name
    else:
        prefix = 'variable'

    if arr is None:
        lst = 'None'
    elif arr.size > 0 or arr.shape == (0,):
        lst = numpy.array2string(arr, None, None, None, ', ', prefix + '(')
    else:  # show zero-length shape unless it is (0,)
        lst = '[], shape=%s' % (repr(arr.shape),)

    return '%s(%s)' % (prefix, lst)
예제 #20
0
파일: sum.py 프로젝트: MakotoSeto/chainer
 def forward(self, x):
     self.retain_inputs(())
     self._in_shape = x[0].shape
     self._in_dtype = x[0].dtype
     self._xp = cuda.get_array_module(*x)
     return self._xp.asarray(
         x[0].sum(axis=self.axis, keepdims=self.keepdims)),
예제 #21
0
    def forward(self, inputs):
        xp = cuda.get_array_module(*inputs)
        x, h_tm1, c_tm1, q = inputs
        
        batchsize = x.shape[0]
        
        self.z       = xp.empty((batchsize,self.out_size*4),dtype=np.dtype('float32'))
        self.c       = xp.empty((batchsize,self.out_size),dtype=np.dtype('float32'))
        self.h       = xp.empty((batchsize,self.out_size),dtype=np.dtype('float32'))

        if xp is np:
            self.z = np.dot(x, self.W.T, out=self.z)
            self.z += np.dot(h_tm1, self.V.T)
            self.z += np.dot(q, self.U.T)
            if not self.nobias:
                self.z += self.b
            
            _lstm_forward_cpu(z=self.z, c_tm1=c_tm1, c=self.c, 
                         h=self.h, out_size=self.out_size)
        else:
            self.z = cp.dot(x, self.W.T, out=self.z)
            gpu.utils.dot_add(A=h_tm1, B=self.V, C=self.z, transb=True)
            gpu.utils.dot_add(A=q, B=self.U, C=self.z, transb=True)
            if not self.nobias:
                gpu.utils.addVec2Mat(self.z, self.b)
            _lstm_forward_gpu(z=self.z, c_tm1=c_tm1, c=self.c, 
                         h=self.h, out_size=self.out_size)
            
        return self.h, self.c
예제 #22
0
파일: crop.py 프로젝트: fangzheng354/ram
 def forward(self, x):
     xp = cuda.get_array_module(*x)
     n, c = x[0].shape[:2]
     y = xp.zeros((n,c,self.size,self.size), dtype=numpy.float32)
     for k in range(n):
         y[k]= x[0][k,:,self.i1[k,0]:self.i2[k,0],self.i1[k,1]:self.i2[k,1]]
     return y,
    def check_proposal_target_creator(
            self, bbox, label, roi, proposal_target_creator):
        xp = cuda.get_array_module(roi)
        sample_roi, gt_roi_loc, gt_roi_label =\
            proposal_target_creator(roi, bbox, label)

        # Test types
        self.assertIsInstance(sample_roi, xp.ndarray)
        self.assertIsInstance(gt_roi_loc, xp.ndarray)
        self.assertIsInstance(gt_roi_label, xp.ndarray)

        sample_roi = cuda.to_cpu(sample_roi)
        gt_roi_loc = cuda.to_cpu(gt_roi_loc)
        gt_roi_label = cuda.to_cpu(gt_roi_label)

        # Test shapes
        self.assertEqual(sample_roi.shape, (self.n_sample, 4))
        self.assertEqual(gt_roi_loc.shape, (self.n_sample, 4))
        self.assertEqual(gt_roi_label.shape, (self.n_sample,))

        # Test foreground and background labels
        np.testing.assert_equal(np.sum(gt_roi_label >= 0), self.n_sample)
        n_pos = np.sum(gt_roi_label >= 1)
        n_neg = np.sum(gt_roi_label == 0)
        self.assertLessEqual(n_pos, self.n_sample * self.pos_ratio)
        self.assertLessEqual(n_neg, self.n_sample - n_pos)
예제 #24
0
    def backward(self, indexes, grad_outputs):
        anchor, positive, negative = self.get_retained_inputs()

        N = anchor.shape[0]
        x_dim = anchor.shape[1]

        xp = cuda.get_array_module(anchor)
        tmp = xp.repeat(self.dist_hinge[:, None], x_dim, axis=1)
        mask = xp.array(tmp > 0, dtype=numpy.float32)

        gy, = grad_outputs
        if self.reduce == 'mean':
            g = gy / N
        else:
            g = gy[:, None]

        tmp = 2 * chainer.functions.broadcast_to(g, mask.shape) * mask

        ret = []
        if 0 in indexes:
            ret.append(tmp * (negative - positive))
        if 1 in indexes:
            ret.append(tmp * (positive - anchor))
        if 2 in indexes:
            ret.append(tmp * (anchor - negative))

        return ret
예제 #25
0
    def check_backward(self, x_data, W_data, b_data, y_grad):
        xp = cuda.get_array_module(x_data)

        if not self.c_contiguous:
            x_data = xp.asfortranarray(x_data)
            W_data = xp.asfortranarray(W_data)
            y_grad = xp.asfortranarray(y_grad)
            self.assertFalse(x_data.flags.c_contiguous)
            self.assertFalse(W_data.flags.c_contiguous)
            self.assertFalse(y_grad.flags.c_contiguous)
            if b_data is not None:
                b = xp.empty((len(b_data) * 2,), dtype=self.b.dtype)
                b[::2] = b_data
                b_data = b[::2]
                self.assertFalse(b_data.flags.c_contiguous)

        args = (x_data, W_data)
        if b_data is not None:
            args = args + (b_data,)

        with chainer.using_config('use_cudnn', self.use_cudnn):
            with chainer.using_config('cudnn_deterministic',
                                      self.cudnn_deterministic):
                gradient_check.check_backward(
                    convolution_2d.Convolution2DFunction(
                        self.stride, self.pad, self.cover_all),
                    args, y_grad, **self.check_backward_options)
예제 #26
0
파일: concat.py 프로젝트: Accent-/chainer
    def backward(self, xs, gy):
        if not xs[:-1]:
            return gy

        xp = cuda.get_array_module(*xs)
        sizes = numpy.array([x.shape[self.axis] for x in xs[:-1]]).cumsum()
        return xp.split(gy[0], sizes, axis=self.axis)
예제 #27
0
    def forward(self, inputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None
        kh, kw = W.shape[2:]

        xp = cuda.get_array_module(*x)
        if xp is numpy:
            self.col = conv.im2col_cpu(
                x, kh, kw, self.sy, self.sx, self.ph, self.pw)
        else:
            self.col = conv.im2col_gpu(
                x, kh, kw, self.sy, self.sx, self.ph, self.pw)

        B, C, KY, KX, IY, IX = self.col.shape
        D = W.shape[0]  # (D, C, KY, KX)
        c_ = self.col.transpose(1, 0, 4, 5, 2, 3) \
            .reshape((C, B * IY * IX, KY * KX))
        w_ = W.transpose(1, 2, 3, 0).reshape((C, KY * KX, D))

        # (C, B*IY*IX, KY*KX), (C, KY*KX, D)-> (C, B*IY*IX, D)
        y = _matmul(c_, w_, xp).astype(x.dtype, copy=False)

        # (C, B*IY*IX, D) -> (B, C*D, IY, IX)
        y = y.reshape((C, B, IY * IX, D)).transpose(1, 0, 3, 2) \
            .reshape((B, C * D, IY, IX))

        if b is not None:
            y += b[None, :, None, None]
        return y,
예제 #28
0
파일: test_ctc.py 프로젝트: BRETT71/chainer
    def check_forward(self, t_data, xs_data):
        x = tuple(chainer.Variable(x_data) for x_data in xs_data)
        t = chainer.Variable(t_data)
        loss = functions.connectionist_temporal_classification(x, t, 2)
        loss_value = float(loss.data)

        # compute expected value by recursive computation.
        xp = cuda.get_array_module(self.x)
        xt = xp.swapaxes(self.x, 0, 1)
        for b in range(xt.shape[0]):
            for t in range(xt.shape[1]):
                xt[b][t] = numpy.exp(xt[b][t]) / numpy.sum(numpy.exp(xt[b][t]))
        loss_expect = 0
        batch_size = xt.shape[0]
        for b in range(batch_size):
            loss_expect += -math.log(self.alpha(xt[b],
                                                self.l[b],
                                                self.x.shape[0]-1,
                                                self.l[b].shape[0]-1)
                                     + self.alpha(xt[b],
                                                  self.l[b],
                                                  self.x.shape[0]-1,
                                                  self.l[b].shape[0]-2))
        loss_expect /= batch_size
        self.assertAlmostEqual(loss_expect, loss_value, places=5)
예제 #29
0
 def __call__(self, rule, param):
     grad = param.grad
     if grad is None:
         return
     xp = cuda.get_array_module(grad)
     with cuda.get_device_from_array(grad):
         xp.clip(grad, self.lower_bound, self.upper_bound, out=grad)
예제 #30
0
    def backward(self, inputs, grad_outputs):
        xp = cuda.get_array_module(*inputs)
        x, W = inputs
        gy = grad_outputs[0]
        gW = xp.zeros_like(W)

        if xp is numpy:
            # It is equivalent to `numpy.add.at(gW, x, gy)` but ufunc.at is
            # too slow.
            for ix, igy in six.moves.zip(x.ravel(),
                                         gy.reshape(x.size, -1)):
                if ix == self.ignore_label:
                    continue
                gW[ix] += igy
        else:
            if self.ignore_label is None:
                cuda.elementwise(
                    'T gy, int32 x, int32 n_out', 'raw T gW',
                    'int w_ind[] = {x, i % n_out}; atomicAdd(&gW[w_ind], gy)',
                    'embed_id_bwd')(
                        gy, xp.expand_dims(x, -1), gW.shape[1], gW)
            else:
                cuda.elementwise(
                    'T gy, int32 x, int32 n_out, int32 ignore', 'raw T gW',
                    '''
                    if (x != ignore) {
                      int w_ind[] = {x, i % n_out};
                      atomicAdd(&gW[w_ind], gy);
                    }
                    ''',
                    'embed_id_bwd_ignore_label')(
                        gy, xp.expand_dims(x, -1), gW.shape[1],
                        self.ignore_label, gW)
        return None, gW
예제 #31
0
    def forward(self, inputs):
        self.retain_inputs((0, 1))
        x, gamma, beta = inputs
        xp = cuda.get_array_module(x)
        if self.running_mean is None:
            self.running_mean = xp.zeros_like(gamma)
            self.running_var = xp.zeros_like(gamma)
        self.mode = _BNMode(x, gamma)

        # expander inserts singleton dimensions to gamma and beta so that they
        # can be broadcasted with x.
        head_ndim = gamma.ndim + 1
        expander = (None, Ellipsis) + (None, ) * (x.ndim - head_ndim)
        self.expander = expander
        self.axis = (0, ) + tuple(range(head_ndim, x.ndim))
        self.use_cudnn = self.mode.can_use_cudnn(xp)

        if self.use_cudnn:
            x = cuda.cupy.ascontiguousarray(x)

            gamma = cuda.cupy.ascontiguousarray(gamma)
            beta = cuda.cupy.ascontiguousarray(beta)
            dtype = x.dtype
            handle = cudnn.get_handle()
            x_desc = cudnn.create_tensor_descriptor(_as4darray(x))
            derivedBnDesc = cudnn.create_uninitialized_tensor_descriptor()
            cudnn_mode = self.mode.get_cudnn_mode()
            libcudnn.deriveBNTensorDescriptor(derivedBnDesc.value,
                                              x_desc.value, cudnn_mode)
            one = numpy.array(1, dtype=dtype).ctypes
            zero = numpy.array(0, dtype=dtype).ctypes
            y = cuda.cupy.empty_like(x)
            # Factor used in the moving average
            factor = 1 - self.decay

            if self.mean is None:
                # Output cache to speed up backward pass.
                self.mean = xp.empty_like(gamma)
                # Output cache to speed up backward pass.
                self.inv_std = xp.empty_like(gamma)
            # Note: cuDNN computes the mini-batch mean and variance
            # internally. We can simply (optionally) pass
            # it the running-average mean and variance arrays.
            # Note: This API seems to set the inverse of the standard deviation
            # (instead of variance) to resultSaveInvVariance argument. The
            # current implementation of our BN depends on this behavior so that
            # we can reduce the number of reduction kernels.
            libcudnn.batchNormalizationForwardTraining(
                handle, cudnn_mode, one.data, zero.data, x_desc.value,
                x.data.ptr, x_desc.value, y.data.ptr, derivedBnDesc.value,
                gamma.data.ptr, beta.data.ptr, factor,
                self.running_mean.data.ptr, self.running_var.data.ptr,
                self.eps, self.mean.data.ptr, self.inv_std.data.ptr)
        else:
            gamma = gamma[expander]
            beta = beta[expander]
            self.mean = x.mean(axis=self.axis)
            var = x.var(axis=self.axis)
            var += self.eps
            self.inv_std = var**(-0.5)
            y = _apply_bn_fwd(xp, x, self.mean[expander],
                              self.inv_std[expander], gamma, beta)
            # Update running statistics
            m = x.size // gamma.size
            adjust = m / max(m - 1., 1.)  # unbiased estimation
            self.running_mean *= self.decay
            self.running_mean += (1 - self.decay) * self.mean
            self.running_var *= self.decay
            self.running_var += (1 - self.decay) * adjust * var

        return y,
예제 #32
0
 def backward_preprocess(self, function, in_data, out_grad):
     self.xp = cuda.get_array_module(*(in_data + out_grad))
     self._preprocess()
예제 #33
0
 def init_state(self, param):
     xp = cuda.get_array_module(param.data)
     with cuda.get_device_from_array(param.data):
         self.state['msg'] = xp.zeros_like(param.data)
         self.state['msdx'] = xp.zeros_like(param.data)
예제 #34
0
def connectionist_temporal_classification(
        x, t, blank_symbol, input_length=None, label_length=None):
    """Connectionist Temporal Classification loss function.

    Connectionist Temporal Classification(CTC) [Graves2006]_ is a loss function
    of sequence labeling where the alignment between the inputs and target is
    unknown. See also [Graves2012]_

    Args:
        x (sequence of Variable): RNN output at each time. ``x`` must be a list
            of :class:`~chainer.Variable` s. Each element of ``x``, ``x[i]``
            is a :class:`~chainer.Variable` representing output of RNN at time
            ``i``.
        t (Variable): Expected label sequence.
        blank_symbol (int): Index of blank_symbol.
            This value must be non-negative.
        input_length (Variable): Length of valid sequence for each of mini
            batch x (optional). If input_length is skipped, It regards that all
            of x is valid input.
        label_length (Variable): Length of valid sequence for each of mini
            batch t (optional). If label_length is skipped, It regards that all
            of t is valid input.

    Returns:
        Variable: A variable holding a scalar value of the CTC loss.

    .. note::
       You need to input ``x`` without applying to activation functions(e.g.
       softmax function), because this function applies softmax functions
       to ``x`` before calculating CTC loss to avoid numerical limitations.
       You also need to apply softmax function to forwarded values before you
       decode it.

    .. note::
       This function is differentiable only by ``x``.

    .. note::
       This function supports (batch, sequence, 1-dimensional input)-data.

    .. [Graves2006] Alex Graves, Santiago Fernandez,\
    Faustino Gomez, Jurgen Schmidhuber,\
    `Connectionist Temporal Classification: Labelling Unsegmented\
    Sequence Data with Recurrent Neural Networks\
    <ftp://ftp.idsia.ch/pub/juergen/icml2006.pdf>`_

    .. [Graves2012] Alex Graves,\
    `Supervised Sequence Labelling with Recurrent Neural Networks\
    <http://www.cs.toronto.edu/~graves/preprint.pdf>`_

    """
    if not isinstance(x, collections.Sequence):
        raise TypeError('x must be a list of Variables')
    if not isinstance(blank_symbol, int):
        raise TypeError('blank_symbol must be non-negative integer.')
    assert blank_symbol >= 0
    assert blank_symbol < x[0].data.shape[1]
    # This implementation only supports 1-dimensional data.
    # TODO(jnishi): Support d(>1)-dimentinal inputs.
    assert(len(x[0].data.shape) == 2)

    if input_length is None:
        xp = cuda.get_array_module(x[0].data)
        input_length = chainer.Variable(
            xp.full((len(x[0].data),), len(x), dtype=numpy.int32),
            volatile='auto')
        label_length = chainer.Variable(
            xp.full((len(t.data),), len(t.data[0]), dtype=numpy.int32),
            volatile='auto')

    # Batch size check.
    assert len(x[0].data) == len(t.data)
    assert len(x[0].data) == len(input_length.data)
    assert len(x[0].data) == len(label_length.data)

    # Length check.
    assert len(x) >= max(input_length.data)
    assert len(t.data[0]) >= max(label_length.data)

    return ConnectionistTemporalClassification(blank_symbol)(
        input_length, label_length, t, *x)
예제 #35
0
파일: variable.py 프로젝트: tk5a/chainer
    def backward(self, retain_grad=False):
        """Runs error backpropagation (a.k.a. backprop) from this variable.

        On backprop, :meth:`Function.backward` is called on each
        :class:`Function` object appearing in the backward graph starting from
        this variable. The backward graph is represented by backward references
        from variable nodes to their creators, and from functions to their
        input variable nodes. The backprop stops at all root nodes. Some
        functions set ``None`` as gradients of some inputs, where further
        backprop does not take place at such inputs.

        This method uses :data:`grad` as the initial error array. User can
        manually set a gradient array before calling this method. If
        :data:`data` contains only one element (i.e., it is scalar) and
        :data:`grad` is ``None``, then this method automatically complements
        1.0 as the initial error. This is useful on starting backprop from
        some scalar loss value.

        Args:
            retain_grad (bool): If ``True``, the gradient arrays of all
                intermediate variables are kept. Otherwise, :data:`grad` of the
                intermediate variables are set to ``None`` on appropriate
                timing, which may reduce the maximum memory consumption.

                In most cases of training some models, the purpose of backprop
                is to compute gradients of parameters, not of all variables,
                and therefore it is recommended to set this flag ``False``.

        """
        if self.creator is None:
            return
        initial_device = None
        if cuda.available and isinstance(self.data, cuda.cupy.ndarray):
            try:
                initial_device = cuda.Device()
            except cuda.cupy.cuda.runtime.CUDARuntimeError as e:
                if e.status != 38:  # cudaErrorNoDevice
                    raise

        is_debug = chainer.is_debug()

        cand_funcs = []
        seen_set = set()
        seen_vars = set()
        need_copy = set()

        # Initialize error by 1, if this is a loss variable
        if self.data.size == 1 and self.grad is None:
            with cuda.get_device_from_array(self.data) as device:
                if device is cuda.DummyDevice:
                    self.grad = numpy.ones_like(self.data)
                else:
                    self.grad = cuda.cupy.ones_like(self.data)

        def add_cand(cand):
            if cand not in seen_set:
                # Negate since heapq is min-heap
                heapq.heappush(cand_funcs, (-cand.rank, len(seen_set), cand))
                seen_set.add(cand)

        add_cand(self.creator)

        while cand_funcs:
            _, _, func = heapq.heappop(cand_funcs)
            outputs = [y() for y in func.outputs]  # access via weak ref

            in_data = tuple([x.data for x in func.inputs])
            out_grad = tuple([None if y is None else y.grad for y in outputs])
            hooks = chainer.get_function_hooks()
            if func._n_local_function_hooks != 0:
                hooks = collections.OrderedDict(hooks)
                hooks.update(func.local_function_hooks)
            hooks = hooks.values()  # avoid six for performance

            cuda.get_device_from_array(*(in_data + out_grad)).use()
            for hook in hooks:
                hook.backward_preprocess(func, in_data, out_grad)
            func.output_data = tuple(
                [None if y is None else y.data for y in outputs])
            gxs = func.backward(in_data, out_grad)
            assert len(gxs) == len(in_data)
            if not getattr(func, '_retain_after_backward', False):
                func.output_data = None
            for hook in hooks:
                hook.backward_postprocess(func, in_data, out_grad)

            if is_debug:
                for gx in gxs:
                    if gx is None:
                        continue
                    cuda.get_device_from_array(gx).use()
                    if cuda.get_array_module(gx).isnan(gx).any():
                        msg = 'NaN is detected on backward computation'
                        raise RuntimeError(msg)

            if not retain_grad:
                for y in outputs:
                    if y is not None and y is not self.node:
                        y.grad = None
            for x, gx in zip(func.inputs, gxs):
                if gx is None:
                    continue
                if not x.requires_grad:
                    continue

                _check_grad_type(func, x, gx)

                # Accumulate the gradient to x. It is a bit tricky to handle
                # branches and parameter gradient accumulation correctly.
                id_x = id(x)
                if x.creator is None:  # leaf
                    if x._grad is None:
                        x.grad = gx
                        need_copy.add(id_x)
                    else:
                        cuda.get_device_from_array(gx).use()
                        if id_x in need_copy:
                            x.grad = utils.force_array(x._grad + gx)  # copy
                            need_copy.remove(id_x)
                        else:
                            x._grad += gx
                else:  # not a leaf
                    add_cand(x.creator)
                    if id_x not in seen_vars:  # 1st visit
                        x.grad = gx
                        seen_vars.add(id_x)
                        need_copy.add(id_x)
                    else:
                        cuda.get_device_from_array(gx).use()
                        if id_x in need_copy:  # 2nd visit
                            x.grad = utils.force_array(gx + x._grad)  # copied
                            need_copy.remove(id_x)
                        else:  # 3rd or later visit
                            x._grad += gx
            del gxs  # to reduce memory usage
            if initial_device is not None:
                initial_device.use()
예제 #36
0
 def f(self, xs):
     xp = cuda.get_array_module(*xs)
     return xp.exp(xs[0]),
예제 #37
0
def _zeros_like(x):
    xp = cuda.get_array_module(x)
    return xp.zeros_like(x)
예제 #38
0
def _full_like(x, val):
    xp = cuda.get_array_module(x)
    return xp.full_like(x, val)
예제 #39
0
 def backward_postprocess(self, function, in_data, out_grad):
     xp = cuda.get_array_module(*(in_data + out_grad))
     assert xp == self.xp
     self._postprocess(function_namer(function, in_data), bwd=True)
예제 #40
0
 def forward_preprocess(self, function, in_data):
     self.xp = cuda.get_array_module(*in_data)
     self._preprocess()
예제 #41
0
 def check_orthogonality(self, w):
     self.initializer(w)
     xp = cuda.get_array_module(w)
     testing.assert_allclose(w, xp.ones((), dtype=numpy.float32) * 2)
예제 #42
0
 def forward_postprocess(self, function, in_data):
     xp = cuda.get_array_module(*in_data)
     assert xp == self.xp
     self._postprocess(function_namer(function, in_data))
예제 #43
0
파일: ceil.py 프로젝트: satuma777/chainer
 def backward(self, x, grad_outputs):
     xp = cuda.get_array_module(*x)
     return xp.zeros_like(x[0]),
예제 #44
0
 def check_orthogonality(self, w):
     self.initializer(w)
     xp = cuda.get_array_module(w)
     w = w.reshape(len(w), -1)
     dots = xp.tensordot(w, w, (1, 1))
     testing.assert_allclose(dots, xp.identity(len(w)))
    def __call__(self,
                 roi,
                 bbox,
                 label,
                 mask,
                 levels,
                 loc_normalize_mean=(0., 0., 0., 0.),
                 loc_normalize_std=(0.1, 0.1, 0.2, 0.2),
                 mask_size=14,
                 binary_mask=True):
        """
        binary_mask = False -> keypoint
        """
        xp = cuda.get_array_module(roi)
        roi = cuda.to_cpu(roi)
        bbox = cuda.to_cpu(bbox)
        label = cuda.to_cpu(label)
        mask = cuda.to_cpu(mask)
        levels = cuda.to_cpu(levels)

        n_bbox, _ = bbox.shape
        n_proposal = roi.shape[0]
        roi = np.concatenate((roi, bbox), axis=0)

        # assign feature levels of ground truth boxes
        bbox_levels = map_rois_to_fpn_levels(np, bbox)
        levels = np.concatenate([levels, bbox_levels])

        pos_roi_per_image = np.round(self.n_sample * self.pos_ratio)
        iou = bbox_iou(roi, bbox)
        gt_assignment = iou.argmax(axis=1)
        max_iou = iou.max(axis=1)
        # Offset range of classes from [0, n_fg_class - 1] to [1, n_fg_class].
        # The label with value 0 is the background.
        gt_roi_label = label[gt_assignment] + 1

        # Select foreground RoIs as those with >= pos_iou_thresh IoU.
        pos_index = np.where(max_iou >= self.pos_iou_thresh)[0]
        pos_roi_per_this_image = int(min(pos_roi_per_image, pos_index.size))
        if pos_index.size > 0:
            pos_index = np.random.choice(pos_index,
                                         size=pos_roi_per_this_image,
                                         replace=False)

        # Select background RoIs as those within
        # [neg_iou_thresh_lo, neg_iou_thresh_hi).
        neg_index = np.where((max_iou < self.neg_iou_thresh_hi)
                             & (max_iou >= self.neg_iou_thresh_lo))[0]
        neg_roi_per_this_image = self.n_sample - pos_roi_per_this_image
        neg_roi_per_this_image = int(
            min(neg_roi_per_this_image, neg_index.size))
        if neg_index.size > 0:
            neg_index = np.random.choice(neg_index,
                                         size=neg_roi_per_this_image,
                                         replace=False)

        # The indices that we're selecting (both positive and negative).
        keep_index = np.append(pos_index, neg_index)
        gt_roi_label = gt_roi_label[keep_index]
        gt_roi_label[pos_roi_per_this_image:] = 0  # negative labels --> 0
        sample_roi = roi[keep_index]
        sample_levels = levels[keep_index]

        # Compute offsets and scales to match sampled RoIs to the GTs.
        gt_roi_loc = bbox2loc(sample_roi, bbox[gt_assignment[keep_index]])
        gt_roi_loc = ((gt_roi_loc - np.array(loc_normalize_mean, np.float32)) /
                      np.array(loc_normalize_std, np.float32))

        # https://engineer.dena.jp/2017/12/chainercvmask-r-cnn.html
        gt_roi_mask = []
        _, h, w = mask.shape
        if binary_mask:
            for i, idx in enumerate(gt_assignment[pos_index]):
                A = mask[idx,
                         np.max((int(sample_roi[i, 0]),
                                 0)):np.min((int(sample_roi[i, 2]), h)),
                         np.max((int(sample_roi[i, 1]),
                                 0)):np.min((int(sample_roi[i, 3]), w))]
                gt_roi_mask.append(
                    cv2.resize(A, (mask_size, mask_size)).astype(np.int32))
        else:
            for i, idx in enumerate(gt_assignment[pos_index]):
                m = np.zeros((mask_size, mask_size), dtype=np.int32)
                # remind: shape of keypoints is (N, 17, 3), N is number of bbox, 17 is number of keypoints, 3 is (x, y, v)
                # v=0: unlabeled, v=1, labeled but invisible, v=2 labeled and visible

                # bbox's (y0, x0), (y1, x1)
                y0, x0, y1, x1 = list(map(int, sample_roi[i, :4]))
                kp = mask[idx]  # shape is (17, 3)
                # convert keypoints coordinate (y, x) into mask coordinate system [0, mask_size]x[0, mask_size]
                kp[:, :2] = (kp[:, :2] - [y0, x0]) / \
                    [max(y1 - y0, 1), max(x1 - x0, 1)] * mask_size
                # mask_size x mask_size 空間でどこにあるかをラベルとして扱う(あとでsoftmax cross entropyする)
                # -1でignoreされる
                keypoint_labels = np.zeros(kp.shape[0], dtype=np.int32)
                for j, r in enumerate(kp):
                    y, x, v = list(map(int, r))
                    if v == 2 and 0 <= y and y < mask_size and 0 <= x and x < mask_size:
                        keypoint_labels[j] = y * mask_size + x

                    else:
                        keypoint_labels[j] = -1

                gt_roi_mask.append(keypoint_labels)

        gt_roi_mask = xp.array(gt_roi_mask)

        if xp != np:
            sample_roi = cuda.to_gpu(sample_roi)
            gt_roi_loc = cuda.to_gpu(gt_roi_loc)
            gt_roi_label = cuda.to_gpu(gt_roi_label)
            gt_roi_mask = cuda.to_gpu(gt_roi_mask)
            sample_levels = cuda.to_gpu(sample_levels)
        return sample_roi, sample_levels, gt_roi_loc, gt_roi_label, gt_roi_mask
예제 #46
0
def main():
    parser = argparse.ArgumentParser(description='Chainer CIFAR example:')
    parser.add_argument('resume')
    parser.add_argument('--nb_trials', type=int, default=50)
    parser.add_argument('--model', default='c5')
    parser.add_argument('--gpu', '-g', type=int, default=0)
    parser.add_argument('--nb_valid', type=int, default=10000)
    parser.add_argument('--seed', type=int, default=1701)
    parser.add_argument('--debug', action='store_true')
    args = parser.parse_args()
    start = time.time()
    logger.initialize("grad_"+args.model)
    logger.info(vars(args))
    np.random.seed(args.seed)
    save_dir = logger.get_savedir()
    logger.info("Written to {}".format(save_dir))
    logger.info('GPU: {}'.format(args.gpu))
    train_all, test = get_cifar10()

    if args.debug:
        valid = train_all[200:400]
    else:
        valid_choice = np.random.choice(range(len(train_all)),
                                        args.nb_valid, replace=False)
        valid = [x for idx, x in enumerate(train_all) if idx in valid_choice]

    print(len(valid))

    model = get_model(args.model, args.gpu, args.resume)

    # Get one image per iteration
    valid_iter = chainer.iterators.SerialIterator(
        valid, 1, repeat=False, shuffle=False)

    if not os.path.exists("grads"):
        os.makedirs("grads")

    chainer.config.train = False
    chainer.config.enable_backprop = True
    for idx, tup in enumerate(valid_iter):
        print(idx)
        img = tup[0][0]
        # Tile image to calculate all the trials at once
        inp = np.tile(img.copy()[np.newaxis, ...], (args.nb_trials, 1, 1, 1))
        label = tup[0][1][np.newaxis, ...]
        sigma = (inp.max() - inp.min()) * 0.025  # noise level
        model.cleargrads()
        inp = inp + np.random.randn(*inp.shape).astype(np.float32) * sigma  # Add noise to every image
        x = Variable(cuda.to_gpu(inp, args.gpu))
        xp = cuda.get_array_module(x)
        pred = model.get_feature(x, False)
        # print(class_list[int(cuda.to_cpu(xp.argmax(pred.data)))], class_list[int(label)])
        pred.grad = xp.ones(pred.shape, dtype=np.float32)
        pred.backward()
        mean_grad = cuda.to_cpu(xp.mean(x.grad.copy(), axis=0))
        mean_grad = np.max(np.abs(mean_grad), axis=0)
        mean_grad = color.gray2rgb(mean_grad)
        mean_grad = clip_image(mean_grad)
        orig_img = np.transpose(img, (1, 2, 0))
        masked = orig_img * mean_grad
        out = np.concatenate((mean_grad, masked, orig_img), axis=1)
        plt.imsave("grads/{:05d}.png".format(idx), out)
        model.cleargrads()

    print(time.time()-start)
예제 #47
0
def n_step_rnn_base(n_layers, dropout_ratio, hx, ws, bs, xs,
                    activation, use_bi_direction, **kwargs):
    """n_step_rnn_base(n_layers, dropout_ratio, hx, ws, bs, xs, activation, use_bi_direction)

    Base function for Stack RNN/BiRNN functions.

    This function is used at  :func:`chainer.functions.n_step_birnn` and
    :func:`chainer.functions.n_step_rnn`.
    This function's behavior depends on following arguments,
    ``activation`` and ``use_bi_direction``.

    .. warning::

       ``train`` and ``use_cudnn`` arguments are not supported anymore since
       v2.
       Instead, use ``chainer.using_config('train', train)`` and
       ``chainer.using_config('use_cudnn', use_cudnn)`` respectively.
       See :func:`chainer.using_config`.

    Args:
        n_layers(int): Number of layers.
        dropout_ratio(float): Dropout ratio.
        hx (chainer.Variable): Variable holding stacked hidden states.
            Its shape is ``(S, B, N)`` where ``S`` is number of layers and is
            equal to ``n_layers``, ``B`` is mini-batch size, and ``N`` is
            dimention of hidden units.
        ws (list of list of chainer.Variable): Weight matrices. ``ws[i]``
            represents weights for i-th layer.
            Each ``ws[i]`` is a list containing two matrices.
            ``ws[i][j]`` is corresponding with ``W_j`` in the equation.
            Only ``ws[0][j]`` where ``0 <= j < 1`` is ``(I, N)`` shape as they
            are multiplied with input variables. All other matrices has
            ``(N, N)`` shape.
        bs (list of list of chainer.Variable): Bias vectors. ``bs[i]``
            represnents biases for i-th layer.
            Each ``bs[i]`` is a list containing two vectors.
            ``bs[i][j]`` is corresponding with ``b_j`` in the equation.
            Shape of each matrix is ``(N,)`` where ``N`` is dimention of
            hidden units.
        xs (list of chainer.Variable): A list of :class:`~chainer.Variable`
            holding input values. Each element ``xs[t]`` holds input value
            for time ``t``. Its shape is ``(B_t, I)``, where ``B_t`` is
            mini-batch size for time ``t``, and ``I`` is size of input units.
            Note that this functions supports variable length sequences.
            When sequneces has different lengths, sort sequences in descending
            order by length, and transpose the sorted sequence.
            :func:`~chainer.functions.transpose_sequence` transpose a list
            of :func:`~chainer.Variable` holding sequence.
            So ``xs`` needs to satisfy
            ``xs[t].shape[0] >= xs[t + 1].shape[0]``.
        activation (str): Activation function name.
            Please select ``tanh`` or ``relu``.
        use_bi_direction (bool): If ``True``, this function uses
            Bi-directional RNN.

    Returns:
        tuple: This functions returns a tuple concaining three elements,
            ``hy`` and ``ys``.

            - ``hy`` is an updated hidden states whose shape is same as ``hx``.
            - ``ys`` is a list of :class:`~chainer.Variable` . Each element
              ``ys[t]`` holds hidden states of the last layer corresponding
              to an input ``xs[t]``. Its shape is ``(B_t, N)`` where ``B_t``
              is mini-batch size for time ``t``, and ``N`` is size of hidden
              units. Note that ``B_t`` is the same value as ``xs[t]``.

    .. seealso::
       :func:`chainer.functions.n_step_rnn`
       :func:`chainer.functions.n_step_birnn`

    """  # NOQA

    argument.check_unexpected_kwargs(
        kwargs, train='train argument is not supported anymore. '
        'Use chainer.using_config',
        use_cudnn='use_cudnn argument is not supported anymore. '
        'Use chainer.using_config')
    argument.assert_kwargs_empty(kwargs)

    activation_list = ['tanh', 'relu']
    if activation not in activation_list:
        candidate = ','.join(activation_list)
        raise ValueError('Invalid activation: "%s". Please select from [%s]'
                         % (activation, candidate))

    xp = cuda.get_array_module(hx)

    if xp is not numpy and chainer.should_use_cudnn('>=auto', 5000):
        states = get_random_state().create_dropout_states(dropout_ratio)
        # flatten all input variables
        inputs = tuple(itertools.chain(
            (hx, ),
            itertools.chain.from_iterable(ws),
            itertools.chain.from_iterable(bs),
            xs))
        if use_bi_direction:
            # Bi-directional RNN
            if activation == 'tanh':
                rnn = NStepBiRNNTanh(n_layers, states)
            elif activation == 'relu':
                rnn = NStepBiRNNReLU(n_layers, states)
        else:
            # Uni-directional RNN
            if activation == 'tanh':
                rnn = NStepRNNTanh(n_layers, states)
            elif activation == 'relu':
                rnn = NStepRNNReLU(n_layers, states)

        ret = rnn(*inputs)
        hy, = ret[:1]
        ys = ret[1:]
        return hy, ys

    else:

        direction = 2 if use_bi_direction else 1
        hx = split_axis.split_axis(hx, n_layers * direction, axis=0,
                                   force_tuple=True)
        hx = [reshape.reshape(h, h.shape[1:]) for h in hx]

        xws = [_stack_weight([w[0]]) for w in ws]
        hws = [_stack_weight([w[1]]) for w in ws]
        xbs = [_stack_weight([b[0]]) for b in bs]
        hbs = [_stack_weight([b[1]]) for b in bs]

        xs_next = xs
        hy = []
        for layer in six.moves.range(n_layers):

            def _one_directional_loop(di):
                # di=0, forward RNN
                # di=1, backward RNN
                xs_list = xs_next if di == 0 else reversed(xs_next)
                layer_idx = direction * layer + di
                h = hx[layer_idx]
                h_list = []
                for x in xs_list:
                    batch = x.shape[0]
                    if h.shape[0] > batch:
                        h, h_rest = split_axis.split_axis(h, [batch], axis=0)
                    else:
                        h_rest = None

                    if layer > 0:
                        x = dropout.dropout(x, ratio=dropout_ratio)

                    rnn_in = (linear.linear(x, xws[layer_idx],
                                            xbs[layer_idx]) +
                              linear.linear(h, hws[layer_idx], hbs[layer_idx]))
                    if activation == 'tanh':
                        h_bar = tanh.tanh(rnn_in)
                    elif activation == 'relu':
                        h_bar = relu.relu(rnn_in)

                    if h_rest is not None:
                        h = concat.concat([h_bar, h_rest], axis=0)
                    else:
                        h = h_bar
                    h_list.append(h_bar)
                return h, h_list

            # Forward RNN
            h, h_forward = _one_directional_loop(di=0)
            hy.append(h)

            if use_bi_direction:
                # Backward RNN
                h, h_backward = _one_directional_loop(di=1)
                h_backward.reverse()
                # Concat
                xs_next = [concat.concat([hfi, hbi], axis=1) for (hfi, hbi) in
                           six.moves.zip(h_forward, h_backward)]
                hy.append(h)
            else:
                # Uni-directional RNN
                xs_next = h_forward

        ys = xs_next
        hy = stack.stack(hy)
        return hy, tuple(ys)
def add_noise(h, test, sigma=0.2):
    xp = cuda.get_array_module(h.data)
    if test:
        return h
    else:
        return h + sigma * xp.random.randn(*h.data.shape)
예제 #49
0
 def forward(self, inputs):
     x, = inputs
     xp = cuda.get_array_module(x)
     return xp.expand_dims(x, self.axis),
    def _compute_core(self, *inputs):
        # Usually, backward() is not necessary for calculating occlusion
        with chainer.using_config('enable_backprop', self.enable_backprop):
            original_result = self.eval_fun(*inputs)
        target_var = self.get_target_var(inputs)
        original_target_array = target_var.array.copy()
        original_score = self.get_output_var(original_result)

        xp = cuda.get_array_module(target_var.array)
        value = 0.

        # fill with `value`
        target_dim = target_var.ndim
        batch_size = target_var.shape[0]
        occlusion_window_shape = [1] * target_dim
        occlusion_window_shape[0] = batch_size
        for axis, size in zip(self.slide_axis, self.size):
            occlusion_window_shape[axis] = size
        occlusion_scores_shape = [1] * target_dim
        occlusion_scores_shape[0] = batch_size
        for axis, size in zip(self.slide_axis, self.size):
            occlusion_scores_shape[axis] = target_var.shape[axis]
        occlusion_window = xp.ones(occlusion_window_shape,
                                   dtype=target_var.dtype) * value
        occlusion_scores = xp.zeros(occlusion_scores_shape, dtype=xp.float32)

        def _extract_index(slide_axis, size, start_indices):
            colon = slice(None)
            index = [colon] * target_dim
            for axis, size, start in zip(slide_axis, size, start_indices):
                index[axis] = slice(start, start + size, 1)
            return tuple(index)

        end_list = [target_var.data.shape[axis] - size + 1 for axis, size
                    in zip(self.slide_axis, self.size)]

        for start in itertools.product(*[six.moves.range(end)
                                         for end in end_list]):
            occlude_index = _extract_index(self.slide_axis, self.size, start)

            if self.target_extractor is None:
                inputs[0].array = original_target_array.copy()
                inputs[0].array[occlude_index] = occlusion_window
                with chainer.using_config('enable_backprop',
                                          self.enable_backprop):
                    occluded_result = self.eval_fun(*inputs)
            else:
                def mask_target_var(hook, args, _target_var):
                    _target_var.array = original_target_array.copy()
                    _target_var.array[occlude_index] = occlusion_window

                self.target_extractor.add_process(
                    '/saliency/mask_target_var', mask_target_var)
                with chainer.using_config('enable_backprop',
                                          self.enable_backprop):
                    occluded_result = self.eval_fun(*inputs)
                self.target_extractor.delete_process(
                    '/saliency/mask_target_var')

            occluded_score = self.get_output_var(occluded_result)
            score_diff_var = original_score - occluded_score  # (bs, 1)
            # expand_dim for ch_axis
            score_diff = xp.reshape(score_diff_var.array,
                                    occlusion_window_shape)
            occlusion_scores[occlude_index] += score_diff
        outputs = (occlusion_scores,)
        return outputs
예제 #51
0
 def __call__(self, array):
     xp = cuda.get_array_module(array)
     array[...] = xp.asarray(self.fill_value)
예제 #52
0
파일: variable.py 프로젝트: taura/chainer
    def backward(self, retain_grad=False):
        """Runs error backpropagation (a.k.a. backprop) from this variable.

        On backprop, :meth:`Function.backward` is called on each
        :class:`Function` object appearing in the backward graph starting from
        this variable. The backward graph is represented by backward references
        from variables to their creators, and from functions to their inputs.
        The backprop stops at all root variables. Some functions set ``None``
        as gradients of some inputs, where further backprop does not take place
        at such input variables.

        This method uses :data:`grad` as the initial error array. User can
        manually set a gradient array before calling this method. If
        :data:`data` contains only one element (i.e., it is scalar) and
        :data:`grad` is ``None``, then this method automatically complements
        1.0 as the initial error. This is useful on starting backprop from
        some scalar loss value.

        Args:
            retain_grad (bool): If ``True``, the gradient arrays of all
                intermediate variables are kept. Otherwise, :data:`grad` of the
                intermediate variables are set to ``None`` on appropriate
                timing, which may reduce the maximum memory consumption.

                In most cases of training some models, the purpose of backprop
                is to compute gradients of parameters, not of variables, so it
                is recommended to set this flag ``False``.

        """
        if self.creator is None:
            return
        initial_device = None
        if cuda.available:
            try:
                initial_device = cuda.Device()
            except cuda.cupy.cuda.runtime.CUDARuntimeError as e:
                if e.status != 38:  # cudaErrorNoDevice
                    raise

        is_debug = chainer.is_debug()

        cand_funcs = []
        seen_set = set()
        seen_vars = set()
        need_copy = set()

        # Initialize error by 1, if this is a loss variable
        if self.data.size == 1 and self.grad is None:
            with cuda.get_device(self.data) as device:
                if device is cuda.DummyDevice:
                    self.grad = numpy.ones_like(self.data)
                else:
                    self.grad = cuda.cupy.ones_like(self.data)

        def add_cand(cand):
            if cand not in seen_set:
                # Negate since heapq is min-heap
                heapq.heappush(cand_funcs, (-cand.rank, len(seen_set), cand))
                seen_set.add(cand)

        add_cand(self.creator)

        while cand_funcs:
            _, _, func = heapq.heappop(cand_funcs)
            outputs = [y() for y in func.outputs]  # access via weak ref

            in_data = tuple([x.data for x in func.inputs])
            out_grad = ()
            # if enable grad accumulate
            if mkld.enable_acc_gradF((in_data,)) and in_data[0].ndim == 4 and all(isinstance(xi, numpy.ndarray) for xi in in_data):
                out_grad_tmp = tuple([None if y is None else y.grad for y in outputs])
                acc_grad_tuple = tuple([None if y is None else y.acc_grad for y in outputs])
                for grad_tmp, acc_grad in zip(out_grad_tmp, acc_grad_tuple):
                    if len(acc_grad) == 0:
                        # no need accumulate, just return grad
                        out_grad += (grad_tmp,)
                    else:
                        """
                        acc_grad's length is not 0, means need to do grad accumulate
                        call native MKLDNN sum primitive
                        """
                        y = numpy.empty((grad_tmp.shape), dtype=grad_tmp.dtype)
                        acc_grad += (grad_tmp,)
                        mkldnn_sum = mkldnn.Sum_F32()
                        mkldnn_sum.sum4d_gx(acc_grad, y)
                        out_grad += (y,)
            else:
                out_grad = tuple([None if y is None else y.grad for y in outputs])
            hooks = chainer.get_function_hooks()
            if func._n_local_function_hooks != 0:
                hooks = collections.OrderedDict(hooks)
                hooks.update(func.local_function_hooks)

            cuda.get_device(*(in_data + out_grad)).use()
            for hook in six.itervalues(hooks):
                hook.backward_preprocess(func, in_data, out_grad)

            if isinstance(func, chainer.functions.connection.convolution_2d.Convolution2DFunction):
                _x = func.inputs[0]
                if _x.creator is None and func.in_chain is True:
                    func.mkldnn_opt = True

            cosim_output = func.backward_cpu_cosim(in_data, out_grad)
            gxs = func.backward(in_data, out_grad)
            assert len(gxs) == len(in_data)
            func.cpu_cosim_verify_result(gxs, cosim_output)

            for hook in six.itervalues(hooks):
                hook.backward_postprocess(func, in_data, out_grad)

            if is_debug:
                for gx in gxs:
                    if gx is None:
                        continue
                    cuda.get_device(gx).use()
                    if cuda.get_array_module(gx).isnan(gx).any():
                        msg = 'NaN is detected on backward computation'
                        raise RuntimeError(msg)

            if not retain_grad:
                for y in outputs:
                    if y is not None and y is not self:
                        y.grad = None
            for x, gx in zip(func.inputs, gxs):
                if gx is None:
                    continue

                _check_grad_type(func, x, gx)

                # Accumulate the gradient to x. It is a bit tricky to handle
                # branches and parameter gradient accumulation correctly.
                id_x = id(x)
                if x.creator is None:  # leaf
                    if x._grad is None:  # 1st visit
                        x.grad = gx
                        need_copy.add(id_x)
                    else:
                        cuda.get_device(gx).use()
                        if id_x in need_copy:  # 2nd visit
                            if mkld.enable_acc_gradF((in_data,)) and in_data[0].ndim == 4 and all(isinstance(xi, numpy.ndarray) for xi in in_data):
                                # if enable_acc_grad,will deply to do grad accumulate,only record grad
                                x.acc_grad += (gx,)
                            else:
                                x.grad = utils.force_array(x.grad + gx)  # copy
                            need_copy.remove(id_x)  # remove from list in 2nd visit
                        else:
                            if mkld.enable_acc_gradF((in_data,)) and in_data[0].ndim == 4 and all(isinstance(xi, numpy.ndarray) for xi in in_data):
                                # if enable_acc_grad, will deply to do grad accumulate, only record grad
                                if len(x.acc_grad) > 0:  # means 3rd or later visit for variable x
                                    x.acc_grad += (gx,)
                                else:  # means this variable is W or b
                                    x._grad += gx
                            else:
                                x._grad += gx  # 3rd or later visit
                else:  # not a leaf
                    add_cand(x.creator)
                    if id_x not in seen_vars:  # 1st visit
                        x.grad = gx
                        seen_vars.add(id_x)
                        need_copy.add(id_x)
                    else:
                        cuda.get_device(gx).use()
                        if id_x in need_copy:  # 2nd visit
                            if mkld.enable_acc_gradF((in_data,)) and in_data[0].ndim == 4 and all(isinstance(xi, numpy.ndarray) for xi in in_data):
                                # if enable_acc_grad, will deply to do grad accumulate, only record grad
                                x.acc_grad += (gx,)
                            else:
                                x._grad = utils.force_array(gx + x._grad)  # copied
                            need_copy.remove(id_x)
                        else:  # 3rd or later visit
                            if mkld.enable_acc_gradF((in_data,)) and in_data[0].ndim == 4 and all(isinstance(xi, numpy.ndarray) for xi in in_data):
                                # if enable_acc_grad, will deply to do grad accumulate, only record grad
                                x.acc_grad += (gx,)
                            else:
                                x._grad += gx
            del gxs  # to reduce memory usage
            if initial_device is not None:
                initial_device.use()
예제 #53
0
 def total_variation2(self, x, tau=None):
     xp = cuda.get_array_module(x.data)
     dx = x[:, :, 1:, :] - x[:, :, :-1, :]
     dy = x[:, :, :, 1:] - x[:, :, :, :-1]
     return F.average(F.absolute(dx)) + F.average(F.absolute(dy))
예제 #54
0
 def forward(self, xs):
     self.retain_inputs(())
     self._in_shapes = [x.shape for x in xs]
     xp = cuda.get_array_module(*xs)
     return xp.dstack(xs),
예제 #55
0
파일: variable.py 프로젝트: zzak/chainer
    def _backward_main(self, retain_grad):
        self._node._check_old_style_gradient()
        if self.creator_node is None:
            return
        initial_device = None
        if cuda.available and isinstance(self.data, cuda.cupy.ndarray):
            try:
                initial_device = cuda.Device()
            except cuda.cupy.cuda.runtime.CUDARuntimeError as e:
                if e.status != 38:  # cudaErrorNoDevice
                    raise

        is_debug = chainer.is_debug()

        cand_funcs = []
        seen_set = set()
        grads = {}

        # Initialize error by 1, if this is a loss variable
        if self.data.size == 1 and self._grad_var is None:
            with cuda.get_device_from_array(self.data) as device:
                if device is cuda.DummyDevice:
                    self.grad = numpy.ones_like(self.data)
                else:
                    self.grad = cuda.cupy.ones_like(self.data)
        grads[self._node] = self._grad_var

        def add_cand(cand):
            if cand not in seen_set:
                # Negate since heapq is min-heap
                heapq.heappush(cand_funcs, (-cand.rank, len(seen_set), cand))
                seen_set.add(cand)

        add_cand(self.creator_node)

        def get_grad(node):
            if node is None:
                return None
            if node in grads:
                return grads[node]
            return node.grad_var

        while cand_funcs:
            _, _, func = heapq.heappop(cand_funcs)
            inputs = func.inputs
            target_input_indexes = [
                i for i, x in enumerate(inputs) if x.requires_grad
            ]
            if not target_input_indexes:
                continue
            outputs = [y() for y in func.outputs]  # access via weak ref

            in_data = tuple([x.data for x in inputs])
            out_grad = tuple([get_grad(y) for y in outputs])
            out_grad_data = tuple(
                [None if g is None else g.data for g in out_grad])
            hooks = chainer.get_function_hooks()
            if func._n_local_function_hooks != 0:
                hooks = collections.OrderedDict(hooks)
                hooks.update(func.local_function_hooks)
            hooks = hooks.values()  # avoid six for performance

            cuda.get_device_from_array(*in_data).use()
            for hook in hooks:
                hook.backward_preprocess(func, in_data, out_grad_data)

            # Collect the current input gradients.
            #
            # Note (Tokui): When the same variable is passed to multiple input
            # slots (e.g. an expression like ``f(x, x)``), it makes the
            # gradient accumulation complicated since the back-propagated
            # gradients w.r.t. the first and second argument should be
            # accumulated to the current gradient w.r.t. the same variable.
            # In this case, the current implementation passes the current
            # gradient only to the first occurrence of the variable in the
            # input tuple and passes ``None`` to the rest of the occurrences.
            # For example, when the input variables are ``(x, x)``, the
            # input gradient passed to the ``backward_accumulate`` method is
            # ``(gx, None)`` where ``gx`` is the current gradient of ``x``.
            # See also the docstring of ``FunctionNode.backward_accumulate``.
            target_inputs = [inputs[i] for i in target_input_indexes]
            in_grad = []
            for i, index_i in enumerate(target_input_indexes):
                x = inputs[index_i]
                if x in target_inputs[:i]:
                    # Pass ``None`` for duplicated input variables except for
                    # the first occurrence (see the comment above).
                    gx = None
                elif x in grads:
                    gx = grads[x]
                elif x.creator_node is None:
                    x._check_old_style_gradient()
                    # accumulate the gradient only if the node is a leaf
                    gx = x.grad_var
                else:
                    gx = None
                in_grad.append(gx)

            gxs = func.backward_accumulate(
                target_input_indexes, out_grad, in_grad)

            assert len(gxs) == len(in_grad)
            for hook in hooks:
                hook.backward_postprocess(func, in_data, out_grad_data)

            if is_debug:
                for gx in gxs:
                    if gx is None:
                        continue
                    gx_data = gx.data
                    if gx_data.dtype.kind == 'f':
                        cuda.get_device_from_array(gx_data).use()
                        if cuda.get_array_module(gx_data).isnan(gx_data).any():
                            raise RuntimeError(
                                'NaN is detected on backward computation of '
                                '{}'.format(func.label))

            if not retain_grad:
                for y in outputs:
                    if y is not None and y is not self.node:
                        grads[y] = None
                        y_var = y.get_variable_or_none()
                        if y_var is not None:
                            y_var._grad_var = None

            for i, gx in enumerate(gxs):
                if gx is None:
                    continue

                x = target_inputs[i]
                if not x.requires_grad:
                    continue

                _check_grad_type(func, x, gx.data)

                if x in target_inputs[:i]:
                    # Accumulate the duplicated gradients here. See the comment
                    # above the code that builds ``in_grad``.
                    cur_gx = grads[x]
                    grads[x] = gx if cur_gx is None else gx + cur_gx
                else:
                    grads[x] = gx

                x_var = x.get_variable_or_none()
                if x_var is not None:
                    x_var._grad_var = grads[x]

                if x.creator_node is not None:
                    add_cand(x.creator_node)

            del gxs  # to reduce memory usage
            if initial_device is not None:
                initial_device.use()
예제 #56
0
 def __init__(self, q_values, q_values_formatter=lambda x: x):
     assert isinstance(q_values, chainer.Variable)
     self.xp = cuda.get_array_module(q_values.data)
     self.q_values = q_values
     self.n_actions = q_values.data.shape[1]
     self.q_values_formatter = q_values_formatter
예제 #57
0
 def forward(self, xs):
     xp = cuda.get_array_module(*xs)
     return xp.dstack(xs),
예제 #58
0
 def backward(self, x, gy):
     x, = x
     xp = cuda.get_array_module(x)
     gy, = gy
     gy_former, gy_latter = xp.split(gy, 2, axis=self.axis)
     return gy_former * (x > 0) - gy_latter * (-x > 0),
예제 #59
0
 def __call__(self, rule, param):
     grad = param.grad
     xp = cuda.get_array_module(grad)
     with cuda.get_device_from_array(grad):
         xp.clip(grad, self.lower_bound, self.upper_bound, out=grad)
예제 #60
0
파일: function.py 프로젝트: zori/chainer
    def __call__(self, *inputs):
        """Applies forward propagation with chaining backward references.

        Basic behavior is expressed in documentation of :class:`Function`
        class.

        .. note::

           If the :data:`~Variable.data` attribute of input variables exist on
           GPU device, then, before it calls :meth:`forward` method, the
           appropriate device is selected, so in most cases implementers do
           not need to take care of device selection.

        Args:
            inputs: Tuple of input :class:`Variable`, :class:`numpy.ndarray` or
                :class:`cupy.ndarray` objects. The volatile flags of all input
                variables must agree. If the input is an :class:`numpy.ndarray`
                or a :class:`cupy.ndarray`, it is automatically wrapped with
                :class:`Variable`.

        Returns:
            One :class:`Variable` object or a tuple of multiple
            :class:`Variable` objects.

        """

        inputs = [x if isinstance(x, chainer.Variable)
                  else chainer.Variable(x, volatile=flag.AUTO)
                  for x in inputs]

        in_data = tuple([x.data for x in inputs])
        if chainer.is_debug():
            self._stack = traceback.extract_stack()

        if self.type_check_enable:
            self._check_data_type_forward(in_data)

        hooks = chainer.get_function_hooks()
        if self._n_local_function_hooks != 0:
            hooks = collections.OrderedDict(hooks)
            hooks.update(self.local_function_hooks)
        for hook in six.itervalues(hooks):
            hook.forward_preprocess(self, in_data)
        # Forward prop
        with cuda.get_device(*in_data):
            outputs = self.forward(in_data)
            assert type(outputs) == tuple
        for hook in six.itervalues(hooks):
            hook.forward_postprocess(self, in_data)

        if chainer.is_debug():
            if any(out.dtype.kind == 'f' and
                   cuda.get_array_module(out).isnan(out).any()
                   for out in outputs):
                msg = 'NaN is detected on forward computation'
                raise RuntimeError(msg)

        out_v = flag.aggregate_flags([x.volatile for x in inputs])
        ret = tuple([variable.Variable(y, volatile=out_v) for y in outputs])

        if out_v == 'on':
            build_graph = False
        elif out_v == 'off':
            build_graph = True
        else:
            build_graph = getattr(_thread_local, 'default_backprop', True)

        if build_graph:
            # Topological ordering
            self.rank = max([x.rank for x in inputs]) if inputs else 0
            # Backward edges
            for y in ret:
                y.set_creator(self)
            self.inputs = inputs
            # Forward edges (must be weak references)
            self.outputs = tuple([weakref.ref(y) for y in ret])

        if len(ret) == 1:
            return ret[0]
        else:
            return ret