Beispiel #1
0
    def update_core_chainerx(self, param):
        """Updates the ChainerX parameter.

        This method can be overridden to implement custom update logic.
        The default implementation is to convert the parameter to a
        memory-shared NumPy/CuPy parameter and call the corresponding update
        method.

        See :meth:`update_core` for details.

        Args:
            param (~chainer.Variable): Variable to be updated.

        """
        grad_array = param.grad
        backend_name = param.array.device.backend.name
        if backend_name not in ('native', 'cuda'):
            raise RuntimeError(
                'Default implementation of Optimizer.update_core_chainerx is '
                'only provided for native or cuda backends (actual: {}). '
                'Override Optimizer.update_core_chainerx() to implement '
                'custom update logic.'.format(backend_name))

        # Convert state arrays to NumPy/CuPy
        chainerx_state_arrays = {}
        for state_name, st in self.state.items():
            st = self.state[state_name]
            if isinstance(st, chainerx.ndarray):
                fallback_arr = backend.from_chx(st)
                self.state[state_name] = fallback_arr
                chainerx_state_arrays[state_name] = (st, fallback_arr)

        # Create a temporary parameter with memory-shared NumPy/CuPy array
        # If the ChainerX parameter has a cached NumPy/CuPy copy, use the
        # cache and avoid redundant conversion. Else, create the cache here
        # and use it.
        if param._chainerx_fallback_array is None:
            param._chainerx_fallback_array = backend.from_chx(
                param.array)

        temp_param = variable.Variable._init_unchecked(
            param._chainerx_fallback_array, is_chainerx_array=False)

        if grad_array is not None:
            temp_param._set_grad_without_check(
                backend.from_chx(grad_array))

        # Update
        self.update_core(temp_param)

        # Restore state arrays
        for state_name, (arr, fallback_arr) in chainerx_state_arrays.items():
            cur_arr = self.state[state_name]
            if cur_arr is not fallback_arr:
                # The optimizer altered the reference of the state, instead of
                # updating it in-place. We need to convert the new state back
                # to ChainerX.
                arr = backend.to_chx(cur_arr)
            self.state[state_name] = arr
Beispiel #2
0
    def update_core_chainerx(self, param):
        """Updates the ChainerX parameter.

        This method can be overridden to implement custom update logic.
        The default implementation is to convert the parameter to a
        memory-shared NumPy/CuPy parameter and call the corresponding update
        method.

        See :meth:`update_core` for details.

        Args:
            param (~chainer.Variable): Variable to be updated.

        """
        grad_array = param.grad
        backend_name = param.array.device.backend.name
        if backend_name not in ('native', 'cuda'):
            raise RuntimeError(
                'Default implementation of Optimizer.update_core_chainerx is '
                'only provided for native or cuda backends (actual: {}). '
                'Override Optimizer.update_core_chainerx() to implement '
                'custom update logic.'.format(backend_name))

        # Convert state arrays to NumPy/CuPy
        chainerx_state_arrays = {}
        for state_name, st in self.state.items():
            st = self.state[state_name]
            if isinstance(st, chainerx.ndarray):
                fallback_arr = backend.from_chx(st)
                self.state[state_name] = fallback_arr
                chainerx_state_arrays[state_name] = (st, fallback_arr)

        # Create a temporary parameter with memory-shared NumPy/CuPy array
        # If the ChainerX parameter has a cached NumPy/CuPy copy, use the
        # cache and avoid redundant conversion. Else, create the cache here
        # and use it.
        if param._chainerx_fallback_array is None:
            param._chainerx_fallback_array = backend.from_chx(param.array)

        temp_param = variable.Variable._init_unchecked(
            param._chainerx_fallback_array, is_chainerx_array=False)

        if grad_array is not None:
            temp_param._set_grad_without_check(backend.from_chx(grad_array))

        # Update
        self.update_core(temp_param)

        # Restore state arrays
        for state_name, (arr, fallback_arr) in chainerx_state_arrays.items():
            cur_arr = self.state[state_name]
            if cur_arr is not fallback_arr:
                # The optimizer altered the reference of the state, instead of
                # updating it in-place. We need to convert the new state back
                # to ChainerX.
                arr = backend.to_chx(cur_arr)
            self.state[state_name] = arr
    def backward(self, target_input_indexes, grad_outputs):
        retained_inputs = self.get_retained_inputs()
        inputs = [None] * len(self.inputs)
        in_data = [None] * len(self.inputs)
        for retained, i_in in six.moves.zip(retained_inputs,
                                            self._input_indexes_to_retain):
            inputs[i_in] = retained
            in_data[i_in] = None if retained is None else retained.array
        in_data = tuple(in_data)

        grad_out_data = tuple(
            [None if grad is None else grad.array for grad in grad_outputs])

        is_chainerx_fallback_mode = self._is_chainerx_fallback_mode
        if is_chainerx_fallback_mode:
            # Convert input and output gradients to numpy/cupy
            in_data = backend.from_chx(in_data)
            grad_out_data = backend.from_chx(grad_out_data)

        # Call Function.backward
        with chainer.using_device(
                backend.get_device_from_array(*(in_data + grad_out_data))):
            if is_chainerx_fallback_mode:
                # Enable attribute fallback
                with function_node._chainerx_attribute_fallback(
                        self._function, self.chainerx_device):
                    gxs = self._function.backward(in_data, grad_out_data)
            else:
                gxs = self._function.backward(in_data, grad_out_data)

        # Check gradients
        for x, gx in six.moves.zip(self.inputs, gxs):
            if gx is not None:
                variable._check_grad_type(self, x, True, gx)

        # Convert input gradients back to ChainerX
        if is_chainerx_fallback_mode:
            gxs = backend.to_chx(gxs)

        ret = []
        for i in target_input_indexes:
            if gxs[i] is None:
                g = None
            else:
                # Intentionally not passing requires_grad=False so that
                # backprop routines can raise an error when a further backprop
                # is attempted against this gradient variable.
                g = variable.Variable(gxs[i])
                if g.xp is not chainerx:
                    g.node._old_style_grad_generator = self._function.label
            ret.append(g)

        return tuple(ret)
Beispiel #4
0
    def backward(self, target_input_indexes, grad_outputs):
        retained_inputs = self.get_retained_inputs()
        inputs = [None] * len(self.inputs)
        in_data = [None] * len(self.inputs)
        for retained, i_in in six.moves.zip(
                retained_inputs, self._input_indexes_to_retain):
            inputs[i_in] = retained
            in_data[i_in] = None if retained is None else retained.array
        in_data = tuple(in_data)

        grad_out_data = tuple([None if grad is None else grad.data
                               for grad in grad_outputs])

        is_chainerx_fallback_mode = self._is_chainerx_fallback_mode
        if is_chainerx_fallback_mode:
            # Convert input and output gradients to numpy/cupy
            in_data = backend.from_chx(in_data)
            grad_out_data = backend.from_chx(grad_out_data)

        # Call Function.backward
        with cuda.get_device_from_array(*(in_data + grad_out_data)):
            if is_chainerx_fallback_mode:
                # Enable attribute fallback
                with function_node._chainerx_attribute_fallback(
                        self._function, self.chainerx_device):
                    gxs = self._function.backward(in_data, grad_out_data)
            else:
                gxs = self._function.backward(in_data, grad_out_data)

        # Check gradients
        for x, gx in six.moves.zip(self.inputs, gxs):
            if gx is not None:
                variable._check_grad_type(self, x, True, gx)

        # Convert input gradients back to ChainerX
        if is_chainerx_fallback_mode:
            gxs = backend.to_chx(gxs)

        ret = []
        for i in target_input_indexes:
            if gxs[i] is None:
                g = None
            else:
                # Intentionally not passing requires_grad=False so that
                # backprop routines can raise an error when a further backprop
                # is attempted against this gradient variable.
                g = variable.Variable(gxs[i])
                if g.xp is not chainerx:
                    g.node._old_style_grad_generator = self._function.label
            ret.append(g)

        return tuple(ret)
Beispiel #5
0
    def forward_cpu(self, x):
        func = self.func
        ndim = func.ndim
        ksize = func.ksize
        stride = func.stride
        pad = func.pad
        cover_all = func.cover_all
        indexes = backend.from_chx(func.indexes)

        col = conv_nd.im2col_nd_cpu(x[0],
                                    ksize,
                                    stride,
                                    pad,
                                    pval=-float('inf'),
                                    cover_all=cover_all)
        n, c = col.shape[:2]
        mid = (len(col.shape) - 2) // 2 + 2
        ksize = col.shape[2:mid]
        outs = col.shape[mid:]
        # (n, c, k_1 * k_2 * ... * k_N, out_1, out_2, ..., out_N)
        ksize_total = functools.reduce(mul, ksize)
        col_shape = (n, c) + (ksize_total, ) + outs
        col = col.reshape(col_shape)
        # (n, c, out_1, ..., out_N, k_1 * .. * k_N)
        col_indexes = (0, 1) + tuple(six.moves.range(3, 3 + ndim)) + (2, )
        col = col.transpose(col_indexes)
        col = col.reshape(-1, ksize_total)

        indexes = indexes.ravel()
        col = col[numpy.arange(len(indexes)), indexes]
        return col.reshape((n, c) + outs),
Beispiel #6
0
    def forward_gpu(self, gy):
        func = self.func

        if func.is_cudnn_used:
            return func.backward_cudnn(gy)

        ndim = func.ndim
        ksize = func.ksize
        stride = func.stride
        pad = func.pad
        in_shape = func._in_shape
        in_dtype = func._in_dtype
        indexes = backend.from_chx(func.indexes)

        n, c = in_shape[:2]
        dims = in_shape[2:]
        ys = gy[0].shape[2:]
        gx = cuda.cupy.empty(in_shape, in_dtype)

        in_params, out_params, operation, name = \
            max_pooling_nd_kernel.MaxPoolingNDKernelBackward.generate(ndim)
        cuda.elementwise(in_params, out_params, operation,
                         name)(gy[0].reduced_view(), indexes.reduced_view(),
                               *(dims + ys + ksize + stride + pad + (gx, )))
        return gx,
Beispiel #7
0
    def forward_cpu(self, inputs):
        class_weight = backend.from_chx(self.class_weight)

        self.retain_inputs((0, 1))
        x, t = inputs
        if chainer.is_debug():
            _check_input_values(x, t, self.ignore_label)

        log_y = log_softmax._log_softmax(x)
        if self.cache_score:
            self.y = numpy.exp(log_y)
        if class_weight is not None:
            shape = [1 if d != 1 else -1 for d in six.moves.range(x.ndim)]
            log_y *= _broadcast_to(class_weight.reshape(shape), x.shape)
        log_yd = numpy.rollaxis(log_y, 1)
        log_yd = log_yd.reshape(len(log_yd), -1)
        t_valid = t != self.ignore_label
        t = t * t_valid
        log_p = log_yd[t.ravel(), numpy.arange(t.size)]

        log_p *= t_valid.ravel()
        if self.reduce == 'mean':
            # deal with the case where the SoftmaxCrossEntropy is
            # unpickled from the old version
            if self.normalize:
                count = t_valid.sum()
            else:
                count = len(x)
            self._coeff = 1.0 / max(count, 1)

            y = log_p.sum(keepdims=True) * (-self._coeff)
            return y.reshape(()),
        else:
            return -log_p.reshape(t.shape),
Beispiel #8
0
    def _chainerx_apply_fallback_preprocess(self, in_data, inputs):
        chainerx_in_data = in_data
        in_data = []
        device = None
        for data, x in six.moves.zip(chainerx_in_data, inputs):
            if data is None:
                fallback_data = None
            else:
                # Use the cached fallback arrays as inputs if they exist.
                x_is_variable = isinstance(x, variable.Variable)
                if x_is_variable and x._chainerx_fallback_array is not None:
                    fallback_data = x._chainerx_fallback_array
                    if device is None:
                        device = x.device
                else:
                    fallback_data = backend.from_chx(data)
                    if device is None:
                        device = backend.ChainerxDevice(data.device)

                    # Update the fallback cache if possible.
                    if x_is_variable:
                        x._chainerx_fallback_array = fallback_data

            in_data.append(fallback_data)

        in_data = tuple(in_data)
        return chainerx_in_data, in_data, device
Beispiel #9
0
    def forward_gpu(self, gys):
        if self._used_cudnn:
            x, = self.apoolnd.get_retained_inputs()
            return self.apoolnd.backward_gpu((x.data, ), gys)

        is_pad_value_none = self.pad_value is None

        gy, = gys
        n, c = self._in_shape[:2]
        idims = self._in_shape[2:]
        odims = gy.shape[2:]
        if is_pad_value_none:
            coeff = self.apoolnd.coeff
            # This conversion from chainerx to cupy exists here for
            # double backward of chainerx on cuda.
            coeff = backend.from_chx(coeff)
            gy *= coeff
        gx = cuda.cupy.empty(self._in_shape, self._in_dtype)

        in_params, out_params, operation, name = \
            average_pooling_nd_kernel.AveragePoolingNDKernelBackward.generate(
                self.ndim)
        cuda.elementwise(in_params, out_params, operation, name)(
            gy.reduced_view(),
            *(idims + odims + self.ksize + self.stride + self.pad + (gx, )))

        if not is_pad_value_none:
            gx /= functools.reduce(operator.mul, self.ksize)
        return gx,
Beispiel #10
0
 def _make_samples(self, t):
     size = int(t.shape[0])
     # first one is the positive, and others are sampled negatives
     samples = self.sampler((size, self.sample_size + 1))
     samples = backend.from_chx(samples)
     samples[:, 0] = t
     return samples
Beispiel #11
0
    def forward_gpu(self, gys):
        if self._used_cudnn:
            x, = self.apoolnd.get_retained_inputs()
            return self.apoolnd.backward_gpu((x.data,), gys)

        is_pad_value_none = self.pad_value is None

        gy, = gys
        n, c = self._in_shape[:2]
        idims = self._in_shape[2:]
        odims = gy.shape[2:]
        if is_pad_value_none:
            coeff = self.apoolnd.coeff
            # This conversion from chainerx to cupy exists here for
            # double backward of chainerx on cuda.
            coeff = backend.from_chx(coeff)
            gy *= coeff
        gx = cuda.cupy.empty(self._in_shape, self._in_dtype)

        in_params, out_params, operation, name = \
            average_pooling_nd_kernel.AveragePoolingNDKernelBackward.generate(
                self.ndim)
        cuda.elementwise(in_params, out_params, operation, name)(
            gy.reduced_view(),
            *(idims + odims + self.ksize + self.stride + self.pad
              + (gx,)))

        if not is_pad_value_none:
            gx /= functools.reduce(operator.mul, self.ksize)
        return gx,
Beispiel #12
0
    def forward_gpu(self, inputs):
        t = backend.from_chx(self.t)  # Workaround for ChainerX.

        gx = cuda.cupy.zeros(self.shape, self.dtype)
        gx = cuda.elementwise('S t, T gloss', 'raw T gx',
                              'int ind[] = {i, t}; gx[ind] = gloss;',
                              'getitem_bwd')(t, inputs[0], gx)
        return gx,
    def forward_gpu(self, inputs):
        class_weight = backend.from_chx(self.class_weight)

        self.retain_inputs((0, 1))
        cupy = cuda.cupy
        x, t = inputs
        if chainer.is_debug():
            _check_input_values(x, t, self.ignore_label)

        if x.size == 0:
            y = cupy.zeros(t.shape, dtype=x.dtype)
            if self.cache_score:
                self.y = y
            if self.reduce == 'mean':
                return y.sum(),
            else:
                return y,
        log_y = log_softmax._log_softmax(x)
        if self.cache_score:
            self.y = cupy.exp(log_y)
        if class_weight is not None:
            shape = [1 if d != 1 else -1 for d in six.moves.range(x.ndim)]
            log_y *= cupy.broadcast_to(class_weight.reshape(shape), x.shape)

        log_y = cupy.rollaxis(log_y, 1, log_y.ndim)

        if self.reduce == 'mean':
            # Reduction is performed in a promoted dtype
            reduc_dtype = _reduction_dtype(x.dtype)
            if self.normalize:
                count = (t != self.ignore_label).sum(dtype=reduc_dtype)
                count = cupy.maximum(1, count)
                coeff = 1. / count
            else:
                coeff = cupy.array(1. / max(1, len(t)), dtype=reduc_dtype)
            self._coeff = coeff

            ret = cuda.reduce(
                'S t, raw T log_y, int32 n_channel, raw U coeff, '
                'S ignore_label', 'U out',
                't == ignore_label ? T(0) : log_y[_j * n_channel + t]',
                'a + b', 'out = static_cast<U>(a * -coeff[0])', '0',
                'crossent_fwd')(t, log_y.reduced_view(), log_y.shape[-1],
                                self._coeff, self.ignore_label)
            ret = ret.astype(log_y.dtype, copy=False)
        else:
            ret = cuda.elementwise(
                'S t, raw T log_y, int32 n_channel, T ignore', 'T out', '''
                if (t == ignore) {
                  out = 0;
                } else {
                  out = -log_y[i * n_channel + t];
                }
                ''', 'softmax_crossent_no_reduce_fwd')(t, log_y.reduced_view(),
                                                       log_y.shape[-1],
                                                       self.ignore_label)
            ret = ret.reshape(t.shape)
        return ret,
Beispiel #14
0
 def getattribute(self, name):
     value = sup.__getattribute__(name)
     if isinstance(value, chainerx.ndarray):
         fallback_arr = fallback_array_cache.get(name)
         if fallback_arr is None:
             fallback_arr = backend.from_chx(value)
             fallback_array_cache[name] = fallback_arr
         return fallback_arr
     return value
    def output_data(self):
        """A tuple of the retained output arrays.

        It has the same length as the :attr:`outputs`. Elements that are not
        retained are set to ``None``.

        """
        if self.node._is_chainerx_fallback_mode:
            return backend.from_chx(self.node.output_data)
        return self.node.output_data
Beispiel #16
0
    def output_data(self):
        """A tuple of the retained output arrays.

        It has the same length as the :attr:`outputs`. Elements that are not
        retained are set to ``None``.

        """
        if self.node._is_chainerx_fallback_mode:
            return backend.from_chx(self.node.output_data)
        return self.node.output_data
    def forward(self, axis, gamma, x, x_layout, xp, expander,
                beta, eps, decay, running_mean, running_var):
        interm_dtype = numpy.promote_types(x.dtype, gamma.dtype)

        gamma = gamma[expander].astype(interm_dtype, copy=False)
        beta = beta[expander].astype(interm_dtype, copy=False)
        mean, var = self.get_mean_and_var(axis, gamma,
                                          x, xp, interm_dtype)
        if xp is numpy:
            inv_std = numpy.reciprocal(numpy.sqrt(
                var + eps, dtype=interm_dtype))
        else:
            inv_std = cuda.cupyx.rsqrt(
                var + eps, dtype=interm_dtype)

        y = _apply_bn_fwd(xp, x, mean[expander],
                          inv_std[expander], gamma, beta)

        # Update running statistics if given
        if running_mean is not None:
            m = x.size // gamma.size
            adjust = m / max(m - 1., 1.)  # unbiased estimation

            xp = backend.get_array_module(
                running_mean, running_var)

            if xp is chainerx:
                running_mean, running_var = backend.from_chx(
                    (running_mean, running_var))

            if xp is numpy:
                running_mean *= decay
                running_mean += (1 - decay) * mean
                running_var *= decay
                running_var += (1 - decay) * adjust * var
            else:
                # running_mean and running_var has the same type as x
                # while mean and var is interm_dtype which is promoted from x
                cuda.elementwise(
                    'T mean, T var, U decay, U adjust',
                    'U r_mean, U r_var',
                    '''
                    r_mean = r_mean * decay + mean * (1 - decay);
                    r_var = r_var * decay + var * (1 - decay) * adjust;
                    ''',
                    'update_mean_var')(mean, var, decay, adjust,
                                       running_mean, running_var)

            if xp is chainerx:
                running_mean = backend.to_chx(running_mean)
                running_var = backend.to_chx(running_var)

        y_layout = x_layout
        return y, y_layout, running_mean, running_var, mean, var, inv_std, None
Beispiel #18
0
    def forward_gpu(self, inputs):
        t = backend.from_chx(self.t)  # Workaround for ChainerX.

        gx = cuda.cupy.zeros(self.shape, self.dtype)
        gx = cuda.elementwise(
            'S t, T gloss',
            'raw T gx',
            'int ind[] = {i, t}; gx[ind] = gloss;',
            'getitem_bwd'
        )(t, inputs[0], gx)
        return gx,
Beispiel #19
0
    def from_chx(self):
        """Converts parameter variables and persistent values from ChainerX \
to NumPy/CuPy devices without any copy."""
        d = self.__dict__
        for name in self._params:
            d[name].from_chx()
        for name in self._persistent:
            if not numpy.isscalar(d[name]):
                d[name] = backend.from_chx(d[name])

        if isinstance(self._device, backend.ChainerxDevice):
            self._device = self._device.fallback_device

        return self
Beispiel #20
0
    def from_chx(self):
        """Converts parameter variables and persistent values from ChainerX \
to NumPy/CuPy devices without any copy."""
        d = self.__dict__
        for name in self._params:
            d[name].from_chx()
        for name in self._persistent:
            if not numpy.isscalar(d[name]):
                d[name] = backend.from_chx(d[name])

        if isinstance(self._device, backend.ChainerxDevice):
            self._device = self._device.fallback_device

        return self
Beispiel #21
0
    def test_from_chx(self, backend_config):
        arr = backend_config.get_array(numpy.ones((2, 3), numpy.float32))
        arr_converted = backend.from_chx(arr)

        src_device = backend_config.device
        if src_device.xp is chainerx:
            dst_xp = src_device.fallback_device.xp
            assert isinstance(arr_converted, dst_xp.ndarray)
            if dst_xp is cuda.cupy:
                assert arr_converted.device.id == src_device.device.index
        else:
            assert arr is arr_converted

        with backend_config:
            self.check_equal_memory_shared(arr, arr_converted)
Beispiel #22
0
    def test_from_chx(self, backend_config):
        arr = backend_config.get_array(numpy.ones((2, 3), numpy.float32))
        arr_converted = backend.from_chx(arr)

        src_device = backend_config.device
        if src_device.xp is chainerx:
            dst_xp = src_device.fallback_device.xp
            assert isinstance(arr_converted, dst_xp.ndarray)
            if dst_xp is cuda.cupy:
                assert arr_converted.device.id == src_device.device.index
        else:
            assert arr is arr_converted

        with backend_config:
            self.check_equal_memory_shared(arr, arr_converted)
Beispiel #23
0
 def forward(self, xs):
     a = xs[0]
     b = xs[1]
     y = a.copy()
     xp = backend.get_array_module(a)
     slices = tuple([
         backend.from_chx(s) if isinstance(s, chainerx.ndarray) else s
         for s in self.slices])
     if y[slices].shape != b.shape:
         raise ValueError(
             'Chainer does not support automatic broadcasting '
             'of variables.')
     if xp is numpy:
         numpy.add.at(y, slices, b),
     else:
         cuda.cupyx.scatter_add(y, slices, b),
     return y,
Beispiel #24
0
 def forward(self, xs):
     a = xs[0]
     b = xs[1]
     y = a.copy()
     xp = backend.get_array_module(a)
     slices = tuple([
         backend.from_chx(s) if isinstance(s, chainerx.ndarray) else s
         for s in self.slices
     ])
     if y[slices].shape != b.shape:
         raise ValueError('Chainer does not support automatic broadcasting '
                          'of variables.')
     if xp is numpy:
         numpy.add.at(y, slices, b),
     else:
         cuda.cupyx.scatter_add(y, slices, b),
     return y,
Beispiel #25
0
    def forward_cpu(self, inputs):
        class_weight = backend.from_chx(self.class_weight)

        self.retain_inputs((0, 1))
        x, t = inputs
        if x.ndim == t.ndim and x.shape == t.shape:
            self.soft_target = True
        if chainer.is_debug() and not self.soft_target:
            _check_input_values(x, t, self.ignore_label)

        log_y = log_softmax._log_softmax(x)
        if self.cache_score:
            self.y = numpy.exp(log_y)

        if self.soft_target:
            return self._soft_target_loss(numpy, x, t, log_y)

        if class_weight is not None:
            shape = [1 if d != 1 else -1 for d in six.moves.range(x.ndim)]
            log_y *= _broadcast_to(class_weight.reshape(shape), x.shape)
        log_yd = numpy.rollaxis(log_y, 1)
        log_yd = log_yd.reshape(len(log_yd), -1)
        t_valid = t != self.ignore_label
        t = t * t_valid
        log_p = log_yd[t.ravel(), numpy.arange(t.size)]

        log_p *= t_valid.ravel()
        if self.reduce == 'mean':
            if self.normalize:
                count = t_valid.sum()
            else:
                count = len(x)
            self._coeff = 1.0 / max(count, 1)

            # Perform reduction in a promoted dtype
            reduc_dtype = _reduction_dtype(x.dtype)
            y = log_p.sum(keepdims=True, dtype=reduc_dtype)
            y = y * (-self._coeff)
            y = y.astype(x.dtype, copy=False)
            return y.reshape(()),
        else:
            return -log_p.reshape(t.shape),
    def forward(self, inputs):
        slices = tuple([
            backend.from_chx(s) if isinstance(s, chainerx.ndarray) else s
            for s in self.slices
        ])

        gy, = inputs
        xp = backend.get_array_module(*inputs)
        gx = xp.zeros(self._in_shape, gy.dtype)
        if xp is numpy:
            try:
                numpy.add.at(gx, slices, gy)
            except IndexError:
                done = False
                # In numpy<1.13, 0-dim boolean index is not supported in
                # numpy.add.at and it's supported for 0-dim arr in
                # arr.__getitem__.
                if not _numpy_supports_0d_bool_index and len(slices) == 1:
                    idx = numpy.asanyarray(slices[0])
                    if idx.dtype == numpy.dtype(bool):
                        # Convert the array and the mask to 1-dim.
                        # numpy.add.at with them is supported in older numpy.
                        numpy.add.at(gx[None], idx[None], gy)
                        done = True

                if not done:
                    msg = '''
GetItem does not support backward for this slices. The slices argument is not
supported by numpy.add.at, while it is supported by numpy.ndarray.__getitem__.

Please report this error to the issue tracker with the stack trace,
the information of your environment, and your script:
https://github.com/chainer/chainer/issues/new.
'''
                    raise IndexError(msg)
        else:
            gx.scatter_add(slices, inputs[0])
        return gx,
    def forward_cpu(self, inputs):
        class_weight = backend.from_chx(self.class_weight)

        self.retain_inputs((0, 1))
        x, t = inputs
        if chainer.is_debug():
            _check_input_values(x, t, self.ignore_label)

        log_y = log_softmax._log_softmax(x)
        if self.cache_score:
            self.y = numpy.exp(log_y)
        if class_weight is not None:
            shape = [1 if d != 1 else -1 for d in six.moves.range(x.ndim)]
            log_y *= _broadcast_to(class_weight.reshape(shape), x.shape)
        log_yd = numpy.rollaxis(log_y, 1)
        log_yd = log_yd.reshape(len(log_yd), -1)
        t_valid = t != self.ignore_label
        t = t * t_valid
        log_p = log_yd[t.ravel(), numpy.arange(t.size)]

        log_p *= t_valid.ravel()
        if self.reduce == 'mean':
            # deal with the case where the SoftmaxCrossEntropy is
            # unpickled from the old version
            if self.normalize:
                count = t_valid.sum()
            else:
                count = len(x)
            self._coeff = 1.0 / max(count, 1)

            # Perform reduction in a promoted dtype
            reduc_dtype = _reduction_dtype(x.dtype)
            y = log_p.sum(keepdims=True, dtype=reduc_dtype)
            y = y * (-self._coeff)
            y = y.astype(x.dtype, copy=False)
            return y.reshape(()),
        else:
            return -log_p.reshape(t.shape),
Beispiel #28
0
    def forward(self, inputs):
        slices = tuple([
            backend.from_chx(s) if isinstance(s, chainerx.ndarray) else s
            for s in self.slices])

        gy, = inputs
        xp = backend.get_array_module(*inputs)
        gx = xp.zeros(self._in_shape, gy.dtype)
        if xp is numpy:
            try:
                numpy.add.at(gx, slices, gy)
            except IndexError:
                done = False
                # In numpy<1.13, 0-dim boolean index is not supported in
                # numpy.add.at and it's supported for 0-dim arr in
                # arr.__getitem__.
                if not _numpy_supports_0d_bool_index and len(slices) == 1:
                    idx = numpy.asanyarray(slices[0])
                    if idx.dtype == numpy.dtype(bool):
                        # Convert the array and the mask to 1-dim.
                        # numpy.add.at with them is supported in older numpy.
                        numpy.add.at(gx[None], idx[None], gy)
                        done = True

                if not done:
                    msg = '''
GetItem does not support backward for this slices. The slices argument is not
supported by numpy.add.at, while it is supported by numpy.ndarray.__getitem__.

Please report this error to the issue tracker with the stack trace,
the information of your environment, and your script:
https://github.com/chainer/chainer/issues/new.
'''
                    raise IndexError(msg)
        else:
            gx.scatter_add(slices, inputs[0])
        return gx,
Beispiel #29
0
    def forward_gpu(self, inputs):
        func = self.func

        if func.is_cudnn_used:
            x = func.get_retained_inputs()[0].array
            return self._forward_gpu_compute_indexes_again((x, inputs[0]))

        ndim = func.ndim
        ksize = func.ksize
        stride = func.stride
        pad = func.pad
        cover_all = func.cover_all
        indexes = backend.from_chx(func.indexes)

        x, = inputs
        in_shape = x.shape
        in_dtype = x.dtype

        n, c = in_shape[:2]
        dims = in_shape[2:]

        ys = tuple(
            conv_nd.get_conv_outsize(d, k, s, p, cover_all)
            for (d, k, s, p) in six.moves.zip(dims, ksize, stride, pad))
        # (n, c, y_1, y_2, ..., y_N)
        y_shape = (n, c) + ys
        y = cuda.cupy.empty(y_shape, dtype=x.dtype)

        cls = max_pooling_nd_kernel.MaxPoolingNDKernelForwardWithIndexes
        in_params, out_params, operation, name = cls.generate(ndim)
        cuda.elementwise(in_params, out_params, operation, name)(
            x.reduced_view(),
            *(dims + ys + ksize + stride + pad + (indexes.reduced_view(), y)))

        self._in_shape = in_shape
        self._in_dtype = in_dtype
        return y,
    def forward_gpu(self, gys):
        func = self.func

        if func.is_cudnn_used:
            return func.backward_cudnn(gys)

        ndim = func.ndim
        pad_value = func.pad_value
        ksize = func.ksize
        stride = func.stride
        pad = func.pad
        in_shape = func._in_shape
        in_dtype = func._in_dtype

        is_pad_value_none = pad_value is None

        gy, = gys
        n, c = in_shape[:2]
        idims = in_shape[2:]
        odims = gy.shape[2:]
        if is_pad_value_none:
            # This conversion from chainerx to cupy exists here for
            # double backward of chainerx on cuda.
            coeff = backend.from_chx(func.coeff)
            gy *= coeff
        gx = cuda.cupy.empty(in_shape, in_dtype)

        in_params, out_params, operation, name = \
            average_pooling_nd_kernel.AveragePoolingNDKernelBackward.generate(
                ndim)
        cuda.elementwise(in_params, out_params, operation,
                         name)(gy.reduced_view(), *(idims + odims + ksize +
                                                    stride + pad + (gx, )))

        if not is_pad_value_none:
            gx /= functools.reduce(operator.mul, ksize)
        return gx,
Beispiel #31
0
    def forward_cpu(self, inputs):
        t = backend.from_chx(self.t)  # Workaround for ChainerX.

        gx = numpy.zeros(self.shape, self.dtype)
        gx[six.moves.range(self.t.size), t] = inputs[0]
        return gx,
 def forward(self, xs):
     slices = tuple([
         backend.from_chx(s) if isinstance(s, chainerx.ndarray) else s
         for s in self.slices
     ])
     return utils.force_array(xs[0][slices]),
Beispiel #33
0
 def visit_array(self, arr):
     assert isinstance(arr, chainer.get_array_types())
     return backend.from_chx(arr)
Beispiel #34
0
 def visit_array(self, arr):
     assert isinstance(arr, chainer.get_array_types())
     return backend.from_chx(arr)
Beispiel #35
0
    def forward(self, inputs):
        self.retain_inputs((0, 1))
        x, gamma, beta = inputs

        xp = backend.get_array_module(x)
        if self.running_mean is None:
            self.running_mean = xp.zeros_like(gamma, dtype=x.dtype)
            self.running_var = xp.zeros_like(gamma, dtype=x.dtype)

        self.axis = _compute_axis(x.ndim, gamma.ndim, self.axis)
        self.key_axis = _compute_key_axis(x.ndim, gamma.ndim, self.axis)

        if all(x.shape[i] == 1 for i in self.axis):
            if 0 in self.axis:
                warnings.warn(
                    'A batch with no more than one sample has been given'
                    ' to F.batch_normalization. F.batch_normalization'
                    ' will always output a zero tensor for such batches.'
                    ' This could be caused by incorrect configuration in'
                    ' your code (such as running evaluation while'
                    ' chainer.config.train=True),'
                    ' but could also happen in the last batch of training'
                    ' if non-repeating iterator is used.',
                    UserWarning)
            else:
                warnings.warn(
                    'F.batch_normalization received a batch with single'
                    ' dimensions along all axes that are used for aggregating'
                    ' statistics. F.batch_normalization'
                    ' will always output a zero tensor for such batches.',
                    UserWarning)

        # TODO(niboshi): Refactor calculation of expander and axis into a
        # function and call it just before they are used.

        # expander inserts singleton dimensions to gamma and beta so that they
        # can be broadcasted with x.
        expander = [None for _ in range(x.ndim)]
        for i in self.key_axis:
            expander[i] = slice(None)
        expander = tuple(expander)
        self.expander = expander

        self.mode = _BNMode(x, gamma, self.key_axis)
        self.use_cudnn = self.mode.can_use_cudnn(xp)
        self.use_ideep = self.mode.can_use_ideep()

        if self.use_ideep:
            # TODO(niboshi): Refactor iDeep part into a separate method
            expand_dim = False
            if x.ndim == 2:
                expand_dim = True
                x = x[:, :, None, None]

            y, self.mean, self.var, self.inv_std = (
                intel64.ideep.batchNormalization.Forward(
                    intel64.ideep.array(x.astype(gamma.dtype, copy=False)),
                    intel64.ideep.array(gamma),
                    intel64.ideep.array(beta),
                    None,
                    None,
                    self.eps
                ))
            y = y.astype(x.dtype, copy=False)

            m = x.size // gamma.size
            adjust = m / max(m - 1., 1.)

            # Update running_mean
            if isinstance(self.running_mean, intel64.ideep.mdarray):
                self.running_mean.inplace_axpby(
                    self.decay, (1 - self.decay), self.mean)
            else:
                self.running_mean *= self.decay
                self.running_mean += self.mean * (1 - self.decay)

            # Update running_var
            if isinstance(self.running_var, intel64.ideep.mdarray):
                self.running_var.inplace_axpby(
                    self.decay, (1 - self.decay), self.var * adjust)
            else:
                self.running_var *= self.decay
                self.running_var += self.var * adjust * (1 - self.decay)

            if expand_dim:
                y = numpy.squeeze(y, axis=(2, 3))

        elif self.use_cudnn:
            # self.mean and self.inv_std are used as buffers to save
            # intermediate results computed during forward pass. These buffers
            # are used to speed-up backward pass.
            y, self.mean, self.inv_std = (
                cudnn.batch_normalization_forward_training(
                    x, gamma, beta, self.running_mean, self.running_var,
                    None, None, self.eps, self.decay,
                    self.mode.is_for_conv2d, self.mode.get_cudnn_mode(),
                    chainer.is_debug()))
        else:
            # Generic CPU and GPU implementation

            gamma = gamma[expander]
            beta = beta[expander]
            self.mean = x.mean(axis=self.axis, dtype=gamma.dtype)
            var = x.var(axis=self.axis, dtype=gamma.dtype)
            if xp is numpy:
                self.inv_std = numpy.reciprocal(numpy.sqrt(
                    var + self.eps, dtype=gamma.dtype))
            else:
                self.inv_std = cuda.cupyx.rsqrt(var + self.eps,
                                                dtype=gamma.dtype)
            y = _apply_bn_fwd(xp, x, self.mean[expander],
                              self.inv_std[expander], gamma, beta)
            # Update running statistics
            m = x.size // gamma.size
            adjust = m / max(m - 1., 1.)  # unbiased estimation

            xp = backend.get_array_module(self.running_mean, self.running_var)
            if xp is chainerx:
                self.running_mean, self.running_var = backend.from_chx(
                    (self.running_mean, self.running_var))

            self.running_mean *= self.decay
            self.running_mean += (1 - self.decay) * self.mean
            self.running_var *= self.decay
            self.running_var += (1 - self.decay) * adjust * var

            if xp is chainerx:
                self.running_mean = backend.to_chx(self.running_mean)
                self.running_var = backend.to_chx(self.running_var)

        return y,
Beispiel #36
0
 def forward(self, xs):
     slices = tuple([
         backend.from_chx(s) if isinstance(s, chainerx.ndarray) else s
         for s in self.slices])
     return utils.force_array(xs[0][slices]),
    def forward_gpu(self, inputs):
        class_weight = backend.from_chx(self.class_weight)

        self.retain_inputs((0, 1))
        cupy = cuda.cupy
        x, t = inputs
        if chainer.is_debug():
            _check_input_values(x, t, self.ignore_label)

        if x.size == 0:
            y = cupy.zeros(t.shape, dtype=x.dtype)
            if self.cache_score:
                self.y = y
            if self.reduce == 'mean':
                return y.sum(),
            else:
                return y,
        log_y = log_softmax._log_softmax(x)
        if self.cache_score:
            self.y = cupy.exp(log_y)
        if class_weight is not None:
            shape = [1 if d != 1 else -1 for d in six.moves.range(x.ndim)]
            log_y *= cupy.broadcast_to(class_weight.reshape(shape), x.shape)

        log_y = cupy.rollaxis(log_y, 1, log_y.ndim)

        if self.reduce == 'mean':
            # Reduction is performed in a promoted dtype
            reduc_dtype = _reduction_dtype(x.dtype)
            if self.normalize:
                count = (t != self.ignore_label).sum(dtype=reduc_dtype)
                count = cupy.maximum(1, count)
                coeff = 1. / count
            else:
                coeff = cupy.array(1. / max(1, len(t)), dtype=reduc_dtype)
            self._coeff = coeff

            ret = cuda.reduce(
                'S t, raw T log_y, int32 n_channel, raw U coeff, '
                'S ignore_label',
                'U out',
                't == ignore_label ? T(0) : log_y[_j * n_channel + t]',
                'a + b', 'out = static_cast<U>(a * -coeff[0])', '0',
                'crossent_fwd'
            )(t, log_y.reduced_view(), log_y.shape[-1],
              self._coeff, self.ignore_label)
            ret = ret.astype(log_y.dtype, copy=False)
        else:
            ret = cuda.elementwise(
                'S t, raw T log_y, int32 n_channel, T ignore', 'T out',
                '''
                if (t == ignore) {
                  out = 0;
                } else {
                  out = -log_y[i * n_channel + t];
                }
                ''',
                'softmax_crossent_no_reduce_fwd'
            )(t, log_y.reduced_view(), log_y.shape[-1], self.ignore_label)
            ret = ret.reshape(t.shape)
        return ret,
    def forward(self, inputs):
        self.retain_inputs((0, 1))
        x, gamma, beta = inputs

        xp = backend.get_array_module(x)
        if self.running_mean is None:
            self.running_mean = xp.zeros_like(gamma, dtype=x.dtype)
            self.running_var = xp.zeros_like(gamma, dtype=x.dtype)

        self.axis = _compute_axis(x.ndim, gamma.ndim, self.axis)
        self.key_axis = _compute_key_axis(x.ndim, gamma.ndim, self.axis)

        if all(x.shape[i] == 1 for i in self.axis):
            if 0 in self.axis:
                warnings.warn(
                    'A batch with no more than one sample has been given'
                    ' to F.batch_normalization. F.batch_normalization'
                    ' will always output a zero tensor for such batches.'
                    ' This could be caused by incorrect configuration in'
                    ' your code (such as running evaluation while'
                    ' chainer.config.train=True),'
                    ' but could also happen in the last batch of training'
                    ' if non-repeating iterator is used.', UserWarning)
            else:
                warnings.warn(
                    'F.batch_normalization received a batch with single'
                    ' dimensions along all axes that are used for aggregating'
                    ' statistics. F.batch_normalization'
                    ' will always output a zero tensor for such batches.',
                    UserWarning)

        # TODO(niboshi): Refactor calculation of expander and axis into a
        # function and call it just before they are used.

        # expander inserts singleton dimensions to gamma and beta so that they
        # can be broadcasted with x.
        expander = [None for _ in range(x.ndim)]
        for i in self.key_axis:
            expander[i] = slice(None)
        expander = tuple(expander)
        self.expander = expander

        self.mode = _BNMode(x, gamma, self.key_axis)
        self.use_cudnn = self.mode.can_use_cudnn(xp)
        self.use_ideep = self.mode.can_use_ideep()

        if self.use_ideep:
            # TODO(niboshi): Refactor iDeep part into a separate method
            expand_dim = False
            if x.ndim == 2:
                expand_dim = True
                x = x[:, :, None, None]

            y, self.mean, self.var, self.inv_std = (
                intel64.ideep.batchNormalization.Forward(
                    intel64.ideep.array(x.astype(gamma.dtype, copy=False)),
                    intel64.ideep.array(gamma), intel64.ideep.array(beta),
                    None, None, self.eps))
            y = y.astype(x.dtype, copy=False)

            m = x.size // gamma.size
            adjust = m / max(m - 1., 1.)

            # Update running_mean
            if isinstance(self.running_mean, intel64.ideep.mdarray):
                self.running_mean.inplace_axpby(self.decay, (1 - self.decay),
                                                self.mean)
            else:
                self.running_mean *= self.decay
                self.running_mean += self.mean * (1 - self.decay)

            # Update running_var
            if isinstance(self.running_var, intel64.ideep.mdarray):
                self.running_var.inplace_axpby(self.decay, (1 - self.decay),
                                               self.var * adjust)
            else:
                self.running_var *= self.decay
                self.running_var += self.var * adjust * (1 - self.decay)

            if expand_dim:
                y = numpy.squeeze(y, axis=(2, 3))

        elif self.use_cudnn:
            # self.mean and self.inv_std are used as buffers to save
            # intermediate results computed during forward pass. These buffers
            # are used to speed-up backward pass.
            y, self.mean, self.inv_std = (
                cudnn.batch_normalization_forward_training(
                    x, gamma, beta, self.running_mean, self.running_var, None,
                    None, self.eps, self.decay, self.mode.is_for_conv2d,
                    self.mode.get_cudnn_mode(), chainer.is_debug()))
        else:
            # Generic CPU and GPU implementation

            gamma = gamma[expander]
            beta = beta[expander]
            self.mean = x.mean(axis=self.axis, dtype=gamma.dtype)
            var = x.var(axis=self.axis, dtype=gamma.dtype)
            if xp is numpy:
                self.inv_std = numpy.reciprocal(
                    numpy.sqrt(var + self.eps, dtype=gamma.dtype))
            else:
                self.inv_std = cuda.cupyx.rsqrt(var + self.eps,
                                                dtype=gamma.dtype)
            y = _apply_bn_fwd(xp, x, self.mean[expander],
                              self.inv_std[expander], gamma, beta)
            # Update running statistics
            m = x.size // gamma.size
            adjust = m / max(m - 1., 1.)  # unbiased estimation

            xp = backend.get_array_module(self.running_mean, self.running_var)
            if xp is chainerx:
                self.running_mean, self.running_var = backend.from_chx(
                    (self.running_mean, self.running_var))

            self.running_mean *= self.decay
            self.running_mean += (1 - self.decay) * self.mean
            self.running_var *= self.decay
            self.running_var += (1 - self.decay) * adjust * var

            if xp is chainerx:
                self.running_mean = backend.to_chx(self.running_mean)
                self.running_var = backend.to_chx(self.running_var)

        return y,
Beispiel #39
0
    def forward_cpu(self, inputs):
        t = backend.from_chx(self.t)  # Workaround for ChainerX.

        gx = numpy.zeros(self.shape, self.dtype)
        gx[six.moves.range(self.t.size), t] = inputs[0]
        return gx,