Beispiel #1
0
    def __call__(self, x, test=False, finetune=False):
        """Invokes the forward propagation of BatchNormalization.

        BatchNormalization accepts additional arguments, which controlls three
        different running mode.

        Args:
            x (Variable): An input variable.
            test (bool): If ``True``, BatchNormalization runs in testing mode;
                it normalizes the input using precomputed statistics.
            finetune (bool): If ``True``, BatchNormalization runs in finetuning
                mode; it accumulates the input array to compute population
                statistics for normalization, and normalizes the input using
                batch statistics.

        If ``test`` and ``finetune`` are both ``False``, then
        BatchNormalization runs in training mode; it computes moving averages
        of mean and variance for evaluation during training, and normalizes the
        input using batch statistics.

        """
        use_batch_mean = not test or finetune

        if use_batch_mean:
            ret = batch_normalization.batch_normalization(
                x, self.gamma, self.beta, self.eps)
            func = ret.creator
            if finetune:
                self.N += 1
                decay = 1. / self.N
            else:
                decay = self.decay
            m = x.data.size // self.gamma.data.size
            adjust = m / max(m - 1., 1.)  # unbiased estimatio

            self.avg_mean = cuda.to_gpu(self.avg_mean) * decay
            func.mean = cuda.to_gpu(func.mean) * (
                1 - decay)  # reuse buffer as a temporary
            self.avg_mean += func.mean
            del func.mean
            self.avg_var = cuda.to_gpu(self.avg_var) * decay
            func.var = cuda.to_gpu(
                func.var) * (1 - decay) * adjust  # reuse buffer as a temporary
            self.avg_var += func.var
            del func.var

#            self.avg_mean *= decay
#            func.mean *= 1 - decay  # reuse buffer as a temporary
#            self.avg_mean += func.mean
#            del func.mean
#            self.avg_var *= decay
#            func.var *= (1 - decay) * adjust  # reuse buffer as a temporary
#            self.avg_var += func.var
#            del func.var
        else:
            mean = variable.Variable(self.avg_mean, volatile='auto')
            var = variable.Variable(self.avg_var, volatile='auto')
            ret = batch_normalization.fixed_batch_normalization(
                x, self.gamma, self.beta, mean, var, self.eps)
        return ret
Beispiel #2
0
    def __call__(self, x_batch, train=True):
        ratio = 1 - self.keep_rate

        x_batch = Variable(x_batch, volatile=True)
        x = embed_id.embed_id(x_batch, self.W)
        x = Variable(x.data, volatile=not train)

        gamma = Variable(self.__mod.array(1.0, dtype=self.__mod.float32),
                         volatile=not train,
                         name='gamma')
        beta = Variable(self.__mod.array(0.0, dtype=self.__mod.float32),
                        volatile=not train,
                        name='beta')

        x = batch_normalization(x,
                                gamma,
                                beta,
                                eps=2e-5,
                                running_mean=None,
                                running_var=None,
                                decay=0.9,
                                use_cudnn=False)
        x = F.dropout(x, ratio, train)

        c, h, hs = self.rnn(x, train)
        return h
    def __call__(self, x, test=False, finetune=False):
        """Invokes the forward propagation of BatchNormalization.

        BatchNormalization accepts additional arguments, which controlls three
        different running mode.

        Args:
            x (Variable): An input variable.
            test (bool): If ``True``, BatchNormalization runs in testing mode;
                it normalizes the input using precomputed statistics.
            finetune (bool): If ``True``, BatchNormalization runs in finetuning
                mode; it accumulates the input array to compute population
                statistics for normalization, and normalizes the input using
                batch statistics.

        If ``test`` and ``finetune`` are both ``False``, then
        BatchNormalization runs in training mode; it computes moving averages
        of mean and variance for evaluation during training, and normalizes the
        input using batch statistics.

        """
        use_batch_mean = not test or finetune

        if use_batch_mean:
            ret = batch_normalization.batch_normalization(
                x, self.gamma, self.beta, self.eps)
            func = ret.creator
            if finetune:
                self.N += 1
                decay = 1. / self.N
            else:
                decay = self.decay
            m = x.data.size // self.gamma.data.size
            adjust = m / max(m - 1., 1.)  # unbiased estimatio

            self.avg_mean = cuda.to_gpu(self.avg_mean) * decay
            func.mean = cuda.to_gpu(func.mean) * (1 - decay)  # reuse buffer as a temporary
            self.avg_mean += func.mean
            del func.mean
            self.avg_var = cuda.to_gpu(self.avg_var) * decay
            func.var = cuda.to_gpu(func.var) * (1 - decay) * adjust  # reuse buffer as a temporary
            self.avg_var += func.var
            del func.var

#            self.avg_mean *= decay
#            func.mean *= 1 - decay  # reuse buffer as a temporary
#            self.avg_mean += func.mean
#            del func.mean
#            self.avg_var *= decay
#            func.var *= (1 - decay) * adjust  # reuse buffer as a temporary
#            self.avg_var += func.var
#            del func.var
        else:
            mean = variable.Variable(self.avg_mean, volatile='auto')
            var = variable.Variable(self.avg_var, volatile='auto')
            ret = batch_normalization.fixed_batch_normalization(
                x, self.gamma, self.beta, mean, var, self.eps)
        return ret
Beispiel #4
0
    def compute_F(self, in_data, out_grad_data):
        x = in_data[0]
        gy = out_grad_data[0]
        ndim = len(x.shape)
        if ndim not in (2, 4):
            raise RuntimeError(
                'len(x.shape) must be 2 or 4, not {}.'.format(ndim))

        xp = cuda.get_array_module(x)
        n = x.shape[0]
        gy_scale = n
        if self._loss_scale is not None:
            gy_scale *= 1.0 / self._loss_scale

        # Re-compute BN forward with gamma=1 and beta=0
        avg_mean = self.link.avg_mean
        _gamma = xp.ones(avg_mean.shape, dtype=x.dtype)
        _beta = xp.zeros(avg_mean.shape, dtype=x.dtype)
        h = batch_normalization(x, _gamma, _beta, eps=self.link.eps).data

        if ndim == 2:
            gy = gy_scale * gy
            gyh = gy * h
        elif ndim == 4:
            # data layout of gy: NCHW
            h = h.transpose(0, 2, 3, 1)
            gy = gy.transpose(0, 2, 3, 1)

            # data layout of gy: NHWC
            gy = gy * gy_scale  # copy
            gyh = gy * h

            gyh = gyh.sum(axis=(1, 2))
            gy = gy.sum(axis=(1, 2))
            # data layout of gy: NC

        if self.link.beta is None:
            grad = gyh
        elif self.link.gamma is None:
            grad = gy
        else:
            grad = xp.hstack((gyh, gy))

        if self.diagonalize:
            if grad.dtype == xp.float16:
                grad = cast(grad, xp.float32).data
            F = xp.diag((grad * grad).mean(axis=0))
        else:
            F_scale = 1 / n
            if grad.dtype == xp.float16:
                grad = cast(grad, xp.float32).data
            F = grad.T.dot(grad) * F_scale

        return F
def group_normalization(x, groups, gamma, beta, eps=1e-5):
    """Group normalization function.

    This function implements a "group normalization"
    which divides the channels into groups and computes within each group
    the mean and variance, then normalize by these statistics,
    scales and shifts them.


    Args:
        x (:class:`~chainer.Variable` or :ref:`ndarray`): Batch tensors.
            First dimension of this value must be the size of minibatch and
            second dimension must be the number of channels.
            Moreover, this value must have one or more following dimensions,
            such as height and width.
        groups (int):
            The number of channel groups.
            This value must be a divisor of the number of channels.
        gamma (:class:`~chainer.Variable` or :ref:`ndarray`):
            Scaling parameter.
        beta (:class:`~chainer.Variable` or :ref:`ndarray`):
            Shifting parameter.
        eps (float): Epsilon value for numerical stability of normalization.


    Returns:
        ~chainer.Variable: The output variable which has the same shape
        as :math:`x`.

    See: `Group Normalization <https://arxiv.org/abs/1803.08494>`_
    """
    if x.ndim <= 2:
        raise ValueError('Input dimension must be grater than 2, '
                         'including batch size dimension '
                         '(first dimension).')

    if not isinstance(groups, int):
        raise TypeError('Argument: \'groups\' type must be (int).')

    xp = backend.get_array_module(x)

    batch_size, channels = x.shape[:2]
    original_shape = x.shape

    if channels % groups != 0:
        raise ValueError('Argument: \'groups\' must be a divisor '
                         'of the number of channel.')

    # By doing this reshaping, calling batch_normalization function becomes
    # equivalent to Group Normalization.
    # And redundant dimension is added in order to utilize ideep64/cuDNN.
    x = reshape.reshape(x, (1, batch_size * groups, -1, 1))

    with cuda.get_device_from_array(x.array):
        dummy_gamma = xp.ones(batch_size * groups).astype(xp.float32)
        dummy_beta = xp.zeros(batch_size * groups).astype(xp.float32)

    with warnings.catch_warnings():
        warnings.simplefilter('ignore')
        x = batch_normalization.batch_normalization(x,
                                                    dummy_gamma,
                                                    dummy_beta,
                                                    eps=eps)

    x = reshape.reshape(x, original_shape)

    target_shape = [1, channels] + [1] * (x.ndim - 2)
    gamma_broadcast = broadcast.broadcast_to(
        reshape.reshape(gamma, target_shape), x.shape)
    beta_broadcast = broadcast.broadcast_to(
        reshape.reshape(beta, target_shape), x.shape)

    return x * gamma_broadcast + beta_broadcast
Beispiel #6
0
def group_normalization(x, groups, gamma, beta, eps=1e-5):
    """Group normalization function.

    This function implements a "group normalization"
    which divides the channels into groups and computes within each group
    the mean and variance, then normalize by these statistics,
    scales and shifts them.


    Args:
        x (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \
        :class:`cupy.ndarray`): Batch tensors.
            First dimension of this value must be the size of minibatch and
            second dimension must be the number of channels.
            Moreover, this value must have one or more following dimensions,
            such as height and width.
        groups (int):
            The number of channel groups.
            This value must be a divisor of the number of channels.
        gamma (~chainer.Variable): Scaling parameter.
        beta (~chainer.Variable): Shifting parameter.
        eps (float): Epsilon value for numerical stability of normalization.


    Returns:
        ~chainer.Variable: The output variable which has the same shape
        as :math:`x`.

    See: `Group Normalization <https://arxiv.org/abs/1803.08494>`_
    """
    if x.ndim <= 2:
        raise ValueError('Input dimension must be grater than 2, '
                         'including batch size dimension '
                         '(first dimension).')

    if not isinstance(groups, int):
        raise TypeError('Argument: \'groups\' type must be (int).')

    xp = backend.get_array_module(x)

    batch_size, channels = x.shape[:2]
    original_shape = x.shape

    if channels % groups != 0:
        raise ValueError('Argument: \'groups\' must be a divisor '
                         'of the number of channel.')

    # By doing this reshaping, calling batch_normalization function becomes
    # equivalent to Group Normalization.
    # And redundant dimension is added in order to utilize ideep64/cuDNN.
    x = reshape.reshape(x, (1, batch_size * groups, -1, 1))

    with cuda.get_device_from_array(x.array):
        dummy_gamma = xp.ones(batch_size * groups).astype(xp.float32)
        dummy_beta = xp.zeros(batch_size * groups).astype(xp.float32)

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        x = batch_normalization.batch_normalization(
            x, dummy_gamma, dummy_beta, eps=eps)

    x = reshape.reshape(x, original_shape)

    target_shape = [1, channels] + [1] * (x.ndim - 2)
    gamma_broadcast = broadcast.broadcast_to(
        reshape.reshape(gamma, target_shape), x.shape)
    beta_broadcast = broadcast.broadcast_to(
        reshape.reshape(beta, target_shape), x.shape)

    return x * gamma_broadcast + beta_broadcast