Exemple #1
0
    def test_clear_input_if_no_need_grad_batch_normalization(self, batch_stat):
        x1 = nn.Variable([1, 1, 2], need_grad=True)
        x2 = nn.Variable([1, 1, 1], need_grad=True)
        x3 = nn.Variable([1, 1, 1], need_grad=True)
        x4 = nn.Variable([1, 1, 1], need_grad=True)
        x5 = nn.Variable([1, 1, 1], need_grad=True)

        x = F.identity(x1)
        beta = F.identity(x2)
        gamma = F.identity(x3)
        if batch_stat:
            y = F.batch_normalization(
                x, beta, gamma, x4, x5, batch_stat=batch_stat)
        else:
            mean = F.identity(x4)
            var = F.identity(x5)
            y = F.batch_normalization(
                x, beta, gamma, mean, var, batch_stat=batch_stat)

        answer = []
        answer.append([False])
        answer.append([False])
        answer.append([False])
        if not batch_stat:
            answer.append([False])
            answer.append([False])
        answer.append([False, True, False, False, False])

        y.forward(clear_no_need_grad=True)
        self.check_input_data_clear_called_flags(answer)
Exemple #2
0
def test_batch_normalization_forward_backward(seed, axis, decay_rate, eps,
                                              output_stat, batch_stat, ctx,
                                              func_name):
    from nbla_test_utils import function_tester
    rng = np.random.RandomState(seed)
    inputs = list(create_inputs(rng, axis))
    axes = [axis]
    if ctx.backend[0].split(':')[0] != 'cpu' and batch_stat == False:
        pytest.skip(
            "cuda and cudnn implementation for batch_stat==False is not implemented yet"
        )
    else:
        function_tester(
            rng,
            F.batch_normalization,
            ref_batch_normalization,
            inputs,
            func_args=[axes, decay_rate, eps, batch_stat, output_stat],
            backward=[True, True, True, False, False],
            ctx=ctx,
            func_name=func_name,
            dstep=1e-2,
            atol_b=1e-2)

    # Check if running mean and var works.
    vinputs = []
    for i in inputs:
        vinputs.append(nn.Variable(i.shape, True))
        vinputs[-1].d = i
    for i in range(5):
        inputs[0] = rng.randn(*inputs[0].shape)
        vinputs[0].d[...] = inputs[0]
        ref_y = ref_batch_normalization(
            *(inputs + [axes, decay_rate, eps, batch_stat, output_stat]))
        with nn.context_scope(ctx), nn.auto_forward():
            y = F.batch_normalization(
                *(vinputs + [axes, decay_rate, eps, batch_stat, output_stat]))
        assert np.allclose(vinputs[3].d, inputs[3], atol=1e-7)
        assert np.allclose(vinputs[4].d, inputs[4])

    # Check if global stat mode works
    batch_stat = False
    if output_stat:
        return
    ref_y = ref_batch_normalization(
        *(inputs + [axes, decay_rate, eps, batch_stat, output_stat]))
    with nn.context_scope(ctx), nn.auto_forward():
        y = F.batch_normalization(
            *(vinputs + [axes, decay_rate, eps, batch_stat, output_stat]))
    assert np.allclose(ref_y, y.d, atol=1e-6)
    def ref_batch_normalization(x, beta, gamma, rmean, rvar, comm, axes, decay_rate,
                                eps, batch_stat, output_stat):

        orig = x - device_id
        inputs = []
        for i in range(n_devices):
            inputs.append(orig + i)
        x = np.concatenate(inputs)

        vx = nn.Variable(x.shape, True)
        vx.d = x
        vbeta = nn.Variable(beta.shape, True)
        vbeta.d = beta
        vgamma = nn.Variable(gamma.shape, True)
        vgamma.d = gamma
        vrmean = nn.Variable(rmean.shape, True)
        vrmean.d = rmean
        vrvar = nn.Variable(rvar.shape, True)
        vrvar.d = rvar
        with nn.context_scope(ctx):
            out = F.batch_normalization(vx, vbeta, vgamma, vrmean, vrvar,
                                        batch_stat=batch_stat, output_stat=output_stat,
                                        axes=axes, decay_rate=decay_rate, eps=eps)
        if output_stat:
            out[0].forward()
            rmean[...] = vrmean.d.copy()
            rvar[...] = vrvar.d.copy()
            return out[0].d[device_id*2:(device_id+1)*2], out[1].d, out[2].d
        out.forward()
        rmean[...] = vrmean.d.copy()
        rvar[...] = vrvar.d.copy()
        return out.d[device_id*2:(device_id+1)*2]
    def ref_batch_normalize_grad(x, beta, gamma, rmean, rvar,
                                 dy,
                                 comm, axes, decay_rate,
                                 eps, batch_stat, output_stat):
        orig = x - device_id
        inputs = []
        for i in range(n_devices):
            inputs.append(orig + i)
        x = np.concatenate(inputs)

        vx = nn.Variable(x.shape, True)
        vx.d = x
        vx.g = 0
        vbeta = nn.Variable(beta.shape, True)
        vbeta.d = beta
        vbeta.g = 0
        vgamma = nn.Variable(gamma.shape, True)
        vgamma.d = gamma
        vgamma.g = 0
        vrmean = nn.Variable(rmean.shape, True)
        vrmean.d = rmean
        vrvar = nn.Variable(rvar.shape, True)
        vrvar.d = rvar
        with nn.context_scope(ctx):
            out = F.batch_normalization(vx, vbeta, vgamma, vrmean, vrvar,
                                        batch_stat=batch_stat, output_stat=output_stat, axes=axes, decay_rate=decay_rate, eps=eps)
        f = out.parent
        f.forward([vx, vbeta, vgamma, vrmean, vrvar], [out])
        for i in range(n_devices):
            out.g[2*i:2*(i+1)] = dy
        f.backward([vx, vbeta, vgamma, vrmean, vrvar], [out])

        return np.concatenate([vx.g[device_id*2:(device_id+1)*2].flatten(), vbeta.g.flatten(), vgamma.g.flatten()])
def ref_fused_batch_normalization(x, beta, gamma, rmean, rvar, z, axes,
                                  decay_rate, eps, batch_stat, nonlinearity,
                                  output_stat):
    with nn.context_scope(cpu_context):
        xvar = nn.Variable.from_numpy_array(x)
        betavar = nn.Variable.from_numpy_array(beta)
        gammavar = nn.Variable.from_numpy_array(gamma)
        rmeanvar = nn.Variable.from_numpy_array(rmean)
        rvarvar = nn.Variable.from_numpy_array(rvar)
        if z is not None:
            zvar = nn.Variable.from_numpy_array(z)
        with nn.auto_forward():
            bn = F.batch_normalization(xvar, betavar, gammavar, rmeanvar,
                                       rvarvar, axes, decay_rate, eps,
                                       batch_stat, output_stat)
            if z is None:
                if output_stat:
                    y = bn[0]
                else:
                    y = bn
            else:
                if output_stat:
                    y = F.add2(bn[0], zvar)
                else:
                    y = F.add2(bn, zvar)
            y = F.relu(y)
        rmean[:] = rmeanvar.d
        rvar[:] = rvarvar.d
        if output_stat:
            return y.d, bn[1].d, bn[2].d
        else:
            return y.d
Exemple #6
0
def INByBatchNorm(inp,
                  axes=[1],
                  decay_rate=0.9,
                  eps=1e-5,
                  fix_parameters=True):
    """Instance Normalization (implemented using BatchNormalization)
    Instance normalization is equivalent to the batch normalization if a batch size is one, in
    other words, it normalizes over spatial dimension(s), meaning all dimensions except for
    the batch and feature dimension.
    """
    assert len(axes) == 1

    shape_stat = [1 for _ in inp.shape]
    shape_stat[axes[0]] = inp.shape[axes[0]]
    beta = get_parameter_or_create("beta", shape_stat, ConstantInitializer(0),
                                   not fix_parameters)
    gamma = get_parameter_or_create("gamma", shape_stat,
                                    ConstantInitializer(1), not fix_parameters)
    mean = get_parameter_or_create("mean", shape_stat, ConstantInitializer(0),
                                   False)
    var = get_parameter_or_create("var", shape_stat, ConstantInitializer(0),
                                  False)
    return F.batch_normalization(inp,
                                 beta,
                                 gamma,
                                 mean,
                                 var,
                                 axes,
                                 decay_rate,
                                 eps,
                                 batch_stat=True,
                                 output_stat=False)
Exemple #7
0
    def connect(self, fname, inputs, args):
        if fname in ['Convolution', 'Deconvolution']:
            # TODO: address leading batch dimension
            args['channel_last'] = True
            x = inputs[0]
            w = inputs[1]
            b = inputs[2] if len(inputs) == 3 else None
            scope = self.get_parameter_scope(w)
            with nn.parameter_scope(scope):
                wd = w.d.copy().transpose(0, 2, 3, 1)
                w = nn.parameter.get_parameter_or_create('W_cl', wd.shape, wd)
            o = F.convolution(x, w, b, **args)
        elif fname == 'BatchNormalization':
            # TODO: address leading batch dimension
            x = inputs[0]
            beta = inputs[1]
            gamma = inputs[2]
            mean = inputs[3]
            var = inputs[4]
            args['axes'] = [len(x.shape) - 1]
            scope = self.get_parameter_scope(beta)
            with nn.parameter_scope(scope):
                beta_d = beta.d.copy().transpose(0, 2, 3, 1)
                gamma_d = gamma.d.copy().transpose(0, 2, 3, 1)
                mean_d = mean.d.copy().transpose(0, 2, 3, 1)
                var_d = var.d.copy().transpose(0, 2, 3, 1)
                beta = nn.parameter.get_parameter_or_create(
                    'beta_cl', beta_d.shape, beta_d, beta.need_grad)
                gamma = nn.parameter.get_parameter_or_create(
                    'gamma_cl', gamma_d.shape, gamma_d, gamma.need_grad)
                mean = nn.parameter.get_parameter_or_create(
                    'mean_cl', mean_d.shape, mean_d, mean.need_grad)
                var = nn.parameter.get_parameter_or_create(
                    'var_cl', var_d.shape, var_d, var.need_grad)
            o = F.batch_normalization(x, beta, gamma, mean, var, **args)
        elif fname in ['MaxPooling', 'AveragePooling', 'SumPooling']:
            args['channel_last'] = True
            o = self._call_function(fname, inputs, args)
        elif fname in ['Concatenate']:
            args['axis'] = len(inputs[0].shape) - 1
            o = self._call_function(fname, inputs, args)
        elif fname == 'Affine':
            x = inputs[0]

            _, h_s, w_s, c_s = inputs[0].shape
            _, b_s = inputs[1].shape
            wd = inputs[1].d.copy()
            wd = np.reshape(wd, (c_s, h_s, w_s, b_s))
            wd = np.transpose(wd, (1, 2, 0, 3))
            wd = np.reshape(wd, (-1, b_s))
            w = nn.parameter.get_parameter_or_create('w_cl', wd.shape, wd,
                                                     False)

            b = inputs[2] if len(inputs) == 3 else None
            o = F.affine(x, w, b, **args)
        else:
            o = self._call_function(fname, inputs, args)
        return o
Exemple #8
0
def CCBN(h,
         y,
         n_classes,
         decay_rate=0.999,
         test=False,
         fix_parameters=False,
         coefs=[1.0]):
    """Categorical Conditional Batch Normaliazation"""
    # Call the batch normalization once
    shape_stat = [1 for _ in h.shape]
    shape_stat[1] = h.shape[1]
    gamma_tmp = nn.Variable.from_numpy_array(np.ones(shape_stat))
    beta_tmp = nn.Variable.from_numpy_array(np.zeros(shape_stat))
    mean = get_parameter_or_create("mean", shape_stat,
                                   ConstantInitializer(0.0), False)
    var = get_parameter_or_create("var", shape_stat, ConstantInitializer(1.0),
                                  False)
    h = F.batch_normalization(h,
                              beta_tmp,
                              gamma_tmp,
                              mean,
                              var,
                              decay_rate=decay_rate,
                              batch_stat=not test)

    # Condition the gamma and beta with the class label
    b, c = h.shape[0:2]

    def embed_func(y, initializer):
        if type(y) != list:
            o = embed(y,
                      n_classes,
                      c,
                      initializer=initializer,
                      sn=False,
                      test=test)
        else:
            y_list = y
            o = reduce(lambda x, y: x + y, [
                coef * embed(y,
                             n_classes,
                             c,
                             initializer=initializer,
                             sn=False,
                             test=test) for coef, y in zip(coefs, y_list)
            ])
        return o

    with nn.parameter_scope("gamma"):
        gamma = embed_func(y, ConstantInitializer(1.0))
        gamma = F.reshape(gamma, [b, c] + [1 for _ in range(len(h.shape[2:]))])
        gamma = F.broadcast(gamma, h.shape)
    with nn.parameter_scope("beta"):
        beta = embed_func(y, ConstantInitializer(0.0))
        beta = F.reshape(beta, [b, c] + [1 for _ in range(len(h.shape[2:]))])
        beta = F.broadcast(beta, h.shape)
    return gamma * h + beta
Exemple #9
0
    def __init__(self, x, weight, bias, beta, gamma, rmean, rvar, z,
                 base_axis, pad, stride, dilation, group, channel_last,
                 decay_rate, eps, batch_stat,
                 nonlinearity, nonlinearity_args, pad_mode, constant_value):

        from collections import OrderedDict
        inputs = OrderedDict()
        xvar = nn.Variable.from_numpy_array(x)
        weightvar = nn.Variable.from_numpy_array(weight)
        inputs['x'] = xvar
        inputs['weight'] = weightvar
        biasvar = None
        betavar = None
        gammavar = None
        rmeanvar = None
        rvarvar = None
        zvar = None
        if bias is not None:
            biasvar = nn.Variable.from_numpy_array(bias)
            inputs['bias'] = biasvar
        if beta is not None:
            betavar = nn.Variable.from_numpy_array(beta)
            gammavar = nn.Variable.from_numpy_array(gamma)
            rmeanvar = nn.Variable.from_numpy_array(rmean)
            rvarvar = nn.Variable.from_numpy_array(rvar)
            inputs['beta'] = betavar
            inputs['gamma'] = gammavar
            inputs['rmean'] = rmeanvar
            inputs['rvar'] = rvarvar
        if z is not None:
            zvar = nn.Variable.from_numpy_array(z)
            inputs['z'] = zvar

        spatial_dims = xvar.ndim - (base_axis + 1)
        assert (len(pad) == spatial_dims or len(pad) == 2 * spatial_dims)
        if len(pad) == spatial_dims:
            pad_width = tuple(p for _ in range(2) for p in pad)
        else:  # if len(pad) == 2 * spatial_dims:
            pad_width = pad
        h = F.pad(xvar, pad_width, pad_mode, constant_value)
        conv_pad = (0,) * spatial_dims
        h = F.convolution(h, weightvar, biasvar, base_axis,
                          conv_pad, stride, dilation, group, channel_last)
        if beta is not None:
            h = F.batch_normalization(h, betavar, gammavar, rmeanvar, rvarvar,
                                      [h.ndim - 1 if channel_last else base_axis],
                                      decay_rate, eps, batch_stat)
        if z is not None:
            h = F.add2(h, zvar)
        h = ref_activation(h, nonlinearity, nonlinearity_args)
        self.input_dict = inputs
        self.output = h
def test_batch_normalization_forward_backward(seed, axis, decay_rate, eps,
                                              output_stat, ctx, func_name):
    from nbla_test_utils import function_tester
    rng = np.random.RandomState(seed)
    inputs = list(create_inputs(rng, axis))
    axes = [axis]
    batch_stat = True
    function_tester(rng, F.batch_normalization, ref_batch_normalization,
                    inputs,
                    func_args=[axes, decay_rate, eps, batch_stat, output_stat],
                    backward=[True, True, True, False, False],
                    ctx=ctx, func_name=func_name, dstep=1e-2, atol_b=1e-2)

    # Check if running mean and var works.
    vinputs = []
    for i in inputs:
        vinputs.append(nn.Variable(i.shape, True))
        vinputs[-1].d = i
    for i in range(5):
        inputs[0] = rng.randn(*inputs[0].shape)
        vinputs[0].d[...] = inputs[0]
        ref_y = ref_batch_normalization(
            *(inputs + [axes, decay_rate, eps, batch_stat, output_stat]))
        with nn.context_scope(ctx), nn.auto_forward():
            y = F.batch_normalization(
                *(vinputs + [axes, decay_rate, eps, batch_stat, output_stat]))
        assert np.allclose(vinputs[3].d, inputs[3])
        assert np.allclose(vinputs[4].d, inputs[4], atol=1e-3)

    # Check if global stat mode works
    batch_stat = False
    if output_stat:
        return
    ref_y = ref_batch_normalization(
        *(inputs + [axes, decay_rate, eps, batch_stat, output_stat]))
    with nn.context_scope(ctx), nn.auto_forward():
        y = F.batch_normalization(
            *(vinputs + [axes, decay_rate, eps, batch_stat, output_stat]))
    assert np.allclose(ref_y, y.d, atol=1e-6)
Exemple #11
0
def normalize(inp, layer_name, bn_batch_stat, activation, args, init_params):
    if args.norm == 'batch_norm':
        if init_params is None:
            inp = PF.batch_normalization(
                inp, batch_stat=bn_batch_stat, name=layer_name)
        else:
            inp = F.batch_normalization(inp, init_params[layer_name + '/bn/beta'], init_params[layer_name + '/bn/gamma'],
                                        mean=None, variance=None, batch_stat=bn_batch_stat)

    if activation is not None:
        return activation(inp)
    else:
        return inp
def batch_normalization(inp,
                        axes=[1],
                        decay_rate=0.9,
                        eps=1e-5,
                        batch_stat=True,
                        output_stat=False):
    """
    Batch normalization layer.

    .. math::
        \\begin{array}{lcl}
        \\mu &=& \\frac{1}{M} \\sum x_i\\\\
        \\sigma^2 &=& \\frac{1}{M} \\left(\\sum x_i - \\mu\\right)^2\\\\
        \\hat{x}_i &=& \\frac{x_i - \\mu}{\\sqrt{\\sigma^2 + \\epsilon}} \\\\
        y_i &=& \\hat{x}_i \\gamma + \\beta.
        \\end{array}

    where :math:`x_i, y_i` are the inputs.
    In testing, the mean and variance computed by moving average calculated during training are used.

    Args:
        inp (~nnabla.Variable): N-D array of input.
        axes (:obj:`tuple` of :obj:`int`): Axes mean and variance are taken.
        decay_rate (float): Decay rate of running mean and variance.
        eps (float): Tiny value to avoid zero division by std.
        batch_stat (bool): Use mini-batch statistics rather than running ones.
        output_stat (bool): Output batch mean and variance.

    Returns:
        :class:`~nnabla.Variable`: N-D array.

    References:

        - Ioffe and Szegedy, Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift. https://arxiv.org/abs/1502.03167

    """
    assert len(axes) == 1
    shape_stat = [1 for _ in inp.shape]
    shape_stat[axes[0]] = inp.shape[axes[0]]
    beta = get_parameter_or_create("beta", shape_stat, ConstantInitializer(0),
                                   True)
    gamma = get_parameter_or_create("gamma", shape_stat,
                                    ConstantInitializer(1), True)
    mean = get_parameter_or_create("mean", shape_stat, ConstantInitializer(0),
                                   False)
    var = get_parameter_or_create("var", shape_stat, ConstantInitializer(0),
                                  False)
    return F.batch_normalization(inp, beta, gamma, mean, var, axes, decay_rate,
                                 eps, batch_stat, output_stat)
Exemple #13
0
def _normalize(x, norm_type, channel_axis=1):
    if norm_type.lower() == "in":
        return F.instance_normalization(x,
                                        gamma=None,
                                        beta=None,
                                        channel_axis=channel_axis)
    elif norm_type.lower() == "bn":
        return F.batch_normalization(x,
                                     gamma=None,
                                     beta=None,
                                     mean=None,
                                     variance=None,
                                     axes=channel_axis)
    else:
        raise ValueError("unknown norm_type: {}".format(norm_type))
Exemple #14
0
def BN(inp, axes=[1], decay_rate=0.9, eps=1e-5,
       batch_stat=True, output_stat=False, fix_parameters=False):
    """Batch Normalization
    """
    shape_stat = [1 for _ in inp.shape]
    shape_stat[axes[0]] = inp.shape[axes[0]]
    beta = get_parameter_or_create(
        "beta", shape_stat, ConstantInitializer(0), not fix_parameters)
    gamma = get_parameter_or_create(
        "gamma", shape_stat, ConstantInitializer(1), not fix_parameters)
    mean = get_parameter_or_create(
        "mean", shape_stat, ConstantInitializer(0), False)
    var = get_parameter_or_create(
        "var", shape_stat, ConstantInitializer(0), False)
    return F.batch_normalization(inp, beta, gamma, mean, var, axes,
                                 decay_rate, eps, batch_stat, output_stat)
Exemple #15
0
def batch_normalization(inp, axes=[1], decay_rate=0.9, eps=1e-5,
                        batch_stat=True, output_stat=False):
    """
    Batch normalization layer.

    .. math::
        \\begin{array}{lcl}
        \\mu &=& \\frac{1}{M} \\sum x_i\\\\
        \\sigma^2 &=& \\frac{1}{M} \\left(\\sum x_i - \\mu\\right)^2\\\\
        \\hat{x}_i &=& \\frac{x_i - \\mu}{\\sqrt{\\sigma^2 + \\epsilon}} \\\\
        y_i &=& \\hat{x}_i \\gamma + \\beta.
        \\end{array}

    where :math:`x_i, y_i` are the inputs.
    In testing, the mean and variance computed by moving average calculated during training are used.

    Args:
        inp (~nnabla.Variable): N-D array of input.
        axes (:obj:`tuple` of :obj:`int`): Axes mean and variance are taken.
        decay_rate (float): Decay rate of running mean and variance.
        eps (float): Tiny value to avoid zero division by std.
        batch_stat (bool): Use mini-batch statistics rather than running ones.
        output_stat (bool): Output batch mean and variance.

    Returns:
        :class:`~nnabla.Variable`: N-D array.

    References:

        - Ioffe and Szegedy, Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift. https://arxiv.org/abs/1502.03167

    """
    assert len(axes) == 1
    shape_stat = [1 for _ in inp.shape]
    shape_stat[axes[0]] = inp.shape[axes[0]]
    beta = get_parameter_or_create(
        "beta", shape_stat, ConstantInitializer(0), True)
    gamma = get_parameter_or_create(
        "gamma", shape_stat, ConstantInitializer(1), True)
    mean = get_parameter_or_create(
        "mean", shape_stat, ConstantInitializer(0), False)
    var = get_parameter_or_create(
        "var", shape_stat, ConstantInitializer(0), False)
    return F.batch_normalization(inp, beta, gamma, mean, var, axes,
                                 decay_rate, eps, batch_stat, output_stat)
def test_batch_normalization_for_multiple_axes_forward_backward(seed, axes, decay_rate, eps,
                                                                output_stat, ctx, func_name):
    rng = np.random.RandomState(seed)
    inputs = list(create_inputs_for_multiple_axes(rng, axes))
    vinputs = []
    for i in inputs:
        vinputs.append(nn.Variable(i.shape, True))
        vinputs[-1].d = i

    # Check if global stat mode works
    batch_stat = False
    if output_stat:
        return
    ref_y = ref_batch_normalization_for_multiple_axes(
        *(inputs + [axes, decay_rate, eps, batch_stat, output_stat]))
    with nn.context_scope(ctx), nn.auto_forward():
        y = F.batch_normalization(
            *(vinputs + [axes, decay_rate, eps, batch_stat, output_stat]))
    assert_allclose(ref_y, y.d, atol=1e-6)
    def __init__(self, x, weight, bias, beta, gamma, rmean, rvar, z, base_axis,
                 pad, stride, dilation, group, channel_last, decay_rate, eps,
                 batch_stat, nonlinearity, nonlinearity_args):

        from collections import OrderedDict
        inputs = OrderedDict()
        xvar = nn.Variable.from_numpy_array(x)
        weightvar = nn.Variable.from_numpy_array(weight)
        inputs['x'] = xvar
        inputs['weight'] = weightvar
        biasvar = None
        betavar = None
        gammavar = None
        rmeanvar = None
        rvarvar = None
        zvar = None
        if bias is not None:
            biasvar = nn.Variable.from_numpy_array(bias)
            inputs['bias'] = biasvar
        if beta is not None:
            betavar = nn.Variable.from_numpy_array(beta)
            gammavar = nn.Variable.from_numpy_array(gamma)
            rmeanvar = nn.Variable.from_numpy_array(rmean)
            rvarvar = nn.Variable.from_numpy_array(rvar)
            inputs['beta'] = betavar
            inputs['gamma'] = gammavar
            inputs['rmean'] = rmeanvar
            inputs['rvar'] = rvarvar
        if z is not None:
            zvar = nn.Variable.from_numpy_array(z)
            inputs['z'] = zvar
        h = F.convolution(xvar, weightvar, biasvar, base_axis, pad, stride,
                          dilation, group, channel_last)
        if beta is not None:
            h = F.batch_normalization(
                h, betavar, gammavar, rmeanvar, rvarvar,
                [h.ndim - 1 if channel_last else base_axis], decay_rate, eps,
                batch_stat)
        if z is not None:
            h = F.add2(h, zvar)
        h = ref_activation(h, nonlinearity, nonlinearity_args)
        self.input_dict = inputs
        self.output = h
def ref_grad_fused_batch_normalization(x, beta, gamma, rmean, rvar, z, dy,
                                       axes, decay_rate, eps, batch_stat,
                                       nonlinearity, output_stat, **kw):
    with nn.context_scope(cpu_context):
        xvar = nn.Variable.from_numpy_array(x, need_grad=True)
        xvar.g = 0
        betavar = nn.Variable.from_numpy_array(beta, need_grad=True)
        betavar.g = 0
        gammavar = nn.Variable.from_numpy_array(gamma, need_grad=True)
        gammavar.g = 0
        rmeanvar = nn.Variable.from_numpy_array(rmean)
        rmeanvar.g = 0
        rvarvar = nn.Variable.from_numpy_array(rvar)
        rvarvar.g = 0
        zvar = None
        if z is not None:
            zvar = nn.Variable.from_numpy_array(z, need_grad=True)
            zvar.g = 0
        with nn.auto_forward():
            bn = F.batch_normalization(xvar, betavar, gammavar, rmeanvar,
                                       rvarvar, axes, decay_rate, eps,
                                       batch_stat, output_stat)
            if z is None:
                if output_stat:
                    y1 = bn[0]
                else:
                    y1 = bn
            else:
                if output_stat:
                    y1 = F.add2(bn[0], zvar)
                else:
                    y1 = F.add2(bn, zvar)
            y = F.relu(y1)
        y.g = dy
        y.backward(dy)
        concat = [xvar.g.flatten(), betavar.g.flatten(), gammavar.g.flatten()]
        if z is not None:
            concat.append(zvar.g.flatten())
        return np.concatenate(concat)
Exemple #19
0
def test_batch_normalization_forward_backward(seed, axis, decay_rate, eps,
                                              output_stat, batch_stat, ctx, func_name,
                                              no_scale, no_bias, no_mean, no_variance):
    from nbla_test_utils import function_tester
    rng = np.random.RandomState(seed)
    inputs = list(create_inputs(rng, axis))
    axes = [axis]
    if not batch_stat and (no_mean or no_variance):
        # check prohibited condition for mean=None and variance=None
        vinputs = []
        for i in inputs:
            vinputs.append(nn.Variable(i.shape, True))

        vinputs = mask_vinputs(
            vinputs, no_scale, no_bias, no_mean, no_variance)
        with pytest.raises(ValueError):
            F.batch_normalization(*vinputs, axes=axes, decay_rate=decay_rate,
                                  eps=eps, batch_stat=batch_stat, output_stat=output_stat)
        return
    else:
        inputs = mask_inputs(inputs, no_scale, no_bias, no_mean, no_variance)
        function_tester(rng, F.batch_normalization, ref_batch_normalization,
                        inputs,
                        func_args=[axes, decay_rate, eps,
                                   batch_stat, output_stat],
                        backward=[True, not no_bias,
                                  not no_scale, False, False],
                        ctx=ctx, func_name=func_name, dstep=1e-2, atol_b=1e-2)

    # Check if running mean and var works.
    if no_mean and no_variance:
        return

    vinputs = []
    for i in inputs:
        vinputs.append(nn.Variable(i.shape, True))
        vinputs[-1].d = i

    vinputs = mask_vinputs(vinputs, no_scale, no_bias, no_mean, no_variance)

    for i in range(5):
        inputs[0] = rng.randn(*inputs[0].shape)
        vinputs[0].d[...] = inputs[0]
        ref_y = ref_batch_normalization(
            *(inputs + [axes, decay_rate, eps, batch_stat, output_stat]))
        with nn.context_scope(ctx), nn.auto_forward():
            y = F.batch_normalization(
                *(vinputs + [axes, decay_rate, eps, batch_stat, output_stat]))
        if not no_mean:
            assert_allclose(vinputs[3].d, inputs[3], atol=1e-7)

        if not no_variance:
            assert_allclose(vinputs[4].d, inputs[4])

    # Check if global stat mode works
    batch_stat = False

    if no_mean or no_variance:
        return

    if output_stat:
        return
    ref_y = ref_batch_normalization(
        *(inputs + [axes, decay_rate, eps, batch_stat, output_stat]))
    with nn.context_scope(ctx), nn.auto_forward():
        y = F.batch_normalization(
            *(vinputs + [axes, decay_rate, eps, batch_stat, output_stat]))
    assert_allclose(ref_y, y.d, atol=1e-6)
 def __call__(self, inp, test=False):
     return F.batch_normalization(inp, self.beta, self.gamma, self.mean,
                                  self.var, self.axes, self.decay_rate,
                                  self.eps, not test, self.output_stat)
Exemple #21
0
 def call(self, input):
     return F.batch_normalization(input, self._beta, self._gamma,
                                  self._mean, self._var, self._axes,
                                  self._decay_rate, self._eps,
                                  self.training, self._output_stat)