def ref_group_normalization(x, beta, gamma, num_groups, channel_axis,
                                batch_axis, eps, output_stat):
        cdim = x.shape[channel_axis]

        if cdim % num_groups > 0:
            raise ValueError()

        shape = x.shape[:channel_axis] + (num_groups, int(cdim / num_groups))
        if channel_axis < len(x.shape) - 1:
            shape += x.shape[channel_axis + 1:]

        tmp = x.reshape(shape).copy()

        ignore_axes = _force_list(batch_axis) + [
            channel_axis,
        ]

        axes = tuple(_get_axes_excluding(len(shape), ignore_axes))

        x_mean = tmp.mean(axis=axes, keepdims=True)
        x_std = tmp.std(axis=axes, keepdims=True)

        if output_stat:
            return ((tmp - x_mean) / (x_std + eps) * gamma + beta).reshape(
                x.shape), x_mean, x_std

        return ((tmp - x_mean) / (x_std + eps) * gamma + beta).reshape(x.shape)
Esempio n. 2
0
def test_layer_normalization_forward_backward(seed, x_shape, batch_axis, output_stat):
    rng = np.random.RandomState(seed)
    input = rng.randn(*x_shape).astype(np.float32)

    stat_shape = tuple([x_shape[i] if i in _force_list(batch_axis) else 1
                        for i in range(len(x_shape))])

    beta = rng.randn(*stat_shape).astype(np.float32)
    gamma = rng.randn(*stat_shape).astype(np.float32)
    eps = 1e-05

    x = nn.Variable.from_numpy_array(input)
    v_beta = nn.Variable.from_numpy_array(beta)
    v_gamma = nn.Variable.from_numpy_array(gamma)

    output = F.layer_normalization(
        x, v_beta, v_gamma, batch_axis, eps, output_stat)
    ref = ref_layer_normalization(
        input, beta, gamma, batch_axis, eps, output_stat)

    if output_stat:
        tmp = F.sink(*output)
        tmp.forward()
        tmp.backward()

        for o, r in zip(output, ref):
            assert o.shape == r.shape
            assert np.allclose(o.d, r, atol=1e-2, rtol=1e-5)

    else:
        output.forward()
        output.backward()

        assert np.allclose(output.d, ref, atol=1e-2, rtol=1e-5)
Esempio n. 3
0
def create_inputs(rng, x_shape, batch_axis, channel_axis, no_scale, no_bias,
                  broadcast_affine_params):
    x = np.array(rng.randn(*x_shape).astype(np.float32))

    channel_axis += len(x_shape) * (channel_axis < 0)

    if broadcast_affine_params:
        affine_param_shape = tuple([
            x_shape[i] if i in [
                channel_axis,
            ] else 1 for i in range(len(x_shape))
        ])
    else:
        batch_axis = _force_list(batch_axis)
        batch_axis = [i + len(x_shape) * (i < 0) for i in batch_axis]
        affine_param_shape = tuple([
            x_shape[i] if i in batch_axis + [
                channel_axis,
            ] else 1 for i in range(len(x_shape))
        ])

    beta = None if no_bias else rng.randn(
        *affine_param_shape).astype(np.float32)
    gamma = None if no_scale else rng.randn(
        *affine_param_shape).astype(np.float32)

    return x, beta, gamma
def create_inputs(rng, x_shape, batch_axis, channel_axis, no_scale, no_bias):
    x = np.array(rng.randn(*x_shape).astype(np.float32))

    stat_shape = tuple([x_shape[i] if i in _force_list(batch_axis) + [channel_axis, ] else 1
                        for i in range(len(x_shape))])

    beta = None if no_bias else rng.randn(*stat_shape).astype(np.float32)
    gamma = None if no_scale else rng.randn(*stat_shape).astype(np.float32)

    return x, beta, gamma
Esempio n. 5
0
def create_inputs(rng, x_shape, batch_axis, no_scale, no_bias):
    x = rng.randn(*x_shape).astype(np.float32)

    stat_shape = list(x_shape)
    for baxis in _force_list(batch_axis):
        stat_shape[baxis+len(x_shape)*(baxis < 0)] = 1

    beta = None if no_bias else rng.randn(*stat_shape).astype(np.float32)
    gamma = None if no_scale else rng.randn(*stat_shape).astype(np.float32)

    return x, beta, gamma
    def ref_layer_normalization(x, beta, gamma, batch_axis, eps, output_stat):
        batch_axis = _force_list(batch_axis)

        axes = tuple(_get_axes_excluding(len(x.shape), batch_axis))

        x_mean = x.mean(axis=axes, keepdims=True)
        x_std = x.std(axis=axes, keepdims=True)

        if output_stat:
            return (x - x_mean) / (x_std + eps) * gamma + beta, x_mean, x_std

        return (x - x_mean) / (x_std + eps) * gamma + beta
Esempio n. 7
0
    def ref_instance_normalization(x, beta, gamma, channel_axis, batch_axis, eps, output_stat):

        ignore_axes = _force_list(batch_axis) + [channel_axis, ]

        axes = tuple(_get_axes_excluding(len(x.shape), ignore_axes))

        x_mean = x.mean(axis=axes, keepdims=True)
        x_std = x.std(axis=axes, keepdims=True)

        if output_stat:
            return (x - x_mean) / (x_std + eps) * gamma + beta, x_mean, x_std

        return (x - x_mean) / (x_std + eps) * gamma + beta
Esempio n. 8
0
def test_group_normalization_forward_backward(seed, num_groups, x_shape,
                                              batch_axis, channel_axis,
                                              output_stat):
    from nnabla.normalization_functions import _force_list

    rng = np.random.RandomState(seed)
    input = np.array(rng.randn(*x_shape).astype(np.float32))

    stat_shape = [
        x_shape[i] if i in _force_list(batch_axis) else 1
        for i in range(len(x_shape) + 1)
    ]
    stat_shape[channel_axis] = num_groups
    stat_shape[channel_axis + 1] = int(x_shape[channel_axis] / num_groups)

    beta = rng.randn(*stat_shape).astype(np.float32)
    gamma = rng.randn(*stat_shape).astype(np.float32)

    eps = 1e-05

    x = nn.Variable.from_numpy_array(input)
    v_beta = nn.Variable.from_numpy_array(beta)
    v_gamma = nn.Variable.from_numpy_array(gamma)

    output = F.group_normalization(x, v_beta, v_gamma, num_groups,
                                   channel_axis, batch_axis, eps, output_stat)
    ref = ref_group_normalization(input, beta, gamma, num_groups, channel_axis,
                                  batch_axis, eps, output_stat)

    if output_stat:
        tmp = F.sink(*output)
        tmp.forward()
        tmp.backward()

        for o, r in zip(output, ref):
            assert o.shape == r.shape
            assert np.allclose(o.d, r, atol=1e-2, rtol=1e-5)

    else:
        output.forward()
        output.backward()

        assert output.shape == ref.shape
        assert np.allclose(output.d, ref, atol=1e-2, rtol=1e-5)
Esempio n. 9
0
def ref_layer_normalization(x, beta, gamma, batch_axis, eps, output_stat):
    batch_axis = _force_list(batch_axis)

    axes = tuple(_get_axes_excluding(len(x.shape), batch_axis))

    x_mean = x.mean(axis=axes, keepdims=True)
    x_var = x.var(axis=axes, keepdims=True)

    norm = (x - x_mean) / (x_var + eps)**0.5

    if gamma is not None:
        norm *= gamma

    if beta is not None:
        norm += beta

    if output_stat:
        return norm, x_mean, x_var

    return norm
Esempio n. 10
0
def ref_group_normalization(x, beta, gamma, num_groups, channel_axis,
                            batch_axis, eps, output_stat):
    cdim = x.shape[channel_axis]

    if cdim % num_groups > 0:
        raise ValueError()

    shape = x.shape[:channel_axis] + (num_groups, int(cdim / num_groups))

    channel_axis += x.ndim * (channel_axis < 0)
    batch_axis = _force_list(batch_axis)
    batch_axis = [b + x.ndim * (b < 0) for b in batch_axis]

    if channel_axis < len(x.shape) - 1:
        shape += x.shape[channel_axis + 1:]

    tmp = x.reshape(shape).copy()

    ignore_axes = batch_axis + [
        channel_axis,
    ]

    axes = tuple(_get_axes_excluding(len(shape), ignore_axes))

    x_mean = tmp.mean(axis=axes, keepdims=True)
    x_var = tmp.var(axis=axes, keepdims=True)

    norm = (tmp - x_mean) / (x_var + eps)**0.5

    norm = norm.reshape(x.shape)

    if gamma is not None:
        norm *= gamma

    if beta is not None:
        norm += beta

    if output_stat:
        return norm, x_mean, x_var

    return norm
Esempio n. 11
0
def test_pf_group_normalization(g_rng, num_groups, inshape, batch_axis,
                                channel_axis, output_stat, fix_parameters,
                                param_init):
    from nnabla.normalization_functions import _force_list, _get_axes_excluding

    def ref_group_normalization(x, beta, gamma, num_groups, channel_axis,
                                batch_axis, eps, output_stat):
        cdim = x.shape[channel_axis]

        if cdim % num_groups > 0:
            raise ValueError()

        shape = x.shape[:channel_axis] + (num_groups, int(cdim / num_groups))
        if channel_axis < len(x.shape) - 1:
            shape += x.shape[channel_axis + 1:]

        tmp = x.reshape(shape).copy()

        ignore_axes = _force_list(batch_axis) + [
            channel_axis,
        ]

        axes = tuple(_get_axes_excluding(len(shape), ignore_axes))

        x_mean = tmp.mean(axis=axes, keepdims=True)
        x_std = tmp.std(axis=axes, keepdims=True)

        if output_stat:
            return ((tmp - x_mean) / (x_std + eps) * gamma + beta).reshape(
                x.shape), x_mean, x_std

        return ((tmp - x_mean) / (x_std + eps) * gamma + beta).reshape(x.shape)

    eps = 1e-5

    p_shape = [1 for _ in range(len(inshape) + 1)]
    p_shape[channel_axis] = num_groups
    p_shape[channel_axis + 1] = int(inshape[channel_axis] / num_groups)
    p_shape = tuple(p_shape)

    x_npy = g_rng.randn(*inshape)

    if param_init:
        beta_init = np.ones(p_shape)
        gamma_init = np.ones(p_shape) * 2
        param_init = dict(beta=beta_init, gamma=gamma_init)
    else:
        beta_init = np.zeros(p_shape)
        gamma_init = np.ones(p_shape)

    x = nn.Variable.from_numpy_array(x_npy)

    kw = {}
    insert_if_not_default(kw, 'channel_axis', channel_axis, 1)
    insert_if_not_default(kw, 'batch_axis', batch_axis, 0)
    insert_if_not_default(kw, 'eps', eps, 1e-5)
    insert_if_not_default(kw, 'output_stat', output_stat, False)
    insert_if_not_default(kw, 'fix_parameters', fix_parameters, False)
    insert_if_not_none(kw, 'param_init', param_init)

    # Check creation
    y = PF.group_normalization(x, num_groups, **kw)
    y = _force_list(y)  # just to simplify after execution

    # Check parameter values before execution ( reshape(Add2(Mul2(h, g), b)) )
    h = y[0]
    b = h.parent.inputs[0].parent.inputs[1]
    g = h.parent.inputs[0].parent.inputs[0].parent.inputs[1]
    assert np.allclose(b.d, beta_init)
    assert np.allclose(g.d, gamma_init)

    # Check execution
    forward_backward_all(*y)

    # Check values
    ref = ref_group_normalization(x_npy, beta_init, gamma_init, num_groups,
                                  channel_axis, batch_axis, eps, output_stat)
    if not output_stat:
        ref = [ref]

    for i in range(len(ref)):
        assert np.allclose(y[i].d, ref[i], atol=1e-2, rtol=1e-5)

    # Check created parameters
    assert len(nn.get_parameters()) == 2
    assert len(nn.get_parameters(grad_only=False)) == 2
    beta, gamma = [
        nn.get_parameters()['group_normalization/' + name]
        for name in ['beta', 'gamma']
    ]
    assert beta.shape == p_shape
    assert gamma.shape == p_shape

    assert beta.need_grad
    assert gamma.need_grad

    b = h.parent.inputs[0].parent.inputs[1]
    g = h.parent.inputs[0].parent.inputs[0].parent.inputs[1]
    assert b.need_grad == (not fix_parameters)
    assert g.need_grad == (not fix_parameters)
Esempio n. 12
0
def test_pf_instance_normalization(g_rng, inshape, batch_axis, channel_axis,
                                   output_stat, fix_parameters, param_init):
    from nnabla.normalization_functions import _force_list, _get_axes_excluding

    def ref_instance_normalization(x, beta, gamma, channel_axis, batch_axis,
                                   eps, output_stat):

        ignore_axes = _force_list(batch_axis) + [
            channel_axis,
        ]

        axes = tuple(_get_axes_excluding(len(x.shape), ignore_axes))

        x_mean = x.mean(axis=axes, keepdims=True)
        x_std = x.std(axis=axes, keepdims=True)

        if output_stat:
            return (x - x_mean) / (x_std + eps) * gamma + beta, x_mean, x_std

        return (x - x_mean) / (x_std + eps) * gamma + beta

    eps = 1e-5

    p_shape = tuple(
        [inshape[i] if i == channel_axis else 1 for i in range(len(inshape))])

    x_npy = g_rng.randn(*inshape)

    if param_init:
        beta_init = np.ones(p_shape)
        gamma_init = np.ones(p_shape) * 2
        param_init = dict(beta=beta_init, gamma=gamma_init)
    else:
        beta_init = np.zeros(p_shape)
        gamma_init = np.ones(p_shape)

    x = nn.Variable.from_numpy_array(x_npy)

    kw = {}
    insert_if_not_default(kw, 'channel_axis', channel_axis, 1)
    insert_if_not_default(kw, 'batch_axis', batch_axis, 0)
    insert_if_not_default(kw, 'eps', eps, 1e-5)
    insert_if_not_default(kw, 'output_stat', output_stat, False)
    insert_if_not_default(kw, 'fix_parameters', fix_parameters, False)
    insert_if_not_none(kw, 'param_init', param_init)

    # Check creation
    y = PF.instance_normalization(x, **kw)
    y = _force_list(y)  # just to simplify after execution

    # Check parameter values before execution
    h = y[0]
    b = h.parent.inputs[1]
    g = h.parent.inputs[0].parent.inputs[1]
    assert np.allclose(b.d, beta_init)
    assert np.allclose(g.d, gamma_init)

    # Check execution
    forward_backward_all(*y)

    # Check values
    ref = ref_instance_normalization(x_npy, beta_init, gamma_init,
                                     channel_axis, batch_axis, eps,
                                     output_stat)
    if not output_stat:
        ref = [ref]

    for i in range(len(ref)):
        assert np.allclose(y[i].d, ref[i], atol=1e-2, rtol=1e-5)

    # Check created parameters
    assert len(nn.get_parameters()) == 2
    assert len(nn.get_parameters(grad_only=False)) == 2
    beta, gamma = [
        nn.get_parameters()['instance_normalization/' + name]
        for name in ['beta', 'gamma']
    ]
    assert beta.shape == p_shape
    assert gamma.shape == p_shape

    assert beta.need_grad
    assert gamma.need_grad

    b = h.parent.inputs[1]
    g = h.parent.inputs[0].parent.inputs[1]
    assert b.need_grad == (not fix_parameters)
    assert g.need_grad == (not fix_parameters)