def ref_group_normalization(x, beta, gamma, num_groups, channel_axis, batch_axis, eps, output_stat): cdim = x.shape[channel_axis] if cdim % num_groups > 0: raise ValueError() shape = x.shape[:channel_axis] + (num_groups, int(cdim / num_groups)) if channel_axis < len(x.shape) - 1: shape += x.shape[channel_axis + 1:] tmp = x.reshape(shape).copy() ignore_axes = _force_list(batch_axis) + [ channel_axis, ] axes = tuple(_get_axes_excluding(len(shape), ignore_axes)) x_mean = tmp.mean(axis=axes, keepdims=True) x_std = tmp.std(axis=axes, keepdims=True) if output_stat: return ((tmp - x_mean) / (x_std + eps) * gamma + beta).reshape( x.shape), x_mean, x_std return ((tmp - x_mean) / (x_std + eps) * gamma + beta).reshape(x.shape)
def test_layer_normalization_forward_backward(seed, x_shape, batch_axis, output_stat): rng = np.random.RandomState(seed) input = rng.randn(*x_shape).astype(np.float32) stat_shape = tuple([x_shape[i] if i in _force_list(batch_axis) else 1 for i in range(len(x_shape))]) beta = rng.randn(*stat_shape).astype(np.float32) gamma = rng.randn(*stat_shape).astype(np.float32) eps = 1e-05 x = nn.Variable.from_numpy_array(input) v_beta = nn.Variable.from_numpy_array(beta) v_gamma = nn.Variable.from_numpy_array(gamma) output = F.layer_normalization( x, v_beta, v_gamma, batch_axis, eps, output_stat) ref = ref_layer_normalization( input, beta, gamma, batch_axis, eps, output_stat) if output_stat: tmp = F.sink(*output) tmp.forward() tmp.backward() for o, r in zip(output, ref): assert o.shape == r.shape assert np.allclose(o.d, r, atol=1e-2, rtol=1e-5) else: output.forward() output.backward() assert np.allclose(output.d, ref, atol=1e-2, rtol=1e-5)
def create_inputs(rng, x_shape, batch_axis, channel_axis, no_scale, no_bias, broadcast_affine_params): x = np.array(rng.randn(*x_shape).astype(np.float32)) channel_axis += len(x_shape) * (channel_axis < 0) if broadcast_affine_params: affine_param_shape = tuple([ x_shape[i] if i in [ channel_axis, ] else 1 for i in range(len(x_shape)) ]) else: batch_axis = _force_list(batch_axis) batch_axis = [i + len(x_shape) * (i < 0) for i in batch_axis] affine_param_shape = tuple([ x_shape[i] if i in batch_axis + [ channel_axis, ] else 1 for i in range(len(x_shape)) ]) beta = None if no_bias else rng.randn( *affine_param_shape).astype(np.float32) gamma = None if no_scale else rng.randn( *affine_param_shape).astype(np.float32) return x, beta, gamma
def create_inputs(rng, x_shape, batch_axis, channel_axis, no_scale, no_bias): x = np.array(rng.randn(*x_shape).astype(np.float32)) stat_shape = tuple([x_shape[i] if i in _force_list(batch_axis) + [channel_axis, ] else 1 for i in range(len(x_shape))]) beta = None if no_bias else rng.randn(*stat_shape).astype(np.float32) gamma = None if no_scale else rng.randn(*stat_shape).astype(np.float32) return x, beta, gamma
def create_inputs(rng, x_shape, batch_axis, no_scale, no_bias): x = rng.randn(*x_shape).astype(np.float32) stat_shape = list(x_shape) for baxis in _force_list(batch_axis): stat_shape[baxis+len(x_shape)*(baxis < 0)] = 1 beta = None if no_bias else rng.randn(*stat_shape).astype(np.float32) gamma = None if no_scale else rng.randn(*stat_shape).astype(np.float32) return x, beta, gamma
def ref_layer_normalization(x, beta, gamma, batch_axis, eps, output_stat): batch_axis = _force_list(batch_axis) axes = tuple(_get_axes_excluding(len(x.shape), batch_axis)) x_mean = x.mean(axis=axes, keepdims=True) x_std = x.std(axis=axes, keepdims=True) if output_stat: return (x - x_mean) / (x_std + eps) * gamma + beta, x_mean, x_std return (x - x_mean) / (x_std + eps) * gamma + beta
def ref_instance_normalization(x, beta, gamma, channel_axis, batch_axis, eps, output_stat): ignore_axes = _force_list(batch_axis) + [channel_axis, ] axes = tuple(_get_axes_excluding(len(x.shape), ignore_axes)) x_mean = x.mean(axis=axes, keepdims=True) x_std = x.std(axis=axes, keepdims=True) if output_stat: return (x - x_mean) / (x_std + eps) * gamma + beta, x_mean, x_std return (x - x_mean) / (x_std + eps) * gamma + beta
def test_group_normalization_forward_backward(seed, num_groups, x_shape, batch_axis, channel_axis, output_stat): from nnabla.normalization_functions import _force_list rng = np.random.RandomState(seed) input = np.array(rng.randn(*x_shape).astype(np.float32)) stat_shape = [ x_shape[i] if i in _force_list(batch_axis) else 1 for i in range(len(x_shape) + 1) ] stat_shape[channel_axis] = num_groups stat_shape[channel_axis + 1] = int(x_shape[channel_axis] / num_groups) beta = rng.randn(*stat_shape).astype(np.float32) gamma = rng.randn(*stat_shape).astype(np.float32) eps = 1e-05 x = nn.Variable.from_numpy_array(input) v_beta = nn.Variable.from_numpy_array(beta) v_gamma = nn.Variable.from_numpy_array(gamma) output = F.group_normalization(x, v_beta, v_gamma, num_groups, channel_axis, batch_axis, eps, output_stat) ref = ref_group_normalization(input, beta, gamma, num_groups, channel_axis, batch_axis, eps, output_stat) if output_stat: tmp = F.sink(*output) tmp.forward() tmp.backward() for o, r in zip(output, ref): assert o.shape == r.shape assert np.allclose(o.d, r, atol=1e-2, rtol=1e-5) else: output.forward() output.backward() assert output.shape == ref.shape assert np.allclose(output.d, ref, atol=1e-2, rtol=1e-5)
def ref_layer_normalization(x, beta, gamma, batch_axis, eps, output_stat): batch_axis = _force_list(batch_axis) axes = tuple(_get_axes_excluding(len(x.shape), batch_axis)) x_mean = x.mean(axis=axes, keepdims=True) x_var = x.var(axis=axes, keepdims=True) norm = (x - x_mean) / (x_var + eps)**0.5 if gamma is not None: norm *= gamma if beta is not None: norm += beta if output_stat: return norm, x_mean, x_var return norm
def ref_group_normalization(x, beta, gamma, num_groups, channel_axis, batch_axis, eps, output_stat): cdim = x.shape[channel_axis] if cdim % num_groups > 0: raise ValueError() shape = x.shape[:channel_axis] + (num_groups, int(cdim / num_groups)) channel_axis += x.ndim * (channel_axis < 0) batch_axis = _force_list(batch_axis) batch_axis = [b + x.ndim * (b < 0) for b in batch_axis] if channel_axis < len(x.shape) - 1: shape += x.shape[channel_axis + 1:] tmp = x.reshape(shape).copy() ignore_axes = batch_axis + [ channel_axis, ] axes = tuple(_get_axes_excluding(len(shape), ignore_axes)) x_mean = tmp.mean(axis=axes, keepdims=True) x_var = tmp.var(axis=axes, keepdims=True) norm = (tmp - x_mean) / (x_var + eps)**0.5 norm = norm.reshape(x.shape) if gamma is not None: norm *= gamma if beta is not None: norm += beta if output_stat: return norm, x_mean, x_var return norm
def test_pf_group_normalization(g_rng, num_groups, inshape, batch_axis, channel_axis, output_stat, fix_parameters, param_init): from nnabla.normalization_functions import _force_list, _get_axes_excluding def ref_group_normalization(x, beta, gamma, num_groups, channel_axis, batch_axis, eps, output_stat): cdim = x.shape[channel_axis] if cdim % num_groups > 0: raise ValueError() shape = x.shape[:channel_axis] + (num_groups, int(cdim / num_groups)) if channel_axis < len(x.shape) - 1: shape += x.shape[channel_axis + 1:] tmp = x.reshape(shape).copy() ignore_axes = _force_list(batch_axis) + [ channel_axis, ] axes = tuple(_get_axes_excluding(len(shape), ignore_axes)) x_mean = tmp.mean(axis=axes, keepdims=True) x_std = tmp.std(axis=axes, keepdims=True) if output_stat: return ((tmp - x_mean) / (x_std + eps) * gamma + beta).reshape( x.shape), x_mean, x_std return ((tmp - x_mean) / (x_std + eps) * gamma + beta).reshape(x.shape) eps = 1e-5 p_shape = [1 for _ in range(len(inshape) + 1)] p_shape[channel_axis] = num_groups p_shape[channel_axis + 1] = int(inshape[channel_axis] / num_groups) p_shape = tuple(p_shape) x_npy = g_rng.randn(*inshape) if param_init: beta_init = np.ones(p_shape) gamma_init = np.ones(p_shape) * 2 param_init = dict(beta=beta_init, gamma=gamma_init) else: beta_init = np.zeros(p_shape) gamma_init = np.ones(p_shape) x = nn.Variable.from_numpy_array(x_npy) kw = {} insert_if_not_default(kw, 'channel_axis', channel_axis, 1) insert_if_not_default(kw, 'batch_axis', batch_axis, 0) insert_if_not_default(kw, 'eps', eps, 1e-5) insert_if_not_default(kw, 'output_stat', output_stat, False) insert_if_not_default(kw, 'fix_parameters', fix_parameters, False) insert_if_not_none(kw, 'param_init', param_init) # Check creation y = PF.group_normalization(x, num_groups, **kw) y = _force_list(y) # just to simplify after execution # Check parameter values before execution ( reshape(Add2(Mul2(h, g), b)) ) h = y[0] b = h.parent.inputs[0].parent.inputs[1] g = h.parent.inputs[0].parent.inputs[0].parent.inputs[1] assert np.allclose(b.d, beta_init) assert np.allclose(g.d, gamma_init) # Check execution forward_backward_all(*y) # Check values ref = ref_group_normalization(x_npy, beta_init, gamma_init, num_groups, channel_axis, batch_axis, eps, output_stat) if not output_stat: ref = [ref] for i in range(len(ref)): assert np.allclose(y[i].d, ref[i], atol=1e-2, rtol=1e-5) # Check created parameters assert len(nn.get_parameters()) == 2 assert len(nn.get_parameters(grad_only=False)) == 2 beta, gamma = [ nn.get_parameters()['group_normalization/' + name] for name in ['beta', 'gamma'] ] assert beta.shape == p_shape assert gamma.shape == p_shape assert beta.need_grad assert gamma.need_grad b = h.parent.inputs[0].parent.inputs[1] g = h.parent.inputs[0].parent.inputs[0].parent.inputs[1] assert b.need_grad == (not fix_parameters) assert g.need_grad == (not fix_parameters)
def test_pf_instance_normalization(g_rng, inshape, batch_axis, channel_axis, output_stat, fix_parameters, param_init): from nnabla.normalization_functions import _force_list, _get_axes_excluding def ref_instance_normalization(x, beta, gamma, channel_axis, batch_axis, eps, output_stat): ignore_axes = _force_list(batch_axis) + [ channel_axis, ] axes = tuple(_get_axes_excluding(len(x.shape), ignore_axes)) x_mean = x.mean(axis=axes, keepdims=True) x_std = x.std(axis=axes, keepdims=True) if output_stat: return (x - x_mean) / (x_std + eps) * gamma + beta, x_mean, x_std return (x - x_mean) / (x_std + eps) * gamma + beta eps = 1e-5 p_shape = tuple( [inshape[i] if i == channel_axis else 1 for i in range(len(inshape))]) x_npy = g_rng.randn(*inshape) if param_init: beta_init = np.ones(p_shape) gamma_init = np.ones(p_shape) * 2 param_init = dict(beta=beta_init, gamma=gamma_init) else: beta_init = np.zeros(p_shape) gamma_init = np.ones(p_shape) x = nn.Variable.from_numpy_array(x_npy) kw = {} insert_if_not_default(kw, 'channel_axis', channel_axis, 1) insert_if_not_default(kw, 'batch_axis', batch_axis, 0) insert_if_not_default(kw, 'eps', eps, 1e-5) insert_if_not_default(kw, 'output_stat', output_stat, False) insert_if_not_default(kw, 'fix_parameters', fix_parameters, False) insert_if_not_none(kw, 'param_init', param_init) # Check creation y = PF.instance_normalization(x, **kw) y = _force_list(y) # just to simplify after execution # Check parameter values before execution h = y[0] b = h.parent.inputs[1] g = h.parent.inputs[0].parent.inputs[1] assert np.allclose(b.d, beta_init) assert np.allclose(g.d, gamma_init) # Check execution forward_backward_all(*y) # Check values ref = ref_instance_normalization(x_npy, beta_init, gamma_init, channel_axis, batch_axis, eps, output_stat) if not output_stat: ref = [ref] for i in range(len(ref)): assert np.allclose(y[i].d, ref[i], atol=1e-2, rtol=1e-5) # Check created parameters assert len(nn.get_parameters()) == 2 assert len(nn.get_parameters(grad_only=False)) == 2 beta, gamma = [ nn.get_parameters()['instance_normalization/' + name] for name in ['beta', 'gamma'] ] assert beta.shape == p_shape assert gamma.shape == p_shape assert beta.need_grad assert gamma.need_grad b = h.parent.inputs[1] g = h.parent.inputs[0].parent.inputs[1] assert b.need_grad == (not fix_parameters) assert g.need_grad == (not fix_parameters)