def test_clear_input_if_no_need_grad_batch_normalization(self, batch_stat): x1 = nn.Variable([1, 1, 2], need_grad=True) x2 = nn.Variable([1, 1, 1], need_grad=True) x3 = nn.Variable([1, 1, 1], need_grad=True) x4 = nn.Variable([1, 1, 1], need_grad=True) x5 = nn.Variable([1, 1, 1], need_grad=True) x = F.identity(x1) beta = F.identity(x2) gamma = F.identity(x3) if batch_stat: y = F.batch_normalization( x, beta, gamma, x4, x5, batch_stat=batch_stat) else: mean = F.identity(x4) var = F.identity(x5) y = F.batch_normalization( x, beta, gamma, mean, var, batch_stat=batch_stat) answer = [] answer.append([False]) answer.append([False]) answer.append([False]) if not batch_stat: answer.append([False]) answer.append([False]) answer.append([False, True, False, False, False]) y.forward(clear_no_need_grad=True) self.check_input_data_clear_called_flags(answer)
def test_batch_normalization_forward_backward(seed, axis, decay_rate, eps, output_stat, batch_stat, ctx, func_name): from nbla_test_utils import function_tester rng = np.random.RandomState(seed) inputs = list(create_inputs(rng, axis)) axes = [axis] if ctx.backend[0].split(':')[0] != 'cpu' and batch_stat == False: pytest.skip( "cuda and cudnn implementation for batch_stat==False is not implemented yet" ) else: function_tester( rng, F.batch_normalization, ref_batch_normalization, inputs, func_args=[axes, decay_rate, eps, batch_stat, output_stat], backward=[True, True, True, False, False], ctx=ctx, func_name=func_name, dstep=1e-2, atol_b=1e-2) # Check if running mean and var works. vinputs = [] for i in inputs: vinputs.append(nn.Variable(i.shape, True)) vinputs[-1].d = i for i in range(5): inputs[0] = rng.randn(*inputs[0].shape) vinputs[0].d[...] = inputs[0] ref_y = ref_batch_normalization( *(inputs + [axes, decay_rate, eps, batch_stat, output_stat])) with nn.context_scope(ctx), nn.auto_forward(): y = F.batch_normalization( *(vinputs + [axes, decay_rate, eps, batch_stat, output_stat])) assert np.allclose(vinputs[3].d, inputs[3], atol=1e-7) assert np.allclose(vinputs[4].d, inputs[4]) # Check if global stat mode works batch_stat = False if output_stat: return ref_y = ref_batch_normalization( *(inputs + [axes, decay_rate, eps, batch_stat, output_stat])) with nn.context_scope(ctx), nn.auto_forward(): y = F.batch_normalization( *(vinputs + [axes, decay_rate, eps, batch_stat, output_stat])) assert np.allclose(ref_y, y.d, atol=1e-6)
def ref_batch_normalization(x, beta, gamma, rmean, rvar, comm, axes, decay_rate, eps, batch_stat, output_stat): orig = x - device_id inputs = [] for i in range(n_devices): inputs.append(orig + i) x = np.concatenate(inputs) vx = nn.Variable(x.shape, True) vx.d = x vbeta = nn.Variable(beta.shape, True) vbeta.d = beta vgamma = nn.Variable(gamma.shape, True) vgamma.d = gamma vrmean = nn.Variable(rmean.shape, True) vrmean.d = rmean vrvar = nn.Variable(rvar.shape, True) vrvar.d = rvar with nn.context_scope(ctx): out = F.batch_normalization(vx, vbeta, vgamma, vrmean, vrvar, batch_stat=batch_stat, output_stat=output_stat, axes=axes, decay_rate=decay_rate, eps=eps) if output_stat: out[0].forward() rmean[...] = vrmean.d.copy() rvar[...] = vrvar.d.copy() return out[0].d[device_id*2:(device_id+1)*2], out[1].d, out[2].d out.forward() rmean[...] = vrmean.d.copy() rvar[...] = vrvar.d.copy() return out.d[device_id*2:(device_id+1)*2]
def ref_batch_normalize_grad(x, beta, gamma, rmean, rvar, dy, comm, axes, decay_rate, eps, batch_stat, output_stat): orig = x - device_id inputs = [] for i in range(n_devices): inputs.append(orig + i) x = np.concatenate(inputs) vx = nn.Variable(x.shape, True) vx.d = x vx.g = 0 vbeta = nn.Variable(beta.shape, True) vbeta.d = beta vbeta.g = 0 vgamma = nn.Variable(gamma.shape, True) vgamma.d = gamma vgamma.g = 0 vrmean = nn.Variable(rmean.shape, True) vrmean.d = rmean vrvar = nn.Variable(rvar.shape, True) vrvar.d = rvar with nn.context_scope(ctx): out = F.batch_normalization(vx, vbeta, vgamma, vrmean, vrvar, batch_stat=batch_stat, output_stat=output_stat, axes=axes, decay_rate=decay_rate, eps=eps) f = out.parent f.forward([vx, vbeta, vgamma, vrmean, vrvar], [out]) for i in range(n_devices): out.g[2*i:2*(i+1)] = dy f.backward([vx, vbeta, vgamma, vrmean, vrvar], [out]) return np.concatenate([vx.g[device_id*2:(device_id+1)*2].flatten(), vbeta.g.flatten(), vgamma.g.flatten()])
def ref_fused_batch_normalization(x, beta, gamma, rmean, rvar, z, axes, decay_rate, eps, batch_stat, nonlinearity, output_stat): with nn.context_scope(cpu_context): xvar = nn.Variable.from_numpy_array(x) betavar = nn.Variable.from_numpy_array(beta) gammavar = nn.Variable.from_numpy_array(gamma) rmeanvar = nn.Variable.from_numpy_array(rmean) rvarvar = nn.Variable.from_numpy_array(rvar) if z is not None: zvar = nn.Variable.from_numpy_array(z) with nn.auto_forward(): bn = F.batch_normalization(xvar, betavar, gammavar, rmeanvar, rvarvar, axes, decay_rate, eps, batch_stat, output_stat) if z is None: if output_stat: y = bn[0] else: y = bn else: if output_stat: y = F.add2(bn[0], zvar) else: y = F.add2(bn, zvar) y = F.relu(y) rmean[:] = rmeanvar.d rvar[:] = rvarvar.d if output_stat: return y.d, bn[1].d, bn[2].d else: return y.d
def INByBatchNorm(inp, axes=[1], decay_rate=0.9, eps=1e-5, fix_parameters=True): """Instance Normalization (implemented using BatchNormalization) Instance normalization is equivalent to the batch normalization if a batch size is one, in other words, it normalizes over spatial dimension(s), meaning all dimensions except for the batch and feature dimension. """ assert len(axes) == 1 shape_stat = [1 for _ in inp.shape] shape_stat[axes[0]] = inp.shape[axes[0]] beta = get_parameter_or_create("beta", shape_stat, ConstantInitializer(0), not fix_parameters) gamma = get_parameter_or_create("gamma", shape_stat, ConstantInitializer(1), not fix_parameters) mean = get_parameter_or_create("mean", shape_stat, ConstantInitializer(0), False) var = get_parameter_or_create("var", shape_stat, ConstantInitializer(0), False) return F.batch_normalization(inp, beta, gamma, mean, var, axes, decay_rate, eps, batch_stat=True, output_stat=False)
def connect(self, fname, inputs, args): if fname in ['Convolution', 'Deconvolution']: # TODO: address leading batch dimension args['channel_last'] = True x = inputs[0] w = inputs[1] b = inputs[2] if len(inputs) == 3 else None scope = self.get_parameter_scope(w) with nn.parameter_scope(scope): wd = w.d.copy().transpose(0, 2, 3, 1) w = nn.parameter.get_parameter_or_create('W_cl', wd.shape, wd) o = F.convolution(x, w, b, **args) elif fname == 'BatchNormalization': # TODO: address leading batch dimension x = inputs[0] beta = inputs[1] gamma = inputs[2] mean = inputs[3] var = inputs[4] args['axes'] = [len(x.shape) - 1] scope = self.get_parameter_scope(beta) with nn.parameter_scope(scope): beta_d = beta.d.copy().transpose(0, 2, 3, 1) gamma_d = gamma.d.copy().transpose(0, 2, 3, 1) mean_d = mean.d.copy().transpose(0, 2, 3, 1) var_d = var.d.copy().transpose(0, 2, 3, 1) beta = nn.parameter.get_parameter_or_create( 'beta_cl', beta_d.shape, beta_d, beta.need_grad) gamma = nn.parameter.get_parameter_or_create( 'gamma_cl', gamma_d.shape, gamma_d, gamma.need_grad) mean = nn.parameter.get_parameter_or_create( 'mean_cl', mean_d.shape, mean_d, mean.need_grad) var = nn.parameter.get_parameter_or_create( 'var_cl', var_d.shape, var_d, var.need_grad) o = F.batch_normalization(x, beta, gamma, mean, var, **args) elif fname in ['MaxPooling', 'AveragePooling', 'SumPooling']: args['channel_last'] = True o = self._call_function(fname, inputs, args) elif fname in ['Concatenate']: args['axis'] = len(inputs[0].shape) - 1 o = self._call_function(fname, inputs, args) elif fname == 'Affine': x = inputs[0] _, h_s, w_s, c_s = inputs[0].shape _, b_s = inputs[1].shape wd = inputs[1].d.copy() wd = np.reshape(wd, (c_s, h_s, w_s, b_s)) wd = np.transpose(wd, (1, 2, 0, 3)) wd = np.reshape(wd, (-1, b_s)) w = nn.parameter.get_parameter_or_create('w_cl', wd.shape, wd, False) b = inputs[2] if len(inputs) == 3 else None o = F.affine(x, w, b, **args) else: o = self._call_function(fname, inputs, args) return o
def CCBN(h, y, n_classes, decay_rate=0.999, test=False, fix_parameters=False, coefs=[1.0]): """Categorical Conditional Batch Normaliazation""" # Call the batch normalization once shape_stat = [1 for _ in h.shape] shape_stat[1] = h.shape[1] gamma_tmp = nn.Variable.from_numpy_array(np.ones(shape_stat)) beta_tmp = nn.Variable.from_numpy_array(np.zeros(shape_stat)) mean = get_parameter_or_create("mean", shape_stat, ConstantInitializer(0.0), False) var = get_parameter_or_create("var", shape_stat, ConstantInitializer(1.0), False) h = F.batch_normalization(h, beta_tmp, gamma_tmp, mean, var, decay_rate=decay_rate, batch_stat=not test) # Condition the gamma and beta with the class label b, c = h.shape[0:2] def embed_func(y, initializer): if type(y) != list: o = embed(y, n_classes, c, initializer=initializer, sn=False, test=test) else: y_list = y o = reduce(lambda x, y: x + y, [ coef * embed(y, n_classes, c, initializer=initializer, sn=False, test=test) for coef, y in zip(coefs, y_list) ]) return o with nn.parameter_scope("gamma"): gamma = embed_func(y, ConstantInitializer(1.0)) gamma = F.reshape(gamma, [b, c] + [1 for _ in range(len(h.shape[2:]))]) gamma = F.broadcast(gamma, h.shape) with nn.parameter_scope("beta"): beta = embed_func(y, ConstantInitializer(0.0)) beta = F.reshape(beta, [b, c] + [1 for _ in range(len(h.shape[2:]))]) beta = F.broadcast(beta, h.shape) return gamma * h + beta
def __init__(self, x, weight, bias, beta, gamma, rmean, rvar, z, base_axis, pad, stride, dilation, group, channel_last, decay_rate, eps, batch_stat, nonlinearity, nonlinearity_args, pad_mode, constant_value): from collections import OrderedDict inputs = OrderedDict() xvar = nn.Variable.from_numpy_array(x) weightvar = nn.Variable.from_numpy_array(weight) inputs['x'] = xvar inputs['weight'] = weightvar biasvar = None betavar = None gammavar = None rmeanvar = None rvarvar = None zvar = None if bias is not None: biasvar = nn.Variable.from_numpy_array(bias) inputs['bias'] = biasvar if beta is not None: betavar = nn.Variable.from_numpy_array(beta) gammavar = nn.Variable.from_numpy_array(gamma) rmeanvar = nn.Variable.from_numpy_array(rmean) rvarvar = nn.Variable.from_numpy_array(rvar) inputs['beta'] = betavar inputs['gamma'] = gammavar inputs['rmean'] = rmeanvar inputs['rvar'] = rvarvar if z is not None: zvar = nn.Variable.from_numpy_array(z) inputs['z'] = zvar spatial_dims = xvar.ndim - (base_axis + 1) assert (len(pad) == spatial_dims or len(pad) == 2 * spatial_dims) if len(pad) == spatial_dims: pad_width = tuple(p for _ in range(2) for p in pad) else: # if len(pad) == 2 * spatial_dims: pad_width = pad h = F.pad(xvar, pad_width, pad_mode, constant_value) conv_pad = (0,) * spatial_dims h = F.convolution(h, weightvar, biasvar, base_axis, conv_pad, stride, dilation, group, channel_last) if beta is not None: h = F.batch_normalization(h, betavar, gammavar, rmeanvar, rvarvar, [h.ndim - 1 if channel_last else base_axis], decay_rate, eps, batch_stat) if z is not None: h = F.add2(h, zvar) h = ref_activation(h, nonlinearity, nonlinearity_args) self.input_dict = inputs self.output = h
def test_batch_normalization_forward_backward(seed, axis, decay_rate, eps, output_stat, ctx, func_name): from nbla_test_utils import function_tester rng = np.random.RandomState(seed) inputs = list(create_inputs(rng, axis)) axes = [axis] batch_stat = True function_tester(rng, F.batch_normalization, ref_batch_normalization, inputs, func_args=[axes, decay_rate, eps, batch_stat, output_stat], backward=[True, True, True, False, False], ctx=ctx, func_name=func_name, dstep=1e-2, atol_b=1e-2) # Check if running mean and var works. vinputs = [] for i in inputs: vinputs.append(nn.Variable(i.shape, True)) vinputs[-1].d = i for i in range(5): inputs[0] = rng.randn(*inputs[0].shape) vinputs[0].d[...] = inputs[0] ref_y = ref_batch_normalization( *(inputs + [axes, decay_rate, eps, batch_stat, output_stat])) with nn.context_scope(ctx), nn.auto_forward(): y = F.batch_normalization( *(vinputs + [axes, decay_rate, eps, batch_stat, output_stat])) assert np.allclose(vinputs[3].d, inputs[3]) assert np.allclose(vinputs[4].d, inputs[4], atol=1e-3) # Check if global stat mode works batch_stat = False if output_stat: return ref_y = ref_batch_normalization( *(inputs + [axes, decay_rate, eps, batch_stat, output_stat])) with nn.context_scope(ctx), nn.auto_forward(): y = F.batch_normalization( *(vinputs + [axes, decay_rate, eps, batch_stat, output_stat])) assert np.allclose(ref_y, y.d, atol=1e-6)
def normalize(inp, layer_name, bn_batch_stat, activation, args, init_params): if args.norm == 'batch_norm': if init_params is None: inp = PF.batch_normalization( inp, batch_stat=bn_batch_stat, name=layer_name) else: inp = F.batch_normalization(inp, init_params[layer_name + '/bn/beta'], init_params[layer_name + '/bn/gamma'], mean=None, variance=None, batch_stat=bn_batch_stat) if activation is not None: return activation(inp) else: return inp
def batch_normalization(inp, axes=[1], decay_rate=0.9, eps=1e-5, batch_stat=True, output_stat=False): """ Batch normalization layer. .. math:: \\begin{array}{lcl} \\mu &=& \\frac{1}{M} \\sum x_i\\\\ \\sigma^2 &=& \\frac{1}{M} \\left(\\sum x_i - \\mu\\right)^2\\\\ \\hat{x}_i &=& \\frac{x_i - \\mu}{\\sqrt{\\sigma^2 + \\epsilon}} \\\\ y_i &=& \\hat{x}_i \\gamma + \\beta. \\end{array} where :math:`x_i, y_i` are the inputs. In testing, the mean and variance computed by moving average calculated during training are used. Args: inp (~nnabla.Variable): N-D array of input. axes (:obj:`tuple` of :obj:`int`): Axes mean and variance are taken. decay_rate (float): Decay rate of running mean and variance. eps (float): Tiny value to avoid zero division by std. batch_stat (bool): Use mini-batch statistics rather than running ones. output_stat (bool): Output batch mean and variance. Returns: :class:`~nnabla.Variable`: N-D array. References: - Ioffe and Szegedy, Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift. https://arxiv.org/abs/1502.03167 """ assert len(axes) == 1 shape_stat = [1 for _ in inp.shape] shape_stat[axes[0]] = inp.shape[axes[0]] beta = get_parameter_or_create("beta", shape_stat, ConstantInitializer(0), True) gamma = get_parameter_or_create("gamma", shape_stat, ConstantInitializer(1), True) mean = get_parameter_or_create("mean", shape_stat, ConstantInitializer(0), False) var = get_parameter_or_create("var", shape_stat, ConstantInitializer(0), False) return F.batch_normalization(inp, beta, gamma, mean, var, axes, decay_rate, eps, batch_stat, output_stat)
def _normalize(x, norm_type, channel_axis=1): if norm_type.lower() == "in": return F.instance_normalization(x, gamma=None, beta=None, channel_axis=channel_axis) elif norm_type.lower() == "bn": return F.batch_normalization(x, gamma=None, beta=None, mean=None, variance=None, axes=channel_axis) else: raise ValueError("unknown norm_type: {}".format(norm_type))
def BN(inp, axes=[1], decay_rate=0.9, eps=1e-5, batch_stat=True, output_stat=False, fix_parameters=False): """Batch Normalization """ shape_stat = [1 for _ in inp.shape] shape_stat[axes[0]] = inp.shape[axes[0]] beta = get_parameter_or_create( "beta", shape_stat, ConstantInitializer(0), not fix_parameters) gamma = get_parameter_or_create( "gamma", shape_stat, ConstantInitializer(1), not fix_parameters) mean = get_parameter_or_create( "mean", shape_stat, ConstantInitializer(0), False) var = get_parameter_or_create( "var", shape_stat, ConstantInitializer(0), False) return F.batch_normalization(inp, beta, gamma, mean, var, axes, decay_rate, eps, batch_stat, output_stat)
def batch_normalization(inp, axes=[1], decay_rate=0.9, eps=1e-5, batch_stat=True, output_stat=False): """ Batch normalization layer. .. math:: \\begin{array}{lcl} \\mu &=& \\frac{1}{M} \\sum x_i\\\\ \\sigma^2 &=& \\frac{1}{M} \\left(\\sum x_i - \\mu\\right)^2\\\\ \\hat{x}_i &=& \\frac{x_i - \\mu}{\\sqrt{\\sigma^2 + \\epsilon}} \\\\ y_i &=& \\hat{x}_i \\gamma + \\beta. \\end{array} where :math:`x_i, y_i` are the inputs. In testing, the mean and variance computed by moving average calculated during training are used. Args: inp (~nnabla.Variable): N-D array of input. axes (:obj:`tuple` of :obj:`int`): Axes mean and variance are taken. decay_rate (float): Decay rate of running mean and variance. eps (float): Tiny value to avoid zero division by std. batch_stat (bool): Use mini-batch statistics rather than running ones. output_stat (bool): Output batch mean and variance. Returns: :class:`~nnabla.Variable`: N-D array. References: - Ioffe and Szegedy, Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift. https://arxiv.org/abs/1502.03167 """ assert len(axes) == 1 shape_stat = [1 for _ in inp.shape] shape_stat[axes[0]] = inp.shape[axes[0]] beta = get_parameter_or_create( "beta", shape_stat, ConstantInitializer(0), True) gamma = get_parameter_or_create( "gamma", shape_stat, ConstantInitializer(1), True) mean = get_parameter_or_create( "mean", shape_stat, ConstantInitializer(0), False) var = get_parameter_or_create( "var", shape_stat, ConstantInitializer(0), False) return F.batch_normalization(inp, beta, gamma, mean, var, axes, decay_rate, eps, batch_stat, output_stat)
def test_batch_normalization_for_multiple_axes_forward_backward(seed, axes, decay_rate, eps, output_stat, ctx, func_name): rng = np.random.RandomState(seed) inputs = list(create_inputs_for_multiple_axes(rng, axes)) vinputs = [] for i in inputs: vinputs.append(nn.Variable(i.shape, True)) vinputs[-1].d = i # Check if global stat mode works batch_stat = False if output_stat: return ref_y = ref_batch_normalization_for_multiple_axes( *(inputs + [axes, decay_rate, eps, batch_stat, output_stat])) with nn.context_scope(ctx), nn.auto_forward(): y = F.batch_normalization( *(vinputs + [axes, decay_rate, eps, batch_stat, output_stat])) assert_allclose(ref_y, y.d, atol=1e-6)
def __init__(self, x, weight, bias, beta, gamma, rmean, rvar, z, base_axis, pad, stride, dilation, group, channel_last, decay_rate, eps, batch_stat, nonlinearity, nonlinearity_args): from collections import OrderedDict inputs = OrderedDict() xvar = nn.Variable.from_numpy_array(x) weightvar = nn.Variable.from_numpy_array(weight) inputs['x'] = xvar inputs['weight'] = weightvar biasvar = None betavar = None gammavar = None rmeanvar = None rvarvar = None zvar = None if bias is not None: biasvar = nn.Variable.from_numpy_array(bias) inputs['bias'] = biasvar if beta is not None: betavar = nn.Variable.from_numpy_array(beta) gammavar = nn.Variable.from_numpy_array(gamma) rmeanvar = nn.Variable.from_numpy_array(rmean) rvarvar = nn.Variable.from_numpy_array(rvar) inputs['beta'] = betavar inputs['gamma'] = gammavar inputs['rmean'] = rmeanvar inputs['rvar'] = rvarvar if z is not None: zvar = nn.Variable.from_numpy_array(z) inputs['z'] = zvar h = F.convolution(xvar, weightvar, biasvar, base_axis, pad, stride, dilation, group, channel_last) if beta is not None: h = F.batch_normalization( h, betavar, gammavar, rmeanvar, rvarvar, [h.ndim - 1 if channel_last else base_axis], decay_rate, eps, batch_stat) if z is not None: h = F.add2(h, zvar) h = ref_activation(h, nonlinearity, nonlinearity_args) self.input_dict = inputs self.output = h
def ref_grad_fused_batch_normalization(x, beta, gamma, rmean, rvar, z, dy, axes, decay_rate, eps, batch_stat, nonlinearity, output_stat, **kw): with nn.context_scope(cpu_context): xvar = nn.Variable.from_numpy_array(x, need_grad=True) xvar.g = 0 betavar = nn.Variable.from_numpy_array(beta, need_grad=True) betavar.g = 0 gammavar = nn.Variable.from_numpy_array(gamma, need_grad=True) gammavar.g = 0 rmeanvar = nn.Variable.from_numpy_array(rmean) rmeanvar.g = 0 rvarvar = nn.Variable.from_numpy_array(rvar) rvarvar.g = 0 zvar = None if z is not None: zvar = nn.Variable.from_numpy_array(z, need_grad=True) zvar.g = 0 with nn.auto_forward(): bn = F.batch_normalization(xvar, betavar, gammavar, rmeanvar, rvarvar, axes, decay_rate, eps, batch_stat, output_stat) if z is None: if output_stat: y1 = bn[0] else: y1 = bn else: if output_stat: y1 = F.add2(bn[0], zvar) else: y1 = F.add2(bn, zvar) y = F.relu(y1) y.g = dy y.backward(dy) concat = [xvar.g.flatten(), betavar.g.flatten(), gammavar.g.flatten()] if z is not None: concat.append(zvar.g.flatten()) return np.concatenate(concat)
def test_batch_normalization_forward_backward(seed, axis, decay_rate, eps, output_stat, batch_stat, ctx, func_name, no_scale, no_bias, no_mean, no_variance): from nbla_test_utils import function_tester rng = np.random.RandomState(seed) inputs = list(create_inputs(rng, axis)) axes = [axis] if not batch_stat and (no_mean or no_variance): # check prohibited condition for mean=None and variance=None vinputs = [] for i in inputs: vinputs.append(nn.Variable(i.shape, True)) vinputs = mask_vinputs( vinputs, no_scale, no_bias, no_mean, no_variance) with pytest.raises(ValueError): F.batch_normalization(*vinputs, axes=axes, decay_rate=decay_rate, eps=eps, batch_stat=batch_stat, output_stat=output_stat) return else: inputs = mask_inputs(inputs, no_scale, no_bias, no_mean, no_variance) function_tester(rng, F.batch_normalization, ref_batch_normalization, inputs, func_args=[axes, decay_rate, eps, batch_stat, output_stat], backward=[True, not no_bias, not no_scale, False, False], ctx=ctx, func_name=func_name, dstep=1e-2, atol_b=1e-2) # Check if running mean and var works. if no_mean and no_variance: return vinputs = [] for i in inputs: vinputs.append(nn.Variable(i.shape, True)) vinputs[-1].d = i vinputs = mask_vinputs(vinputs, no_scale, no_bias, no_mean, no_variance) for i in range(5): inputs[0] = rng.randn(*inputs[0].shape) vinputs[0].d[...] = inputs[0] ref_y = ref_batch_normalization( *(inputs + [axes, decay_rate, eps, batch_stat, output_stat])) with nn.context_scope(ctx), nn.auto_forward(): y = F.batch_normalization( *(vinputs + [axes, decay_rate, eps, batch_stat, output_stat])) if not no_mean: assert_allclose(vinputs[3].d, inputs[3], atol=1e-7) if not no_variance: assert_allclose(vinputs[4].d, inputs[4]) # Check if global stat mode works batch_stat = False if no_mean or no_variance: return if output_stat: return ref_y = ref_batch_normalization( *(inputs + [axes, decay_rate, eps, batch_stat, output_stat])) with nn.context_scope(ctx), nn.auto_forward(): y = F.batch_normalization( *(vinputs + [axes, decay_rate, eps, batch_stat, output_stat])) assert_allclose(ref_y, y.d, atol=1e-6)
def __call__(self, inp, test=False): return F.batch_normalization(inp, self.beta, self.gamma, self.mean, self.var, self.axes, self.decay_rate, self.eps, not test, self.output_stat)
def call(self, input): return F.batch_normalization(input, self._beta, self._gamma, self._mean, self._var, self._axes, self._decay_rate, self._eps, self.training, self._output_stat)