def test_op_batch_normalization(use_cudnn, sample, device_id, precision): dtype = PRECISION_TO_TYPE[precision] epsilon = 0.00001 dev = cntk_device(device_id) t = AA(sample, dtype=dtype).reshape(-1, 1) mean = 1 var = 2 init_scale = 3 init_bias = 4 forward = [(x - mean) / np.sqrt(var + epsilon) * init_scale + init_bias for x in t] expected_forward = AA(forward) scale = Parameter(init=AA([init_scale], dtype=dtype), dtype=dtype, device=dev) bias = Parameter(init=AA([init_bias], dtype=dtype), dtype=dtype, device=dev) run_mean = constant(mean, shape=(1), dtype=dtype, device=dev) run_variance = constant(var, shape=(1), dtype=dtype, device=dev) run_count = constant(0, dtype=dtype, device=dev) from cntk import batch_normalization, input a = input(shape=(1), dtype=dtype, needs_gradient=False, name='a') with pytest.warns(Warning): op = batch_normalization( a, scale, bias, run_mean, run_variance, False, #no running_count here, epsilon=epsilon, use_cudnn_engine=use_cudnn) op_node = batch_normalization(a, scale, bias, run_mean, run_variance, running_count=run_count, spatial=False, epsilon=epsilon, use_cudnn_engine=use_cudnn) forward_input = {a: t} unittest_helper(op_node, forward_input, expected_forward, expected_backward=None, device_id=device_id, precision=precision)
def test_batchnorm(device_id): if device_id == -1: pytest.skip('Test only runs on GPU') shape = (3, ) i = C.input_variable(shape, dtype='float16') scale = C.parameter(shape, init=1, dtype='float') bias = C.parameter(shape, init=2, dtype='float') run_mean = C.constant(3, shape=shape, dtype='float') run_variance = C.constant(4, shape=shape, dtype='float') run_count = C.constant(0, shape=(), dtype='float') bn = C.batch_normalization(i, scale, bias, run_mean, run_variance, running_count=run_count, spatial=False, normalization_time_constant=5000, blend_time_constant=0, epsilon=0.00001, use_cudnn_engine=True, disable_regularization=True) data = AA([[1, 2, 3]]).astype(np.float16) bn.grad(data, wrt=[scale, bias])
def test_BatchNormalization(tmpdir): dtype = np.float32 sample = [ # 5 samples having 4 classes [1, 1, 2, 3], [0, 0, 0, 0], [3, 3, 4, 4], [1000, 1000, 1000, 1000], [10000, 10000, 10000, 10000]] epsilon = 0.00001 t = np.asarray(sample, dtype=dtype).reshape(-1,1) mean = 1 var = 2 init_scale = 3 init_bias = 4 scale = C.Parameter(init=np.asarray([init_scale], dtype=dtype), dtype=dtype) bias = C.Parameter(init=np.asarray([init_bias], dtype=dtype), dtype=dtype) run_mean = C.ops.constant(mean, shape=(1), dtype=dtype) run_variance = C.ops.constant(var, shape=(1), dtype=dtype) run_count = C.ops.constant(0, dtype=dtype) a = C.input_variable(shape=(1), dtype=dtype, needs_gradient=False, name='a') op_node = C.batch_normalization(a, scale, bias, run_mean, run_variance, running_count=run_count, spatial=False, epsilon=epsilon) verify_one_input(op_node, t, tmpdir, 'BatchNormalization')
def test_BatchNormalization(tmpdir, dtype): if (dtype == np.float16): pytest.skip("TO BE FIXED") with C.default_options(dtype = dtype): sample = [ # 5 samples having 4 classes [1, 1, 2, 3], [0, 0, 0, 0], [3, 3, 4, 4], [1000, 1000, 1000, 1000], [10000, 10000, 10000, 10000]] epsilon = 0.00001 t = np.asarray(sample, dtype=dtype).reshape(-1,1) mean = 1 var = 2 init_scale = 3 init_bias = 4 scale = C.Parameter(init=np.asarray([init_scale], dtype=dtype), dtype=dtype) bias = C.Parameter(init=np.asarray([init_bias], dtype=dtype), dtype=dtype) run_mean = C.ops.constant(mean, shape=(1), dtype=dtype) run_variance = C.ops.constant(var, shape=(1), dtype=dtype) run_count = C.ops.constant(0, dtype=dtype) a = C.input_variable(shape=(1), dtype=dtype, needs_gradient=False, name='a') op_node = C.batch_normalization(a, scale, bias, run_mean, run_variance, running_count=run_count, spatial=False, epsilon=epsilon) verify_one_input(op_node, t, tmpdir, 'BatchNormalization')
def batch_normalization(operand, scale, bias, running_mean, running_inv_std, special, normalization_time_constant=0, blend_time_constant=0, epsilon=0.00001, use_cudnn_engine=False, name=''): ''' TODO: Args: operand: scale: bias: running_mean: running_inv_std: special: normalization_time_constant: blend_time_constant: epsilon: use_cudnn_engine: name (str): the name of the node in the network Returns: :class:`cntk.Function` ''' from cntk import batch_normalization operand = sanitize_input(operand) return batch_normalization(operand, scale, bias, running_mean, running_inv_std, special, normalization_time_constant, blend_time_constant, epsilon, use_cudnn_engine, name).output()
def test_op_batch_normalization_spatial_shape_inference(channels, input_size, device_id, precision): dtype = PRECISION_TO_TYPE[precision] dev = cntk_device(device_id) spatial = True epsilon = 0.01 init_scale = 1 init_bias = 2 init_mean = 3 init_var = 4 init_count = 2 shape = (channels, input_size, input_size) param_shape = (C.InferredDimension,) i = C.input_variable(shape, dtype=dtype) scale = C.parameter(param_shape, init=init_scale, dtype=dtype, device=dev) bias = C.parameter(param_shape, init=init_bias, dtype=dtype, device=dev) run_mean = C.constant(init_mean, shape=param_shape, dtype=dtype, device=dev) run_var = C.constant(init_var, shape=param_shape, dtype=dtype, device=dev) run_count = C.constant(init_count, shape=(), dtype=dtype, device=dev) bn = C.batch_normalization(i, scale, bias, run_mean, run_var, spatial, normalization_time_constant=-1, epsilon=epsilon, running_count = run_count) for param in [scale, bias, run_mean, run_var]: assert(param.shape == (channels,))
def test_op_batch_normalization(use_cudnn, sample, device_id, precision): dtype = PRECISION_TO_TYPE[precision] epsilon = 0.00001 dev = cntk_device(device_id) t = AA(sample, dtype=dtype).reshape(-1,1,1) mean = 1 var = 2 init_scale = 3 init_bias = 4 forward = [(x - mean) / np.sqrt(var + epsilon) * init_scale + init_bias for x in t] expected_forward = AA(forward) scale = Parameter(init=AA([init_scale], dtype=dtype), device=dev) bias = Parameter(init=AA([init_bias], dtype=dtype), device=dev) run_mean = constant(mean, shape=(1), device=dev) run_variance = constant(var, shape=(1), device=dev) run_count = constant(0, device=dev) from cntk import batch_normalization a = I(shape=(1), dtype=dtype, needs_gradient=False, name='a') with pytest.warns(Warning): op = batch_normalization(a, scale, bias, run_mean, run_variance, False, #no running_count here, epsilon=epsilon, use_cudnn_engine=use_cudnn) op_node = batch_normalization(a, scale, bias, run_mean, run_variance, running_count=run_count, spatial=False, epsilon=epsilon, use_cudnn_engine=use_cudnn) forward_input = {a: t} unittest_helper(op_node, forward_input, expected_forward, expected_backward=None, device_id=device_id, precision=precision)
def test_op_batch_normalization(use_cudnn, sample, device_id, precision): dtype = PRECISION_TO_TYPE[precision] epsilon = 0.00001 t = AA(sample, dtype=dtype).reshape(-1, 1, 1) mean = 1 var = 2 init_scale = 3 init_bias = 4 forward = [(x - mean) / np.sqrt(var + epsilon) * init_scale + init_bias for x in t] expected_forward = AA(forward) scale = Parameter(init=AA([init_scale], dtype=dtype)) bias = Parameter(init=AA([init_bias], dtype=dtype)) run_mean = Constant(mean, shape=(1), dtype=dtype) run_variance = Constant(var, shape=(1), dtype=dtype) from cntk import batch_normalization input = I(shape=(1), dtype=dtype, needs_gradient=False, name='input') op = batch_normalization(input, scale, bias, run_mean, run_variance, False, epsilon=epsilon, use_cudnn_engine=use_cudnn) forward_input = {input: t} actual_forward = op.eval(forward_input) for res, exp in zip(actual_forward, expected_forward): assert res.shape == AA(exp).shape assert np.allclose(res, exp, atol=TOLERANCE_ABSOLUTE)
def test_op_batch_normalization_numpy(shape, spatial, device_id, precision): # for some reason the numpy code below does not work in python 2.7 import sys if sys.version_info[0] < 3: pytest.skip("Only works on Python 3+") dtype = PRECISION_TO_TYPE[precision] dev = cntk_device(device_id) if spatial: param_shape = (shape[0], ) reduced_shape = shape[1:] reduce_dims = (0, 2, 3, 4)[0:len(shape)] else: param_shape = (np.prod(shape), ) reduced_shape = () reduce_dims = (0, ) batch_size = 3 x = 10 * np.random.random((batch_size, ) + shape).astype(dtype) init_scale = 1 init_bias = 2 init_mean = 3 init_var = 4 init_count = 2 epsilon = 0.01 i = C.input_variable(shape, dtype=dtype) scale = C.parameter(param_shape, init=init_scale, dtype=dtype, device=dev) bias = C.parameter(param_shape, init=init_bias, dtype=dtype, device=dev) run_mean = C.constant(init_mean, shape=param_shape, dtype=dtype, device=dev) run_var = C.constant(init_var, shape=param_shape, dtype=dtype, device=dev) run_count = C.constant(init_count, shape=(), dtype=dtype, device=dev) #use negative normalization_time_constant for easier exp_avg compute bn = C.batch_normalization(i, scale, bias, run_mean, run_var, spatial, normalization_time_constant=-1, epsilon=epsilon, running_count=run_count) fwd = bn.eval(x, device=dev) y_fwd = (x - init_mean) / np.sqrt(init_var + epsilon) * init_scale + init_bias assert (np.allclose(y_fwd, fwd)) bwd = bn.grad(x, wrt=bn.parameters, outputs=[bn], device=dev) exp_avg = batch_size / (init_count + batch_size) mean = np.mean(x, reduce_dims) mean_b = np.asarray([[np.ones(reduced_shape) * x for x in mean]] * batch_size) reduced_count = batch_size * np.prod(reduced_shape) var = np.mean((x - mean_b)**2, reduce_dims) #the output variance is unbiased, while computation uses biased variance var_out = var * reduced_count / (reduced_count - 1) var_b = np.asarray([[np.ones(reduced_shape) * x for x in var]] * batch_size) x_hat = (x - mean_b) / np.sqrt(var_b + epsilon) y = init_scale * x_hat + init_bias d_scale = np.sum(x_hat, reduce_dims) d_bias = np.sum(np.ones_like(x_hat), reduce_dims) assert (np.allclose(y, bwd[1], atol=1e-6)) assert (np.allclose(d_scale.reshape(param_shape), bwd[0][scale], atol=1e-2)) assert (np.allclose(d_bias.reshape(param_shape), bwd[0][bias])) assert (np.allclose( init_var * (1 - exp_avg) + var_out.reshape(param_shape) * exp_avg, run_var.value)) assert (np.allclose( init_mean * (1 - exp_avg) + mean.reshape(param_shape) * exp_avg, run_mean.value)) assert (run_count.value == init_count + batch_size)
def test_op_batch_normalization_numpy(shape, spatial, device_id, precision): # for some reason the numpy code below does not work in python 2.7 import sys if sys.version_info[0] < 3: pytest.skip("Only works on Python 3+") dtype = PRECISION_TO_TYPE[precision] dev = cntk_device(device_id) if spatial: param_shape = (shape[0],) reduced_shape = shape[1:] reduce_dims = (0,2,3,4)[0:len(shape)] else: param_shape = (np.prod(shape),) reduced_shape = () reduce_dims = (0,) batch_size = 3 x = 10 * np.random.random((batch_size,)+shape).astype(dtype) init_scale = 1 init_bias = 2 init_mean = 3 init_var = 4 init_count = 2 epsilon = 0.01 i = C.input_variable(shape, dtype=dtype) scale = C.parameter(param_shape, init=init_scale, dtype=dtype, device=dev) bias = C.parameter(param_shape, init=init_bias, dtype=dtype, device=dev) run_mean = C.constant(init_mean, shape=param_shape, dtype=dtype, device=dev) run_var = C.constant(init_var, shape=param_shape, dtype=dtype, device=dev) run_count = C.constant(init_count, shape=(), dtype=dtype, device=dev) #use negative normalization_time_constant for easier exp_avg compute bn = C.batch_normalization(i, scale, bias, run_mean, run_var, spatial, normalization_time_constant=-1, epsilon=epsilon, running_count = run_count) fwd = bn.eval(x, device=dev) y_fwd = (x - init_mean) / np.sqrt(init_var + epsilon) * init_scale + init_bias assert(np.allclose(y_fwd, fwd)) bwd = bn.grad(x, wrt=bn.parameters, outputs=[bn], device=dev) exp_avg = batch_size / (init_count + batch_size) mean = np.mean(x, reduce_dims) mean_b = np.asarray([[np.ones(reduced_shape)*x for x in mean]]*batch_size) reduced_count = batch_size * np.prod(reduced_shape) var = np.mean((x - mean_b) ** 2, reduce_dims) #the output variance is unbiased, while computation uses biased variance var_out = var * reduced_count / (reduced_count - 1) var_b = np.asarray([[np.ones(reduced_shape)*x for x in var]]*batch_size) x_hat = (x - mean_b) / np.sqrt(var_b + epsilon) y = init_scale * x_hat + init_bias d_scale = np.sum(x_hat, reduce_dims) d_bias = np.sum(np.ones_like(x_hat), reduce_dims) assert(np.allclose(y, bwd[1], atol=1e-6)) assert(np.allclose(d_scale.reshape(param_shape), bwd[0][scale], atol=1e-2)) assert(np.allclose(d_bias.reshape(param_shape), bwd[0][bias])) assert(np.allclose(init_var * (1-exp_avg) + var_out.reshape(param_shape) * exp_avg, run_var.value)) assert(np.allclose(init_mean * (1-exp_avg) + mean.reshape(param_shape) * exp_avg, run_mean.value)) assert(run_count.value == init_count + batch_size)