Beispiel #1
0
def test_op_batch_normalization(use_cudnn, sample, device_id, precision):
    dtype = PRECISION_TO_TYPE[precision]
    epsilon = 0.00001
    dev = cntk_device(device_id)

    t = AA(sample, dtype=dtype).reshape(-1, 1)
    mean = 1
    var = 2
    init_scale = 3
    init_bias = 4

    forward = [(x - mean) / np.sqrt(var + epsilon) * init_scale + init_bias
               for x in t]

    expected_forward = AA(forward)

    scale = Parameter(init=AA([init_scale], dtype=dtype),
                      dtype=dtype,
                      device=dev)
    bias = Parameter(init=AA([init_bias], dtype=dtype),
                     dtype=dtype,
                     device=dev)
    run_mean = constant(mean, shape=(1), dtype=dtype, device=dev)
    run_variance = constant(var, shape=(1), dtype=dtype, device=dev)
    run_count = constant(0, dtype=dtype, device=dev)

    from cntk import batch_normalization, input

    a = input(shape=(1), dtype=dtype, needs_gradient=False, name='a')

    with pytest.warns(Warning):
        op = batch_normalization(
            a,
            scale,
            bias,
            run_mean,
            run_variance,
            False,
            #no running_count here,
            epsilon=epsilon,
            use_cudnn_engine=use_cudnn)

    op_node = batch_normalization(a,
                                  scale,
                                  bias,
                                  run_mean,
                                  run_variance,
                                  running_count=run_count,
                                  spatial=False,
                                  epsilon=epsilon,
                                  use_cudnn_engine=use_cudnn)

    forward_input = {a: t}

    unittest_helper(op_node,
                    forward_input,
                    expected_forward,
                    expected_backward=None,
                    device_id=device_id,
                    precision=precision)
Beispiel #2
0
def test_batchnorm(device_id):
    if device_id == -1:
        pytest.skip('Test only runs on GPU')
    shape = (3, )
    i = C.input_variable(shape, dtype='float16')
    scale = C.parameter(shape, init=1, dtype='float')
    bias = C.parameter(shape, init=2, dtype='float')
    run_mean = C.constant(3, shape=shape, dtype='float')
    run_variance = C.constant(4, shape=shape, dtype='float')
    run_count = C.constant(0, shape=(), dtype='float')

    bn = C.batch_normalization(i,
                               scale,
                               bias,
                               run_mean,
                               run_variance,
                               running_count=run_count,
                               spatial=False,
                               normalization_time_constant=5000,
                               blend_time_constant=0,
                               epsilon=0.00001,
                               use_cudnn_engine=True,
                               disable_regularization=True)

    data = AA([[1, 2, 3]]).astype(np.float16)
    bn.grad(data, wrt=[scale, bias])
Beispiel #3
0
def test_BatchNormalization(tmpdir):
    dtype = np.float32

    sample = [  # 5 samples having 4 classes
        [1, 1, 2, 3],
        [0, 0, 0, 0],
        [3, 3, 4, 4],
        [1000, 1000, 1000, 1000],
        [10000, 10000, 10000, 10000]]

    epsilon = 0.00001

    t = np.asarray(sample, dtype=dtype).reshape(-1,1)
    mean = 1
    var = 2
    init_scale = 3
    init_bias = 4

    scale        = C.Parameter(init=np.asarray([init_scale], dtype=dtype), dtype=dtype)
    bias         = C.Parameter(init=np.asarray([init_bias], dtype=dtype), dtype=dtype)
    run_mean     = C.ops.constant(mean, shape=(1), dtype=dtype)
    run_variance = C.ops.constant(var,  shape=(1), dtype=dtype)
    run_count    = C.ops.constant(0,               dtype=dtype)

    a = C.input_variable(shape=(1), dtype=dtype, needs_gradient=False, name='a')

    op_node = C.batch_normalization(a, scale, bias, run_mean, run_variance, running_count=run_count, spatial=False,
        epsilon=epsilon)

    verify_one_input(op_node, t, tmpdir, 'BatchNormalization')
Beispiel #4
0
def test_BatchNormalization(tmpdir, dtype):
    if (dtype == np.float16):
        pytest.skip("TO BE FIXED")
    with C.default_options(dtype = dtype):
        sample = [  # 5 samples having 4 classes
            [1, 1, 2, 3],
            [0, 0, 0, 0],
            [3, 3, 4, 4],
            [1000, 1000, 1000, 1000],
            [10000, 10000, 10000, 10000]]

        epsilon = 0.00001

        t = np.asarray(sample, dtype=dtype).reshape(-1,1)
        mean = 1
        var = 2
        init_scale = 3
        init_bias = 4

        scale        = C.Parameter(init=np.asarray([init_scale], dtype=dtype), dtype=dtype)
        bias         = C.Parameter(init=np.asarray([init_bias], dtype=dtype), dtype=dtype)
        run_mean     = C.ops.constant(mean, shape=(1), dtype=dtype)
        run_variance = C.ops.constant(var,  shape=(1), dtype=dtype)
        run_count    = C.ops.constant(0,               dtype=dtype)

        a = C.input_variable(shape=(1), dtype=dtype, needs_gradient=False, name='a')

        op_node = C.batch_normalization(a, scale, bias, run_mean, run_variance, running_count=run_count, spatial=False,
            epsilon=epsilon)

        verify_one_input(op_node, t, tmpdir, 'BatchNormalization')
Beispiel #5
0
def batch_normalization(operand, scale, bias, running_mean, running_inv_std, special,
                        normalization_time_constant=0, blend_time_constant=0,
                        epsilon=0.00001, use_cudnn_engine=False, name=''):
    '''
    TODO: 
    Args:                
        operand:
        scale:   
        bias:
        running_mean:
        running_inv_std:
        special:
        normalization_time_constant:
        blend_time_constant:
        epsilon:
        use_cudnn_engine:
        name (str): the name of the node in the network
    Returns:
        :class:`cntk.Function`
    '''
    from cntk import batch_normalization
    operand = sanitize_input(operand)    
    return batch_normalization(operand, scale, bias, running_mean, running_inv_std, special,
                                normalization_time_constant, blend_time_constant,
                                epsilon, use_cudnn_engine, name).output()
Beispiel #6
0
def test_op_batch_normalization_spatial_shape_inference(channels, input_size, device_id, precision):
    dtype = PRECISION_TO_TYPE[precision]
    dev = cntk_device(device_id)

    spatial = True
    epsilon = 0.01

    init_scale = 1
    init_bias  = 2
    init_mean  = 3
    init_var   = 4
    init_count = 2

    shape = (channels, input_size, input_size)
    param_shape = (C.InferredDimension,)

    i = C.input_variable(shape, dtype=dtype)
    scale = C.parameter(param_shape, init=init_scale, dtype=dtype, device=dev)
    bias = C.parameter(param_shape, init=init_bias, dtype=dtype, device=dev)
    run_mean = C.constant(init_mean, shape=param_shape, dtype=dtype, device=dev)
    run_var = C.constant(init_var, shape=param_shape, dtype=dtype, device=dev)
    run_count = C.constant(init_count, shape=(), dtype=dtype, device=dev)

    bn = C.batch_normalization(i, scale, bias, run_mean, run_var, spatial, normalization_time_constant=-1, epsilon=epsilon, running_count = run_count)

    for param in [scale, bias, run_mean, run_var]:
        assert(param.shape == (channels,))
Beispiel #7
0
def batch_normalization(operand, scale, bias, running_mean, running_inv_std, special,
                        normalization_time_constant=0, blend_time_constant=0,
                        epsilon=0.00001, use_cudnn_engine=False, name=''):
    '''
    TODO: 
    Args:                
        operand:
        scale:   
        bias:
        running_mean:
        running_inv_std:
        special:
        normalization_time_constant:
        blend_time_constant:
        epsilon:
        use_cudnn_engine:
        name (str): the name of the node in the network
    Returns:
        :class:`cntk.Function`
    '''
    from cntk import batch_normalization
    operand = sanitize_input(operand)    
    return batch_normalization(operand, scale, bias, running_mean, running_inv_std, special,
                                normalization_time_constant, blend_time_constant,
                                epsilon, use_cudnn_engine, name).output()
Beispiel #8
0
def test_op_batch_normalization(use_cudnn, sample, device_id, precision):
    dtype = PRECISION_TO_TYPE[precision]
    epsilon = 0.00001
    dev = cntk_device(device_id)

    t = AA(sample, dtype=dtype).reshape(-1,1,1)
    mean = 1
    var = 2
    init_scale = 3
    init_bias = 4

    forward = [(x - mean) / np.sqrt(var + epsilon) * init_scale + init_bias for x in t]

    expected_forward = AA(forward)

    scale        = Parameter(init=AA([init_scale], dtype=dtype), device=dev)
    bias         = Parameter(init=AA([init_bias], dtype=dtype), device=dev)
    run_mean     = constant(mean, shape=(1), device=dev)
    run_variance = constant(var,  shape=(1), device=dev)
    run_count    = constant(0,               device=dev)

    from cntk import batch_normalization

    a = I(shape=(1), dtype=dtype, needs_gradient=False, name='a')

    with pytest.warns(Warning):
        op = batch_normalization(a, scale, bias, run_mean, run_variance, False,
            #no running_count here, 
            epsilon=epsilon, use_cudnn_engine=use_cudnn)

    op_node = batch_normalization(a, scale, bias, run_mean, run_variance, running_count=run_count, spatial=False,
        epsilon=epsilon, use_cudnn_engine=use_cudnn)

    forward_input = {a: t}

    unittest_helper(op_node, forward_input, expected_forward, expected_backward=None, device_id=device_id, precision=precision)
def test_op_batch_normalization(use_cudnn, sample, device_id, precision):
    dtype = PRECISION_TO_TYPE[precision]
    epsilon = 0.00001

    t = AA(sample, dtype=dtype).reshape(-1, 1, 1)
    mean = 1
    var = 2
    init_scale = 3
    init_bias = 4

    forward = [(x - mean) / np.sqrt(var + epsilon) * init_scale + init_bias
               for x in t]

    expected_forward = AA(forward)

    scale = Parameter(init=AA([init_scale], dtype=dtype))
    bias = Parameter(init=AA([init_bias], dtype=dtype))
    run_mean = Constant(mean, shape=(1), dtype=dtype)
    run_variance = Constant(var, shape=(1), dtype=dtype)

    from cntk import batch_normalization

    input = I(shape=(1), dtype=dtype, needs_gradient=False, name='input')

    op = batch_normalization(input,
                             scale,
                             bias,
                             run_mean,
                             run_variance,
                             False,
                             epsilon=epsilon,
                             use_cudnn_engine=use_cudnn)

    forward_input = {input: t}
    actual_forward = op.eval(forward_input)

    for res, exp in zip(actual_forward, expected_forward):
        assert res.shape == AA(exp).shape
        assert np.allclose(res, exp, atol=TOLERANCE_ABSOLUTE)
Beispiel #10
0
def test_op_batch_normalization_numpy(shape, spatial, device_id, precision):
    # for some reason the numpy code below does not work in python 2.7
    import sys
    if sys.version_info[0] < 3:
        pytest.skip("Only works on Python 3+")

    dtype = PRECISION_TO_TYPE[precision]
    dev = cntk_device(device_id)

    if spatial:
        param_shape = (shape[0], )
        reduced_shape = shape[1:]
        reduce_dims = (0, 2, 3, 4)[0:len(shape)]
    else:
        param_shape = (np.prod(shape), )
        reduced_shape = ()
        reduce_dims = (0, )

    batch_size = 3
    x = 10 * np.random.random((batch_size, ) + shape).astype(dtype)

    init_scale = 1
    init_bias = 2
    init_mean = 3
    init_var = 4
    init_count = 2
    epsilon = 0.01

    i = C.input_variable(shape, dtype=dtype)
    scale = C.parameter(param_shape, init=init_scale, dtype=dtype, device=dev)
    bias = C.parameter(param_shape, init=init_bias, dtype=dtype, device=dev)
    run_mean = C.constant(init_mean,
                          shape=param_shape,
                          dtype=dtype,
                          device=dev)
    run_var = C.constant(init_var, shape=param_shape, dtype=dtype, device=dev)
    run_count = C.constant(init_count, shape=(), dtype=dtype, device=dev)
    #use negative normalization_time_constant for easier exp_avg compute
    bn = C.batch_normalization(i,
                               scale,
                               bias,
                               run_mean,
                               run_var,
                               spatial,
                               normalization_time_constant=-1,
                               epsilon=epsilon,
                               running_count=run_count)
    fwd = bn.eval(x, device=dev)
    y_fwd = (x - init_mean) / np.sqrt(init_var +
                                      epsilon) * init_scale + init_bias
    assert (np.allclose(y_fwd, fwd))

    bwd = bn.grad(x, wrt=bn.parameters, outputs=[bn], device=dev)
    exp_avg = batch_size / (init_count + batch_size)

    mean = np.mean(x, reduce_dims)
    mean_b = np.asarray([[np.ones(reduced_shape) * x
                          for x in mean]] * batch_size)
    reduced_count = batch_size * np.prod(reduced_shape)
    var = np.mean((x - mean_b)**2, reduce_dims)
    #the output variance is unbiased, while computation uses biased variance
    var_out = var * reduced_count / (reduced_count - 1)
    var_b = np.asarray([[np.ones(reduced_shape) * x
                         for x in var]] * batch_size)
    x_hat = (x - mean_b) / np.sqrt(var_b + epsilon)
    y = init_scale * x_hat + init_bias

    d_scale = np.sum(x_hat, reduce_dims)
    d_bias = np.sum(np.ones_like(x_hat), reduce_dims)

    assert (np.allclose(y, bwd[1], atol=1e-6))
    assert (np.allclose(d_scale.reshape(param_shape), bwd[0][scale],
                        atol=1e-2))
    assert (np.allclose(d_bias.reshape(param_shape), bwd[0][bias]))
    assert (np.allclose(
        init_var * (1 - exp_avg) + var_out.reshape(param_shape) * exp_avg,
        run_var.value))
    assert (np.allclose(
        init_mean * (1 - exp_avg) + mean.reshape(param_shape) * exp_avg,
        run_mean.value))
    assert (run_count.value == init_count + batch_size)
Beispiel #11
0
def test_op_batch_normalization_numpy(shape, spatial, device_id, precision):
    # for some reason the numpy code below does not work in python 2.7
    import sys
    if sys.version_info[0] < 3:
        pytest.skip("Only works on Python 3+")

    dtype = PRECISION_TO_TYPE[precision]
    dev = cntk_device(device_id)

    if spatial:
        param_shape = (shape[0],)
        reduced_shape = shape[1:]
        reduce_dims = (0,2,3,4)[0:len(shape)]
    else:
        param_shape = (np.prod(shape),)
        reduced_shape = ()
        reduce_dims = (0,)

    batch_size = 3
    x = 10 * np.random.random((batch_size,)+shape).astype(dtype)

    init_scale = 1
    init_bias  = 2
    init_mean  = 3
    init_var   = 4
    init_count = 2
    epsilon    = 0.01

    i = C.input_variable(shape, dtype=dtype)
    scale = C.parameter(param_shape, init=init_scale, dtype=dtype, device=dev)
    bias = C.parameter(param_shape, init=init_bias, dtype=dtype, device=dev)
    run_mean = C.constant(init_mean, shape=param_shape, dtype=dtype, device=dev)
    run_var = C.constant(init_var, shape=param_shape, dtype=dtype, device=dev)
    run_count = C.constant(init_count, shape=(), dtype=dtype, device=dev)
    #use negative normalization_time_constant for easier exp_avg compute
    bn = C.batch_normalization(i, scale, bias, run_mean, run_var, spatial, normalization_time_constant=-1, epsilon=epsilon, running_count = run_count)
    fwd = bn.eval(x, device=dev)
    y_fwd = (x - init_mean) / np.sqrt(init_var + epsilon) * init_scale + init_bias
    assert(np.allclose(y_fwd, fwd))

    bwd = bn.grad(x, wrt=bn.parameters, outputs=[bn], device=dev)
    exp_avg = batch_size / (init_count + batch_size)

    mean = np.mean(x, reduce_dims)
    mean_b = np.asarray([[np.ones(reduced_shape)*x for x in mean]]*batch_size)
    reduced_count = batch_size * np.prod(reduced_shape)
    var = np.mean((x - mean_b) ** 2, reduce_dims)
    #the output variance is unbiased, while computation uses biased variance
    var_out = var * reduced_count / (reduced_count - 1)
    var_b = np.asarray([[np.ones(reduced_shape)*x for x in var]]*batch_size)
    x_hat = (x - mean_b) / np.sqrt(var_b + epsilon)
    y = init_scale * x_hat + init_bias

    d_scale = np.sum(x_hat, reduce_dims)
    d_bias = np.sum(np.ones_like(x_hat), reduce_dims)

    assert(np.allclose(y, bwd[1], atol=1e-6))
    assert(np.allclose(d_scale.reshape(param_shape), bwd[0][scale], atol=1e-2))
    assert(np.allclose(d_bias.reshape(param_shape), bwd[0][bias]))
    assert(np.allclose(init_var * (1-exp_avg) + var_out.reshape(param_shape) * exp_avg, run_var.value))
    assert(np.allclose(init_mean * (1-exp_avg) + mean.reshape(param_shape) * exp_avg, run_mean.value))
    assert(run_count.value == init_count + batch_size)