def test_batchnorm_training(): for shape in [(2, 3), (2, 3, 2, 2)]: data_tmp = np.random.normal(size=shape) s = shape[1], gamma = np.ones(s) beta = np.ones(s) gamma[1] = 3 beta[0] = 3 rolling_mean = np.random.uniform(size=s) rolling_std = np.random.uniform(size=s) data = mx.symbol.Variable('data') test = mx.symbol.BatchNorm(data, fix_gamma=False) check_numeric_gradient(test, [data_tmp, gamma, beta], [rolling_mean, rolling_std], numeric_eps=1e-3, check_eps=5e-2) # Gamma needs to be fixed at one when fix_gamma is true, gamma = np.ones(s) test = mx.symbol.BatchNorm(data, fix_gamma=True) check_numeric_gradient(test, [data_tmp, gamma, beta], [rolling_mean, rolling_std], numeric_eps=1e-3, check_eps=5e-2)
def test_batchnorm_training(): for shape in [(2, 3), (2, 3, 2, 2)]: data_tmp = np.random.normal(size=shape) s = (shape[1],) gamma = np.ones(s) beta = np.ones(s) gamma[1] = 3 beta[0] = 3 rolling_mean = np.random.uniform(size=s) rolling_std = np.random.uniform(size=s) data = mx.symbol.Variable("data") test = mx.symbol.BatchNorm(data, fix_gamma=False) check_numeric_gradient( test, [data_tmp, gamma, beta], [rolling_mean, rolling_std], numeric_eps=1e-3, check_eps=5e-2 ) # Gamma needs to be fixed at one when fix_gamma is true, gamma = np.ones(s) test = mx.symbol.BatchNorm(data, fix_gamma=True) check_numeric_gradient( test, [data_tmp, gamma, beta], [rolling_mean, rolling_std], numeric_eps=1e-3, check_eps=5e-2 )
def test_pow_fn(): shape = (3, 4) exp = mx.symbol.Variable("exp") y = mx.sym.pow(2, exp) x = np.ones(shape)*3 check_numeric_gradient(y, [x]) check_symbolic_forward(y, [x], [2**x]) check_symbolic_backward(y, [x], [np.ones(shape)], [np.log(2) * 2**x])
def test_scalar_pow(): data = mx.symbol.Variable('data') shape = (1, 1) data_tmp = np.ones(shape) test = data ** 2 check_numeric_gradient(test, [data_tmp]) check_symbolic_forward(test, [data_tmp], [data_tmp ** 2]) check_symbolic_backward(test, [data_tmp], [np.ones(shape)], [2 * data_tmp])
def test_pow_fn(): shape = (3, 4) exp = mx.symbol.Variable("exp") y = mx.sym.pow(2, exp) x = np.ones(shape) * 3 check_numeric_gradient(y, [x]) check_symbolic_forward(y, [x], [2**x]) check_symbolic_backward(y, [x], [np.ones(shape)], [np.log(2) * 2**x])
def test_scalar_pow(): data = mx.symbol.Variable('data') shape = (1, 1) data_tmp = np.ones(shape) test = data**2 check_numeric_gradient(test, [data_tmp]) check_symbolic_forward(test, [data_tmp], [data_tmp**2]) check_symbolic_backward(test, [data_tmp], [np.ones(shape)], [2 * data_tmp])
def test_symbol_pow(): shape = (1, 1) data = mx.symbol.Variable('data') data_tmp = np.ones(shape)*2 exp = mx.symbol.Variable('exp') exp_tmp = np.ones(shape)*3 test = data**exp check_numeric_gradient(test, [data_tmp, exp_tmp]) check_symbolic_forward(test, [data_tmp, exp_tmp], [data_tmp**exp_tmp]) data_dir = data_tmp**(exp_tmp - 1) * exp_tmp exp_dir = data_tmp**(exp_tmp) * np.log(data_tmp) check_symbolic_backward(test, [data_tmp, exp_tmp], [np.ones(shape)], [data_dir, exp_dir])
def test_unpooling_backward(): np.random.seed(1) # Check identity preservation. for shape in [(1, 3, 5, 5), (3, 1, 5, 5), (1, 7, 4, 4)]: for pad in [(0, 0)]: #, (0, 1), (1, 0), (1, 1)]: data_s = mx.symbol.Variable('data') data = np.array(range(np.prod(shape)), dtype='float32').reshape(shape) pl = mx.symbol.Pooling(data=data_s, pool_type='max', kernel=(1, 1), stride=(1, 1), pad=pad, name='pool') upl = mx.symbol.Unpooling(pl, data_s, pl, kernel=(1, 1), stride=(1, 1), pad=pad, name='unpool') exec_ = upl.simple_bind(ctx=mx.cpu(), data=shape) exec_.forward(is_train=True) exec_.backward(mx.nd.ones(shape)) exec_.grad_arrays[0].wait_to_read() assert np.all( exec_.grad_arrays[0].asnumpy() == np.ones(shape)), (str( exec_.grad_arrays[0].asnumpy())) for shape in [(1, 3, 6, 5), (3, 1, 6, 5), (1, 7, 5, 5)]: for pad in [(0, 0), (0, 1), (1, 0), (1, 1)]: data_s = mx.symbol.Variable('data') pl = mx.symbol.Pooling(data=data_s, pool_type='max', kernel=(2, 2), stride=(2, 2), pad=pad, name='pool') upl = mx.symbol.Unpooling(pl, data_s, pl, kernel=(2, 2), stride=(2, 2), pad=pad, name='unpool') check_numeric_gradient(upl, [ np.array(range(np.prod(shape)), dtype='float32').reshape(shape) ], [], numeric_eps=1e-2, check_eps=7e-2) # Test special case of smaller step size than filter size. shape = (1, 1, 5, 5) data_s = mx.symbol.Variable('data') data = np.array(range(np.prod(shape)), dtype='float32').reshape(shape) data[0, 0, 0, 3] = 7 data[0, 0, 1, 3] = 7 data[0, 0, 0, 4] = 7 data[0, 0, 1, 4] = 7 data[0, 0, 1, 2] = 7 pldata_s = mx.symbol.Variable('pldata') pldata = np.array([[6., 7., 7.], [16., 18., 19.], [21., 23., 24.]], dtype='float32').reshape((1, 1, 3, 3)) upl = mx.symbol.Unpooling(pldata_s, data_s, pldata_s, kernel=(3, 3), stride=(2, 2), pad=(1, 1), name='unpool') exec_ = upl.bind(ctx=mx.cpu(), args={ 'data': mx.nd.array(data), 'pldata': mx.nd.array(pldata) }, args_grad={ 'data': mx.nd.zeros((data.shape)), 'pldata': mx.nd.zeros((pldata.shape)) }) exec_.forward(is_train=True) exec_.backward( mx.nd.array(range(np.prod(shape)), dtype='float32').reshape(shape)) grad = exec_.grad_arrays[0].asnumpy() assert np.all(grad[0, 0, 0, 1:3] == 3.)
def test_unpooling_backward(): np.random.seed(1) # Check identity preservation. for shape in [(1, 3, 5, 5), (3, 1, 5, 5), (1, 7, 4, 4)]: for pad in [(0, 0)]: #, (0, 1), (1, 0), (1, 1)]: data_s = mx.symbol.Variable('data') data = np.array(range(np.prod(shape)), dtype='float32').reshape(shape) pl = mx.symbol.Pooling(data=data_s, pool_type='max', kernel=(1, 1), stride=(1, 1), pad=pad, name='pool') upl = mx.symbol.Unpooling(pl, data_s, pl, kernel=(1, 1), stride=(1, 1), pad=pad, name='unpool') exec_ = upl.simple_bind(ctx=mx.cpu(), data=shape) exec_.forward(is_train=True) exec_.backward(mx.nd.ones(shape)) exec_.grad_arrays[0].wait_to_read() assert np.all(exec_.grad_arrays[0].asnumpy() == np.ones(shape)), ( str(exec_.grad_arrays[0].asnumpy())) for shape in [(1, 3, 6, 5), (3, 1, 6, 5), (1, 7, 5, 5)]: for pad in [(0, 0), (0, 1), (1, 0), (1, 1)]: data_s = mx.symbol.Variable('data') pl = mx.symbol.Pooling(data=data_s, pool_type='max', kernel=(2, 2), stride=(2, 2), pad=pad, name='pool') upl = mx.symbol.Unpooling(pl, data_s, pl, kernel=(2, 2), stride=(2, 2), pad=pad, name='unpool') check_numeric_gradient(upl, [np.array(range(np.prod(shape)), dtype='float32').reshape(shape)], [], numeric_eps=1e-2, check_eps=7e-2) # Test special case of smaller step size than filter size. shape = (1, 1, 5, 5) data_s = mx.symbol.Variable('data') data = np.array(range(np.prod(shape)), dtype='float32').reshape(shape) data[0, 0, 0, 3] = 7 data[0, 0, 1, 3] = 7 data[0, 0, 0, 4] = 7 data[0, 0, 1, 4] = 7 data[0, 0, 1, 2] = 7 pldata_s = mx.symbol.Variable('pldata') pldata = np.array([[6., 7., 7.], [16., 18., 19.], [21., 23., 24.]], dtype='float32').reshape((1, 1, 3, 3)) upl = mx.symbol.Unpooling(pldata_s, data_s, pldata_s, kernel=(3, 3), stride=(2, 2), pad=(1, 1), name='unpool') exec_ = upl.bind(ctx=mx.cpu(), args={'data': mx.nd.array(data), 'pldata': mx.nd.array(pldata)}, args_grad={'data': mx.nd.zeros((data.shape)), 'pldata': mx.nd.zeros((pldata.shape))}) exec_.forward(is_train=True) exec_.backward(mx.nd.array(range(np.prod(shape)), dtype='float32').reshape(shape)) grad = exec_.grad_arrays[0].asnumpy() assert np.all(grad[0, 0, 0, 1:3] == 3.)