def test_gaussian(backend, args): be = NervanaObject.be dim1, dim2 = args shape = (dim1, dim2) Wdev = be.empty(shape) gaussian_init = Gaussian(loc=10000, scale=1) gaussian_init.fill(Wdev) Whost = Wdev.get() flat = Whost.flatten() for elt in flat: # Not a very robust test... assert elt >= 0 return
def test_ref_compare_rand(backend_default, reflstmargs): # run comparison with reference code # for Gaussian random init np.random.seed(seed=0) seq_len, input_size, hidden_size, batch_size = reflstmargs NervanaObject.be.bsz = NervanaObject.be.batch_size = batch_size check_lstm(seq_len, input_size, hidden_size, batch_size, Gaussian())
def test_ref_compare_rand(backend, refgruargs): # run comparison with reference code # for Gaussian random init seq_len, input_size, hidden_size, batch_size = refgruargs NervanaObject.be.bsz = NervanaObject.be.batch_size = batch_size check_rnn(seq_len, input_size, hidden_size, batch_size, Gaussian())
def test_conv(backend_cpu64, convargs): nin, nifm, fside, batch_size, dil_h, dil_w = convargs fshape = (fside, fside, fside) NervanaObject.be.bsz = NervanaObject.be.batch_size = batch_size sz = nin * nin * nifm * batch_size epsilon = 1.0e-5 inp = np.arange(sz) * 2.5 * epsilon np.random.shuffle(inp) inp = inp.reshape((nin * nin * nifm, batch_size)) lshape = (nifm, nin, nin) init = Gaussian() layer = ConvWithReset(fshape, strides=2, padding=fside - 1, dilation=dict(dil_d=1, dil_h=dil_h, dil_w=dil_w), init=init) pert_frac = 0.1 # test 10% of the inputs # select pert_frac fraction of inps to perturb pert_cnt = int(np.ceil(inp.size * pert_frac)) pert_inds = np.random.permutation(inp.size)[0:pert_cnt] (max_abs, max_rel) = general_gradient_comp(layer, inp, epsilon=epsilon, lshape=lshape, pert_inds=pert_inds) assert max_abs < 1.0e-7
def gradient_calc(seq_len, input_size, hidden_size, batch_size, epsilon=None, rand_scale=None, inp_bl=None): NervanaObject.be.bsz = NervanaObject.be.batch_size = batch_size input_shape = (input_size, seq_len * batch_size) # generate input if one is not given if inp_bl is None: inp_bl = np.random.randn(*input_shape) # neon gru instance gru = GRU(hidden_size, init=Gaussian(), activation=Tanh(), gate_activation=Logistic()) inpa = gru.be.array(np.copy(inp_bl)) # run fprop on the baseline input gru.configure((input_size, seq_len)) gru.prev_layer = True gru.allocate() gru.set_deltas([gru.be.iobuf(gru.in_shape)]) out_bl = gru.fprop(inpa).get() # random scaling/hash to generate fake loss if rand_scale is None: rand_scale = np.random.random(out_bl.shape) * 2.0 - 1.0 # loss function would be: # loss_bl = np.sum(rand_scale * out_bl) # run back prop with rand_scale as the errors # use copy to avoid any interactions deltas_neon = gru.bprop(gru.be.array(np.copy(rand_scale))).get() # add a perturbation to each input element grads_est = np.zeros(inpa.shape) inp_pert = inp_bl.copy() for pert_ind in range(inpa.size): save_val = inp_pert.flat[pert_ind] inp_pert.flat[pert_ind] = save_val + epsilon reset_gru(gru) gru.allocate() out_pos = gru.fprop(gru.be.array(inp_pert)).get() inp_pert.flat[pert_ind] = save_val - epsilon reset_gru(gru) gru.allocate() out_neg = gru.fprop(gru.be.array(inp_pert)).get() # calculate the loss with perturbations loss_pos = np.sum(rand_scale * out_pos) loss_neg = np.sum(rand_scale * out_neg) # compute the gradient estimate grad = 0.5 / float(epsilon) * (loss_pos - loss_neg) grads_est.flat[pert_ind] = grad # reset the perturbed input element inp_pert.flat[pert_ind] = save_val del gru return (grads_est, deltas_neon)
def gradient_calc(seq_len, input_size, hidden_size, batch_size, epsilon=None, rand_scale=None, inp_bl=None): NervanaObject.be.bsz = NervanaObject.be.batch_size = batch_size input_shape = (input_size, seq_len * batch_size) # generate input if one is not given if inp_bl is None: inp_bl = np.random.randn(*input_shape) # neon rnn instance rnn = Recurrent(hidden_size, Gaussian(), Tanh()) inpa = rnn.be.array(np.copy(inp_bl)) # run fprop on the baseline input out_bl = rnn.fprop(inpa).get() # random scaling/hash to generate fake loss if rand_scale is None: rand_scale = np.random.random(out_bl.shape) * 2.0 - 1.0 # loss function would be: # loss_bl = np.sum(rand_scale * out_bl) # run back prop with rand_scale as the errors # use copy to avoid any interactions deltas_neon = rnn.bprop(rnn.be.array(np.copy(rand_scale))).get() # add a perturbation to each input element grads_est = np.zeros(inpa.shape) inp_pert = inp_bl.copy() for pert_ind in range(inpa.size): save_val = inp_pert.flat[pert_ind] inp_pert.flat[pert_ind] = save_val + epsilon reset_rnn(rnn) out_pos = rnn.fprop(rnn.be.array(inp_pert)).get() inp_pert.flat[pert_ind] = save_val - epsilon reset_rnn(rnn) out_neg = rnn.fprop(rnn.be.array(inp_pert)).get() # calculate the loss with perturbations loss_pos = np.sum(rand_scale * out_pos) loss_neg = np.sum(rand_scale * out_neg) # compute the gradient estimate grad = 0.5 * (loss_pos - loss_neg) / epsilon grads_est.flat[pert_ind] = grad # reset the perturbed input element inp_pert.flat[pert_ind] = save_val del rnn return (grads_est, deltas_neon)
def test_ref_compare_rand_init_state(backend_default, refgruargs): seq_len, input_size, hidden_size, batch_size = refgruargs NervanaObject.be.bsz = NervanaObject.be.batch_size = batch_size check_gru(seq_len, input_size, hidden_size, batch_size, Gaussian(), add_init_state=True)
def run(be, fake_dilation, fsz, stride, pad, dilation): K = 8 strides = stride padding = pad be.rng = be.gen_rng(be.rng_seed) in_shape = 16 while out_shape(in_shape, fsz, stride, dilation, pad) < 3: in_shape *= 2 train_shape = (1, in_shape, in_shape) inp = be.array(be.rng.randn(np.prod(train_shape), be.bsz)) init = Gaussian() layers = [ Conv((5, 5, K), init=init), Conv((fsz, fsz, K), strides=strides, padding=padding, init=init, dilation=dict(dil_d=1, dil_h=dilation, dil_w=dilation)), Conv((3, 3, K), init=init), Affine(nout=1, init=init) ] model = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) model.initialize(train_shape, cost) if fake_dilation: # Perform regular convolution with an expanded filter. weights = save(model) new_layers = layers # Replace the middle layers. new_fsz = dilated_fsz(fsz, dilation) new_layers[1] = Conv((new_fsz, new_fsz, K), strides=strides, padding=padding, init=init) model = Model(layers=new_layers) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) model.initialize(train_shape, cost) load(weights, model, K, fsz, dilation) print(model) model.optimizer = GradientDescentMomentum(learning_rate=0.01, momentum_coef=0.9) outputs = fprop(model, inp) weights = bprop(model, outputs) model.optimizer.optimize(model.layers_to_optimize, epoch=0) return outputs.get(), weights.get()
def test_ref_compare_rand(backend_default, refgruargs): # run comparison with reference code # for Gaussian random init seq_len, input_size, hidden_size, batch_size = refgruargs NervanaObject.be.bsz = NervanaObject.be.batch_size = batch_size try: check_rnn(seq_len, input_size, hidden_size, batch_size, Gaussian()) except Exception: # xfail for cpu backend on pascal if not isinstance(NervanaObject.be, NervanaCPU): check_rnn(seq_len, input_size, hidden_size, batch_size, Gaussian()) else: if os.getenv("PLATFORM"): platform = os.getenv("PLATFORM") else: if os.path.exists("/proc/cpuinfo"): cat_cmd = 'cat /proc/cpuinfo | grep "model name" | tail -1 | cut -f 2 -d \':\' | \ cut -f 3 -d \')\' | cut -f 1 -d \'@\' | cut -f 2,3 -d \' \'' cpu_model_name = subp.check_output(cat_cmd, shell=True) else: cpu_model_name = "unknown" if cpu_model_name == 'CPU E5-2699A\n' or b'CPU E5-2699A\n': platform = "BDW" else: platform = "unknown" if platform == 'BDW': pytest.xfail(reason="xfail issue #1041 with {} PLATFORM". format(platform)) else: check_rnn(seq_len, input_size, hidden_size, batch_size, Gaussian())
def test_gan_container(backend_default): """ Set up a GenerativeAdversarial container and make sure generator and discriminator layers get configured correctly. """ init_norm = Gaussian(loc=0.0, scale=0.01) # set up container and ensure layers get wired up correctly generator = Sequential([Affine(nout=10, init=init_norm), Affine(nout=100, init=init_norm)]) discriminator = Sequential([Affine(nout=100, init=init_norm), Affine(nout=1, init=init_norm)]) layers = GenerativeAdversarial(generator, discriminator) assert len(layers.layers) == 4 assert layers.layers[0].nout == 10 assert layers.layers[1].nout == 100 assert layers.layers[2].nout == 100 assert layers.layers[3].nout == 1 assert layers.generator.layers == layers.layers[0:2] assert layers.discriminator.layers == layers.layers[2:4]
def test_bias(backend_cpu64, biasargs): n, batch_size = biasargs NervanaObject.be.bsz = NervanaObject.be.batch_size = batch_size init = Gaussian() layer = BiasWithReset(init=init) inp = np.random.randn(n, batch_size) epsilon = 1.0e-5 pert_frac = 0.1 # test 10% of the inputs # select pert_frac fraction of inps to perturb pert_cnt = int(np.ceil(inp.size*pert_frac)) pert_inds = np.random.permutation(inp.size)[0:pert_cnt] (max_abs, max_rel) = general_gradient_comp(layer, inp, epsilon=epsilon, lshape=inp.shape, pert_inds=pert_inds) assert max_abs < 1.0e-7
def test_mlp(backend_cpu64, mlpargs): nin, nout, batch_size = mlpargs # run the gradient check on an mlp batch_size = batch_size NervanaObject.be.bsz = NervanaObject.be.batch_size = batch_size init = Gaussian() layer = LinearWithReset(nout=nout, init=init) inp = np.random.randn(nin, batch_size) epsilon = 1.0e-5 pert_frac = 0.1 # test 10% of the inputs # select pert_frac fraction of inps to perturb pert_cnt = int(np.ceil(inp.size*pert_frac)) pert_inds = np.random.permutation(inp.size)[0:pert_cnt] (max_abs, max_rel) = general_gradient_comp(layer, inp, epsilon=epsilon, pert_inds=pert_inds) assert max_abs < 1.0e-7
from builtins import zip import numpy as np from neon import NervanaObject from neon.layers import Sequential, Conv, MergeSum, SkipNode, Activation from neon.initializers.initializer import Gaussian, IdentityInit from neon.transforms import Rectlin from utils import allclose_with_out try: from neon.backends.nervanamkl import NervanaMKL except ImportError: # stub out the class class NervanaMKL(object): pass init1 = Gaussian(scale=0.01) relu = Rectlin() batch_size = 64 def conv_params(fsize, nfm, stride=1, relu=True): return dict(fshape=(fsize, fsize, nfm), strides=stride, padding=(1 if fsize > 1 else 0), activation=(Rectlin() if relu else None), init=init1, batch_norm=True) def id_params(nfm): return dict(fshape=(1, 1, nfm),