def build_model(self): # setup weight initialization function init_norm = Gaussian(loc=0.0, scale=0.01) # setup model layers layers = [ Affine(nout=100, init=init_norm, bias=Uniform(), activation=Rectlin()), Affine(nout=10, init=init_norm, bias=Uniform(), activation=Logistic(shortcut=True)) ] # setup cost function as CrossEntropy self.cost = GeneralizedCost(costfunc=CrossEntropyBinary()) # setup optimizer self.optimizer = GradientDescentMomentum( 0.1, momentum_coef=0.9, stochastic_round=self.args.rounding) # initialize model object self.model = ModelDist(layers=layers)
def test_uniform(backend, args): be = NervanaObject.be dim1, dim2 = args shape = (dim1, dim2) Wdev = be.empty(shape) uniform_init = Uniform(low=-5, high=15) uniform_init.fill(Wdev) Whost = Wdev.get() flat = Whost.flatten() for elt in flat: assert elt <= 15 and elt >= -5 return
def test_concat_l1_l1(backend_default, allrand_args): # test two linear layers that are merged with concat dtypeu = np.float32 w_rng, rngmax = allrand_args # Diff size inputs and outputs nins = [128, 1024] nouts = [64, 2048] batch_size = 16 NervanaObject.be.bsz = batch_size be = NervanaObject.be init_unif = Uniform(low=w_rng[0], high=w_rng[1]) layers = [Sequential(Affine(nout=nout, init=init_unif)) for nout in nouts] inputs = [be.array(dtypeu(np.random.random((nin, batch_size)))) for nin in nins] merge = MergeMultistream(layers, merge="stack") assert(len(inputs) == len(layers)) merge.configure(inputs) merge.allocate() merge.set_deltas(None) out = merge.fprop(inputs).get() sublayers = [s.layers[0] for s in layers] weights = [layer.W.get() for layer in sublayers] out_exp = np.concatenate([np.dot(w, inp.get()) for (w, inp) in zip(weights, inputs)]) assert np.allclose(out, out_exp, atol=1e-3) err_lst = [dtypeu(np.random.random((nout, batch_size))) for nout in nouts] err_concat = np.concatenate(err_lst) merge.bprop(be.array(err_concat)) dW_exp_lst = [np.dot(err, inp.get().T) for (err, inp) in zip(err_lst, inputs)] for layer, dW_exp in zip(sublayers, dW_exp_lst): assert np.allclose(layer.dW.get(), dW_exp) return
def test_linear_ones(backend_default, basic_linargs): # basic sanity check with all ones on the inputs # and weights, check that each row in output # is the sum of the weights for that output # this check will confirm that the correct number # of operations is being run nin, nout, batch_size = basic_linargs NervanaObject.be.bsz = NervanaObject.be.bs = batch_size dtypeu = np.float32 init_unif = Uniform(low=1.0, high=1.0) layer = Linear(nout=nout, init=init_unif) inp = layer.be.array(dtypeu(np.ones((nin, batch_size)))) layer.configure(nin) layer.allocate() out = layer.fprop(inp).asnumpyarray() w = layer.W.asnumpyarray() sums = np.sum(w, 1).reshape((nout, 1)) * np.ones((1, batch_size)) # for larger layers need to estimate numerical precision # atol = est_mm_prec(w, inp.asnumpyarray()) assert (np.allclose(sums, out, atol=0.0, rtol=0.0), '%e' % np.max(np.abs(out - sums))) return
def test_linear_zeros(backend_default, basic_linargs): # basic sanity check with 0 weights random inputs nin, nout, batch_size = basic_linargs NervanaObject.be.bsz = batch_size dtypeu = np.float32 init_unif = Uniform(low=0.0, high=0.0) layer = Linear(nout=nout, init=init_unif) inp = layer.be.array(dtypeu(np.random.random((nin, batch_size)))) layer.configure(nin) layer.prev_layer = True # Hack to force delta buffer allocation layer.allocate() layer.set_deltas([layer.be.iobuf(nin)]) out = layer.fprop(inp).get() assert np.min(out) == 0.0 and np.max(out) == 0.0 err = dtypeu(np.zeros((nout, batch_size))) deltas = layer.bprop(layer.be.array(err)).get() assert np.min(deltas) == 0.0 and np.max(deltas) == 0.0 dw = layer.dW.get() assert np.min(dw) == 0.0 and np.max(dw) == 0.0 return
def test_concat_sequence_l1_l1(backend_default, allrand_args): # test two linear layers that are merged with concat dtypeu = np.float32 w_rng, rngmax = allrand_args # Diff size input steps nin = 128 steps = [32, 64] nout = 256 batch_size = 16 NervanaObject.be.bsz = NervanaObject.be.bs = batch_size init_unif = Uniform(low=w_rng[0], high=w_rng[1]) layers = [Linear(nout=nout, init=init_unif) for _ in range(2)] inputs = [layers[0].be.array(dtypeu(np.random.random((nin, batch_size*step)))) for step in steps] merge = MergeConcatSequence(layers) assert(len(inputs) == len(layers)) merge.configure(inputs) merge.allocate() out = merge.fprop(inputs).asnumpyarray() weights = [layer.W.asnumpyarray() for layer in layers] out_exp = np.concatenate([np.dot(w, inp.get()) for (w, inp) in zip(weights, inputs)], axis=1) assert np.allclose(out, out_exp, atol=1e-3) err_lst = [dtypeu(np.random.random((nout, batch_size*step))) for step in steps] err_concat = layers[0].be.array(np.concatenate(err_lst, axis=1)) merge.bprop(err_concat) dW_exp_lst = [np.dot(err, inp.asnumpyarray().T) for (err, inp) in zip(err_lst, inputs)] for layer, dW_exp in zip(layers, dW_exp_lst): assert np.allclose(layer.dW.asnumpyarray(), dW_exp) return
def test_sum_l1_l1(backend_default, allrand_args): # test two linear layers that are merged with sum dtypeu = np.float32 w_rng, rngmax = allrand_args # Diff size inputs and outputs nins = [128, 1024] nouts = [64, 64] batch_size = 16 NervanaObject.be.bsz = NervanaObject.be.bs = batch_size init_unif = Uniform(low=w_rng[0], high=w_rng[1]) layers = [Linear(nout=nout, init=init_unif) for nout in nouts] inputs = [layers[0].be.array(dtypeu(np.random.random((nin, batch_size)))) for nin in nins] merge = MergeSum(layers) assert(len(inputs) == len(layers)) merge.configure(inputs) merge.allocate() out = merge.fprop(inputs).asnumpyarray() weights = [layer.W.asnumpyarray() for layer in layers] out_exp = sum([np.dot(w, inp.get()) for (w, inp) in zip(weights, inputs)]) assert np.allclose(out, out_exp, atol=1e-3) err = dtypeu(np.random.random((nouts[0], batch_size))) merge.bprop(layers[0].be.array(err)) dW_exp_lst = [np.dot(err, inp.asnumpyarray().T) for inp in inputs] for layer, dW_exp in zip(layers, dW_exp_lst): assert np.allclose(layer.dW.asnumpyarray(), dW_exp) return
def test_conv_zeros(backend_default, zeros_convargs): fshape, nofm, batch_size = zeros_convargs NervanaObject.be.bsz = batch_size # basic sanity check with 0 weights random inputs init_unif = Uniform(low=0.0, high=0.0) inshape = (3, 32, 32) insize = np.prod(inshape) neon_layer = Convolution(fshape=(fshape, fshape, nofm), strides=1, padding=0, init=init_unif) inp = neon_layer.be.array(np.random.random((insize, batch_size))) inp.lshape = inshape neon_layer.configure(inshape) neon_layer.prev_layer = True neon_layer.allocate() neon_layer.set_deltas([neon_layer.be.iobuf(inshape)]) out = neon_layer.fprop(inp).get() assert np.min(out) == 0.0 and np.max(out) == 0.0 err = np.zeros(out.shape) deltas = neon_layer.bprop(neon_layer.be.array(err)).get() assert np.min(deltas) == 0.0 and np.max(deltas) == 0.0 dw = neon_layer.dW.get() assert np.min(dw) == 0.0 and np.max(dw) == 0.0 return
def test_conv_ones(backend_default, ones_convargs): dtypeu = np.float32 indim, nifm, fshape, nofm, batch_size = ones_convargs NervanaObject.be.bsz = batch_size # weights set to one init_unif = Uniform(low=1.0, high=1.0) inshape = (nifm, indim, indim) insize = np.prod(inshape) neon_layer = Convolution(fshape=(fshape, fshape, nofm), strides=1, padding=0, init=init_unif) inp = neon_layer.be.array(np.ones((insize, batch_size))) inp.lshape = inshape neon_layer.configure(inshape) neon_layer.prev_layer = True neon_layer.allocate() neon_layer.set_deltas([neon_layer.be.iobuf(inshape)]) # run fprop out = neon_layer.fprop(inp).get() out_exp = fshape * fshape * nifm assert np.min(out) == out_exp and np.max(out) == out_exp # generate err array err = np.ones(out.shape) # run bprop neon_layer.bprop(neon_layer.be.array(err)).get() dw = neon_layer.dW.get() # generate the reference layer ref_layer = ConvLayerRef(1, batch_size, identity, inshape[0], inshape[1:3], (fshape, fshape), nofm, 1, dtypeu) # init weights to ones ref_layer.weights = np.ones(neon_layer.W.shape).T.astype(dtypeu) # run bprop ref_layer.bprop(err.T.astype(dtypeu), inp.get().T.astype(dtypeu), 1.0) # expected output for updates is uniform matrix with # all elements == ofmsize*batch_size updates_exp = ref_layer.ofmsize * batch_size # check dw from neon layer assert np.max(dw) == updates_exp and np.min(dw) == updates_exp # the deltas are more complicated since the matricies are not # uniform, going to use the reference code directly here # no tolerence here should be exact dd = np.abs(ref_layer.berror.T - neon_layer.deltas.get()) assert np.max(dd) == 0.0 return
def test_affine_wrapper(backend_default): """ Verify that the Affine wrapper constructs the right layer objects. """ nout = 11 aff = Affine(nout, Uniform()) assert isinstance(aff, list) assert len(aff) == 1 assert isinstance(aff[0], Linear) assert aff[0].nout == nout aff = Affine(nout, Uniform(), bias=Uniform()) assert isinstance(aff, list) assert len(aff) == 2 assert isinstance(aff[0], Linear) assert isinstance(aff[1], Bias) aff = Affine(nout, Uniform(), activation=Rectlin()) assert isinstance(aff, list) assert len(aff) == 2 assert isinstance(aff[0], Linear) assert isinstance(aff[1], Activation) aff = Affine(nout, Uniform(), bias=Uniform(), activation=Rectlin()) assert isinstance(aff, list) assert len(aff) == 3 assert isinstance(aff[0], Linear) assert isinstance(aff[1], Bias) assert isinstance(aff[2], Activation)
def test_conv_wrapper(backend_default): """ Verify that the Conv wrapper constructs the right layer objects. """ conv = Conv((4, 4, 3), Uniform()) assert isinstance(conv, list) assert len(conv) == 1 assert isinstance(conv[0], Convolution) conv = Conv((4, 4, 3), Uniform(), bias=Uniform()) assert isinstance(conv, list) assert len(conv) == 1 assert isinstance(conv[0], Convolution_bias) conv = Conv((4, 4, 3), Uniform(), activation=Rectlin()) assert isinstance(conv, list) assert len(conv) == 2 assert isinstance(conv[0], Convolution) assert isinstance(conv[1], Activation) conv = Conv((4, 4, 3), Uniform(), bias=Uniform(), activation=Rectlin()) assert isinstance(conv, list) assert isinstance(conv[0], Convolution_bias) assert isinstance(conv[1], Activation) assert len(conv) == 2
def test_all_rand(backend_default, allrand_args, deltas_buffer): # test with random weights and random inputs dtypeu = np.float32 w_rng, rngmax = allrand_args inp_rng = [0.0, rngmax] nin = 1024 nout = 2048 batch_size = 16 NervanaObject.be.bsz = batch_size init_unif = Uniform(low=w_rng[0], high=w_rng[1]) layer = Linear(nout=nout, init=init_unif) inp = np.random.random((nin, batch_size)) inp *= inp_rng[1] - inp_rng[0] inp += inp_rng[0] inp = inp.astype(dtypeu) layer.configure(nin) layer.prev_layer = True # Hack to force delta buffer allocation layer.allocate() layer.allocate_deltas(deltas_buffer) deltas_buffer.allocate_buffers() layer.set_deltas(deltas_buffer) out = layer.fprop(layer.be.array(inp)).get() w = layer.W.get() # the expected output using numpy out_exp = np.dot(w, inp) # for larger layers need to estimate numerical precision atol = 2 * est_mm_prec(w, inp, ntrials=1) assert np.allclose(out_exp, out, atol=atol, rtol=0.0), \ '%e %e' % (np.max(np.abs(out - out_exp)), atol) err = np.random.random((nout, batch_size)) err = err * (inp_rng[1] - inp_rng[0]) + inp_rng[0] err = err.astype(dtypeu) deltas = layer.bprop(layer.be.array(err)).get() dw = layer.dW.get() deltas_exp = np.dot(w.T, err) atol = 2 * est_mm_prec(w.T, err, ntrials=1) assert np.allclose(deltas_exp, deltas, atol=atol, rtol=0.0), \ '%e %e' % (np.max(np.abs(deltas_exp - deltas)), atol) dw_exp = np.dot(err, inp.T) atol = 2 * est_mm_prec(err, inp.T, ntrials=1) assert np.allclose(dw_exp, dw, atol=atol, rtol=0.0), \ '%e %e' % (np.max(np.abs(dw_exp - dw)), atol) return
def test_concat_sequence_l1_l1(backend_default, allrand_args, deltas_buffer): # test two linear layers that are merged with concat dtypeu = np.float32 w_rng, rngmax = allrand_args # Diff size input steps nin = 128 steps = [32, 64] nout = 256 batch_size = 16 NervanaObject.be.bsz = batch_size be = NervanaObject.be init_unif = Uniform(low=w_rng[0], high=w_rng[1]) layers = [Sequential(Affine(nout=nout, init=init_unif)) for _ in (0, 1)] inputs = [ be.array(dtypeu(np.random.random((nin, batch_size * step)))) for step in steps ] merge = MergeMultistream(layers, merge="recurrent") assert (len(inputs) == len(layers)) merge.configure(inputs) merge.allocate() merge.allocate_deltas(deltas_buffer) deltas_buffer.allocate_buffers() merge.set_deltas(deltas_buffer) out = merge.fprop(inputs).get() sublayers = [s.layers[0] for s in layers] weights = [layer.W.get() for layer in sublayers] out_exp = np.concatenate( [np.dot(w, inp.get()) for (w, inp) in zip(weights, inputs)], axis=1) assert allclose_with_out(out, out_exp, atol=1e-3) err_lst = [ dtypeu(np.random.random((nout, batch_size * step))) for step in steps ] err_concat = be.array(np.concatenate(err_lst, axis=1)) merge.bprop(err_concat) dW_exp_lst = [ np.dot(err, inp.get().T) for (err, inp) in zip(err_lst, inputs) ] for layer, dW_exp in zip(sublayers, dW_exp_lst): assert allclose_with_out(layer.dW.get(), dW_exp) return
def test_linear_zeros(backend, basic_linargs): # basic sanity check with 0 weights random inputs nin, nout, batch_size = basic_linargs NervanaObject.be.bsz = NervanaObject.be.bs = batch_size dtypeu = np.float32 init_unif = Uniform(low=0.0, high=0.0) layer = Linear(nout=nout, init=init_unif) inp = layer.be.array(dtypeu(np.random.random((nin, batch_size)))) out = layer.fprop(inp).get() assert np.min(out) == 0.0 and np.max(out) == 0.0 err = dtypeu(np.zeros((nout, batch_size))) deltas = layer.bprop(layer.be.array(err)).asnumpyarray() assert np.min(deltas) == 0.0 and np.max(deltas) == 0.0 dw = layer.dW.asnumpyarray() assert np.min(dw) == 0.0 and np.max(dw) == 0.0 return
def test_conv_wrapper(backend_default): """ Verify that the Conv wrapper constructs the right layer objects. """ conv = Conv((4, 4, 3), Uniform()) assert isinstance(conv, list) assert len(conv) == 1 assert isinstance(conv[0], Convolution) conv = Conv((4, 4, 3), Uniform(), bias=Uniform()) assert isinstance(conv, list) # temp roll back conv_bias if False and conv[0].be.is_mkl(): assert len(conv) == 1 assert isinstance(conv[0], Convolution_bias) else: assert len(conv) == 2 assert isinstance(conv[0], Convolution) assert isinstance(conv[1], Bias) conv = Conv((4, 4, 3), Uniform(), activation=Rectlin()) assert isinstance(conv, list) assert len(conv) == 2 assert isinstance(conv[0], Convolution) assert isinstance(conv[1], Activation) conv = Conv((4, 4, 3), Uniform(), bias=Uniform(), activation=Rectlin()) assert isinstance(conv, list) # temp roll back conv_bias if False and conv[0].be.is_mkl(): assert isinstance(conv[0], Convolution_bias) assert isinstance(conv[1], Activation) assert len(conv) == 2 else: assert isinstance(conv[0], Convolution) assert isinstance(conv[1], Bias) assert isinstance(conv[2], Activation) assert len(conv) == 3
def test_conv_ones(backend_default, ones_convargs, deltas_buffer): dtypeu = np.float32 indim, nifm, fshape, nofm, batch_size, stride, pad = ones_convargs if isinstance(NervanaObject.be, NervanaGPU) and NervanaObject.be.compute_capability < (5, 0): if nifm % 4 != 0: pytest.skip(msg="C dim must be a multiple of 4 for Kepler bprop kernel") NervanaObject.be.bsz = batch_size # weights set to one init_unif = Uniform(low=1.0, high=1.0) inshape = (nifm, indim, indim) insize = np.prod(inshape) neon_layer = Convolution(fshape=(fshape, fshape, nofm), strides=stride, padding=pad, init=init_unif) inp = neon_layer.be.array(np.ones((insize, batch_size))) inp.lshape = inshape neon_layer.configure(inshape) neon_layer.prev_layer = True neon_layer.allocate() neon_layer.allocate_deltas(deltas_buffer) deltas_buffer.allocate_buffers() neon_layer.set_deltas(deltas_buffer) # run fprop out = neon_layer.fprop(inp).get() # generate the reference layer ref_layer = ConvLayerRef(1, batch_size, identity, inshape[0], inshape[1:3], (fshape, fshape), nofm, stride, dtypeu, padding=pad) # init weights to ones ref_layer.weights = np.ones(neon_layer.W.shape).T.astype(dtypeu) ref_layer.fprop(inp.get().T) out_exp = ref_layer.y.copy() assert allclose_with_out(out_exp.T, out, atol=0.0, rtol=0.0) # generate err array err = np.ones(out.shape).astype(np.float32) # run bprop neon_layer.bprop(neon_layer.be.array(err)) dw = neon_layer.dW.get() # run bprop ref_layer.bprop(err.T.astype(dtypeu), 1.0) # expected output for updates is uniform matrix with # all elements == ofmsize*batch_size updates_exp = ref_layer.updates.T # check dw from neon layer assert allclose_with_out(dw, updates_exp, atol=0.0, rtol=0.0) # the deltas are more complicated since the matricies are not # uniform, going to use the reference code directly here # no tolerance here should be exact dd = np.abs(ref_layer.berror_nopad.T - neon_layer.deltas.get()) try: assert np.max(dd) == 0.0 except AssertionError: if ones_convargs in ((32, 32, 3, 32, 64, 2, 0), (32, 32, 3, 16, 64, 2, 0), (32, 32, 3, 64, 64, 2, 0)): pytest.xfail(reason="xfail before mkl update. issue: #1020") else: assert np.max(dd) == 0.0 return
def test_conv_rand(backend_default, rand_convargs): indim, nifm, fshape, nofm, batch_size, stride, rng_max, w_rng, pad = rand_convargs NervanaObject.be.bsz = batch_size inp_rng = [0.0, rng_max] dtypeu = np.float32 init_unif = Uniform(low=w_rng[0], high=w_rng[1]) inshape = (nifm, indim, indim) insize = np.prod(inshape) # generate neon conv layer neon_layer = Convolution(fshape=(fshape, fshape, nofm), strides=stride, padding=pad, init=init_unif) # generate the reference layer ref_layer = ConvLayerRef(1, batch_size, identity, inshape[0], inshape[1:3], (fshape, fshape), nofm, stride, dtypeu, padding=pad) # setup input in range inp_rng inpa = np.random.random((insize, batch_size)) inpa *= inp_rng[1] - inp_rng[0] inpa += inp_rng[0] inpa = inpa.astype(dtypeu) inp = neon_layer.be.array(inpa) inp.lshape = inshape # run fprop on neon neon_layer.configure(inshape) neon_layer.prev_layer = True neon_layer.allocate() neon_layer.set_deltas([neon_layer.be.iobuf(inshape)]) neon_out = neon_layer.fprop(inp).get() # pull neon weights into ref layer weights ref_layer.weights = neon_layer.W.get().T ref_layer.fprop(inpa.T) ref_out = np.copy(ref_layer.y) # estimate the numerical precision by # permuting order of ops in ref layer # fprop calculation ref_layer.fprop(inpa.T, permute=True) ref_out_perm = ref_layer.y atol = 4 * np.max(np.abs(ref_out - ref_out_perm)) # compare ref and neon layer fprop outputs # using the empirically determined atol assert allclose_with_out(ref_out.T, neon_out, atol=atol, rtol=1.e-4) # generate random deltas array erra = np.random.random(neon_out.shape) erra *= (inp_rng[1] - inp_rng[0]) erra += inp_rng[0] erra = erra.astype(dtypeu) err = neon_layer.be.array(erra) # run neon bprop neon_deltas = neon_layer.bprop(err).get() neon_dW = neon_layer.dW.get() # run ref code bprop ref_layer.bprop(erra.T, 1.0) ref_deltas = np.copy(ref_layer.berror_nopad.T) ref_dW = np.copy(ref_layer.updates) # estimate precision using permutation # of operation order on ref layer code ref_layer.bprop(erra.T, 1.0, permute=True) ref_deltas_perm = ref_layer.berror_nopad.T ref_dW_perm = ref_layer.updates atol = 4 * np.max(np.abs(ref_deltas - ref_deltas_perm)) assert allclose_with_out(ref_deltas, neon_deltas, atol=atol, rtol=1.e-4) atol = 4 * np.max(np.abs(ref_dW - ref_dW_perm)) assert allclose_with_out(ref_dW.T, neon_dW, atol=atol, rtol=1.e-4) return
def test_conv_rand(backend, rand_convargs): indim, nifm, fshape, nofm, batch_size, rng_max, w_rng = rand_convargs NervanaObject.be.bsz = NervanaObject.be.bs = batch_size inp_rng = [0.0, rng_max] dtypeu = np.float32 init_unif = Uniform(low=w_rng[0], high=w_rng[1]) inshape = (nifm, indim, indim) insize = np.prod(inshape) # generate neon conv layer neon_layer = Convolution(fshape=(fshape, fshape, nofm), strides=1, padding=0, init=init_unif) # generate the reference layer ref_layer = ConvLayerRef(1, batch_size, identity, inshape[0], inshape[1:3], (fshape, fshape), nofm, 1, dtypeu) # setup input in range inp_rng inpa = np.random.random((insize, batch_size)) inpa *= inp_rng[1] - inp_rng[0] inpa += inp_rng[0] inpa = inpa.astype(dtypeu) inp = neon_layer.be.array(inpa) inp.lshape = inshape # run fprop on neon neon_out = neon_layer.fprop(inp).get() # pull neon weights into ref layer weights ref_layer.weights = neon_layer.W.get().T ref_layer.fprop(inpa.T) ref_out = np.copy(ref_layer.y) # estimate the numerical precision by # permuting order of ops in ref layer # fprop calculation ref_layer.fprop(inpa.T, permute=True) ref_out_perm = ref_layer.y atol = np.max(np.abs(ref_out - ref_out_perm)) atol += 10 # fudge factor # compare ref and neon layer fprop outputs # using the empirically determined atol assert (np.allclose(ref_out.T, neon_out, atol=atol, rtol=0.0), '%e %e' % (np.max(np.abs(ref_out.T - neon_out)), atol)) # generate random deltas array erra = np.random.random(neon_out.shape) erra *= (inp_rng[1] - inp_rng[0]) erra += inp_rng[0] erra = erra.astype(dtypeu) err = neon_layer.be.array(erra) # run neon bprop neon_deltas = neon_layer.bprop(err).get() neon_dW = neon_layer.dW.get() # run ref code bprop ref_layer.bprop(erra.T, inpa.T, 1.0) ref_deltas = np.copy(ref_layer.berror.T) ref_dW = np.copy(ref_layer.updates) # estimate precision using permutation # of operation order on ref layer code ref_layer.bprop(erra.T, inpa.T, 1.0, permute=True) ref_deltas_perm = ref_layer.berror.T ref_dW_perm = ref_layer.updates atol = np.max(np.abs(ref_deltas - ref_deltas_perm)) atol *= 10.0 # fudge factor assert (np.allclose(ref_deltas, neon_deltas, atol=atol, rtol=0.0), '%e %e' % (np.max(np.abs(ref_deltas - neon_deltas)), atol)) atol = np.max(np.abs(ref_dW - ref_dW_perm)) atol *= 10.0 print 'atol on bprop dW = %e' % atol assert (np.allclose(ref_dW.T, neon_dW, atol=atol, rtol=0.0), '%e %e' % (np.max(np.abs(ref_dW.T - neon_dW)), atol)) return