def test_conv_rand(backend_default, rand_convargs, deltas_buffer): indim, nifm, fshape, nofm, batch_size, stride, rng_max, w_rng, pad = rand_convargs if isinstance(NervanaObject.be, NervanaGPU) and NervanaObject.be.compute_capability < (5, 0): if nifm % 4 != 0: pytest.skip(msg="C dim must be a multiple of 4 for Kepler bprop kernel") if isinstance(NervanaObject.be, NervanaMKL): pytest.xfail(reason="Known MKL bug. See #913") NervanaObject.be.bsz = batch_size inp_rng = [0.0, rng_max] dtypeu = np.float32 init_unif = Uniform(low=w_rng[0], high=w_rng[1]) inshape = (nifm, indim, indim) insize = np.prod(inshape) # generate neon conv layer neon_layer = Convolution(fshape=(fshape, fshape, nofm), strides=stride, padding=pad, init=init_unif) # generate the reference layer ref_layer = ConvLayerRef(1, batch_size, identity, inshape[0], inshape[1:3], (fshape, fshape), nofm, stride, dtypeu, padding=pad) # setup input in range inp_rng inpa = np.random.random((insize, batch_size)) inpa *= inp_rng[1] - inp_rng[0] inpa += inp_rng[0] inpa = inpa.astype(dtypeu) inp = neon_layer.be.array(inpa) inp.lshape = inshape # run fprop on neon neon_layer.configure(inshape) neon_layer.prev_layer = True neon_layer.allocate() neon_layer.allocate_deltas(deltas_buffer) deltas_buffer.allocate_buffers() neon_layer.set_deltas(deltas_buffer) neon_out = neon_layer.fprop(inp).get() # pull neon weights into ref layer weights ref_layer.weights = neon_layer.W.get().T ref_layer.fprop(inpa.T) ref_out = np.copy(ref_layer.y) # estimate the numerical precision by # permuting order of ops in ref layer # fprop calculation ref_layer.fprop(inpa.T, permute=True) ref_out_perm = ref_layer.y atol = 4 * np.max(np.abs(ref_out - ref_out_perm)) # compare ref and neon layer fprop outputs # using the empirically determined atol assert allclose_with_out(ref_out.T, neon_out, atol=atol, rtol=1.e-4) # generate random deltas array erra = np.random.random(neon_out.shape) erra *= (inp_rng[1] - inp_rng[0]) erra += inp_rng[0] erra = erra.astype(dtypeu) err = neon_layer.be.array(erra) # run neon bprop neon_deltas = neon_layer.bprop(err).get() neon_dW = neon_layer.dW.get() # run ref code bprop ref_layer.bprop(erra.T, 1.0) ref_deltas = np.copy(ref_layer.berror_nopad.T) ref_dW = np.copy(ref_layer.updates) # estimate precision using permutation # of operation order on ref layer code ref_layer.bprop(erra.T, 1.0, permute=True) ref_deltas_perm = ref_layer.berror_nopad.T ref_dW_perm = ref_layer.updates atol = 4 * np.max(np.abs(ref_deltas - ref_deltas_perm)) assert allclose_with_out(ref_deltas, neon_deltas, atol=atol, rtol=1.e-4) atol = 4 * np.max(np.abs(ref_dW - ref_dW_perm)) assert allclose_with_out(ref_dW.T, neon_dW, atol=atol, rtol=1.e-4) return
def test_conv_ones(backend_default, ones_convargs, deltas_buffer): dtypeu = np.float32 indim, nifm, fshape, nofm, batch_size, stride, pad = ones_convargs if isinstance(NervanaObject.be, NervanaGPU) and NervanaObject.be.compute_capability < (5, 0): if nifm % 4 != 0: pytest.skip(msg="C dim must be a multiple of 4 for Kepler bprop kernel") if isinstance(NervanaObject.be, NervanaMKL): pytest.xfail(reason="Known MKL bug. See #913") NervanaObject.be.bsz = batch_size # weights set to one init_unif = Uniform(low=1.0, high=1.0) inshape = (nifm, indim, indim) insize = np.prod(inshape) neon_layer = Convolution(fshape=(fshape, fshape, nofm), strides=stride, padding=pad, init=init_unif) inp = neon_layer.be.array(np.ones((insize, batch_size))) inp.lshape = inshape neon_layer.configure(inshape) neon_layer.prev_layer = True neon_layer.allocate() neon_layer.allocate_deltas(deltas_buffer) deltas_buffer.allocate_buffers() neon_layer.set_deltas(deltas_buffer) # run fprop out = neon_layer.fprop(inp).get() # generate the reference layer ref_layer = ConvLayerRef(1, batch_size, identity, inshape[0], inshape[1:3], (fshape, fshape), nofm, stride, dtypeu, padding=pad) # init weights to ones ref_layer.weights = np.ones(neon_layer.W.shape).T.astype(dtypeu) ref_layer.fprop(inp.get().T) out_exp = ref_layer.y.copy() assert allclose_with_out(out_exp.T, out, atol=0.0, rtol=0.0) # generate err array err = np.ones(out.shape).astype(np.float32) # run bprop neon_layer.bprop(neon_layer.be.array(err)) dw = neon_layer.dW.get() # run bprop ref_layer.bprop(err.T.astype(dtypeu), 1.0) # expected output for updates is uniform matrix with # all elements == ofmsize*batch_size updates_exp = ref_layer.updates.T # check dw from neon layer assert allclose_with_out(dw, updates_exp, atol=0.0, rtol=0.0) # the deltas are more complicated since the matricies are not # uniform, going to use the reference code directly here # no tolerance here should be exact dd = np.abs(ref_layer.berror_nopad.T - neon_layer.deltas.get()) assert np.max(dd) == 0.0 return
def test_conv_ones(backend_default, ones_convargs): dtypeu = np.float32 indim, nifm, fshape, nofm, batch_size = ones_convargs NervanaObject.be.bsz = NervanaObject.be.bs = batch_size # weights set to one init_unif = Uniform(low=1.0, high=1.0) inshape = (nifm, indim, indim) insize = np.prod(inshape) neon_layer = Convolution(fshape=(fshape, fshape, nofm), strides=1, padding=0, init=init_unif) inp = neon_layer.be.array(np.ones((insize, batch_size))) inp.lshape = inshape neon_layer.configure(inshape) neon_layer.allocate() # run fprop out = neon_layer.fprop(inp).get() out_exp = fshape * fshape * nifm assert np.min(out) == out_exp and np.max(out) == out_exp # generate err array err = np.ones(out.shape) # run bprop neon_layer.bprop(neon_layer.be.array(err)).get() dw = neon_layer.dW.get() # generate the reference layer ref_layer = ConvLayerRef(1, batch_size, identity, inshape[0], inshape[1:3], (fshape, fshape), nofm, 1, dtypeu) # init weights to ones ref_layer.weights = np.ones(neon_layer.W.shape).T.astype(dtypeu) # run bprop ref_layer.bprop(err.T.astype(dtypeu), inp.get().T.astype(dtypeu), 1.0) # expected output for updates is uniform matrix with # all elements == ofmsize*batch_size updates_exp = ref_layer.ofmsize * batch_size # check dw from neon layer assert np.max(dw) == updates_exp and np.min(dw) == updates_exp # the deltas are more complicated since the matricies are not # uniform, going to use the reference code directly here # no tolerence here should be exact dd = np.abs(ref_layer.berror.T - neon_layer.deltas.get()) assert np.max(dd) == 0.0 return
def test_conv_rand(backend_default, rand_convargs): indim, nifm, fshape, nofm, batch_size, rng_max, w_rng = rand_convargs NervanaObject.be.bsz = batch_size inp_rng = [0.0, rng_max] dtypeu = np.float32 init_unif = Uniform(low=w_rng[0], high=w_rng[1]) inshape = (nifm, indim, indim) insize = np.prod(inshape) # generate neon conv layer neon_layer = Convolution(fshape=(fshape, fshape, nofm), strides=1, padding=0, init=init_unif) # generate the reference layer ref_layer = ConvLayerRef(1, batch_size, identity, inshape[0], inshape[1:3], (fshape, fshape), nofm, 1, dtypeu) # setup input in range inp_rng inpa = np.random.random((insize, batch_size)) inpa *= inp_rng[1] - inp_rng[0] inpa += inp_rng[0] inpa = inpa.astype(dtypeu) inp = neon_layer.be.array(inpa) inp.lshape = inshape # run fprop on neon neon_layer.configure(inshape) neon_layer.prev_layer = True neon_layer.allocate() neon_layer.set_deltas([neon_layer.be.iobuf(inshape)]) neon_out = neon_layer.fprop(inp).get() # pull neon weights into ref layer weights ref_layer.weights = neon_layer.W.get().T ref_layer.fprop(inpa.T) ref_out = np.copy(ref_layer.y) # estimate the numerical precision by # permuting order of ops in ref layer # fprop calculation ref_layer.fprop(inpa.T, permute=True) ref_out_perm = ref_layer.y atol = np.max(np.abs(ref_out - ref_out_perm)) atol += 10 # fudge factor # compare ref and neon layer fprop outputs # using the empirically determined atol assert (np.allclose(ref_out.T, neon_out, atol=atol, rtol=0.0), '%e %e' % (np.max(np.abs(ref_out.T - neon_out)), atol)) # generate random deltas array erra = np.random.random(neon_out.shape) erra *= (inp_rng[1] - inp_rng[0]) erra += inp_rng[0] erra = erra.astype(dtypeu) err = neon_layer.be.array(erra) # run neon bprop neon_deltas = neon_layer.bprop(err).get() neon_dW = neon_layer.dW.get() # run ref code bprop ref_layer.bprop(erra.T, inpa.T, 1.0) ref_deltas = np.copy(ref_layer.berror.T) ref_dW = np.copy(ref_layer.updates) # estimate precision using permutation # of operation order on ref layer code ref_layer.bprop(erra.T, inpa.T, 1.0, permute=True) ref_deltas_perm = ref_layer.berror.T ref_dW_perm = ref_layer.updates atol = np.max(np.abs(ref_deltas - ref_deltas_perm)) atol *= 10.0 # fudge factor assert (np.allclose(ref_deltas, neon_deltas, atol=atol, rtol=0.0), '%e %e' % (np.max(np.abs(ref_deltas - neon_deltas)), atol)) atol = np.max(np.abs(ref_dW - ref_dW_perm)) atol *= 10.0 print 'atol on bprop dW = %e' % atol assert (np.allclose(ref_dW.T, neon_dW, atol=atol, rtol=0.0), '%e %e' % (np.max(np.abs(ref_dW.T - neon_dW)), atol)) return
def test_convolution(transformer_factory): """ test convolution forward path """ N = 128 C, K = 3, 8 D, T = 1, 1 H = W = 32 R = S = 2 padding = dict(pad_d=0, pad_h=0, pad_w=0) strides = dict(str_d=1, str_h=1, str_w=1) dilation = dict(dil_d=1, dil_h=1, dil_w=1) conv_params = padding.copy() conv_params.update(strides) conv_params.update(dilation) ax_i = ng.make_axes([ax.C, ax.D, ax.H, ax.W, ax.N]) ax_f = ng.make_axes([ax.C, ax.T, ax.R, ax.S, ax.K]) ax_i.set_shape((C, D, H, W, N)) ax_f.set_shape((C, T, R, S, K)) ax_o = ng.make_axes([ ng.make_axis(roles=[ar.features_input]).named('C'), ng.make_axis(roles=[ar.features_0]).named('D'), ng.make_axis(roles=[ar.features_1]).named('H'), ng.make_axis(roles=[ar.features_2]).named('W'), ax.N ]) ax_o[:-1].set_shape((K, output_dim(D, T, padding['pad_d'], strides['str_d']), output_dim(H, R, padding['pad_h'], strides['str_h']), output_dim(W, S, padding['pad_w'], strides['str_w']))) inputs = ng.placeholder(axes=ax_i) filters = ng.placeholder(axes=ax_f) # randomly initialize input_value = rng.uniform(-1, 1, ax_i) filter_value = rng.uniform(-1, 1, ax_f) assert input_value.shape == ax_i.lengths assert filter_value.shape == ax_f.lengths inputs = ng.placeholder(ax_i) filters = ng.placeholder(ax_f) output = ng.convolution(conv_params, inputs, filters, axes=ax_o) targets = ng.placeholder(axes=output.axes) costs = ng.cross_entropy_binary(ng.sigmoid(output), targets) error = ng.sum(costs, out_axes=()) / ng.batch_size(costs) d_inputs = ng.deriv(error, inputs) d_filters = ng.deriv(error, filters) targets_value = rng.uniform(.1, 0.9, output.axes) with executor([output, error, d_inputs, d_filters], inputs, filters, targets) as conv_executor: result_ng, err_ng, gradI_ng, gradF_ng = \ conv_executor(input_value, filter_value, targets_value) # Now compute reference values via NEON NervanaObject.be.bsz = N neon_layer = Convolution(fshape=(R, S, K), padding=padding, strides=strides) inp = neon_layer.be.array(input_value.reshape(C * H * W * D, N)) neon_layer.W = neon_layer.be.array(filter_value.reshape(C * R * S * T, K)) neon_layer.dW = neon_layer.be.empty_like(neon_layer.W) neon_layer.configure((C, H, W)) neon_layer.prev_layer = True neon_layer.allocate() neon_layer.set_deltas(DummyDeltaBuffers()) result_ne = neon_layer.fprop(inp).get().reshape(output.axes.lengths) act_result_ne = 1. / (1.0 + np.exp(-result_ne)) err = neon_layer.be.array( (act_result_ne - targets_value).reshape(-1, N) / float(N)) gradI_ne = neon_layer.bprop(err).get().reshape(ax_i.lengths) gradF_ne = neon_layer.dW.get().reshape(ax_f.lengths) # Compare fprop ng.testing.assert_allclose(result_ng, result_ne, rtol=0, atol=1e-6) # Compare bprop ng.testing.assert_allclose(gradI_ng, gradI_ne, rtol=0, atol=1e-6) # Compare update ng.testing.assert_allclose(gradF_ng, gradF_ne, rtol=0, atol=1e-4)