def three(): image_size = 32 batch_size = 32 input_filters = 512 output_filters = 512 np.random.seed(123) with make_backend(batch_size=batch_size, datatype=np.float32, device_id=0) as be: W = np.random.randn(input_filters,3,3,output_filters).astype(np.float32) W_cuda = MyTensor.from_np(W) print('type(W_cuda)', type(W_cuda)) inputs = np.zeros((input_filters,image_size, image_size,batch_size), dtype=np.float32) inputs[:] = np.random.randn(*inputs.shape) inputs_cuda = MyTensor.from_np(inputs) print('type(inputs_cuda)', type(inputs_cuda)) conv = Convolution((3, 3, output_filters), strides=1, padding=1, be=be) #, init=init) print('created conv') conv.W = W_cuda conv.configure((input_filters,image_size, image_size)) conv.W = W_cuda print('configure done') outputs = np.zeros((image_size * image_size * output_filters, batch_size), dtype=np.float32) outputs_cuda = MyTensor.from_np(outputs) conv.outputs = outputs_cuda conv.fprop(inputs_cuda) cuda.Context.synchronize() for it in range(3): start = time.time() conv.fprop(inputs_cuda) cuda.Context.synchronize() print('time=', time.time() - start) # outputs = outputs_cuda.get() outputs_cuda.to_host() print(outputs[1:3,1:3]) print('outputs.shape', outputs.shape) printDims(W=W, I=inputs) check(W=W, I=inputs, O=outputs, c=0, h=0, w=0, n=0, eps=1e-3) check(W=W, I=inputs, O=outputs, c=0, h=0, w=0, n=1, eps=1e-3) check(W=W, I=inputs, O=outputs, c=0, h=0, w=1, n=0, eps=1e-3) check(W=W, I=inputs, O=outputs, c=0, h=1, w=0, n=0, eps=1e-3) check(W=W, I=inputs, O=outputs, c=1, h=0, w=0, n=0, eps=1e-3) check(W=W, I=inputs, O=outputs, c=3, h=2, w=1, n=27, eps=1e-3) check(W=W, I=inputs, O=outputs, c=17, h=25, w=7, n=27, eps=1e-3)
def test_conv_zeros(backend_default, zeros_convargs): fshape, nofm, batch_size = zeros_convargs NervanaObject.be.bsz = batch_size # basic sanity check with 0 weights random inputs init_unif = Uniform(low=0.0, high=0.0) inshape = (3, 32, 32) insize = np.prod(inshape) neon_layer = Convolution(fshape=(fshape, fshape, nofm), strides=1, padding=0, init=init_unif) inp = neon_layer.be.array(np.random.random((insize, batch_size))) inp.lshape = inshape neon_layer.configure(inshape) neon_layer.prev_layer = True neon_layer.allocate() neon_layer.set_deltas([neon_layer.be.iobuf(inshape)]) out = neon_layer.fprop(inp).get() assert np.min(out) == 0.0 and np.max(out) == 0.0 err = np.zeros(out.shape) deltas = neon_layer.bprop(neon_layer.be.array(err)).get() assert np.min(deltas) == 0.0 and np.max(deltas) == 0.0 dw = neon_layer.dW.get() assert np.min(dw) == 0.0 and np.max(dw) == 0.0 return
def test_conv_ones(backend_default, ones_convargs): dtypeu = np.float32 indim, nifm, fshape, nofm, batch_size = ones_convargs NervanaObject.be.bsz = batch_size # weights set to one init_unif = Uniform(low=1.0, high=1.0) inshape = (nifm, indim, indim) insize = np.prod(inshape) neon_layer = Convolution(fshape=(fshape, fshape, nofm), strides=1, padding=0, init=init_unif) inp = neon_layer.be.array(np.ones((insize, batch_size))) inp.lshape = inshape neon_layer.configure(inshape) neon_layer.prev_layer = True neon_layer.allocate() neon_layer.set_deltas([neon_layer.be.iobuf(inshape)]) # run fprop out = neon_layer.fprop(inp).get() out_exp = fshape * fshape * nifm assert np.min(out) == out_exp and np.max(out) == out_exp # generate err array err = np.ones(out.shape) # run bprop neon_layer.bprop(neon_layer.be.array(err)).get() dw = neon_layer.dW.get() # generate the reference layer ref_layer = ConvLayerRef(1, batch_size, identity, inshape[0], inshape[1:3], (fshape, fshape), nofm, 1, dtypeu) # init weights to ones ref_layer.weights = np.ones(neon_layer.W.shape).T.astype(dtypeu) # run bprop ref_layer.bprop(err.T.astype(dtypeu), inp.get().T.astype(dtypeu), 1.0) # expected output for updates is uniform matrix with # all elements == ofmsize*batch_size updates_exp = ref_layer.ofmsize * batch_size # check dw from neon layer assert np.max(dw) == updates_exp and np.min(dw) == updates_exp # the deltas are more complicated since the matricies are not # uniform, going to use the reference code directly here # no tolerence here should be exact dd = np.abs(ref_layer.berror.T - neon_layer.deltas.get()) assert np.max(dd) == 0.0 return
def test_conv_ones(backend_default, ones_convargs): dtypeu = np.float32 indim, nifm, fshape, nofm, batch_size = ones_convargs NervanaObject.be.bsz = NervanaObject.be.bs = batch_size # weights set to one init_unif = Uniform(low=1.0, high=1.0) inshape = (nifm, indim, indim) insize = np.prod(inshape) neon_layer = Convolution(fshape=(fshape, fshape, nofm), strides=1, padding=0, init=init_unif) inp = neon_layer.be.array(np.ones((insize, batch_size))) inp.lshape = inshape neon_layer.configure(inshape) neon_layer.allocate() # run fprop out = neon_layer.fprop(inp).get() out_exp = fshape * fshape * nifm assert np.min(out) == out_exp and np.max(out) == out_exp # generate err array err = np.ones(out.shape) # run bprop neon_layer.bprop(neon_layer.be.array(err)).get() dw = neon_layer.dW.get() # generate the reference layer ref_layer = ConvLayerRef(1, batch_size, identity, inshape[0], inshape[1:3], (fshape, fshape), nofm, 1, dtypeu) # init weights to ones ref_layer.weights = np.ones(neon_layer.W.shape).T.astype(dtypeu) # run bprop ref_layer.bprop(err.T.astype(dtypeu), inp.get().T.astype(dtypeu), 1.0) # expected output for updates is uniform matrix with # all elements == ofmsize*batch_size updates_exp = ref_layer.ofmsize * batch_size # check dw from neon layer assert np.max(dw) == updates_exp and np.min(dw) == updates_exp # the deltas are more complicated since the matricies are not # uniform, going to use the reference code directly here # no tolerence here should be exact dd = np.abs(ref_layer.berror.T - neon_layer.deltas.get()) assert np.max(dd) == 0.0 return
def test_conv_rand(backend_default, rand_convargs): indim, nifm, fshape, nofm, batch_size, stride, rng_max, w_rng, pad = rand_convargs NervanaObject.be.bsz = batch_size inp_rng = [0.0, rng_max] dtypeu = np.float32 init_unif = Uniform(low=w_rng[0], high=w_rng[1]) inshape = (nifm, indim, indim) insize = np.prod(inshape) # generate neon conv layer neon_layer = Convolution(fshape=(fshape, fshape, nofm), strides=stride, padding=pad, init=init_unif) # generate the reference layer ref_layer = ConvLayerRef(1, batch_size, identity, inshape[0], inshape[1:3], (fshape, fshape), nofm, stride, dtypeu, padding=pad) # setup input in range inp_rng inpa = np.random.random((insize, batch_size)) inpa *= inp_rng[1] - inp_rng[0] inpa += inp_rng[0] inpa = inpa.astype(dtypeu) inp = neon_layer.be.array(inpa) inp.lshape = inshape # run fprop on neon neon_layer.configure(inshape) neon_layer.prev_layer = True neon_layer.allocate() neon_layer.set_deltas([neon_layer.be.iobuf(inshape)]) neon_out = neon_layer.fprop(inp).get() # pull neon weights into ref layer weights ref_layer.weights = neon_layer.W.get().T ref_layer.fprop(inpa.T) ref_out = np.copy(ref_layer.y) # estimate the numerical precision by # permuting order of ops in ref layer # fprop calculation ref_layer.fprop(inpa.T, permute=True) ref_out_perm = ref_layer.y atol = 4 * np.max(np.abs(ref_out - ref_out_perm)) # compare ref and neon layer fprop outputs # using the empirically determined atol assert allclose_with_out(ref_out.T, neon_out, atol=atol, rtol=1.e-4) # generate random deltas array erra = np.random.random(neon_out.shape) erra *= (inp_rng[1] - inp_rng[0]) erra += inp_rng[0] erra = erra.astype(dtypeu) err = neon_layer.be.array(erra) # run neon bprop neon_deltas = neon_layer.bprop(err).get() neon_dW = neon_layer.dW.get() # run ref code bprop ref_layer.bprop(erra.T, 1.0) ref_deltas = np.copy(ref_layer.berror_nopad.T) ref_dW = np.copy(ref_layer.updates) # estimate precision using permutation # of operation order on ref layer code ref_layer.bprop(erra.T, 1.0, permute=True) ref_deltas_perm = ref_layer.berror_nopad.T ref_dW_perm = ref_layer.updates atol = 4 * np.max(np.abs(ref_deltas - ref_deltas_perm)) assert allclose_with_out(ref_deltas, neon_deltas, atol=atol, rtol=1.e-4) atol = 4 * np.max(np.abs(ref_dW - ref_dW_perm)) assert allclose_with_out(ref_dW.T, neon_dW, atol=atol, rtol=1.e-4) return
def test_convolution(transformer_factory): """ test convolution forward path """ N = 128 C, K = 3, 8 D, T = 1, 1 H = W = 32 R = S = 2 padding = dict(pad_d=0, pad_h=0, pad_w=0) strides = dict(str_d=1, str_h=1, str_w=1) conv_params = padding.copy() conv_params.update(strides) ax_i = ng.make_axes([ax.C, ax.D, ax.H, ax.W, ax.N]) ax_f = ng.make_axes([ax.C, ax.T, ax.R, ax.S, ax.K]) ax_i.set_shape((C, D, H, W, N)) ax_f.set_shape((C, T, R, S, K)) ax_o = ng.make_axes([ ng.make_axis(ax_f.role_axes(ar.Channelout)[0].length, name='C', roles=[ar.Channel]), spatial_axis(ax_i, ax_f, padding['pad_d'], strides['str_d'], role=ar.Depth), spatial_axis(ax_i, ax_f, padding['pad_h'], strides['str_h'], role=ar.Height), spatial_axis(ax_i, ax_f, padding['pad_w'], strides['str_w'], role=ar.Width), ax.N ]) inputs = ng.placeholder(axes=ax_i) filters = ng.placeholder(axes=ax_f) # randomly initialize input_value = rng.uniform(-1, 1, ax_i) filter_value = rng.uniform(-1, 1, ax_f) assert input_value.shape == ax_i.lengths assert filter_value.shape == ax_f.lengths inputs = ng.placeholder(ax_i) filters = ng.placeholder(ax_f) output = ng.convolution(conv_params, inputs, filters, axes=ax_o) targets = ng.placeholder(axes=output.axes) costs = ng.cross_entropy_binary(ng.sigmoid(output), targets) error = ng.sum(costs, out_axes=()) / ng.batch_size(costs) d_inputs = ng.deriv(error, inputs) d_filters = ng.deriv(error, filters) targets_value = rng.uniform(.1, 0.9, output.axes) conv_executor = executor([output, error, d_inputs, d_filters], inputs, filters, targets) result_ng, err_ng, gradI_ng, gradF_ng = conv_executor( input_value, filter_value, targets_value) # Now compute reference values via NEON NervanaObject.be.bsz = N neon_layer = Convolution(fshape=(R, S, K), padding=padding, strides=strides) inp = neon_layer.be.array(input_value.reshape(C * H * W * D, N)) neon_layer.W = neon_layer.be.array(filter_value.reshape(C * R * S * T, K)) neon_layer.dW = neon_layer.be.empty_like(neon_layer.W) neon_layer.configure((C, H, W)) neon_layer.prev_layer = True neon_layer.allocate() neon_layer.set_deltas(DummyDeltaBuffers()) result_ne = neon_layer.fprop(inp).get().reshape(output.axes.lengths) act_result_ne = 1. / (1.0 + np.exp(-result_ne)) err = neon_layer.be.array( (act_result_ne - targets_value).reshape(-1, N) / float(N)) gradI_ne = neon_layer.bprop(err).get().reshape(ax_i.lengths) gradF_ne = neon_layer.dW.get().reshape(ax_f.lengths) # Compare fprop np.testing.assert_allclose(result_ng, result_ne, rtol=0, atol=1e-6) # Compare bprop np.testing.assert_allclose(gradI_ng, gradI_ne, rtol=0, atol=1e-6) # Compare update np.testing.assert_allclose(gradF_ng, gradF_ne, rtol=0, atol=1e-4)
def test_conv_rand(backend_default, rand_convargs): indim, nifm, fshape, nofm, batch_size, stride, rng_max, w_rng, pad = rand_convargs NervanaObject.be.bsz = batch_size inp_rng = [0.0, rng_max] dtypeu = np.float32 init_unif = Uniform(low=w_rng[0], high=w_rng[1]) inshape = (nifm, indim, indim) insize = np.prod(inshape) # generate neon conv layer neon_layer = Convolution(fshape=(fshape, fshape, nofm), strides=stride, padding=pad, init=init_unif) # generate the reference layer ref_layer = ConvLayerRef(1, batch_size, identity, inshape[0], inshape[1:3], (fshape, fshape), nofm, stride, dtypeu, padding=pad) # setup input in range inp_rng inpa = np.random.random((insize, batch_size)) inpa *= inp_rng[1] - inp_rng[0] inpa += inp_rng[0] inpa = inpa.astype(dtypeu) inp = neon_layer.be.array(inpa) inp.lshape = inshape # run fprop on neon neon_layer.configure(inshape) neon_layer.prev_layer = True neon_layer.allocate() neon_layer.set_deltas([neon_layer.be.iobuf(inshape)]) neon_out = neon_layer.fprop(inp).get() # pull neon weights into ref layer weights ref_layer.weights = neon_layer.W.get().T ref_layer.fprop(inpa.T) ref_out = np.copy(ref_layer.y) # estimate the numerical precision by # permuting order of ops in ref layer # fprop calculation ref_layer.fprop(inpa.T, permute=True) ref_out_perm = ref_layer.y atol = 4*np.max(np.abs(ref_out - ref_out_perm)) # compare ref and neon layer fprop outputs # using the empirically determined atol assert allclose_with_out(ref_out.T, neon_out, atol=atol, rtol=1.e-4) # generate random deltas array erra = np.random.random(neon_out.shape) erra *= (inp_rng[1] - inp_rng[0]) erra += inp_rng[0] erra = erra.astype(dtypeu) err = neon_layer.be.array(erra) # run neon bprop neon_deltas = neon_layer.bprop(err).get() neon_dW = neon_layer.dW.get() # run ref code bprop ref_layer.bprop(erra.T, 1.0) ref_deltas = np.copy(ref_layer.berror_nopad.T) ref_dW = np.copy(ref_layer.updates) # estimate precision using permutation # of operation order on ref layer code ref_layer.bprop(erra.T, 1.0, permute=True) ref_deltas_perm = ref_layer.berror_nopad.T ref_dW_perm = ref_layer.updates atol = 4*np.max(np.abs(ref_deltas - ref_deltas_perm)) assert allclose_with_out(ref_deltas, neon_deltas, atol=atol, rtol=1.e-4) atol = 4*np.max(np.abs(ref_dW - ref_dW_perm)) assert allclose_with_out(ref_dW.T, neon_dW, atol=atol, rtol=1.e-4) return
def test_conv_ones(backend_default, ones_convargs): dtypeu = np.float32 indim, nifm, fshape, nofm, batch_size, stride, pad = ones_convargs NervanaObject.be.bsz = batch_size # weights set to one init_unif = Uniform(low=1.0, high=1.0) inshape = (nifm, indim, indim) insize = np.prod(inshape) neon_layer = Convolution(fshape=(fshape, fshape, nofm), strides=stride, padding=pad, init=init_unif) inp = neon_layer.be.array(np.ones((insize, batch_size))) inp.lshape = inshape neon_layer.configure(inshape) neon_layer.prev_layer = True neon_layer.allocate() neon_layer.set_deltas([neon_layer.be.iobuf(inshape)]) # run fprop out = neon_layer.fprop(inp).get() # generate the reference layer ref_layer = ConvLayerRef(1, batch_size, identity, inshape[0], inshape[1:3], (fshape, fshape), nofm, stride, dtypeu, padding=pad) # init weights to ones ref_layer.weights = np.ones(neon_layer.W.shape).T.astype(dtypeu) ref_layer.fprop(inp.get().T) out_exp = ref_layer.y.copy() assert np.allclose(out_exp.T, out, atol=0.0, rtol=0.0) # generate err array err = np.ones(out.shape).astype(np.float32) # run bprop neon_layer.bprop(neon_layer.be.array(err)) dw = neon_layer.dW.get() # run bprop ref_layer.bprop(err.T.astype(dtypeu), 1.0) # expected output for updates is uniform matrix with # all elements == ofmsize*batch_size updates_exp = ref_layer.updates.T # check dw from neon layer assert np.allclose(dw, updates_exp, atol=0.0, rtol=0.0) # the deltas are more complicated since the matricies are not # uniform, going to use the reference code directly here # no tolerance here should be exact dd = np.abs(ref_layer.berror_nopad.T - neon_layer.deltas.get()) assert np.max(dd) == 0.0 return
conv.W = W_cuda print('type(W_cuda)', type(W_cuda)) inputs = np.zeros((input_filters,image_size, image_size,batch_size), dtype=np.float32) inputs[:] = np.random.randn(*inputs.shape) inputs_cuda = gpuarray.to_gpu(inputs) print('type(inputs_cuda)', type(inputs_cuda)) conv.configure((input_filters,image_size, image_size)) print('configure done') outputs = np.zeros((image_size * image_size * output_filters, batch_size), dtype=np.float32) outputs_cuda = gpuarray.to_gpu(outputs) conv.outputs = outputs_cuda conv.fprop(inputs_cuda) for it in range(3): start = time.time() for i in range(10): conv.fprop(inputs_cuda) cuda.Context.synchronize() print('time=', time.time() - start) outputs = outputs_cuda.get() print(outputs[1:3,1:3]) assert abs(outputs[1,1] - 1.33960593) < 1e-4 assert abs(outputs[1,2] + 6.06682396) < 1e-4 assert abs(outputs[2,2] - 8.76905346) < 1e-4
def test_conv_ones(backend_default, ones_convargs, deltas_buffer): dtypeu = np.float32 indim, nifm, fshape, nofm, batch_size, stride, pad = ones_convargs if isinstance(NervanaObject.be, NervanaGPU) and NervanaObject.be.compute_capability < (5, 0): if nifm % 4 != 0: pytest.skip(msg="C dim must be a multiple of 4 for Kepler bprop kernel") NervanaObject.be.bsz = batch_size # weights set to one init_unif = Uniform(low=1.0, high=1.0) inshape = (nifm, indim, indim) insize = np.prod(inshape) neon_layer = Convolution(fshape=(fshape, fshape, nofm), strides=stride, padding=pad, init=init_unif) inp = neon_layer.be.array(np.ones((insize, batch_size))) inp.lshape = inshape neon_layer.configure(inshape) neon_layer.prev_layer = True neon_layer.allocate() neon_layer.allocate_deltas(deltas_buffer) deltas_buffer.allocate_buffers() neon_layer.set_deltas(deltas_buffer) # run fprop out = neon_layer.fprop(inp).get() # generate the reference layer ref_layer = ConvLayerRef(1, batch_size, identity, inshape[0], inshape[1:3], (fshape, fshape), nofm, stride, dtypeu, padding=pad) # init weights to ones ref_layer.weights = np.ones(neon_layer.W.shape).T.astype(dtypeu) ref_layer.fprop(inp.get().T) out_exp = ref_layer.y.copy() assert allclose_with_out(out_exp.T, out, atol=0.0, rtol=0.0) # generate err array err = np.ones(out.shape).astype(np.float32) # run bprop neon_layer.bprop(neon_layer.be.array(err)) dw = neon_layer.dW.get() # run bprop ref_layer.bprop(err.T.astype(dtypeu), 1.0) # expected output for updates is uniform matrix with # all elements == ofmsize*batch_size updates_exp = ref_layer.updates.T # check dw from neon layer assert allclose_with_out(dw, updates_exp, atol=0.0, rtol=0.0) # the deltas are more complicated since the matricies are not # uniform, going to use the reference code directly here # no tolerance here should be exact dd = np.abs(ref_layer.berror_nopad.T - neon_layer.deltas.get()) try: assert np.max(dd) == 0.0 except AssertionError: if ones_convargs in ((32, 32, 3, 32, 64, 2, 0), (32, 32, 3, 16, 64, 2, 0), (32, 32, 3, 64, 64, 2, 0)): pytest.xfail(reason="xfail before mkl update. issue: #1020") else: assert np.max(dd) == 0.0 return
class ResidualModule(LayerContainer): """ Layer that encapsulates a sequential plus a residual skip branch, optionally containing a projection Arguments: layers (list): List of objects which can be either a list of layers (including layer containers). projection (Initializer, optional): If a valid Initializer is supplied, then the skip layer will perform a 1x1 convolution with appropriate striding to match the size and shape of the output of the main branch. The default is None, which means that the input to the module will be added directly to the output of the main branch with no projection applied. NB: IdentityInit are treated differently from regular Initializers in that the projection is applied, but those Identity weights are never updated. """ def __init__(self, layers, projection=None, name="residual"): super(ResidualModule, self).__init__(name) if isinstance(layers, Sequential): self.layers = [layers] elif isinstance(layers, list): if isinstance(layers[0], Sequential): self.layers = layers else: self.layers = [Sequential(layers)] elif isinstance(layers, Layer): self.layers = [Sequential([layers])] else: ValueError("Incompatible element for ResidualModule container") convlayers = [l for l in self.layers[0].layers if type(l) is Convolution] nofm = convlayers[-1].convparams["K"] skip_stride = convlayers[-2].convparams["str_h"] self.owns_output = True self.error_views = None self.projection = projection if projection is not None: self.skip_layer = Convolution((1, 1, nofm), init=projection, strides=skip_stride) if projection.name != "Identity": self.layers.append(self.skip_layer) else: self.skip_layer = None def configure(self, in_obj): """ sets shape based parameters of this layer given an input tuple or int or input layer Arguments: in_obj (int, tuple, Layer or Tensor or dataset): object that provides shape information for layer Returns: (tuple): shape of output data """ super(ResidualModule, self).configure(in_obj) self.layers[0].configure(in_obj) self.out_shape = self.layers[0].out_shape if self.skip_layer is not None: self.skip_layer.configure(in_obj) return self # deserialization is not yet automated for this @classmethod def gen_class(cls, pdict): key = "projection" if pdict.get(key, None) is not None: config = pdict[key].get("config", {}) pdict[key] = load_class(pdict[key]["type"]).gen_class(config) return super(ResidualModule, cls).gen_class(pdict) def nested_str(self, level=0): ss = super(ResidualModule, self).nested_str(level) if self.skip_layer is not None: ss += "\n" + " " * level + self.skip_layer.nested_str(level + 1) return ss def allocate(self, shared_outputs=None): self.outputs = self.be.iobuf(self.out_shape, shared=shared_outputs) self.layers[0].allocate(self.outputs) if self.skip_layer is not None: self.skip_layer.allocate(self.outputs) def set_deltas(self, delta_buffers): assert len(delta_buffers) == 4, "Need extra delta buffer pool for residual layers" self.layers[0].allocate_deltas(delta_buffers[1:3]) self.layers[0].layers[0].set_deltas(delta_buffers[0:1]) if self.skip_layer is not None: self.skip_layer.set_deltas(delta_buffers[0:1]) self.deltas = self.be.iobuf(self.in_shape, shared=delta_buffers[0]) delta_buffers.reverse() def fprop(self, inputs, inference=False): self.inputs = inputs self.layers[0].fprop(inputs, inference) if self.skip_layer is not None: self.skip_layer.fprop(inputs, inference, beta=1.0) else: self.outputs[:] = self.outputs + inputs return self.outputs def bprop(self, error, alpha=1.0, beta=0.0): if self.skip_layer is not None: self.skip_layer.bprop(error, alpha=alpha) else: self.deltas[:] = error self.layers[0].bprop(error, alpha=alpha, beta=1.0) return self.deltas def get_terminal(self): return self.layers[0].get_terminal()
def test_conv_rand(backend, rand_convargs): indim, nifm, fshape, nofm, batch_size, rng_max, w_rng = rand_convargs NervanaObject.be.bsz = NervanaObject.be.bs = batch_size inp_rng = [0.0, rng_max] dtypeu = np.float32 init_unif = Uniform(low=w_rng[0], high=w_rng[1]) inshape = (nifm, indim, indim) insize = np.prod(inshape) # generate neon conv layer neon_layer = Convolution(fshape=(fshape, fshape, nofm), strides=1, padding=0, init=init_unif) # generate the reference layer ref_layer = ConvLayerRef(1, batch_size, identity, inshape[0], inshape[1:3], (fshape, fshape), nofm, 1, dtypeu) # setup input in range inp_rng inpa = np.random.random((insize, batch_size)) inpa *= inp_rng[1] - inp_rng[0] inpa += inp_rng[0] inpa = inpa.astype(dtypeu) inp = neon_layer.be.array(inpa) inp.lshape = inshape # run fprop on neon neon_out = neon_layer.fprop(inp).get() # pull neon weights into ref layer weights ref_layer.weights = neon_layer.W.get().T ref_layer.fprop(inpa.T) ref_out = np.copy(ref_layer.y) # estimate the numerical precision by # permuting order of ops in ref layer # fprop calculation ref_layer.fprop(inpa.T, permute=True) ref_out_perm = ref_layer.y atol = np.max(np.abs(ref_out - ref_out_perm)) atol += 10 # fudge factor # compare ref and neon layer fprop outputs # using the empirically determined atol assert (np.allclose(ref_out.T, neon_out, atol=atol, rtol=0.0), '%e %e' % (np.max(np.abs(ref_out.T - neon_out)), atol)) # generate random deltas array erra = np.random.random(neon_out.shape) erra *= (inp_rng[1] - inp_rng[0]) erra += inp_rng[0] erra = erra.astype(dtypeu) err = neon_layer.be.array(erra) # run neon bprop neon_deltas = neon_layer.bprop(err).get() neon_dW = neon_layer.dW.get() # run ref code bprop ref_layer.bprop(erra.T, inpa.T, 1.0) ref_deltas = np.copy(ref_layer.berror.T) ref_dW = np.copy(ref_layer.updates) # estimate precision using permutation # of operation order on ref layer code ref_layer.bprop(erra.T, inpa.T, 1.0, permute=True) ref_deltas_perm = ref_layer.berror.T ref_dW_perm = ref_layer.updates atol = np.max(np.abs(ref_deltas - ref_deltas_perm)) atol *= 10.0 # fudge factor assert (np.allclose(ref_deltas, neon_deltas, atol=atol, rtol=0.0), '%e %e' % (np.max(np.abs(ref_deltas - neon_deltas)), atol)) atol = np.max(np.abs(ref_dW - ref_dW_perm)) atol *= 10.0 print 'atol on bprop dW = %e' % atol assert (np.allclose(ref_dW.T, neon_dW, atol=atol, rtol=0.0), '%e %e' % (np.max(np.abs(ref_dW.T - neon_dW)), atol)) return