def three(): image_size = 32 batch_size = 32 input_filters = 512 output_filters = 512 np.random.seed(123) with make_backend(batch_size=batch_size, datatype=np.float32, device_id=0) as be: W = np.random.randn(input_filters,3,3,output_filters).astype(np.float32) W_cuda = MyTensor.from_np(W) print('type(W_cuda)', type(W_cuda)) inputs = np.zeros((input_filters,image_size, image_size,batch_size), dtype=np.float32) inputs[:] = np.random.randn(*inputs.shape) inputs_cuda = MyTensor.from_np(inputs) print('type(inputs_cuda)', type(inputs_cuda)) conv = Convolution((3, 3, output_filters), strides=1, padding=1, be=be) #, init=init) print('created conv') conv.W = W_cuda conv.configure((input_filters,image_size, image_size)) conv.W = W_cuda print('configure done') outputs = np.zeros((image_size * image_size * output_filters, batch_size), dtype=np.float32) outputs_cuda = MyTensor.from_np(outputs) conv.outputs = outputs_cuda conv.fprop(inputs_cuda) cuda.Context.synchronize() for it in range(3): start = time.time() conv.fprop(inputs_cuda) cuda.Context.synchronize() print('time=', time.time() - start) # outputs = outputs_cuda.get() outputs_cuda.to_host() print(outputs[1:3,1:3]) print('outputs.shape', outputs.shape) printDims(W=W, I=inputs) check(W=W, I=inputs, O=outputs, c=0, h=0, w=0, n=0, eps=1e-3) check(W=W, I=inputs, O=outputs, c=0, h=0, w=0, n=1, eps=1e-3) check(W=W, I=inputs, O=outputs, c=0, h=0, w=1, n=0, eps=1e-3) check(W=W, I=inputs, O=outputs, c=0, h=1, w=0, n=0, eps=1e-3) check(W=W, I=inputs, O=outputs, c=1, h=0, w=0, n=0, eps=1e-3) check(W=W, I=inputs, O=outputs, c=3, h=2, w=1, n=27, eps=1e-3) check(W=W, I=inputs, O=outputs, c=17, h=25, w=7, n=27, eps=1e-3)
def test_convolution(transformer_factory): """ test convolution forward path """ N = 128 C, K = 3, 8 D, T = 1, 1 H = W = 32 R = S = 2 padding = dict(pad_d=0, pad_h=0, pad_w=0) strides = dict(str_d=1, str_h=1, str_w=1) conv_params = padding.copy() conv_params.update(strides) ax_i = ng.make_axes([ax.C, ax.D, ax.H, ax.W, ax.N]) ax_f = ng.make_axes([ax.C, ax.T, ax.R, ax.S, ax.K]) ax_i.set_shape((C, D, H, W, N)) ax_f.set_shape((C, T, R, S, K)) ax_o = ng.make_axes([ ng.make_axis(ax_f.role_axes(ar.Channelout)[0].length, name='C', roles=[ar.Channel]), spatial_axis(ax_i, ax_f, padding['pad_d'], strides['str_d'], role=ar.Depth), spatial_axis(ax_i, ax_f, padding['pad_h'], strides['str_h'], role=ar.Height), spatial_axis(ax_i, ax_f, padding['pad_w'], strides['str_w'], role=ar.Width), ax.N ]) inputs = ng.placeholder(axes=ax_i) filters = ng.placeholder(axes=ax_f) # randomly initialize input_value = rng.uniform(-1, 1, ax_i) filter_value = rng.uniform(-1, 1, ax_f) assert input_value.shape == ax_i.lengths assert filter_value.shape == ax_f.lengths inputs = ng.placeholder(ax_i) filters = ng.placeholder(ax_f) output = ng.convolution(conv_params, inputs, filters, axes=ax_o) targets = ng.placeholder(axes=output.axes) costs = ng.cross_entropy_binary(ng.sigmoid(output), targets) error = ng.sum(costs, out_axes=()) / ng.batch_size(costs) d_inputs = ng.deriv(error, inputs) d_filters = ng.deriv(error, filters) targets_value = rng.uniform(.1, 0.9, output.axes) conv_executor = executor([output, error, d_inputs, d_filters], inputs, filters, targets) result_ng, err_ng, gradI_ng, gradF_ng = conv_executor( input_value, filter_value, targets_value) # Now compute reference values via NEON NervanaObject.be.bsz = N neon_layer = Convolution(fshape=(R, S, K), padding=padding, strides=strides) inp = neon_layer.be.array(input_value.reshape(C * H * W * D, N)) neon_layer.W = neon_layer.be.array(filter_value.reshape(C * R * S * T, K)) neon_layer.dW = neon_layer.be.empty_like(neon_layer.W) neon_layer.configure((C, H, W)) neon_layer.prev_layer = True neon_layer.allocate() neon_layer.set_deltas(DummyDeltaBuffers()) result_ne = neon_layer.fprop(inp).get().reshape(output.axes.lengths) act_result_ne = 1. / (1.0 + np.exp(-result_ne)) err = neon_layer.be.array( (act_result_ne - targets_value).reshape(-1, N) / float(N)) gradI_ne = neon_layer.bprop(err).get().reshape(ax_i.lengths) gradF_ne = neon_layer.dW.get().reshape(ax_f.lengths) # Compare fprop np.testing.assert_allclose(result_ng, result_ne, rtol=0, atol=1e-6) # Compare bprop np.testing.assert_allclose(gradI_ng, gradI_ne, rtol=0, atol=1e-6) # Compare update np.testing.assert_allclose(gradF_ng, gradF_ne, rtol=0, atol=1e-4)
import time image_size = 64 batch_size = 128 input_filters = 32 output_filters = 32 np.random.seed(123) with make_backend(batch_size=batch_size, datatype=np.float32, device_id=0) as be: conv = Convolution((3, 3, output_filters), strides=1, padding=1, be=be) print('created conv') W = np.random.randn(input_filters,3,3,output_filters).astype(np.float32) W_cuda = gpuarray.to_gpu(W) conv.W = W_cuda print('type(W_cuda)', type(W_cuda)) inputs = np.zeros((input_filters,image_size, image_size,batch_size), dtype=np.float32) inputs[:] = np.random.randn(*inputs.shape) inputs_cuda = gpuarray.to_gpu(inputs) print('type(inputs_cuda)', type(inputs_cuda)) conv.configure((input_filters,image_size, image_size)) print('configure done') outputs = np.zeros((image_size * image_size * output_filters, batch_size), dtype=np.float32) outputs_cuda = gpuarray.to_gpu(outputs) conv.outputs = outputs_cuda conv.fprop(inputs_cuda)