Ejemplo n.º 1
0
def three():
    image_size = 32
    batch_size = 32
    input_filters = 512
    output_filters = 512

    np.random.seed(123)
    
    with make_backend(batch_size=batch_size,
            datatype=np.float32, device_id=0) as be:
        W = np.random.randn(input_filters,3,3,output_filters).astype(np.float32)
        W_cuda = MyTensor.from_np(W)

        print('type(W_cuda)', type(W_cuda))

        inputs = np.zeros((input_filters,image_size, image_size,batch_size), dtype=np.float32)
        inputs[:] = np.random.randn(*inputs.shape)
        inputs_cuda = MyTensor.from_np(inputs)

        print('type(inputs_cuda)', type(inputs_cuda))

        conv = Convolution((3, 3, output_filters), strides=1, padding=1, be=be) #, init=init)
        print('created conv')
        conv.W = W_cuda

        conv.configure((input_filters,image_size, image_size))
        conv.W = W_cuda
        print('configure done')
        outputs = np.zeros((image_size * image_size * output_filters, batch_size), dtype=np.float32)
        outputs_cuda = MyTensor.from_np(outputs)
        conv.outputs = outputs_cuda
        conv.fprop(inputs_cuda)
        cuda.Context.synchronize()
        for it in range(3):
          start = time.time()
          conv.fprop(inputs_cuda)
          cuda.Context.synchronize()
          print('time=', time.time() - start)

    #    outputs = outputs_cuda.get()
        outputs_cuda.to_host()
        print(outputs[1:3,1:3])
        print('outputs.shape', outputs.shape)
        printDims(W=W, I=inputs)
        check(W=W, I=inputs, O=outputs, c=0, h=0, w=0, n=0, eps=1e-3)
        check(W=W, I=inputs, O=outputs, c=0, h=0, w=0, n=1, eps=1e-3)
        check(W=W, I=inputs, O=outputs, c=0, h=0, w=1, n=0, eps=1e-3)
        check(W=W, I=inputs, O=outputs, c=0, h=1, w=0, n=0, eps=1e-3)
        check(W=W, I=inputs, O=outputs, c=1, h=0, w=0, n=0, eps=1e-3)
        check(W=W, I=inputs, O=outputs, c=3, h=2, w=1, n=27, eps=1e-3)
        check(W=W, I=inputs, O=outputs, c=17, h=25, w=7, n=27, eps=1e-3)
Ejemplo n.º 2
0
def test_convolution(transformer_factory):
    """
    test convolution forward path
    """
    N = 128
    C, K = 3, 8
    D, T = 1, 1
    H = W = 32
    R = S = 2

    padding = dict(pad_d=0, pad_h=0, pad_w=0)
    strides = dict(str_d=1, str_h=1, str_w=1)
    conv_params = padding.copy()
    conv_params.update(strides)

    ax_i = ng.make_axes([ax.C, ax.D, ax.H, ax.W, ax.N])
    ax_f = ng.make_axes([ax.C, ax.T, ax.R, ax.S, ax.K])
    ax_i.set_shape((C, D, H, W, N))
    ax_f.set_shape((C, T, R, S, K))
    ax_o = ng.make_axes([
        ng.make_axis(ax_f.role_axes(ar.Channelout)[0].length,
                     name='C',
                     roles=[ar.Channel]),
        spatial_axis(ax_i,
                     ax_f,
                     padding['pad_d'],
                     strides['str_d'],
                     role=ar.Depth),
        spatial_axis(ax_i,
                     ax_f,
                     padding['pad_h'],
                     strides['str_h'],
                     role=ar.Height),
        spatial_axis(ax_i,
                     ax_f,
                     padding['pad_w'],
                     strides['str_w'],
                     role=ar.Width), ax.N
    ])

    inputs = ng.placeholder(axes=ax_i)
    filters = ng.placeholder(axes=ax_f)

    # randomly initialize
    input_value = rng.uniform(-1, 1, ax_i)
    filter_value = rng.uniform(-1, 1, ax_f)

    assert input_value.shape == ax_i.lengths
    assert filter_value.shape == ax_f.lengths

    inputs = ng.placeholder(ax_i)
    filters = ng.placeholder(ax_f)

    output = ng.convolution(conv_params, inputs, filters, axes=ax_o)
    targets = ng.placeholder(axes=output.axes)

    costs = ng.cross_entropy_binary(ng.sigmoid(output), targets)
    error = ng.sum(costs, out_axes=()) / ng.batch_size(costs)
    d_inputs = ng.deriv(error, inputs)
    d_filters = ng.deriv(error, filters)

    targets_value = rng.uniform(.1, 0.9, output.axes)

    conv_executor = executor([output, error, d_inputs, d_filters], inputs,
                             filters, targets)
    result_ng, err_ng, gradI_ng, gradF_ng = conv_executor(
        input_value, filter_value, targets_value)

    # Now compute reference values via NEON
    NervanaObject.be.bsz = N
    neon_layer = Convolution(fshape=(R, S, K),
                             padding=padding,
                             strides=strides)

    inp = neon_layer.be.array(input_value.reshape(C * H * W * D, N))
    neon_layer.W = neon_layer.be.array(filter_value.reshape(C * R * S * T, K))
    neon_layer.dW = neon_layer.be.empty_like(neon_layer.W)
    neon_layer.configure((C, H, W))
    neon_layer.prev_layer = True
    neon_layer.allocate()
    neon_layer.set_deltas(DummyDeltaBuffers())

    result_ne = neon_layer.fprop(inp).get().reshape(output.axes.lengths)

    act_result_ne = 1. / (1.0 + np.exp(-result_ne))
    err = neon_layer.be.array(
        (act_result_ne - targets_value).reshape(-1, N) / float(N))
    gradI_ne = neon_layer.bprop(err).get().reshape(ax_i.lengths)
    gradF_ne = neon_layer.dW.get().reshape(ax_f.lengths)

    # Compare fprop
    np.testing.assert_allclose(result_ng, result_ne, rtol=0, atol=1e-6)

    # Compare bprop
    np.testing.assert_allclose(gradI_ng, gradI_ne, rtol=0, atol=1e-6)

    # Compare update
    np.testing.assert_allclose(gradF_ng, gradF_ne, rtol=0, atol=1e-4)
Ejemplo n.º 3
0
import time

image_size = 64
batch_size = 128
input_filters = 32
output_filters = 32

np.random.seed(123)

with make_backend(batch_size=batch_size,
            datatype=np.float32, device_id=0) as be:
    conv = Convolution((3, 3, output_filters), strides=1, padding=1, be=be)
    print('created conv')
    W = np.random.randn(input_filters,3,3,output_filters).astype(np.float32)
    W_cuda = gpuarray.to_gpu(W)
    conv.W = W_cuda

    print('type(W_cuda)', type(W_cuda))

    inputs = np.zeros((input_filters,image_size, image_size,batch_size), dtype=np.float32)
    inputs[:] = np.random.randn(*inputs.shape)
    inputs_cuda = gpuarray.to_gpu(inputs)

    print('type(inputs_cuda)', type(inputs_cuda))

    conv.configure((input_filters,image_size, image_size))
    print('configure done')
    outputs = np.zeros((image_size * image_size * output_filters, batch_size), dtype=np.float32)
    outputs_cuda = gpuarray.to_gpu(outputs)
    conv.outputs = outputs_cuda
    conv.fprop(inputs_cuda)