Ejemplo n.º 1
0
def test_conv_zeros(backend_default, zeros_convargs):
    fshape, nofm, batch_size = zeros_convargs

    NervanaObject.be.bsz = batch_size

    # basic sanity check with 0 weights random inputs
    init_unif = Uniform(low=0.0, high=0.0)
    inshape = (3, 32, 32)
    insize = np.prod(inshape)
    neon_layer = Convolution(fshape=(fshape, fshape, nofm),
                             strides=1, padding=0, init=init_unif)
    inp = neon_layer.be.array(np.random.random((insize, batch_size)))
    inp.lshape = inshape
    neon_layer.configure(inshape)
    neon_layer.prev_layer = True
    neon_layer.allocate()
    neon_layer.set_deltas([neon_layer.be.iobuf(inshape)])
    out = neon_layer.fprop(inp).get()
    assert np.min(out) == 0.0 and np.max(out) == 0.0

    err = np.zeros(out.shape)
    deltas = neon_layer.bprop(neon_layer.be.array(err)).get()
    assert np.min(deltas) == 0.0 and np.max(deltas) == 0.0

    dw = neon_layer.dW.get()
    assert np.min(dw) == 0.0 and np.max(dw) == 0.0
    return
Ejemplo n.º 2
0
def test_conv_zeros(backend, zeros_convargs):
    fshape, nofm, batch_size = zeros_convargs

    NervanaObject.be.bsz = NervanaObject.be.bs = batch_size

    # basic sanity check with 0 weights random inputs
    init_unif = Uniform(low=0.0, high=0.0)
    inshape = (3, 32, 32)
    insize = np.prod(inshape)
    neon_layer = Convolution(fshape=(fshape, fshape, nofm),
                             strides=1,
                             padding=0,
                             init=init_unif)
    inp = neon_layer.be.array(np.random.random((insize, batch_size)))
    inp.lshape = inshape

    out = neon_layer.fprop(inp).get()
    assert np.min(out) == 0.0 and np.max(out) == 0.0

    err = np.zeros(out.shape)
    deltas = neon_layer.bprop(neon_layer.be.array(err)).get()
    assert np.min(deltas) == 0.0 and np.max(deltas) == 0.0

    dw = neon_layer.dW.get()
    assert np.min(dw) == 0.0 and np.max(dw) == 0.0
    return
Ejemplo n.º 3
0
def test_conv_ones(backend, ones_convargs):
    dtypeu = np.float32
    indim, nifm, fshape, nofm, batch_size = ones_convargs
    NervanaObject.be.bsz = NervanaObject.be.bs = batch_size

    # weights set to one
    init_unif = Uniform(low=1.0, high=1.0)

    inshape = (nifm, indim, indim)
    insize = np.prod(inshape)

    neon_layer = Convolution(fshape=(fshape, fshape, nofm),
                             strides=1,
                             padding=0,
                             init=init_unif)
    inp = neon_layer.be.array(np.ones((insize, batch_size)))
    inp.lshape = inshape

    # run fprop
    out = neon_layer.fprop(inp).get()
    out_exp = fshape * fshape * nifm
    assert np.min(out) == out_exp and np.max(out) == out_exp

    # generate err array
    err = np.ones(out.shape)

    # run bprop
    neon_layer.bprop(neon_layer.be.array(err)).get()
    dw = neon_layer.dW.get()

    # generate the reference layer
    ref_layer = ConvLayerRef(1, batch_size, identity, inshape[0], inshape[1:3],
                             (fshape, fshape), nofm, 1, dtypeu)

    # init weights to ones
    ref_layer.weights = np.ones(neon_layer.W.shape).T.astype(dtypeu)

    # run bprop
    ref_layer.bprop(err.T.astype(dtypeu), inp.get().T.astype(dtypeu), 1.0)

    # expected output for updates is uniform matrix with
    # all elements == ofmsize*batch_size
    updates_exp = ref_layer.ofmsize * batch_size

    # check dw from neon layer
    assert np.max(dw) == updates_exp and np.min(dw) == updates_exp

    # the deltas are more complicated since the matricies are not
    # uniform, going to use the reference code directly here
    # no tolerence here should be exact
    dd = np.abs(ref_layer.berror.T - neon_layer.deltas.get())
    assert np.max(dd) == 0.0

    return
Ejemplo n.º 4
0
def three():
    image_size = 32
    batch_size = 32
    input_filters = 512
    output_filters = 512

    np.random.seed(123)
    
    with make_backend(batch_size=batch_size,
            datatype=np.float32, device_id=0) as be:
        W = np.random.randn(input_filters,3,3,output_filters).astype(np.float32)
        W_cuda = MyTensor.from_np(W)

        print('type(W_cuda)', type(W_cuda))

        inputs = np.zeros((input_filters,image_size, image_size,batch_size), dtype=np.float32)
        inputs[:] = np.random.randn(*inputs.shape)
        inputs_cuda = MyTensor.from_np(inputs)

        print('type(inputs_cuda)', type(inputs_cuda))

        conv = Convolution((3, 3, output_filters), strides=1, padding=1, be=be) #, init=init)
        print('created conv')
        conv.W = W_cuda

        conv.configure((input_filters,image_size, image_size))
        conv.W = W_cuda
        print('configure done')
        outputs = np.zeros((image_size * image_size * output_filters, batch_size), dtype=np.float32)
        outputs_cuda = MyTensor.from_np(outputs)
        conv.outputs = outputs_cuda
        conv.fprop(inputs_cuda)
        cuda.Context.synchronize()
        for it in range(3):
          start = time.time()
          conv.fprop(inputs_cuda)
          cuda.Context.synchronize()
          print('time=', time.time() - start)

    #    outputs = outputs_cuda.get()
        outputs_cuda.to_host()
        print(outputs[1:3,1:3])
        print('outputs.shape', outputs.shape)
        printDims(W=W, I=inputs)
        check(W=W, I=inputs, O=outputs, c=0, h=0, w=0, n=0, eps=1e-3)
        check(W=W, I=inputs, O=outputs, c=0, h=0, w=0, n=1, eps=1e-3)
        check(W=W, I=inputs, O=outputs, c=0, h=0, w=1, n=0, eps=1e-3)
        check(W=W, I=inputs, O=outputs, c=0, h=1, w=0, n=0, eps=1e-3)
        check(W=W, I=inputs, O=outputs, c=1, h=0, w=0, n=0, eps=1e-3)
        check(W=W, I=inputs, O=outputs, c=3, h=2, w=1, n=27, eps=1e-3)
        check(W=W, I=inputs, O=outputs, c=17, h=25, w=7, n=27, eps=1e-3)
Ejemplo n.º 5
0
def simple1():
    image_size = 3
    batch_size = 32
    input_filters = 4
    output_filters = 4

    np.random.seed(123)

    with make_backend(batch_size=batch_size,
            datatype=np.float32, device_id=0) as be:

        W = np.random.randn(input_filters,3,3,output_filters).astype(np.float32)
        print('W.shape', W.shape)
        W_cuda = MyTensor.from_np(W)

        inputs = np.zeros((input_filters,image_size, image_size,batch_size), dtype=np.float32)
        inputs[:] = np.random.randn(*inputs.shape)
        inputs_cuda = MyTensor.from_np(inputs)

        print('type(inputs_cuda)', type(inputs_cuda))

        conv = Convolution((3, 3, output_filters), strides=1, padding=1, be=be) #, init=init)
        print('created conv')

        conv.configure((input_filters,image_size, image_size))
        conv.W = W_cuda
        print('configure done')
        outputs = np.zeros((image_size * image_size * output_filters, batch_size), dtype=np.float32)
        outputs_cuda = MyTensor.from_np(outputs)
        conv.outputs = outputs_cuda
        conv.fprop(inputs_cuda)
        cuda.Context.synchronize()

        for it in range(3):
            start = time.time()
            conv.fprop(inputs_cuda)
            cuda.Context.synchronize()
            print('time=', time.time() - start)

    #    outputs = outputs_cuda.get()
        outputs_cuda.to_host()
        printDims(W=W, I=inputs)
        check(W=W, I=inputs, O=outputs, c=0, h=0, w=0, n=0)
        check(W=W, I=inputs, O=outputs, c=0, h=0, w=0, n=1)
        check(W=W, I=inputs, O=outputs, c=0, h=1, w=0, n=0)
        check(W=W, I=inputs, O=outputs, c=0, h=0, w=1, n=0)
        check(W=W, I=inputs, O=outputs, c=1, h=0, w=0, n=0)
        check(W=W, I=inputs, O=outputs, c=3, h=2, w=1, n=27)

        print('outputs.shape', outputs.shape)
Ejemplo n.º 6
0
    def __init__(self, layers, projection=None, name="residual"):
        super(ResidualModule, self).__init__(name)

        if isinstance(layers, Sequential):
            self.layers = [layers]
        elif isinstance(layers, list):
            if isinstance(layers[0], Sequential):
                self.layers = layers
            else:
                self.layers = [Sequential(layers)]
        elif isinstance(layers, Layer):
            self.layers = [Sequential([layers])]
        else:
            ValueError("Incompatible element for ResidualModule container")

        convlayers = [l for l in self.layers[0].layers if type(l) is Convolution]
        nofm = convlayers[-1].convparams["K"]
        skip_stride = convlayers[-2].convparams["str_h"]

        self.owns_output = True
        self.error_views = None
        self.projection = projection
        if projection is not None:
            self.skip_layer = Convolution((1, 1, nofm), init=projection, strides=skip_stride)
            if projection.name != "Identity":
                self.layers.append(self.skip_layer)
        else:
            self.skip_layer = None
Ejemplo n.º 7
0
def test_conv_ones(backend_default, ones_convargs):
    dtypeu = np.float32
    indim, nifm, fshape, nofm, batch_size = ones_convargs
    NervanaObject.be.bsz = NervanaObject.be.bs = batch_size

    # weights set to one
    init_unif = Uniform(low=1.0, high=1.0)

    inshape = (nifm, indim, indim)
    insize = np.prod(inshape)

    neon_layer = Convolution(fshape=(fshape, fshape, nofm), strides=1, padding=0, init=init_unif)
    inp = neon_layer.be.array(np.ones((insize, batch_size)))
    inp.lshape = inshape
    neon_layer.configure(inshape)
    neon_layer.allocate()
    # run fprop
    out = neon_layer.fprop(inp).get()
    out_exp = fshape * fshape * nifm
    assert np.min(out) == out_exp and np.max(out) == out_exp

    # generate err array
    err = np.ones(out.shape)

    # run bprop
    neon_layer.bprop(neon_layer.be.array(err)).get()
    dw = neon_layer.dW.get()

    # generate the reference layer
    ref_layer = ConvLayerRef(1, batch_size, identity, inshape[0], inshape[1:3], (fshape, fshape), nofm, 1, dtypeu)

    # init weights to ones
    ref_layer.weights = np.ones(neon_layer.W.shape).T.astype(dtypeu)

    # run bprop
    ref_layer.bprop(err.T.astype(dtypeu), inp.get().T.astype(dtypeu), 1.0)

    # expected output for updates is uniform matrix with
    # all elements == ofmsize*batch_size
    updates_exp = ref_layer.ofmsize * batch_size

    # check dw from neon layer
    assert np.max(dw) == updates_exp and np.min(dw) == updates_exp

    # the deltas are more complicated since the matricies are not
    # uniform, going to use the reference code directly here
    # no tolerence here should be exact
    dd = np.abs(ref_layer.berror.T - neon_layer.deltas.get())
    assert np.max(dd) == 0.0

    return
Ejemplo n.º 8
0
def test_convolution(transformer_factory):
    """
    test convolution forward path
    """
    N = 128
    C, K = 3, 8
    D, T = 1, 1
    H = W = 32
    R = S = 2

    padding = dict(pad_d=0, pad_h=0, pad_w=0)
    strides = dict(str_d=1, str_h=1, str_w=1)
    conv_params = padding.copy()
    conv_params.update(strides)

    ax_i = ng.make_axes([ax.C, ax.D, ax.H, ax.W, ax.N])
    ax_f = ng.make_axes([ax.C, ax.T, ax.R, ax.S, ax.K])
    ax_i.set_shape((C, D, H, W, N))
    ax_f.set_shape((C, T, R, S, K))
    ax_o = ng.make_axes([
        ng.make_axis(ax_f.role_axes(ar.Channelout)[0].length,
                     name='C',
                     roles=[ar.Channel]),
        spatial_axis(ax_i,
                     ax_f,
                     padding['pad_d'],
                     strides['str_d'],
                     role=ar.Depth),
        spatial_axis(ax_i,
                     ax_f,
                     padding['pad_h'],
                     strides['str_h'],
                     role=ar.Height),
        spatial_axis(ax_i,
                     ax_f,
                     padding['pad_w'],
                     strides['str_w'],
                     role=ar.Width), ax.N
    ])

    inputs = ng.placeholder(axes=ax_i)
    filters = ng.placeholder(axes=ax_f)

    # randomly initialize
    input_value = rng.uniform(-1, 1, ax_i)
    filter_value = rng.uniform(-1, 1, ax_f)

    assert input_value.shape == ax_i.lengths
    assert filter_value.shape == ax_f.lengths

    inputs = ng.placeholder(ax_i)
    filters = ng.placeholder(ax_f)

    output = ng.convolution(conv_params, inputs, filters, axes=ax_o)
    targets = ng.placeholder(axes=output.axes)

    costs = ng.cross_entropy_binary(ng.sigmoid(output), targets)
    error = ng.sum(costs, out_axes=()) / ng.batch_size(costs)
    d_inputs = ng.deriv(error, inputs)
    d_filters = ng.deriv(error, filters)

    targets_value = rng.uniform(.1, 0.9, output.axes)

    conv_executor = executor([output, error, d_inputs, d_filters], inputs,
                             filters, targets)
    result_ng, err_ng, gradI_ng, gradF_ng = conv_executor(
        input_value, filter_value, targets_value)

    # Now compute reference values via NEON
    NervanaObject.be.bsz = N
    neon_layer = Convolution(fshape=(R, S, K),
                             padding=padding,
                             strides=strides)

    inp = neon_layer.be.array(input_value.reshape(C * H * W * D, N))
    neon_layer.W = neon_layer.be.array(filter_value.reshape(C * R * S * T, K))
    neon_layer.dW = neon_layer.be.empty_like(neon_layer.W)
    neon_layer.configure((C, H, W))
    neon_layer.prev_layer = True
    neon_layer.allocate()
    neon_layer.set_deltas(DummyDeltaBuffers())

    result_ne = neon_layer.fprop(inp).get().reshape(output.axes.lengths)

    act_result_ne = 1. / (1.0 + np.exp(-result_ne))
    err = neon_layer.be.array(
        (act_result_ne - targets_value).reshape(-1, N) / float(N))
    gradI_ne = neon_layer.bprop(err).get().reshape(ax_i.lengths)
    gradF_ne = neon_layer.dW.get().reshape(ax_f.lengths)

    # Compare fprop
    np.testing.assert_allclose(result_ng, result_ne, rtol=0, atol=1e-6)

    # Compare bprop
    np.testing.assert_allclose(gradI_ng, gradI_ne, rtol=0, atol=1e-6)

    # Compare update
    np.testing.assert_allclose(gradF_ng, gradF_ne, rtol=0, atol=1e-4)
Ejemplo n.º 9
0
def test_conv_rand(backend_default, rand_convargs):
    indim, nifm, fshape, nofm, batch_size, stride, rng_max, w_rng, pad = rand_convargs
    NervanaObject.be.bsz = batch_size
    inp_rng = [0.0, rng_max]
    dtypeu = np.float32
    init_unif = Uniform(low=w_rng[0], high=w_rng[1])

    inshape = (nifm, indim, indim)
    insize = np.prod(inshape)

    # generate neon conv layer
    neon_layer = Convolution(fshape=(fshape, fshape, nofm),
                             strides=stride, padding=pad, init=init_unif)

    # generate the reference layer
    ref_layer = ConvLayerRef(1,
                             batch_size,
                             identity,
                             inshape[0],
                             inshape[1:3],
                             (fshape, fshape),
                             nofm,
                             stride,
                             dtypeu,
                             padding=pad)

    # setup input in range inp_rng
    inpa = np.random.random((insize, batch_size))
    inpa *= inp_rng[1] - inp_rng[0]
    inpa += inp_rng[0]
    inpa = inpa.astype(dtypeu)
    inp = neon_layer.be.array(inpa)
    inp.lshape = inshape

    # run fprop on neon
    neon_layer.configure(inshape)
    neon_layer.prev_layer = True
    neon_layer.allocate()
    neon_layer.set_deltas([neon_layer.be.iobuf(inshape)])
    neon_out = neon_layer.fprop(inp).get()

    # pull neon weights into ref layer weights
    ref_layer.weights = neon_layer.W.get().T
    ref_layer.fprop(inpa.T)
    ref_out = np.copy(ref_layer.y)

    # estimate the numerical precision by
    # permuting order of ops in ref layer
    # fprop calculation
    ref_layer.fprop(inpa.T, permute=True)
    ref_out_perm = ref_layer.y
    atol = 4*np.max(np.abs(ref_out - ref_out_perm))

    # compare ref and neon layer fprop outputs
    # using the empirically determined atol
    assert allclose_with_out(ref_out.T, neon_out, atol=atol, rtol=1.e-4)

    # generate random deltas array
    erra = np.random.random(neon_out.shape)
    erra *= (inp_rng[1] - inp_rng[0])
    erra += inp_rng[0]

    erra = erra.astype(dtypeu)
    err = neon_layer.be.array(erra)

    # run neon bprop
    neon_deltas = neon_layer.bprop(err).get()
    neon_dW = neon_layer.dW.get()

    # run ref code bprop
    ref_layer.bprop(erra.T, 1.0)
    ref_deltas = np.copy(ref_layer.berror_nopad.T)
    ref_dW = np.copy(ref_layer.updates)

    # estimate precision using permutation
    # of operation order on ref layer code
    ref_layer.bprop(erra.T, 1.0, permute=True)
    ref_deltas_perm = ref_layer.berror_nopad.T
    ref_dW_perm = ref_layer.updates

    atol = 4*np.max(np.abs(ref_deltas - ref_deltas_perm))
    assert allclose_with_out(ref_deltas, neon_deltas, atol=atol, rtol=1.e-4)

    atol = 4*np.max(np.abs(ref_dW - ref_dW_perm))
    assert allclose_with_out(ref_dW.T, neon_dW, atol=atol, rtol=1.e-4)
    return
Ejemplo n.º 10
0
def test_conv_ones(backend_default, ones_convargs):
    dtypeu = np.float32
    indim, nifm, fshape, nofm, batch_size, stride, pad = ones_convargs
    NervanaObject.be.bsz = batch_size

    # weights set to one
    init_unif = Uniform(low=1.0, high=1.0)

    inshape = (nifm, indim, indim)
    insize = np.prod(inshape)

    neon_layer = Convolution(fshape=(fshape, fshape, nofm),
                             strides=stride, padding=pad, init=init_unif)
    inp = neon_layer.be.array(np.ones((insize, batch_size)))
    inp.lshape = inshape
    neon_layer.configure(inshape)
    neon_layer.prev_layer = True
    neon_layer.allocate()
    neon_layer.set_deltas([neon_layer.be.iobuf(inshape)])
    # run fprop
    out = neon_layer.fprop(inp).get()

    # generate the reference layer
    ref_layer = ConvLayerRef(1,
                             batch_size,
                             identity,
                             inshape[0],
                             inshape[1:3],
                             (fshape, fshape),
                             nofm,
                             stride,
                             dtypeu,
                             padding=pad)
    # init weights to ones
    ref_layer.weights = np.ones(neon_layer.W.shape).T.astype(dtypeu)
    ref_layer.fprop(inp.get().T)
    out_exp = ref_layer.y.copy()
    assert np.allclose(out_exp.T, out, atol=0.0, rtol=0.0)

    # generate err array
    err = np.ones(out.shape).astype(np.float32)

    # run bprop
    neon_layer.bprop(neon_layer.be.array(err))
    dw = neon_layer.dW.get()

    # run bprop
    ref_layer.bprop(err.T.astype(dtypeu), 1.0)

    # expected output for updates is uniform matrix with
    # all elements == ofmsize*batch_size
    updates_exp = ref_layer.updates.T

    # check dw from neon layer
    assert np.allclose(dw, updates_exp, atol=0.0, rtol=0.0)

    # the deltas are more complicated since the matricies are not
    # uniform, going to use the reference code directly here
    # no tolerance here should be exact
    dd = np.abs(ref_layer.berror_nopad.T - neon_layer.deltas.get())
    assert np.max(dd) == 0.0

    return
Ejemplo n.º 11
0
import numpy as np
import pycuda.driver as cuda
#import pycuda.autoinit
import pycuda.gpuarray as gpuarray
import time

image_size = 64
batch_size = 128
input_filters = 32
output_filters = 32

np.random.seed(123)

with make_backend(batch_size=batch_size,
            datatype=np.float32, device_id=0) as be:
    conv = Convolution((3, 3, output_filters), strides=1, padding=1, be=be)
    print('created conv')
    W = np.random.randn(input_filters,3,3,output_filters).astype(np.float32)
    W_cuda = gpuarray.to_gpu(W)
    conv.W = W_cuda

    print('type(W_cuda)', type(W_cuda))

    inputs = np.zeros((input_filters,image_size, image_size,batch_size), dtype=np.float32)
    inputs[:] = np.random.randn(*inputs.shape)
    inputs_cuda = gpuarray.to_gpu(inputs)

    print('type(inputs_cuda)', type(inputs_cuda))

    conv.configure((input_filters,image_size, image_size))
    print('configure done')
Ejemplo n.º 12
0
def test_conv_rand(backend, rand_convargs):
    indim, nifm, fshape, nofm, batch_size, rng_max, w_rng = rand_convargs
    NervanaObject.be.bsz = NervanaObject.be.bs = batch_size
    inp_rng = [0.0, rng_max]
    dtypeu = np.float32
    init_unif = Uniform(low=w_rng[0], high=w_rng[1])

    inshape = (nifm, indim, indim)
    insize = np.prod(inshape)

    # generate neon conv layer
    neon_layer = Convolution(fshape=(fshape, fshape, nofm),
                             strides=1,
                             padding=0,
                             init=init_unif)

    # generate the reference layer
    ref_layer = ConvLayerRef(1, batch_size, identity, inshape[0], inshape[1:3],
                             (fshape, fshape), nofm, 1, dtypeu)

    # setup input in range inp_rng
    inpa = np.random.random((insize, batch_size))
    inpa *= inp_rng[1] - inp_rng[0]
    inpa += inp_rng[0]
    inpa = inpa.astype(dtypeu)
    inp = neon_layer.be.array(inpa)
    inp.lshape = inshape

    # run fprop on neon
    neon_out = neon_layer.fprop(inp).get()

    # pull neon weights into ref layer weights
    ref_layer.weights = neon_layer.W.get().T
    ref_layer.fprop(inpa.T)
    ref_out = np.copy(ref_layer.y)

    # estimate the numerical precision by
    # permuting order of ops in ref layer
    # fprop calculation
    ref_layer.fprop(inpa.T, permute=True)
    ref_out_perm = ref_layer.y
    atol = np.max(np.abs(ref_out - ref_out_perm))
    atol += 10  # fudge factor

    # compare ref and neon layer fprop outputs
    # using the empirically determined atol
    assert (np.allclose(ref_out.T, neon_out, atol=atol, rtol=0.0),
            '%e %e' % (np.max(np.abs(ref_out.T - neon_out)), atol))

    # generate random deltas array
    erra = np.random.random(neon_out.shape)
    erra *= (inp_rng[1] - inp_rng[0])
    erra += inp_rng[0]

    erra = erra.astype(dtypeu)
    err = neon_layer.be.array(erra)

    # run neon bprop
    neon_deltas = neon_layer.bprop(err).get()
    neon_dW = neon_layer.dW.get()

    # run ref code bprop
    ref_layer.bprop(erra.T, inpa.T, 1.0)
    ref_deltas = np.copy(ref_layer.berror.T)
    ref_dW = np.copy(ref_layer.updates)

    # estimate precision using permutation
    # of operation order on ref layer code
    ref_layer.bprop(erra.T, inpa.T, 1.0, permute=True)
    ref_deltas_perm = ref_layer.berror.T
    ref_dW_perm = ref_layer.updates

    atol = np.max(np.abs(ref_deltas - ref_deltas_perm))
    atol *= 10.0  # fudge factor
    assert (np.allclose(ref_deltas, neon_deltas, atol=atol, rtol=0.0),
            '%e %e' % (np.max(np.abs(ref_deltas - neon_deltas)), atol))

    atol = np.max(np.abs(ref_dW - ref_dW_perm))
    atol *= 10.0
    print 'atol on bprop dW = %e' % atol
    assert (np.allclose(ref_dW.T, neon_dW, atol=atol, rtol=0.0),
            '%e %e' % (np.max(np.abs(ref_dW.T - neon_dW)), atol))
    return
Ejemplo n.º 13
0
class ResidualModule(LayerContainer):
    """
    Layer that encapsulates a sequential plus a residual skip branch, optionally
    containing a projection

    Arguments:
        layers (list): List of objects which can be either a list of layers (including layer
                       containers).
        projection (Initializer, optional): If a valid Initializer is supplied, then the skip
                                            layer will perform a 1x1 convolution with
                                            appropriate striding to match the size and shape
                                            of the output of the main branch.  The default is
                                            None, which means that the input to the module will
                                            be added directly to the output of the main branch
                                            with no projection applied.  NB:  IdentityInit are
                                            treated differently from regular Initializers in
                                            that the projection is applied, but those Identity
                                            weights are never updated.

    """

    def __init__(self, layers, projection=None, name="residual"):
        super(ResidualModule, self).__init__(name)

        if isinstance(layers, Sequential):
            self.layers = [layers]
        elif isinstance(layers, list):
            if isinstance(layers[0], Sequential):
                self.layers = layers
            else:
                self.layers = [Sequential(layers)]
        elif isinstance(layers, Layer):
            self.layers = [Sequential([layers])]
        else:
            ValueError("Incompatible element for ResidualModule container")

        convlayers = [l for l in self.layers[0].layers if type(l) is Convolution]
        nofm = convlayers[-1].convparams["K"]
        skip_stride = convlayers[-2].convparams["str_h"]

        self.owns_output = True
        self.error_views = None
        self.projection = projection
        if projection is not None:
            self.skip_layer = Convolution((1, 1, nofm), init=projection, strides=skip_stride)
            if projection.name != "Identity":
                self.layers.append(self.skip_layer)
        else:
            self.skip_layer = None

    def configure(self, in_obj):
        """
        sets shape based parameters of this layer given an input tuple or int
        or input layer

        Arguments:
            in_obj (int, tuple, Layer or Tensor or dataset): object that provides shape
                                                             information for layer

        Returns:
            (tuple): shape of output data
        """
        super(ResidualModule, self).configure(in_obj)
        self.layers[0].configure(in_obj)
        self.out_shape = self.layers[0].out_shape
        if self.skip_layer is not None:
            self.skip_layer.configure(in_obj)
        return self

    # deserialization is not yet automated for this
    @classmethod
    def gen_class(cls, pdict):
        key = "projection"
        if pdict.get(key, None) is not None:
            config = pdict[key].get("config", {})
            pdict[key] = load_class(pdict[key]["type"]).gen_class(config)
        return super(ResidualModule, cls).gen_class(pdict)

    def nested_str(self, level=0):
        ss = super(ResidualModule, self).nested_str(level)
        if self.skip_layer is not None:
            ss += "\n" + "  " * level + self.skip_layer.nested_str(level + 1)
        return ss

    def allocate(self, shared_outputs=None):
        self.outputs = self.be.iobuf(self.out_shape, shared=shared_outputs)
        self.layers[0].allocate(self.outputs)
        if self.skip_layer is not None:
            self.skip_layer.allocate(self.outputs)

    def set_deltas(self, delta_buffers):
        assert len(delta_buffers) == 4, "Need extra delta buffer pool for residual layers"
        self.layers[0].allocate_deltas(delta_buffers[1:3])
        self.layers[0].layers[0].set_deltas(delta_buffers[0:1])
        if self.skip_layer is not None:
            self.skip_layer.set_deltas(delta_buffers[0:1])
        self.deltas = self.be.iobuf(self.in_shape, shared=delta_buffers[0])
        delta_buffers.reverse()

    def fprop(self, inputs, inference=False):
        self.inputs = inputs
        self.layers[0].fprop(inputs, inference)
        if self.skip_layer is not None:
            self.skip_layer.fprop(inputs, inference, beta=1.0)
        else:
            self.outputs[:] = self.outputs + inputs
        return self.outputs

    def bprop(self, error, alpha=1.0, beta=0.0):
        if self.skip_layer is not None:
            self.skip_layer.bprop(error, alpha=alpha)
        else:
            self.deltas[:] = error
        self.layers[0].bprop(error, alpha=alpha, beta=1.0)
        return self.deltas

    def get_terminal(self):
        return self.layers[0].get_terminal()
Ejemplo n.º 14
0
def test_conv_rand(backend, rand_convargs):
    indim, nifm, fshape, nofm, batch_size, rng_max, w_rng = rand_convargs
    NervanaObject.be.bsz = NervanaObject.be.bs = batch_size
    inp_rng = [0.0, rng_max]
    dtypeu = np.float32
    init_unif = Uniform(low=w_rng[0], high=w_rng[1])

    inshape = (nifm, indim, indim)
    insize = np.prod(inshape)

    # generate neon conv layer
    neon_layer = Convolution(fshape=(fshape, fshape, nofm),
                             strides=1, padding=0, init=init_unif)

    # generate the reference layer
    ref_layer = ConvLayerRef(1,
                             batch_size,
                             identity,
                             inshape[0],
                             inshape[1:3],
                             (fshape, fshape),
                             nofm,
                             1,
                             dtypeu)

    # setup input in range inp_rng
    inpa = np.random.random((insize, batch_size))
    inpa *= inp_rng[1] - inp_rng[0]
    inpa += inp_rng[0]
    inpa = inpa.astype(dtypeu)
    inp = neon_layer.be.array(inpa)
    inp.lshape = inshape

    # run fprop on neon
    neon_out = neon_layer.fprop(inp).get()

    # pull neon weights into ref layer weights
    ref_layer.weights = neon_layer.W.get().T
    ref_layer.fprop(inpa.T)
    ref_out = np.copy(ref_layer.y)

    # estimate the numerical precision by
    # permuting order of ops in ref layer
    # fprop calculation
    ref_layer.fprop(inpa.T, permute=True)
    ref_out_perm = ref_layer.y
    atol = np.max(np.abs(ref_out - ref_out_perm))
    atol += 10  # fudge factor

    # compare ref and neon layer fprop outputs
    # using the empirically determined atol
    assert (np.allclose(ref_out.T, neon_out, atol=atol, rtol=0.0),
            '%e %e' % (np.max(np.abs(ref_out.T - neon_out)), atol))

    # generate random deltas array
    erra = np.random.random(neon_out.shape)
    erra *= (inp_rng[1] - inp_rng[0])
    erra += inp_rng[0]

    erra = erra.astype(dtypeu)
    err = neon_layer.be.array(erra)

    # run neon bprop
    neon_deltas = neon_layer.bprop(err).get()
    neon_dW = neon_layer.dW.get()

    # run ref code bprop
    ref_layer.bprop(erra.T, inpa.T, 1.0)
    ref_deltas = np.copy(ref_layer.berror.T)
    ref_dW = np.copy(ref_layer.updates)

    # estimate precision using permutation
    # of operation order on ref layer code
    ref_layer.bprop(erra.T, inpa.T, 1.0, permute=True)
    ref_deltas_perm = ref_layer.berror.T
    ref_dW_perm = ref_layer.updates

    atol = np.max(np.abs(ref_deltas - ref_deltas_perm))
    atol *= 10.0  # fudge factor
    assert (np.allclose(ref_deltas, neon_deltas, atol=atol, rtol=0.0),
            '%e %e' % (np.max(np.abs(ref_deltas - neon_deltas)), atol))

    atol = np.max(np.abs(ref_dW - ref_dW_perm))
    atol *= 10.0
    print 'atol on bprop dW = %e' % atol
    assert (np.allclose(ref_dW.T, neon_dW, atol=atol, rtol=0.0),
            '%e %e' % (np.max(np.abs(ref_dW.T - neon_dW)), atol))
    return
def test_conv_ones(backend_default, ones_convargs):
    dtypeu = np.float32
    indim, nifm, fshape, nofm, batch_size, stride, pad = ones_convargs
    NervanaObject.be.bsz = batch_size

    # weights set to one
    init_unif = Uniform(low=1.0, high=1.0)

    inshape = (nifm, indim, indim)
    insize = np.prod(inshape)

    neon_layer = Convolution(fshape=(fshape, fshape, nofm),
                             strides=stride,
                             padding=pad,
                             init=init_unif)
    inp = neon_layer.be.array(np.ones((insize, batch_size)))
    inp.lshape = inshape
    neon_layer.configure(inshape)
    neon_layer.prev_layer = True
    neon_layer.allocate()
    neon_layer.set_deltas([neon_layer.be.iobuf(inshape)])
    # run fprop
    out = neon_layer.fprop(inp).get()

    # generate the reference layer
    ref_layer = ConvLayerRef(1,
                             batch_size,
                             identity,
                             inshape[0],
                             inshape[1:3], (fshape, fshape),
                             nofm,
                             stride,
                             dtypeu,
                             padding=pad)
    # init weights to ones
    ref_layer.weights = np.ones(neon_layer.W.shape).T.astype(dtypeu)
    ref_layer.fprop(inp.get().T)
    out_exp = ref_layer.y.copy()
    assert np.allclose(out_exp.T, out, atol=0.0, rtol=0.0)

    # generate err array
    err = np.ones(out.shape).astype(np.float32)

    # run bprop
    neon_layer.bprop(neon_layer.be.array(err))
    dw = neon_layer.dW.get()

    # run bprop
    ref_layer.bprop(err.T.astype(dtypeu), 1.0)

    # expected output for updates is uniform matrix with
    # all elements == ofmsize*batch_size
    updates_exp = ref_layer.updates.T

    # check dw from neon layer
    assert np.allclose(dw, updates_exp, atol=0.0, rtol=0.0)

    # the deltas are more complicated since the matricies are not
    # uniform, going to use the reference code directly here
    # no tolerance here should be exact
    dd = np.abs(ref_layer.berror_nopad.T - neon_layer.deltas.get())
    assert np.max(dd) == 0.0

    return
def test_conv_rand(backend_default, rand_convargs):
    indim, nifm, fshape, nofm, batch_size, stride, rng_max, w_rng, pad = rand_convargs
    NervanaObject.be.bsz = batch_size
    inp_rng = [0.0, rng_max]
    dtypeu = np.float32
    init_unif = Uniform(low=w_rng[0], high=w_rng[1])

    inshape = (nifm, indim, indim)
    insize = np.prod(inshape)

    # generate neon conv layer
    neon_layer = Convolution(fshape=(fshape, fshape, nofm),
                             strides=stride,
                             padding=pad,
                             init=init_unif)

    # generate the reference layer
    ref_layer = ConvLayerRef(1,
                             batch_size,
                             identity,
                             inshape[0],
                             inshape[1:3], (fshape, fshape),
                             nofm,
                             stride,
                             dtypeu,
                             padding=pad)

    # setup input in range inp_rng
    inpa = np.random.random((insize, batch_size))
    inpa *= inp_rng[1] - inp_rng[0]
    inpa += inp_rng[0]
    inpa = inpa.astype(dtypeu)
    inp = neon_layer.be.array(inpa)
    inp.lshape = inshape

    # run fprop on neon
    neon_layer.configure(inshape)
    neon_layer.prev_layer = True
    neon_layer.allocate()
    neon_layer.set_deltas([neon_layer.be.iobuf(inshape)])
    neon_out = neon_layer.fprop(inp).get()

    # pull neon weights into ref layer weights
    ref_layer.weights = neon_layer.W.get().T
    ref_layer.fprop(inpa.T)
    ref_out = np.copy(ref_layer.y)

    # estimate the numerical precision by
    # permuting order of ops in ref layer
    # fprop calculation
    ref_layer.fprop(inpa.T, permute=True)
    ref_out_perm = ref_layer.y
    atol = 4 * np.max(np.abs(ref_out - ref_out_perm))

    # compare ref and neon layer fprop outputs
    # using the empirically determined atol
    assert allclose_with_out(ref_out.T, neon_out, atol=atol, rtol=1.e-4)

    # generate random deltas array
    erra = np.random.random(neon_out.shape)
    erra *= (inp_rng[1] - inp_rng[0])
    erra += inp_rng[0]

    erra = erra.astype(dtypeu)
    err = neon_layer.be.array(erra)

    # run neon bprop
    neon_deltas = neon_layer.bprop(err).get()
    neon_dW = neon_layer.dW.get()

    # run ref code bprop
    ref_layer.bprop(erra.T, 1.0)
    ref_deltas = np.copy(ref_layer.berror_nopad.T)
    ref_dW = np.copy(ref_layer.updates)

    # estimate precision using permutation
    # of operation order on ref layer code
    ref_layer.bprop(erra.T, 1.0, permute=True)
    ref_deltas_perm = ref_layer.berror_nopad.T
    ref_dW_perm = ref_layer.updates

    atol = 4 * np.max(np.abs(ref_deltas - ref_deltas_perm))
    assert allclose_with_out(ref_deltas, neon_deltas, atol=atol, rtol=1.e-4)

    atol = 4 * np.max(np.abs(ref_dW - ref_dW_perm))
    assert allclose_with_out(ref_dW.T, neon_dW, atol=atol, rtol=1.e-4)
    return
Ejemplo n.º 17
0
def test_conv_ones(backend_default, ones_convargs, deltas_buffer):
    dtypeu = np.float32
    indim, nifm, fshape, nofm, batch_size, stride, pad = ones_convargs
    if isinstance(NervanaObject.be, NervanaGPU) and NervanaObject.be.compute_capability < (5, 0):
        if nifm % 4 != 0:
            pytest.skip(msg="C dim must be a multiple of 4 for Kepler bprop kernel")

    NervanaObject.be.bsz = batch_size

    # weights set to one
    init_unif = Uniform(low=1.0, high=1.0)

    inshape = (nifm, indim, indim)
    insize = np.prod(inshape)

    neon_layer = Convolution(fshape=(fshape, fshape, nofm),
                             strides=stride, padding=pad, init=init_unif)
    inp = neon_layer.be.array(np.ones((insize, batch_size)))
    inp.lshape = inshape
    neon_layer.configure(inshape)
    neon_layer.prev_layer = True
    neon_layer.allocate()

    neon_layer.allocate_deltas(deltas_buffer)
    deltas_buffer.allocate_buffers()
    neon_layer.set_deltas(deltas_buffer)

    # run fprop
    out = neon_layer.fprop(inp).get()

    # generate the reference layer
    ref_layer = ConvLayerRef(1,
                             batch_size,
                             identity,
                             inshape[0],
                             inshape[1:3],
                             (fshape, fshape),
                             nofm,
                             stride,
                             dtypeu,
                             padding=pad)
    # init weights to ones
    ref_layer.weights = np.ones(neon_layer.W.shape).T.astype(dtypeu)
    ref_layer.fprop(inp.get().T)
    out_exp = ref_layer.y.copy()
    assert allclose_with_out(out_exp.T, out, atol=0.0, rtol=0.0)

    # generate err array
    err = np.ones(out.shape).astype(np.float32)

    # run bprop
    neon_layer.bprop(neon_layer.be.array(err))
    dw = neon_layer.dW.get()

    # run bprop
    ref_layer.bprop(err.T.astype(dtypeu), 1.0)

    # expected output for updates is uniform matrix with
    # all elements == ofmsize*batch_size
    updates_exp = ref_layer.updates.T

    # check dw from neon layer
    assert allclose_with_out(dw, updates_exp, atol=0.0, rtol=0.0)

    # the deltas are more complicated since the matricies are not
    # uniform, going to use the reference code directly here
    # no tolerance here should be exact
    dd = np.abs(ref_layer.berror_nopad.T - neon_layer.deltas.get())
    try:
        assert np.max(dd) == 0.0
    except AssertionError:
        if ones_convargs in ((32, 32, 3, 32, 64, 2, 0),
                             (32, 32, 3, 16, 64, 2, 0),
                             (32, 32, 3, 64, 64, 2, 0)):
            pytest.xfail(reason="xfail before mkl update. issue: #1020")
        else:
            assert np.max(dd) == 0.0

    return