Beispiel #1
0
def test_wgan_cost(backend_default):
    """
    Set up a Wasserstein GANCost transform and make sure cost and errors are getting
    computed correctly.
    """
    be = backend_default
    cost = GANCost(func="wasserstein")
    y_data = be.iobuf(5).fill(1.)
    y_noise = be.iobuf(5).fill(2.)
    output = be.iobuf(1)
    expected = be.iobuf(1)
    delta = be.iobuf(5)

    # fprop for discriminator cost
    output[:] = cost(y_data, y_noise)
    expected[:] = be.sum(y_data - y_noise, axis=0)
    tensors_allclose(output, expected)

    # bprop for wasserstein cost
    delta[:] = cost.bprop_data(y_data)
    assert allclose_with_out(delta.get(), 1.)

    delta[:] = cost.bprop_noise(y_noise)
    assert allclose_with_out(delta.get(), -1.)

    delta[:] = cost.bprop_generator(y_noise)
    assert allclose_with_out(delta.get(), 1.)
Beispiel #2
0
def test_model_get_outputs_rnn(backend_default, data):

    dataset = PTB(50, path=data)
    dataiter = dataset.train_iter

    # weight initialization
    init = Constant(0.08)

    # model initialization
    layers = [
        Recurrent(150, init, activation=Logistic()),
        Affine(len(dataiter.vocab), init, bias=init, activation=Rectlin())
    ]

    model = Model(layers=layers)
    output = model.get_outputs(dataiter)

    assert output.shape == (dataiter.ndata, dataiter.seq_length, dataiter.nclass)

    # since the init are all constant and model is un-trained:
    # along the feature dim, the values should be all the same
    assert allclose_with_out(output[0, 0], output[0, 0, 0], rtol=0, atol=1e-4)
    assert allclose_with_out(output[0, 1], output[0, 1, 0], rtol=0, atol=1e-4)

    # along the time dim, the values should be increasing:
    assert np.alltrue(output[0, 2] > output[0, 1])
    assert np.alltrue(output[0, 1] > output[0, 0])
Beispiel #3
0
def test_modified_gan_cost(backend_default):
    """
    Set up a modified GANCost transform and make sure cost and errors are getting
    computed correctly.
    """
    be = backend_default
    cost = GANCost(cost_type="dis", func="modified")

    y_data = be.iobuf(5).fill(1.)
    y_noise = be.iobuf(5).fill(2.)
    output = be.iobuf(1)
    expected = be.iobuf(1)
    delta = be.iobuf(5)

    # fprop for discriminator cost
    output[:] = cost(y_data, y_noise)
    expected[:] = -be.sum(be.safelog(y_data) + be.safelog(1-y_noise), axis=0)
    tensors_allclose(output, expected)

    # bprop for modified cost
    delta[:] = cost.bprop_data(y_data)
    assert allclose_with_out(delta.get(), -1. / 1)

    delta[:] = cost.bprop_noise(y_noise)
    assert allclose_with_out(delta.get(), 1. - 2.)

    delta[:] = cost.bprop_generator(y_noise)
    assert allclose_with_out(delta.get(), -1. / 2.)
Beispiel #4
0
def test_biSum(backend_default, fargs, deltas_buffer):

    seq_len, input_size, hidden_size, batch_size = fargs
    input_size *= 2

    in_shape = (input_size, seq_len)
    NervanaObject.be.bsz = batch_size

    bisum = BiSum()
    bisum.configure(in_shape)
    bisum.prev_layer = True

    bisum.allocate()
    bisum.allocate_deltas(deltas_buffer)
    deltas_buffer.allocate_buffers()
    bisum.set_deltas(deltas_buffer)

    # inputs
    inp_np = np.random.random((input_size, seq_len * batch_size))
    inp_be = bisum.be.array(inp_np)

    # outputs
    out_be = bisum.fprop(inp_be)
    del_be = bisum.bprop(out_be)

    out_ref = bisum.be.empty_like(out_be)
    out_ref[:] = inp_be[:input_size // 2] + inp_be[input_size // 2:]
    assert out_be.shape[0] * 2 == inp_be.shape[0]
    assert allclose_with_out(out_be.get(), out_ref.get(), rtol=0.0, atol=1.0e-5)

    assert allclose_with_out(del_be[:input_size // 2].get(), out_be.get(), rtol=0.0, atol=1.0e-5)
    assert allclose_with_out(del_be[input_size // 2:].get(), out_be.get(), rtol=0.0, atol=1.0e-5)
def compare_helper(op, inA, inB, ng, nc, dtype):
    numpy_result = math_helper(np, op, inA, inB, dtype=np.float32).astype(dtype)

    nervanaGPU_result = math_helper(ng, op, inA, inB, dtype=dtype).get()
    allclose_with_out(numpy_result, nervanaGPU_result, rtol=0, atol=1e-5)

    nervanaCPU_result = math_helper(nc, op, inA, inB, dtype=dtype).get()
    allclose_with_out(numpy_result, nervanaCPU_result, rtol=0, atol=1e-5)
Beispiel #6
0
def test_bibn(backend_default, fargs):

    seq_len, input_size, hidden_size, batch_size = fargs
    in_shape = (input_size, seq_len)
    NervanaObject.be.bsz = batch_size

    hidden_size = min(10, hidden_size)

    # setup the bi-directional rnn
    init_glorot = GlorotUniform()
    birnn = BiBNRNN(hidden_size, activation=Rectlinclip(slope=0), init=init_glorot)
    birnn.configure(in_shape)
    birnn.prev_layer = True
    birnn.allocate()
    birnn.set_deltas([birnn.be.iobuf(birnn.in_shape)])

    # test fprop

    # set the ff buffer
    inp_np = np.random.random(birnn.h_ff_buffer.shape)
    inp_be = birnn.be.array(inp_np)
    birnn.h_ff_buffer[:] = inp_np

    # compare the bn output with calling the backend bn
    xsum = birnn.be.zeros_like(birnn.xmean)
    xvar = birnn.be.zeros_like(birnn.xvar)
    gmean = birnn.be.zeros_like(birnn.gmean)
    gvar = birnn.be.zeros_like(birnn.gvar)
    gamma = birnn.be.ones(birnn.gamma.shape)
    beta = birnn.be.zeros_like(birnn.beta)
    grad_gamma = birnn.be.zeros_like(gamma)
    grad_beta = birnn.be.zeros_like(beta)
    out_ref = birnn.be.zeros_like(birnn.h_ff_buffer)

    xsum[:] = birnn.be.sum(birnn.h_ff_buffer, axis=1)
    birnn.be.compound_fprop_bn(
        birnn.h_ff_buffer, xsum, xvar, gmean, gvar,
        gamma, beta, out_ref, birnn.eps, birnn.rho,
        accumbeta=0, relu=False)

    # call the bibnrnn layer fprop_bn
    out_bn = birnn._fprop_bn(birnn.h_ff_buffer, inference=False)

    assert allclose_with_out(out_bn.get(), out_ref.get(), rtol=0.0, atol=1.0e-5)

    # test bprop
    err_np = np.random.random(birnn.h_ff_buffer.shape)
    err_be = birnn.be.array(err_np)

    err_out_ref = birnn.be.empty_like(err_be)
    birnn.be.compound_bprop_bn(err_out_ref, grad_gamma, grad_beta,
                               err_be,
                               inp_be, xsum, xvar, gamma,
                               birnn.eps)

    err_out_bn = birnn._bprop_bn(err_be, out_bn)

    assert allclose_with_out(err_out_bn.get(), err_out_ref.get(), rtol=0.0, atol=2.5e-5)
Beispiel #7
0
def test_all_rand(backend_default, allrand_args, deltas_buffer):
    # test with random weights and random inputs
    dtypeu = np.float32
    w_rng, rngmax = allrand_args
    inp_rng = [0.0, rngmax]
    nin = 1024
    nout = 2048
    batch_size = 16
    NervanaObject.be.bsz = batch_size

    init_unif = Uniform(low=w_rng[0], high=w_rng[1])
    layer = Linear(nout=nout, init=init_unif)
    inp = np.random.random((nin, batch_size))
    inp *= inp_rng[1] - inp_rng[0]
    inp += inp_rng[0]
    inp = inp.astype(dtypeu)
    layer.configure(nin)
    layer.prev_layer = True  # Hack to force delta buffer allocation
    layer.allocate()

    layer.allocate_deltas(deltas_buffer)
    deltas_buffer.allocate_buffers()
    layer.set_deltas(deltas_buffer)

    out = layer.fprop(layer.be.array(inp)).get()
    w = layer.W.get()

    # the expected output using numpy
    out_exp = np.dot(w, inp)

    # for larger layers need to estimate numerical precision
    atol = 2 * est_mm_prec(w, inp, ntrials=1)
    assert allclose_with_out(out_exp, out, atol=atol, rtol=0.0), \
        '%e %e' % (np.max(np.abs(out - out_exp)), atol)

    err = np.random.random((nout, batch_size))
    err = err * (inp_rng[1] - inp_rng[0]) + inp_rng[0]
    err = err.astype(dtypeu)
    deltas = layer.bprop(layer.be.array(err)).get()
    dw = layer.dW.get()

    deltas_exp = np.dot(w.T, err)
    atol = 2 * est_mm_prec(w.T, err, ntrials=1)
    assert allclose_with_out(deltas_exp, deltas, atol=atol, rtol=0.0), \
        '%e %e' % (np.max(np.abs(deltas_exp - deltas)), atol)

    dw_exp = np.dot(err, inp.T)
    atol = 2 * est_mm_prec(err, inp.T, ntrials=1)
    assert allclose_with_out(dw_exp, dw, atol=atol, rtol=0.0), \
        '%e %e' % (np.max(np.abs(dw_exp - dw)), atol)

    return
def test_recurrent_mean(backend_default, refgruargs, deltas_buffer):
    seq_len, nin, batch_size = refgruargs
    NervanaObject.be.bsz = batch_size

    in_shape = (nin, seq_len)
    layer = RecurrentMean()
    layer.configure(in_shape)
    layer.prev_layer = True
    layer.allocate()

    layer.allocate_deltas(deltas_buffer)
    deltas_buffer.allocate_buffers()
    layer.set_deltas(deltas_buffer)

    # zeros
    inp = layer.be.zeros((nin, seq_len * batch_size))
    out = layer.fprop(inp)
    err = layer.bprop(out).get()
    assert np.all(out.get() == np.zeros((nin, batch_size)))
    assert np.all(err == inp.get())

    # ones
    inp = layer.be.ones((nin, seq_len * batch_size))
    out = layer.fprop(inp)
    err = layer.bprop(out).get()
    assert np.all(out.get() == np.ones((nin, batch_size)))
    assert np.all(err == 1. / seq_len * inp.get())

    # random
    rinp = np.random.random((nin, batch_size))
    inp = np.repeat(rinp, repeats=seq_len, axis=1)
    inp_g = layer.be.array(inp)
    out = layer.fprop(inp_g)
    err = layer.bprop(out)
    assert allclose_with_out(out.get(), rinp)
    assert allclose_with_out(err.get(), 1. / seq_len * inp)

    # full random
    inp = np.random.random((nin, seq_len * batch_size))
    inp_g = layer.be.array(inp)
    out = layer.fprop(inp_g)
    err = layer.bprop(out)
    out_comp = np.zeros(out.shape)
    err_comp = np.zeros(inp.shape)
    for i in range(seq_len):
        out_comp[:] = out_comp + inp[:, i * batch_size:(i + 1) * batch_size]
        err_comp[:, i * batch_size:(i + 1) * batch_size] = out.get() / float(seq_len)
    out_comp[:] /= float(seq_len)

    assert allclose_with_out(out_comp, out.get())
    assert allclose_with_out(err_comp, err.get())
Beispiel #9
0
def test_schedule(backend_default):
    """
    Test constant rate, fixed step and various modes of programmable steps.
    """
    lr_init = 0.1

    # default scheduler has a constant learning rate
    sch = Schedule()
    for epoch in range(10):
        lr = sch.get_learning_rate(learning_rate=lr_init, epoch=epoch)
        assert lr == lr_init

    # test a uniform step schedule
    step_config = 2
    change = 0.5
    sch = Schedule(step_config=step_config, change=change)
    for epoch in range(10):
        lr = sch.get_learning_rate(learning_rate=lr_init, epoch=epoch)
        # test a repeated call for the same epoch
        lr2 = sch.get_learning_rate(learning_rate=lr_init, epoch=epoch)
        # print epoch, lr, lr2
        assert allclose_with_out(lr, lr_init * change**(np.floor(epoch // step_config)))
        assert allclose_with_out(lr2, lr_init * change**(np.floor(epoch // step_config)))

    # test a list step schedule
    sch = Schedule(step_config=[2, 3], change=.1)
    assert allclose_with_out(.1, sch.get_learning_rate(learning_rate=.1, epoch=0))
    assert allclose_with_out(.1, sch.get_learning_rate(learning_rate=.1, epoch=1))
    assert allclose_with_out(.01, sch.get_learning_rate(learning_rate=.1, epoch=2))
    # test a repeated call for the same epoch
    assert allclose_with_out(.01, sch.get_learning_rate(learning_rate=.1, epoch=2))
    assert allclose_with_out(.001, sch.get_learning_rate(learning_rate=.1, epoch=3))
    assert allclose_with_out(.001, sch.get_learning_rate(learning_rate=.1, epoch=4))
def test_recurrent_last(backend_default, refgruargs, deltas_buffer):
    seq_len, nin, batch_size = refgruargs
    NervanaObject.be.bsz = batch_size

    in_shape = (nin, seq_len)
    layer = RecurrentLast()
    layer.configure(in_shape)
    layer.prev_layer = True
    layer.allocate()

    layer.allocate_deltas(deltas_buffer)
    deltas_buffer.allocate_buffers()
    layer.set_deltas(deltas_buffer)

    # zeros
    inp = layer.be.zeros((nin, seq_len * batch_size))
    out = layer.fprop(inp)
    err = layer.bprop(out).get()
    assert np.all(out.get() == np.zeros((nin, batch_size)))
    assert np.all(err == inp.get())

    # ones
    inp = layer.be.ones((nin, seq_len * batch_size))
    out = layer.fprop(inp)
    err = layer.bprop(out).get()
    assert np.all(out.get() == np.ones((nin, batch_size)))
    assert np.all(err[:, -batch_size:] == inp.get()[:, -batch_size:])
    assert np.all(
        err[:, :-batch_size] == np.zeros((nin, (seq_len - 1) * batch_size)))

    # random
    rinp = np.random.random((nin, batch_size))
    inp = np.repeat(rinp, repeats=seq_len, axis=1)
    inp_g = layer.be.array(inp)
    out = layer.fprop(inp_g)
    err = layer.bprop(out)
    assert allclose_with_out(out.get(), rinp)
    assert allclose_with_out(err[:, -batch_size:].get(), rinp)

    # full random
    inp = np.random.random((nin, seq_len * batch_size))
    inp_g = layer.be.array(inp)
    out = layer.fprop(inp_g)
    err = layer.bprop(out)
    out_comp = np.zeros(out.shape)
    err_comp = np.zeros(inp.shape)
    out_comp[:] = inp[:, -batch_size:]
    err_comp[:, -batch_size:] = out.get()
Beispiel #11
0
def test_biLSTM_fprop(backend_default, fargs):

    # basic sanity check with 0 weights random inputs
    seq_len, input_size, hidden_size, batch_size = fargs
    in_shape = (input_size, seq_len)
    out_shape = (hidden_size, seq_len)
    NervanaObject.be.bsz = batch_size

    # setup the bi-directional rnn
    init_glorot = GlorotUniform()
    bilstm = BiLSTM(hidden_size, gate_activation=Logistic(), init=init_glorot,
                    activation=Tanh(), reset_cells=True)
    bilstm.configure(in_shape)
    bilstm.prev_layer = True
    bilstm.allocate()

    # same weight
    nout = hidden_size
    bilstm.W_input_b[:] = bilstm.W_input_f
    bilstm.W_recur_b[:] = bilstm.W_recur_f
    bilstm.b_b[:] = bilstm.b_f
    bilstm.dW[:] = 0

    # inputs - random and flipped left-to-right inputs
    lr = np.random.random((input_size, seq_len * batch_size))
    lr_rev = list(reversed(get_steps(lr.copy(), in_shape)))

    rl = con(lr_rev, axis=1)
    inp_lr = bilstm.be.array(lr)
    inp_rl = bilstm.be.array(rl)

    # outputs
    out_lr = bilstm.fprop(inp_lr).get().copy()

    bilstm.h_buffer[:] = 0
    out_rl = bilstm.fprop(inp_rl).get().copy()

    # views
    out_lr_f_s = get_steps(out_lr[:nout], out_shape)
    out_lr_b_s = get_steps(out_lr[nout:], out_shape)
    out_rl_f_s = get_steps(out_rl[:nout], out_shape)
    out_rl_b_s = get_steps(out_rl[nout:], out_shape)

    # asserts
    for x_f, x_b, y_f, y_b in zip(out_lr_f_s, out_lr_b_s,
                                  reversed(out_rl_f_s), reversed(out_rl_b_s)):
        assert allclose_with_out(x_f, y_b, rtol=0.0, atol=1.0e-5)
        assert allclose_with_out(x_b, y_f, rtol=0.0, atol=1.0e-5)
Beispiel #12
0
def checkSequentialMatchesBatch():
    """ check LSTM I/O forward/backward interactions """

    n, b, d = (5, 3, 4)  # sequence length, batch size, hidden size
    input_size = 10
    WLSTM = LSTM.init(input_size, d)  # input size, hidden size
    X = np.random.randn(n, b, input_size)
    h0 = np.random.randn(b, d)
    c0 = np.random.randn(b, d)

    # sequential forward
    cprev = c0
    hprev = h0
    caches = [{} for t in range(n)]
    Hcat = np.zeros((n, b, d))
    for t in range(n):
        xt = X[t:t + 1]
        _, cprev, hprev, cache = LSTM.forward(xt, WLSTM, cprev, hprev)
        caches[t] = cache
        Hcat[t] = hprev

    # sanity check: perform batch forward to check that we get the same thing
    H, _, _, batch_cache = LSTM.forward(X, WLSTM, c0, h0)
    assert allclose_with_out(H, Hcat), 'Sequential and Batch forward don''t match!'

    # eval loss
    wrand = np.random.randn(*Hcat.shape)
    # loss = np.sum(Hcat * wrand)
    dH = wrand

    # get the batched version gradients
    BdX, BdWLSTM, Bdc0, Bdh0 = LSTM.backward(dH, batch_cache)

    # now perform sequential backward
    dX = np.zeros_like(X)
    dWLSTM = np.zeros_like(WLSTM)
    dc0 = np.zeros_like(c0)
    dh0 = np.zeros_like(h0)
    dcnext = None
    dhnext = None
    for t in reversed(range(n)):
        dht = dH[t].reshape(1, b, d)
        dx, dWLSTMt, dcprev, dhprev = LSTM.backward(
            dht, caches[t], dcnext, dhnext)
        dhnext = dhprev
        dcnext = dcprev

        dWLSTM += dWLSTMt  # accumulate LSTM gradient
        dX[t] = dx[0]
        if t == 0:
            dc0 = dcprev
            dh0 = dhprev

    # and make sure the gradients match
    neon_logger.display('Making sure batched version agrees with sequential version: '
                        '(should all be True)')
    neon_logger.display(np.allclose(BdX, dX))
    neon_logger.display(np.allclose(BdWLSTM, dWLSTM))
    neon_logger.display(np.allclose(Bdc0, dc0))
    neon_logger.display(np.allclose(Bdh0, dh0))
Beispiel #13
0
def test_padding(backend_default, poolargs):
    fshape, nifm, padding, stride, in_sz, batch_size = poolargs

    NervanaObject.be.bsz = batch_size

    # basic sanity check with random inputs
    inshape = (nifm, in_sz, in_sz)
    insize = np.prod(inshape)
    neon_layer = Pooling(fshape=fshape, strides=stride, padding=padding)

    inp = neon_layer.be.array(np.random.random((insize, batch_size)))
    inp.lshape = inshape
    neon_layer.configure(inshape)
    neon_layer.prev_layer = True
    neon_layer.allocate()
    neon_layer.set_deltas([neon_layer.be.iobuf(inshape)])

    out = neon_layer.fprop(inp).get()

    ncheck = [0, batch_size // 2, batch_size - 1]

    (out_exp, check_inds) = ref_pooling(inp, inp.lshape,
                                        (fshape, fshape),
                                        padding,
                                        (stride, stride),
                                        neon_layer.be,
                                        ncheck=ncheck)

    out_shape = list(out_exp.shape[0:3])
    out_shape.append(batch_size)
    outa = out.reshape(out_shape)

    assert allclose_with_out(out_exp, outa[:, :, :, check_inds], atol=0.0, rtol=0.0)
def test_hdf5meansubtract(backend_default, meansubhdf):
    NervanaObject.be.bsz = 128
    bsz = 128

    datit = HDF5Iterator(meansubhdf[0])
    datit.allocate()
    typ = meansubhdf[1]
    mn = datit.mean.get()
    assert typ in ['chan_mean', 'full_mean']

    cnt_image = 0
    max_len = datit.ndata
    MAX_CNT = max_len*datit.inp.shape[1]
    for x, t in datit:
        x_ = x.get().flatten()
        x_exp = (np.arange(len(x_)) + cnt_image) % MAX_CNT
        x_exp = x_exp.reshape((-1, np.prod(datit.lshape))).T
        if typ == 'chan_mean':
            x_exp = x_exp.reshape((datit.lshape[0], -1)) - mn
        elif typ == 'full_mean':
            x_exp = x_exp.reshape((-1, bsz)) - mn
        x_exp = x_exp.flatten()
        assert allclose_with_out(x_, x_exp, atol=0.0, rtol=1.0e-7)
        cnt_image += len(x_)

    datit.cleanup()
Beispiel #15
0
def test_linear_ones(backend_default, basic_linargs, deltas_buffer):
    # basic sanity check with all ones on the inputs
    # and weights, check that each row in output
    # is the sum of the weights for that output
    # this check will confirm that the correct number
    # of operations is being run
    nin, nout, batch_size = basic_linargs
    NervanaObject.be.bsz = batch_size

    dtypeu = np.float32

    init_unif = Uniform(low=1.0, high=1.0)
    layer = Linear(nout=nout, init=init_unif)
    inp = layer.be.array(dtypeu(np.ones((nin, batch_size))))
    layer.configure(nin)
    layer.prev_layer = True  # Hack to force delta buffer allocation
    layer.allocate()

    layer.allocate_deltas(deltas_buffer)
    deltas_buffer.allocate_buffers()
    layer.set_deltas(deltas_buffer)

    out = layer.fprop(inp).get()
    w = layer.W.get()
    sums = np.sum(w, 1).reshape((nout, 1)) * np.ones((1, batch_size))

    # for larger layers need to estimate numerical precision
    # atol = est_mm_prec(w, inp.get())
    assert allclose_with_out(sums, out, atol=0.0, rtol=0.0), \
        '%e' % np.max(np.abs(out - sums))
    return
Beispiel #16
0
def test_softmax_big_inputs(backend_default):
    np.random.seed(1)

    be = backend_default
    assert be.bsz >= 128, 'This tests needs large batch size'

    act = Softmax()
    Nout = 1000  # 1000 input and output units to softmax

    # random inputs
    x_ = np.random.random((Nout, be.bsz))

    x = be.iobuf(Nout)
    # init input to softmax
    x[:] = x_

    # numpy softmax
    mx = np.max(x_, axis=0)
    ex = np.exp(x_ - mx)
    y_ = ex/np.sum(ex, axis=0)

    # in-place softmax on device
    x[:] = act(x)

    assert allclose_with_out(y_, x.get(), atol=0.0, rtol=1.0e-5)
Beispiel #17
0
def gradient_check(seq_len, input_size, hidden_size, batch_size,
                   threshold=1.0e-3):
    # 'threshold' is the max fractional difference
    #             between gradient estimate and
    #             bprop deltas (def is 5%)
    # for a given set of layer parameters calculate
    # the gradients and compare to the derivatives
    # obtained with the bprop function.  repeat this
    # for a range of perturbations and use the
    # perturbation size with the best results.
    # This is necessary for 32 bit computations

    min_max_err = -1.0  # minimum max error
    neon_logger.display('Perturb mag, max grad diff')
    for pert_exp in range(-5, 0):
        # need to generate the scaling and input outside
        # having an issue with the random number generator
        # when these are generated inside the gradient_calc
        # function
        input_shape = (input_size, seq_len * batch_size)
        output_shape = (hidden_size, seq_len * batch_size)

        rand_scale = np.random.random(output_shape) * 2.0 - 1.0
        inp = np.random.randn(*input_shape)

        pert_mag = 10.0**pert_exp
        (grad_est, deltas) = gradient_calc(seq_len,
                                           input_size,
                                           hidden_size,
                                           batch_size,
                                           epsilon=pert_mag,
                                           rand_scale=rand_scale,
                                           inp_bl=inp)
        dd = np.max(np.abs(grad_est - deltas))
        neon_logger.display('%e, %e' % (pert_mag, dd))
        if min_max_err < 0.0 or dd < min_max_err:
            min_max_err = dd
        # reset the seed so models are same in each run
        allclose_with_out(grad_est, deltas, rtol=0.0, atol=0.0)
        NervanaObject.be.rng_reset()

    # check that best value of worst case error is less than threshold
    neon_logger.display('Worst case error %e with perturbation %e' % (min_max_err, pert_mag))
    neon_logger.display('Threshold %e' % (threshold))
    assert min_max_err < threshold
def test_dilated_conv(backend_default, fargs_tests):

    fsz = fargs_tests[0]
    dil = fargs_tests[1]
    stride = fargs_tests[2]
    be = backend_default

    o1, w1 = run(be, False, fsz, stride, 1, dil)
    o2, w2 = run(be, True, fsz, stride, 1, dil)
    # Verify that the results of faked dilation match those of actual dilation.
    assert allclose_with_out(o1, o2, atol=1e-1, rtol=4e-3)
    try:
        assert allclose_with_out(w1, w2, atol=0, rtol=1e-3)
    except Exception:
        if not isinstance(NervanaObject.be, NervanaGPU):
            assert allclose_with_out(w1, w2, atol=1e-1, rtol=1e-3)
        else:
            assert allclose_with_out(w1, w2, atol=0, rtol=1e-3)
Beispiel #19
0
def test_dconv_rand(backend_default, rand_convargs, deltas_buffer):
    indim, nifm, fshape, nofm, batch_size, rngmax, w_rng = rand_convargs
    if isinstance(NervanaObject.be, NervanaGPU) and NervanaObject.be.compute_capability < (5, 0):
        if nofm % 4 != 0:
            pytest.skip(msg="C dim must be a multiple of 4 for Kepler bprop kernel")
    NervanaObject.be.bsz = batch_size
    dtypeu = np.float32
    inp_rng = [0.0, rngmax]

    init_unif = Uniform(low=w_rng[0], high=w_rng[1])
    inshape = (indim, indim, nifm)
    insize = np.prod(inshape)

    # generate neon deconv layer
    # need to switch to nofm here...
    neon_layer = Deconvolution(fshape=(fshape, fshape, nofm), strides=1,
                               padding=0, init=init_unif)
    insize = np.prod(inshape)

    # generate reference deconv layer
    ref_layer = DeconvRefLayer(1, batch_size, identity, inshape[0], inshape[1:3],
                               (fshape, fshape), nofm, 1, dtypeu)

    # setup input in range inp_rng
    inpa = np.random.random((insize, batch_size))
    inpa *= (inp_rng[1] - inp_rng[0])
    inpa += inp_rng[0]
    inpa = inpa.astype(dtypeu)
    inp = neon_layer.be.array(inpa)
    inp.lshape = inshape

    # run fprop on neon
    neon_layer.configure(inshape)
    neon_layer.prev_layer = True
    neon_layer.allocate()
    neon_out = neon_layer.fprop(inp).get()

    neon_layer.allocate_deltas(deltas_buffer)
    deltas_buffer.allocate_buffers()
    neon_layer.set_deltas(deltas_buffer)

    # pull neon weights into ref layer weights
    ref_layer.weights = neon_layer.W.get().T
    ref_out = np.copy(ref_layer.berror)

    # estimate the numerical precision
    ref_layer.fprop(inpa.T, permute=True)
    ref_out2 = ref_layer.berror
    atol = 10 * np.max(np.abs(ref_out - ref_out2))
    assert allclose_with_out(ref_out.T, neon_out, atol=atol, rtol=0.0), \
        '%e %e' % (np.max(np.abs(ref_out.T - neon_out)), atol)

    # generate err array
    erra = np.random.random(neon_out.shape)
    erra *= (inp_rng[1] - inp_rng[0])
    erra += inp_rng[0]
    erra = erra.astype(dtypeu)
Beispiel #20
0
def test_shift_schedule(backend_default):
    """
    Test binary shift learning rate schedule
    """
    lr_init = 0.1
    interval = 1
    sch = ShiftSchedule(interval)
    for epoch in range(10):
        lr = sch.get_learning_rate(learning_rate=lr_init, epoch=epoch)
        assert allclose_with_out(lr, lr_init / (2 ** epoch))
Beispiel #21
0
def test_exp_schedule(backend_default):
    """
    Test exponential learning rate schedule
    """
    lr_init = 0.1
    decay = 0.01
    sch = ExpSchedule(decay)
    for epoch in range(10):
        lr = sch.get_learning_rate(learning_rate=lr_init, epoch=epoch)
        assert allclose_with_out(lr, lr_init / (1. + decay * epoch))
Beispiel #22
0
def test_power_schedule(backend_default):
    """
    Test the PowerSchedule class
    """
    sch = PowerSchedule(step_config=2, change=0.5)

    target_lr = [1.0, 1.0, 0.5, 0.5, 0.25, 0.25, 0.125, 0.125]

    for e, lr in enumerate(target_lr):
        assert allclose_with_out(lr, sch.get_learning_rate(learning_rate=1.0, epoch=e))
Beispiel #23
0
def test_lookuptable_rand_error(backend_default, basic_linargs, deltas_buffer):
    nin, nout, batch_size, vocab_size = basic_linargs
    NervanaObject.be.bsz = batch_size

    dtypeu = np.float32

    init_glorot = GlorotUniform()
    layer = LookupTable(
        vocab_size=vocab_size, embedding_dim=nout, init=init_glorot)

    inp = np.random.random_integers(0, vocab_size - 1, size=nin * batch_size)
    layer.configure(nin)
    layer.allocate()

    layer.prev_layer = True  # Hack to force delta buffer allocation
    layer.allocate_deltas(deltas_buffer)
    deltas_buffer.allocate_buffers()
    layer.set_deltas(deltas_buffer)

    inputs = layer.be.array(inp.reshape((nin, batch_size)))
    out = layer.fprop(inputs).get()
    W = layer.W.get()
    for i in range(nin * batch_size):
        assert np.all(W[inp[i]].T == out[:, i])

    err = dtypeu(np.random.random((nout, nin * batch_size)))
    layer.bprop(layer.be.array(err)).get()

    dw = layer.dW.get()
    unqidx, count = np.unique(inp, return_counts=True)
    dw_exp = np.zeros((1, nout))
    for wrd_id, cnt in zip(unqidx, count):
        dw_exp[:] = 0
        cnt_exp = 0
        for i, w_id in enumerate(inp):
            if w_id == wrd_id:
                dw_exp[:] = dw_exp[:] + err[:, i]
                cnt_exp += 1
        assert allclose_with_out(dw[wrd_id, :], dw_exp, atol=0, rtol=1e-4)
        assert allclose_with_out(dw_exp, dw[wrd_id, :], atol=0, rtol=1e-4)
        assert cnt == cnt_exp

    return
Beispiel #24
0
def test_biRNN_bprop(backend_default, fargs, deltas_buffer):

    # basic sanity check with 0 weights random inputs
    seq_len, input_size, hidden_size, batch_size = fargs
    in_shape = (input_size, seq_len)
    NervanaObject.be.bsz = batch_size

    # setup the bi-directional rnn
    init_glorot = GlorotUniform()
    birnn = BiRNN(hidden_size, activation=Rectlinclip(slope=0), init=init_glorot)
    birnn.configure(in_shape)
    birnn.prev_layer = True
    birnn.allocate()

    birnn.allocate_deltas(deltas_buffer)
    deltas_buffer.allocate_buffers()
    birnn.set_deltas(deltas_buffer)

    # same weight for bi-rnn backward and rnn weights
    birnn.W_input_b[:] = birnn.W_input_f
    birnn.W_recur_b[:] = birnn.W_recur_f
    birnn.b_b[:] = birnn.b_f
    birnn.dW[:] = 0

    # same weight for bi-directional rnn
    init_glorot = GlorotUniform()
    rnn = Recurrent(hidden_size, activation=Rectlinclip(slope=0), init=init_glorot)
    rnn.configure(in_shape)
    rnn.prev_layer = True
    rnn.allocate()

    rnn.allocate_deltas(deltas_buffer)
    deltas_buffer.allocate_buffers()
    rnn.set_deltas(deltas_buffer)

    # inputs and views
    lr = np.random.random((input_size, seq_len * batch_size))
    lr_rev = list(reversed(get_steps(lr.copy(), in_shape)))
    rl = con(lr_rev, axis=1)

    # allocate gpu buffers
    inp_lr = birnn.be.array(lr)
    inp_rl = birnn.be.array(rl)

    # outputs
    out_lr_g = birnn.fprop(inp_lr)
    del_lr = birnn.bprop(out_lr_g).get().copy()
    birnn.h_buffer[:] = 0
    out_rl_g = birnn.fprop(inp_rl)
    del_rl = birnn.bprop(out_rl_g).get().copy()

    del_lr_s = get_steps(del_lr, in_shape)
    del_rl_s = get_steps(del_rl, in_shape)
    for (x, y) in zip(del_lr_s, reversed(del_rl_s)):
        assert allclose_with_out(x, y, rtol=0.0, atol=1.0e-5)
Beispiel #25
0
def test_roipooling_bprop_ref(backend_default, rois=None, inputs=None, outputs_fprop_ref=None,
                              input_errors=None):

    if rois is None and inputs is None and outputs_fprop_ref is None and input_errors is None:
        return

    (bsz, img_fm_c, img_fm_h, img_fm_w) = inputs.shape
    (rois_per_batch, _, roi_size, _) = input_errors.shape

    outputs_fprop_ref_in = outputs_fprop_ref.reshape(rois_per_batch, -1).T
    feature_maps = inputs.reshape(bsz, -1).T.astype(np.float, order='C')
    input_errors_in = input_errors.reshape(
        rois_per_batch, -1).T.astype(np.float, order='C')

    # compare with GPU kernel, need to call fprop first, then bprop
    NervanaObject.be.bsz = bsz
    be = NervanaObject.be
    input_dev = be.array(feature_maps)
    rois_dev = be.array(rois)
    output_shape = (img_fm_c, roi_size, roi_size, rois_per_batch)
    outputs_dev = be.zeros(output_shape, dtype=np.float32)
    # make sure the type being int
    argmax_dev = be.zeros(output_shape, dtype=np.int32)
    input_error_dev = be.array(input_errors_in)
    output_error_dev = be.zeros(outputs_fprop_ref_in.shape)

    be.roipooling_fprop(input_dev, rois_dev, outputs_dev, argmax_dev, rois_per_batch,
                        img_fm_c, img_fm_h, img_fm_w, roi_size, roi_size, spatial_scale)

    outputs_fprop_be = outputs_dev.get().reshape(-1, rois_per_batch)

    assert allclose_with_out(
        outputs_fprop_ref_in, outputs_fprop_be, atol=1e-6, rtol=0)

    start_time = timeit()
    be.roipooling_bprop(input_error_dev, rois_dev, output_error_dev, argmax_dev,
                        rois_per_batch, img_fm_c, img_fm_h, img_fm_w, roi_size,
                        roi_size, spatial_scale)
    neon_logger.display("NervanaGPU roipooling bprop (sec): {}".format(timeit() - start_time))
    outputs_backend = output_error_dev.get()

    assert allclose_with_out(outputs_fprop_ref_in, outputs_backend, atol=1e-6, rtol=0)
Beispiel #26
0
def test_concat_sequence_l1_l1(backend_default, allrand_args, deltas_buffer):
    # test two linear layers that are merged with concat
    dtypeu = np.float32
    w_rng, rngmax = allrand_args
    # Diff size input steps
    nin = 128
    steps = [32, 64]
    nout = 256
    batch_size = 16
    NervanaObject.be.bsz = batch_size
    be = NervanaObject.be

    init_unif = Uniform(low=w_rng[0], high=w_rng[1])
    layers = [Sequential(Affine(nout=nout, init=init_unif)) for _ in (0, 1)]
    inputs = [be.array(dtypeu(np.random.random((nin, batch_size * step))))
              for step in steps]
    merge = MergeMultistream(layers, merge="recurrent")
    assert(len(inputs) == len(layers))
    merge.configure(inputs)
    merge.allocate()

    merge.allocate_deltas(deltas_buffer)
    deltas_buffer.allocate_buffers()
    merge.set_deltas(deltas_buffer)

    out = merge.fprop(inputs).get()

    sublayers = [s.layers[0] for s in layers]
    weights = [layer.W.get() for layer in sublayers]
    out_exp = np.concatenate([np.dot(w, inp.get()) for (w, inp) in zip(weights, inputs)], axis=1)

    assert allclose_with_out(out, out_exp, atol=1e-3)

    err_lst = [dtypeu(np.random.random((nout, batch_size * step))) for step in steps]
    err_concat = be.array(np.concatenate(err_lst, axis=1))
    merge.bprop(err_concat)
    dW_exp_lst = [np.dot(err, inp.get().T) for (err, inp) in zip(err_lst, inputs)]

    for layer, dW_exp in zip(sublayers, dW_exp_lst):
        assert allclose_with_out(layer.dW.get(), dW_exp)
    return
Beispiel #27
0
def test_step_schedule(backend_default):
    """
    Test the StepSchedule class
    """
    step_config = [1, 4, 5]
    change = [0.1, 0.3, 0.4]
    sch = StepSchedule(step_config=step_config, change=change)

    target_lr = [1.0, 0.1, 0.1, 0.1, 0.3, 0.4, 0.4, 0.4, 0.4]

    for e, lr in enumerate(target_lr):
        assert allclose_with_out(lr, sch.get_learning_rate(learning_rate=1.0, epoch=e))
    def test_hard_coded(self):
        """
        The most basic test case
        """
        be = self.be
        x0 = be.array(np.ones((3, 3)) * 1, name='x0', dtype=self.dtype)
        x1 = be.array(np.ones((3, 3)) * 2, name='x1', dtype=self.dtype)
        x2 = be.array(np.ones((3, 3)) * 3, name='x2', dtype=self.dtype)
        x3 = be.array(np.ones((3, 3)) * 5, name='x3', dtype=self.dtype)

        f = x0 * x0 - x1 * x0 + x0 * x2 - x2 * x1 * x0 + x3 * x3 * x3
        ad = Autodiff(f, be)

        x0_grad = be.array(np.ones((3, 3)) * -3, dtype=self.dtype)
        x1_grad = be.array(np.ones((3, 3)) * -4, dtype=self.dtype)
        x2_grad = be.array(np.ones((3, 3)) * -1, dtype=self.dtype)
        x3_grad = be.array(np.ones((3, 3)) * 75, dtype=self.dtype)

        assert allclose_with_out(ad.get_grad_asnumpyarray([x0])[0], x0_grad.get(), atol=1e-5)
        assert allclose_with_out(ad.get_grad_asnumpyarray([x1])[0], x1_grad.get(), atol=1e-5)
        assert allclose_with_out(ad.get_grad_asnumpyarray([x2])[0], x2_grad.get(), atol=1e-5)
        assert allclose_with_out(ad.get_grad_asnumpyarray([x3])[0], x3_grad.get(), atol=1e-5)
Beispiel #29
0
def test_roipooling_fprop_ref(backend_default, rois=None, inputs=None, outputs_ref=None):

    if rois is None and inputs is None and outputs_ref is None:
        return

    (bsz, img_fm_c, img_fm_h, img_fm_w) = inputs.shape
    (rois_per_batch, _, roi_size, _) = outputs_ref.shape
    outputs_ref_in = outputs_ref.reshape(rois_per_batch, -1).T
    rois_per_image = rois_per_batch // bsz
    feature_maps = inputs.reshape(bsz, -1).T.astype(np.float, order='C')

    # run the numpy roi fprop (function inside this test script)
    outputs_np = fprop_roipooling_ref(feature_maps, rois,
                                      img_fm_c, img_fm_h, img_fm_w,
                                      bsz, rois_per_image, roi_size, roi_size)

    assert allclose_with_out(outputs_ref_in, outputs_np, atol=1e-6, rtol=0)

    # call NervanaGPU roipooling kernel
    NervanaObject.be.bsz = bsz
    be = NervanaObject.be
    input_dev = be.array(feature_maps)
    rois_dev = be.array(rois)
    output_shape = (img_fm_c, roi_size, roi_size, rois_per_batch)
    outputs_dev = be.zeros(output_shape, dtype=np.float32)
    # make sure the type being int
    argmax_dev = be.zeros(output_shape, dtype=np.int32)

    start_time = timeit()
    be.roipooling_fprop(input_dev, rois_dev, outputs_dev, argmax_dev, rois_per_batch,
                        img_fm_c, img_fm_h, img_fm_w, roi_size, roi_size, spatial_scale)

    outputs_backend = outputs_dev.get().reshape(-1, rois_per_batch)

    neon_logger.display("Nervana backend roipooling fprop (sec): {}".format(timeit() - start_time))

    assert allclose_with_out(outputs_ref_in, outputs_backend, atol=1e-6, rtol=0)
Beispiel #30
0
def test_roipooling_fprop_random(backend_default, fargs):

    rois_per_image, img_fm_c, img_fm_h, img_fm_w, roi_size, bsz = fargs

    # generate a random feature map and some random ROIs
    feature_maps = np.random.random(
        (img_fm_c, img_fm_h, img_fm_w, bsz)).reshape(-1, bsz)
    rois_per_batch = rois_per_image * bsz

    rois_idx = np.vstack([i * np.ones((rois_per_image, 1)) for i in range(bsz)])
    rois = np.random.random((rois_per_batch, 4)) * min(img_fm_h, img_fm_w)

    rois = np.zeros((rois_per_batch, 4))
    rois[:, 0] = np.random.random((rois_per_batch,)) * 10 / spatial_scale
    rois[:, 1] = np.random.random((rois_per_batch,)) * 25 / spatial_scale
    rois[:, 2] = (
        np.random.random((rois_per_batch,)) * 27 + (img_fm_w - 27)) / spatial_scale
    rois[:, 3] = (
        np.random.random((rois_per_batch,)) * 12 + (img_fm_h - 12)) / spatial_scale

    rois = np.hstack((rois_idx, rois))

    # run the numpy roi fprop (function inside this test script)
    outputs_np = fprop_roipooling_ref(feature_maps, rois,
                                      img_fm_c, img_fm_h, img_fm_w,
                                      bsz, rois_per_image, roi_size, roi_size)

    # call backend roipooling kernel
    NervanaObject.be.bsz = bsz
    be = NervanaObject.be
    input_dev = be.array(feature_maps)
    rois_dev = be.array(rois)
    output_shape = (img_fm_c, roi_size, roi_size, rois_per_batch)
    outputs_dev = be.zeros(output_shape)
    # make sure the type being int
    argmax_dev = be.zeros(output_shape, np.int32)

    start_time = timeit()
    be.roipooling_fprop(input_dev, rois_dev, outputs_dev, argmax_dev, rois_per_batch,
                        img_fm_c, img_fm_h, img_fm_w, roi_size, roi_size, spatial_scale)
    neon_logger.display("Nervana backend roipooling fprop (sec): {}".format(timeit() - start_time))

    outputs_be = outputs_dev.get().reshape(-1, rois_per_batch)
    assert allclose_with_out(outputs_np, outputs_be, atol=1e-6, rtol=0)
Beispiel #31
0
def test_model_serialize(backend_default, data):
    dataset = MNIST(path=data)
    (X_train, y_train), (X_test, y_test), nclass = dataset.load_data()
    train_set = ArrayIterator([X_train, X_train],
                              y_train,
                              nclass=nclass,
                              lshape=(1, 28, 28))

    init_norm = Gaussian(loc=0.0, scale=0.01)

    # initialize model
    path1 = Sequential([
        Conv((5, 5, 16),
             init=init_norm,
             bias=Constant(0),
             activation=Rectlin()),
        Pooling(2),
        Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin())
    ])
    path2 = Sequential([
        Affine(nout=100,
               init=init_norm,
               bias=Constant(0),
               activation=Rectlin()),
        Dropout(keep=0.5),
        Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin())
    ])
    layers = [
        MergeMultistream(layers=[path1, path2], merge="stack"),
        Affine(nout=20, init=init_norm, batch_norm=True, activation=Rectlin()),
        Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))
    ]

    tmp_save = 'test_model_serialize_tmp_save.pickle'
    mlp = Model(layers=layers)
    mlp.optimizer = GradientDescentMomentum(learning_rate=0.1,
                                            momentum_coef=0.9)
    mlp.cost = GeneralizedCost(costfunc=CrossEntropyBinary())
    mlp.initialize(train_set, cost=mlp.cost)
    n_test = 3
    num_epochs = 3
    # Train model for num_epochs and n_test batches
    for epoch in range(num_epochs):
        for i, (x, t) in enumerate(train_set):
            x = mlp.fprop(x)
            delta = mlp.cost.get_errors(x, t)
            mlp.bprop(delta)
            mlp.optimizer.optimize(mlp.layers_to_optimize, epoch=epoch)
            if i > n_test:
                break

    # Get expected outputs of n_test batches and states of all layers
    outputs_exp = []
    pdicts_exp = [l.get_params_serialize() for l in mlp.layers_to_optimize]
    for i, (x, t) in enumerate(train_set):
        outputs_exp.append(mlp.fprop(x, inference=True))
        if i > n_test:
            break

    # Serialize model
    mlp.save_params(tmp_save, keep_states=True)

    # Load model
    mlp = Model(tmp_save)

    mlp.initialize(train_set)
    outputs = []
    pdicts = [l.get_params_serialize() for l in mlp.layers_to_optimize]
    for i, (x, t) in enumerate(train_set):
        outputs.append(mlp.fprop(x, inference=True))
        if i > n_test:
            break

    # Check outputs, states, and params are the same
    for output, output_exp in zip(outputs, outputs_exp):
        assert allclose_with_out(output.get(), output_exp.get())

    for pd, pd_exp in zip(pdicts, pdicts_exp):
        for s, s_e in zip(pd['states'], pd_exp['states']):
            if isinstance(s, list):  # this is the batch norm case
                for _s, _s_e in zip(s, s_e):
                    assert allclose_with_out(_s, _s_e)
            else:
                assert allclose_with_out(s, s_e)
        for p, p_e in zip(pd['params'], pd_exp['params']):
            assert type(p) == type(p_e)
            if isinstance(p, list):  # this is the batch norm case
                for _p, _p_e in zip(p, p_e):
                    assert allclose_with_out(_p, _p_e)
            elif isinstance(p, np.ndarray):
                assert allclose_with_out(p, p_e)
            else:
                assert p == p_e

    os.remove(tmp_save)
Beispiel #32
0
def check_lstm(seq_len, input_size, hidden_size,
               batch_size, init_func, inp_moms=[0.0, 1.0]):
    # init_func is the initializer for the model params
    # inp_moms is the [ mean, std dev] of the random input
    input_shape = (input_size, seq_len * batch_size)
    hidden_shape = (hidden_size, seq_len * batch_size)
    NervanaObject.be.bsz = NervanaObject.be.batch_size = batch_size

    # neon LSTM
    lstm = LSTM(hidden_size,
                init_func,
                activation=Tanh(),
                gate_activation=Logistic())

    inp = np.random.rand(*input_shape) * inp_moms[1] + inp_moms[0]
    inpa = lstm.be.array(inp)
    # run neon fprop
    lstm.configure((input_size, seq_len))
    lstm.prev_layer = True  # Hack to force allocating a delta buffer
    lstm.allocate()

    dtree = DeltasTree()
    lstm.allocate_deltas(dtree)
    dtree.allocate_buffers()
    lstm.set_deltas(dtree)

    lstm.fprop(inpa)

    # reference numpy LSTM
    lstm_ref = RefLSTM()
    WLSTM = lstm_ref.init(input_size, hidden_size)

    # make ref weights and biases with neon model
    WLSTM[0, :] = lstm.b.get().T
    WLSTM[1:input_size + 1, :] = lstm.W_input.get().T
    WLSTM[input_size + 1:] = lstm.W_recur.get().T

    # transpose input X and do fprop
    inp_ref = inp.copy().T.reshape(seq_len, batch_size, input_size)
    (Hout_ref, cprev, hprev, batch_cache) = lstm_ref.forward(inp_ref,
                                                             WLSTM)

    # the output needs transpose as well
    Hout_ref = Hout_ref.reshape(seq_len * batch_size, hidden_size).T
    IFOGf_ref = batch_cache['IFOGf'].reshape(seq_len * batch_size, hidden_size * 4).T
    Ct_ref = batch_cache['Ct'].reshape(seq_len * batch_size, hidden_size).T

    # compare results
    neon_logger.display('====Verifying IFOG====')
    assert allclose_with_out(lstm.ifog_buffer.get(),
                             IFOGf_ref,
                             rtol=0.0,
                             atol=1.5e-5)

    neon_logger.display('====Verifying cell states====')
    assert allclose_with_out(lstm.c_act_buffer.get(),
                             Ct_ref,
                             rtol=0.0,
                             atol=1.5e-5)

    neon_logger.display('====Verifying hidden states====')
    assert allclose_with_out(lstm.outputs.get(),
                             Hout_ref,
                             rtol=0.0,
                             atol=1.5e-5)

    neon_logger.display('fprop is verified')

    # now test the bprop
    # generate random deltas tensor
    deltas = np.random.randn(*hidden_shape)

    lstm.bprop(lstm.be.array(deltas))
    # grab the delta W from gradient buffer
    dWinput_neon = lstm.dW_input.get()
    dWrecur_neon = lstm.dW_recur.get()
    db_neon = lstm.db.get()

    deltas_ref = deltas.copy().T.reshape(seq_len, batch_size, hidden_size)
    (dX_ref, dWLSTM_ref, dc0_ref, dh0_ref) = lstm_ref.backward(deltas_ref,
                                                               batch_cache)
    dWrecur_ref = dWLSTM_ref[-hidden_size:, :]
    dWinput_ref = dWLSTM_ref[1:input_size + 1, :]
    db_ref = dWLSTM_ref[0, :]
    dX_ref = dX_ref.reshape(seq_len * batch_size, input_size).T

    # compare results
    neon_logger.display('Making sure neon LSTM match numpy LSTM in bprop')
    neon_logger.display('====Verifying update on W_recur====')

    assert allclose_with_out(dWrecur_neon,
                             dWrecur_ref.T,
                             rtol=0.0,
                             atol=1.5e-5)

    neon_logger.display('====Verifying update on W_input====')
    assert allclose_with_out(dWinput_neon,
                             dWinput_ref.T,
                             rtol=0.0,
                             atol=1.5e-5)

    neon_logger.display('====Verifying update on bias====')
    assert allclose_with_out(db_neon.flatten(),
                             db_ref,
                             rtol=0.0,
                             atol=1.5e-5)

    neon_logger.display('====Verifying output delta====')
    assert allclose_with_out(lstm.out_deltas_buffer.get(),
                             dX_ref,
                             rtol=0.0,
                             atol=1.5e-5)

    neon_logger.display('bprop is verified')

    return
Beispiel #33
0
def test_beamsearch(backend_default):
    """
    Simlulated beam search on a minibatch of 2, for 4 time steps. The
    LSTM states are real but the "softmax outputs" z are hardcoded and
    not taken from  the network.
    There are 6 tokens the network outputs, and they have probabilities
    like exp(1), exp(5), exp(7)

    The test asserts that the score_lists assigned by _beamsearch_step(z_list)
    are equal to the probabilities computed manually adding probabilities
    to z_list.
    """
    be = backend_default

    batch_size = 2
    be.bsz = batch_size
    time_steps = 4
    nout = 6
    num_beams = 3

    # create unused layers
    activation = Tanh()
    gate_activation = Logistic()
    init_ary = np.eye(nout)
    init = Array(init_ary)
    encoder = LSTM(nout,
                   init,
                   activation=activation,
                   gate_activation=gate_activation,
                   name="Enc")
    decoder = LSTM(nout,
                   init,
                   activation=activation,
                   gate_activation=gate_activation,
                   name="Dec")

    class DummyFProp():
        """
        Constructs an artificial beam search example with known correct outputs.
        This is called inside a nested loop over steps, num_life. In the first
        time step there is one life beam, after that, 3 life beams per step.
        There are 4 time steps total. Each beamsearch_step builds one list over
        num_life beams.

        At t=0, the winners for ex0 are 1, 4, 5 (indexed by their position) and
        winners for ex1 are 2,4,5. From there we continue the beam for ex0:
            12, 13, 14              6+2=8 6+3=9  6+2=8
            40, 43, 45  with scores 5+4=9 5+3=8  5+7=12 three new winners 45, 52, 55
            50, 52, 55              5+4=9 5+6=11 5+5=10

        for ex2
            1 4 5  with scores   5 4 7
        we get the three winners 1, 4, 5 and continue (just taking the
        3 in order, no sorting)
            10 12 13 14 (not unique!)  5+2=7  5+2=7  5+3=8
            41 42 43       with scores 4+6=10 4+5=9  4+7=11 winners  43 51 52
            51 52 53                   7+4=11 7+6=13 7+3=10 scores   11 11 13
        continue from the three winners 43 51 52
            431 433 434             11+10=21 11+3=14 11+9=20
            511 512 513 with scores 11+6=17  11+5=16 11+7=18  winners 431 434 520
            520 521 522             13+8=21  13+4=17 13+6=19  scores   21  20  21
        continue from three winners 431 511 513 (going along beams, the matches
        in a beam)
            4310 4312 4313 4314             21+2=23  21+2=23 21+3=24 21+10=31 (not unique!)
            4341 4342 4343      with scores 20+10=30 20+5=25 20+7=27        winners 4314 4341 5204
            5200 5202 5204                  21+8=29  21+6=27 21+10=31       scores    31   30   31
        overall winners are 4314 4341 5204

        """
        def __init__(self):
            self.i = -1
            # t=0
            #                                 X        x  x  <-- winners: 1, 4, 5  (for example 0)
            z = be.array(
                np.exp(np.array([[1, 6, 2, 1, 5, 5], [1, 5, 2, 2, 4, 7]]))).T

            # t=1
            #                                     x  x  x  <-- give we picked 4: new winners 2,3,4
            z1 = be.array(
                np.exp(np.array([[1, 1, 2, 3, 2, 1], [2, 1, 2, 3, 2, 1]]))).T
            #                               x        x     x  <-- give we picked 5:
            #                                                     new winners 0,3,[5]
            #                                                     score 12
            z2 = be.array(
                np.exp(np.array([[4, 1, 2, 3, 1, 7], [2, 6, 5, 7, 2, 4]]))).T
            #                               x     X        X  <-- give we picked 1:
            #                                                     new winners 0,[2],[5]
            #                                                     scores 12, 11
            z3 = be.array(
                np.exp(np.array([[4, 1, 6, 3, 1, 5], [1, 4, 6, 3, 2, 1]]))).T

            # t=2
            # example 0: given constructed (1, 5), score 11: 3, 4; scores 21, 20
            z4 = be.array(
                np.exp(np.array([[1, 1, 2, 10, 9, 1], [2, 10, 2, 3, 9, 1]]))).T
            # example 0: given constructed (5, 5), score 12: none selected from this beam
            z5 = be.array(
                np.exp(np.array([[4, 1, 2, 3, 1, 7], [2, 6, 5, 7, 2, 4]]))).T
            # example 0: given constructed (1, 2), score 12: 1; score 20
            z6 = be.array(
                np.exp(np.array([[4, 8, 6, 3, 1, 5], [8, 4, 6, 3, 1, 1]]))).T

            # t=3
            # example 0: given constructed (1, 5, 4), score 20: 1, score 30
            z7 = be.array(
                np.exp(np.array([[1, 10, 2, 1, 1, 1], [2, 1, 2, 3, 10, 1]]))).T
            # example 0: given constructed (1, 2, 1), score 20: 5, score 30
            z8 = be.array(
                np.exp(np.array([[4, 1, 2, 3, 1, 10], [2, 10, 5, 7, 2, 4]]))).T
            # example 0: given constructed (1, 5, 3), score 21: 4, score 31
            z9 = be.array(
                np.exp(np.array([[4, 8, 6, 3, 10, 5], [8, 4, 6, 3, 10, 1]]))).T

            self.z_list = [z, z1, z2, z3, z4, z5, z6, z7, z8, z9]

        def fprop(self, z, inference=True, init_state=None):
            self.i += 1
            return self.z_list[self.i]

    def final_state():
        return be.zeros_like(decoder.h[-1])

    class InObj(NervanaObject):
        def __init__(self):
            self.shape = (nout, time_steps)
            self.decoder_shape = (nout, time_steps)

    decoder.fprop = DummyFProp().fprop
    layers = Seq2Seq([encoder, decoder], decoder_connections=[0])
    layers.decoder._recurrent[0].final_state = final_state

    in_obj = InObj()
    layers.configure(in_obj)  # made zeros because zeros have shape
    layers.allocate()
    layers.allocate_deltas(None)
    beamsearch = BeamSearch(layers)
    inputs = be.iobuf(in_obj.shape)
    beamsearch.beamsearch(inputs, num_beams=num_beams)

    ex0 = np.array([[1, 5, 4, 1], [1, 2, 1, 5], [1, 5, 3, 4]])
    ex1 = np.array([[5, 1, 4, 4], [5, 1, 1, 1], [5, 2, 0, 4]])

    # extract all candidates
    examples = reformat_samples(beamsearch, num_beams, batch_size)
    assert allclose_with_out(examples[0], ex0)
    assert allclose_with_out(examples[1], ex1)
def test_dconv_rand(backend_default, rand_convargs, deltas_buffer):
    indim, nifm, fshape, nofm, batch_size, rngmax, w_rng = rand_convargs
    if isinstance(NervanaObject.be,
                  NervanaGPU) and NervanaObject.be.compute_capability < (5, 0):
        if nofm % 4 != 0:
            pytest.skip(
                msg="C dim must be a multiple of 4 for Kepler bprop kernel")
    NervanaObject.be.bsz = batch_size
    dtypeu = np.float32
    inp_rng = [0.0, rngmax]

    init_unif = Uniform(low=w_rng[0], high=w_rng[1])
    inshape = (indim, indim, nifm)
    insize = np.prod(inshape)

    # generate neon deconv layer
    # need to switch to nofm here...
    neon_layer = Deconvolution(fshape=(fshape, fshape, nofm),
                               strides=1,
                               padding=0,
                               init=init_unif)
    insize = np.prod(inshape)

    # generate reference deconv layer
    ref_layer = DeconvRefLayer(1, batch_size, identity, inshape[0],
                               inshape[1:3], (fshape, fshape), nofm, 1, dtypeu)

    # setup input in range inp_rng
    inpa = np.random.random((insize, batch_size))
    inpa *= (inp_rng[1] - inp_rng[0])
    inpa += inp_rng[0]
    inpa = inpa.astype(dtypeu)
    inp = neon_layer.be.array(inpa)
    inp.lshape = inshape

    # run fprop on neon
    neon_layer.configure(inshape)
    neon_layer.prev_layer = True
    neon_layer.allocate()
    neon_out = neon_layer.fprop(inp).get()

    neon_layer.allocate_deltas(deltas_buffer)
    deltas_buffer.allocate_buffers()
    neon_layer.set_deltas(deltas_buffer)

    # pull neon weights into ref layer weights
    ref_layer.weights = neon_layer.W.get().T
    ref_out = np.copy(ref_layer.berror)

    # estimate the numerical precision
    ref_layer.fprop(inpa.T, permute=True)
    ref_out2 = ref_layer.berror
    atol = 10 * np.max(np.abs(ref_out - ref_out2))
    assert allclose_with_out(ref_out.T, neon_out, atol=atol, rtol=0.0), \
        '%e %e' % (np.max(np.abs(ref_out.T - neon_out)), atol)

    # generate err array
    erra = np.random.random(neon_out.shape)
    erra *= (inp_rng[1] - inp_rng[0])
    erra += inp_rng[0]
    erra = erra.astype(dtypeu)
Beispiel #35
0
def check_rnn(seq_len, input_size, hidden_size,
              batch_size, init_func, inp_moms=[0.0, 1.0]):
    # init_func is the initializer for the model params
    # inp_moms is the [ mean, std dev] of the random input
    input_shape = (input_size, seq_len * batch_size)
    output_shape = (hidden_size, seq_len * batch_size)
    NervanaObject.be.bsz = NervanaObject.be.batch_size = batch_size

    # ======== create models ========
    # neon RNN
    rnn = Recurrent(hidden_size, init_func, activation=Tanh())

    # reference numpy RNN
    rnn_ref = RefRecurrent(input_size, hidden_size)
    Wxh = rnn_ref.Wxh
    Whh = rnn_ref.Whh
    bh = rnn_ref.bh

    # ========= generate data =================
    # generate random input tensor
    inp = np.random.rand(*input_shape) * inp_moms[1] + inp_moms[0]
    inpa = rnn.be.array(inp)
    # generate random deltas tensor
    deltas = np.random.randn(*output_shape)

    # the reference code expects these shapes:
    # input_shape: (seq_len, input_size, batch_size)
    # output_shape: (seq_len, hidden_size, batch_size)
    inp_ref = inp.copy().T.reshape(
        seq_len, batch_size, input_size).swapaxes(1, 2)
    deltas_ref = deltas.copy().T.reshape(
        seq_len, batch_size, hidden_size).swapaxes(1, 2)

    # ========= running models ==========
    # run neon fprop
    rnn.configure((input_size, seq_len))
    rnn.prev_layer = True
    rnn.allocate()
    rnn.set_deltas([rnn.be.iobuf(rnn.in_shape)])
    rnn.fprop(inpa)

    # weights are only initialized after doing fprop, so now
    # make ref weights and biases the same with neon model
    Wxh[:] = rnn.W_input.get()
    Whh[:] = rnn.W_recur.get()
    bh[:] = rnn.b.get()

    (dWxh_ref, dWhh_ref, db_ref, h_ref_list,
     dh_ref_list, d_out_ref) = rnn_ref.lossFun(inp_ref, deltas_ref)

    # now test the bprop
    rnn.bprop(rnn.be.array(deltas))
    # grab the delta W from gradient buffer
    dWxh_neon = rnn.dW_input.get()
    dWhh_neon = rnn.dW_recur.get()
    db_neon = rnn.db.get()

    # comparing outputs
    neon_logger.display('====Verifying hidden states====')
    neon_logger.display(allclose_with_out(rnn.outputs.get(),
                                          h_ref_list,
                                          rtol=0.0,
                                          atol=1.0e-5))
    neon_logger.display('fprop is verified')

    neon_logger.display('====Verifying update on W and b ====')
    neon_logger.display('dWxh')
    assert allclose_with_out(dWxh_neon,
                             dWxh_ref,
                             rtol=0.0,
                             atol=1.0e-5)
    neon_logger.display('dWhh')
    assert allclose_with_out(dWhh_neon,
                             dWhh_ref,
                             rtol=0.0,
                             atol=1.0e-5)

    neon_logger.display('====Verifying update on bias====')
    neon_logger.display('db')
    assert allclose_with_out(db_neon,
                             db_ref,
                             rtol=0.0,
                             atol=1.0e-5)

    neon_logger.display('bprop is verified')

    return
Beispiel #36
0
def test_bibn(backend_default, fargs, deltas_buffer):

    seq_len, input_size, hidden_size, batch_size = fargs
    in_shape = (input_size, seq_len)
    NervanaObject.be.bsz = batch_size

    hidden_size = min(10, hidden_size)

    # setup the bi-directional rnn
    init_glorot = GlorotUniform()
    birnn = BiBNRNN(hidden_size, activation=Rectlinclip(slope=0), init=init_glorot)
    birnn.configure(in_shape)
    birnn.prev_layer = True

    birnn.allocate()
    birnn.allocate_deltas(deltas_buffer)
    deltas_buffer.allocate_buffers()
    birnn.set_deltas(deltas_buffer)

    # test fprop

    # set the ff buffer
    inp_np = np.random.random(birnn.h_ff_buffer.shape)
    inp_be = birnn.be.array(inp_np)
    birnn.h_ff_buffer[:] = inp_np

    # compare the bn output with calling the backend bn
    xsum = birnn.be.zeros_like(birnn.xmean)
    xvar = birnn.be.zeros_like(birnn.xvar)
    gmean = birnn.be.zeros_like(birnn.gmean)
    gvar = birnn.be.zeros_like(birnn.gvar)
    gamma = birnn.be.ones(birnn.gamma.shape)
    beta = birnn.be.zeros_like(birnn.beta)
    grad_gamma = birnn.be.zeros_like(gamma)
    grad_beta = birnn.be.zeros_like(beta)
    out_ref = birnn.be.zeros_like(birnn.h_ff_buffer)

    xsum[:] = birnn.be.sum(birnn.h_ff_buffer, axis=1)

    birnn.be.compound_fprop_bn(
        birnn.h_ff_buffer, xsum, xvar, gmean, gvar,
        gamma, beta, out_ref, birnn.eps, birnn.rho, False,
        accumbeta=0, relu=False)

    # call the bibnrnn layer fprop_bn
    out_bn = birnn._fprop_bn(birnn.h_ff_buffer, inference=False)

    assert allclose_with_out(out_bn.get(), out_ref.get(), rtol=0.0, atol=1.0e-5)

    # test bprop
    err_np = np.random.random(birnn.h_ff_buffer.shape)
    err_be = birnn.be.array(err_np)

    err_out_ref = birnn.be.empty_like(err_be)
    birnn.be.compound_bprop_bn(err_out_ref, grad_gamma, grad_beta,
                               err_be,
                               inp_be, xsum, xvar, gamma,
                               birnn.eps)

    err_out_bn = birnn._bprop_bn(err_be, out_bn)

    assert allclose_with_out(err_out_bn.get(), err_out_ref.get(), rtol=0.0, atol=2.5e-5)
Beispiel #37
0
def test_branch_model_fork_cpu(backend_cpu64):
    from neon.layers import BranchNode, Tree
    np.random.seed(0)
    be = NervanaObject.be
    be.bsz = 32
    bnode = BranchNode()
    i1 = inception([(32,), (32, 32), ('max', 16)])
    top1 = top_branch()
    top2 = top_branch()
    p1 = Sequential(main_branch() + [bnode, i1] + top1)
    p2 = [bnode] + top2

    alpha2 = 0.3
    neon_layer = Tree([p1, p2], alphas=[1.0, alpha2])

    inshape = (4, 224, 224)
    insize = np.prod(inshape)
    inpa = np.random.random((insize, batch_size))
    neon_layer.configure(inshape)
    inp = neon_layer.be.array(inpa)

    neon_layer.allocate()

    neon_layer.layers[0].layers[0].prev_layer = True
    neon_layer.allocate_deltas()

    neon_out_dev = neon_layer.fprop(inp)
    neon_out = [d.get() for d in neon_out_dev]

    # Now make the reference pathways:
    main_trunk2 = Sequential(main_branch())
    main_trunk2.configure(inshape)
    main2 = main_trunk2.layers
    main2[0].prev_layer = True
    main2[0].deltas = be.iobuf(inshape)

    branch2 = Sequential(top_branch())
    lbranch2 = branch2.layers
    (b1, b2, b3) = inception_bare(i1, [(32,), (32, 32), ('max', 16)])

    for bb in (b1, b2, b3, lbranch2):
        oshape = inshape
        for ll in main2 + bb:
            oshape = ll.configure(oshape)

    main1_trunk = neon_layer.layers[0].layers[:8]
    for ll, lo in zip(main2, main1_trunk):
        if ll.has_params:
            ll.set_params({'params': {'W': lo.W.get()}})
        ll.allocate()
        temp_deltas = DeltasTree()
        temp_deltas.proc_layer(ll)
        temp_deltas.allocate_buffers()
        ll.set_deltas(temp_deltas)

    for ll, lo in zip(lbranch2, neon_layer.layers[1].layers[1:]):
        if ll.has_params:
            ll.set_params({'params': {'W': lo.W.get()}})

    for bb in (b1, b2, b3, lbranch2):
        for ll in bb:
            ll.allocate()
            temp_deltas = DeltasTree()
            temp_deltas.proc_layer(ll)
            temp_deltas.allocate_buffers()
            ll.set_deltas(temp_deltas)

    # Create the combined output buffer
    merge_output = be.empty_like(neon_layer.layers[0].layers[9].outputs)

    x = inp
    for ll in main2:
        x = ll.fprop(x)
    main2_out = x

    start = 0
    for bb in (b1, b2, b3):
        xb = main2_out
        for ll in bb:
            xb = ll.fprop(xb)
        end = start + xb.shape[0]
        merge_output[start:end] = xb
        start = end

    x = merge_output

    top_trunk = Sequential(top1).layers
    for ll in top_trunk:
        x = ll.fprop(x)

    neon_out_ref = x.get()
    assert allclose_with_out(neon_out_ref, neon_out[0], rtol=0)

    # Now do second branch
    neon_out_ref2 = branch2.fprop(main2_out).get()
    assert allclose_with_out(neon_out_ref2, neon_out[1])

    neon_logger.display("Beginning Back prop")
    erra = [np.random.random(d.shape) for d in neon_out]
    err = [be.array(d) for d in erra]
    neon_layer.layers[0].layers[0].deltas = be.iobuf(inshape)
    neon_layer.bprop(err)

    bottom_neon_deltas = neon_layer.layers[0].layers[1].deltas.get()
    middle_neon_deltas = neon_layer.layers[1].layers[1].deltas.get()

    err0 = err[0]
    for ll in reversed(top_trunk):
        err0 = ll.bprop(err0)

    err1 = err[1]
    for ll in reversed(lbranch2):
        err1 = ll.bprop(err1)

    for bb, errb in zip((b1, b2, b3), neon_layer.layers[0].layers[-5].error_views):
        for ll in reversed(bb):
            errb = ll.bprop(errb)

    # Now sum up the deltas at the root of the branch layer and compare
    ref_deltas = be.zeros_like(b1[0].deltas)
    ref_deltas[:] = alpha2 * lbranch2[0].deltas
    ref_deltas[:] = ref_deltas + b3[0].deltas + b2[0].deltas + b1[0].deltas
    neon_ref_deltas = ref_deltas.get()
    assert allclose_with_out(middle_neon_deltas, neon_ref_deltas, rtol=0)

    x = ref_deltas
    main2[0].deltas = be.iobuf(inshape)

    for ll in reversed(main2):
        x = ll.bprop(x)

    bottom_neon_ref_deltas = main2[1].deltas.get()
    assert allclose_with_out(bottom_neon_deltas, bottom_neon_ref_deltas, rtol=0)
Beispiel #38
0
def test_branch_model(backend_gpu):
    np.random.seed(0)
    be = NervanaObject.be
    be.bsz = 64
    main1 = main_branch()
    i1 = inception([(32,), (32, 32), ('max', 16)])
    top = top_branch()
    neon_layer = Sequential(main1 + i1 + top)

    inshape = (4, 224, 224)
    insize = np.prod(inshape)
    inpa = np.random.random((insize, batch_size))
    neon_layer.configure(inshape)
    inp = neon_layer.be.array(inpa)
    neon_layer.allocate()
    neon_logger.display(neon_layer.nested_str())
    neon_layer.layers[0].prev_layer = True

    neon_layer.allocate_deltas()

    neon_out = neon_layer.fprop(inp).get()

    # Now make the reference pathways:
    main_trunk2 = Sequential(main_branch())
    main_trunk2.configure(inshape)
    main2 = main_trunk2.layers
    main2[0].prev_layer = True
    main2[0].deltas = be.iobuf(inshape)
    (b1, b2, b3) = inception_bare(i1, [(32,), (32, 32), ('max', 16)])

    for bb in (b1, b2, b3):
        oshape = inshape
        for ll in main2 + bb:
            oshape = ll.configure(oshape)

    main1_trunk = neon_layer.layers[:8]
    for ll, lo in zip(main2, main1_trunk):
        if ll.has_params:
            ll.set_params({'params': {'W': lo.W.get()}})
        ll.allocate()

        temp_buff = DeltasTree()
        ll.allocate_deltas(temp_buff)
        temp_buff.allocate_buffers()
        ll.set_deltas(temp_buff)

    for bb in (b1, b2, b3):
        for ll in bb:
            ll.allocate()
            temp_buff = DeltasTree()
            ll.allocate_deltas(temp_buff)
            temp_buff.allocate_buffers()
            ll.set_deltas(temp_buff)

    # Create the combined output buffer
    merge_output = be.empty_like(neon_layer.layers[8].outputs)

    x = inp
    for ll in main2:
        x = ll.fprop(x)

    start = 0
    for bb in (b1, b2, b3):
        xb = x
        for ll in bb:
            xb = ll.fprop(xb)
        end = start + xb.shape[0]
        merge_output[start:end] = xb
        start = end

    x = merge_output

    top_trunk = Sequential(top).layers
    for ll in top_trunk:
        x = ll.fprop(x)

    neon_out_ref = x.get()
    assert allclose_with_out(neon_out, neon_out_ref, rtol=0)

    neon_logger.display("Beginning Back prop")
    erra = np.random.random(neon_out.shape)
    err = be.array(erra)
    for ll in reversed(neon_layer.layers[8:]):
        err = ll.bprop(err)

    neon_deltas = err.get()
    for bb, errb in zip((b1, b2, b3), neon_layer.layers[8].error_views):
        for ll in reversed(bb):
            errb = ll.bprop(errb)

    # Now sum up the deltas at the root of the branch layer and compare
    ref_deltas = be.zeros_like(b1[0].deltas)
    ref_deltas[:] = b3[0].deltas + b2[0].deltas + b1[0].deltas

    neon_ref_deltas = ref_deltas.get()

    assert allclose_with_out(neon_deltas, neon_ref_deltas, rtol=0)
Beispiel #39
0
def test_conv_rand(backend_default, rand_convargs, deltas_buffer):

    indim, nifm, fshape, nofm, batch_size, stride, rng_max, w_rng, pad = rand_convargs
    if isinstance(NervanaObject.be, NervanaGPU) and NervanaObject.be.compute_capability < (5, 0):
        if nifm % 4 != 0:
            pytest.skip(msg="C dim must be a multiple of 4 for Kepler bprop kernel")
    NervanaObject.be.bsz = batch_size
    inp_rng = [0.0, rng_max]
    dtypeu = np.float32
    init_unif = Uniform(low=w_rng[0], high=w_rng[1])

    inshape = (nifm, indim, indim)
    insize = np.prod(inshape)

    # generate neon conv layer
    neon_layer = Convolution(fshape=(fshape, fshape, nofm),
                             strides=stride, padding=pad, init=init_unif)

    # generate the reference layer
    ref_layer = ConvLayerRef(1,
                             batch_size,
                             identity,
                             inshape[0],
                             inshape[1:3],
                             (fshape, fshape),
                             nofm,
                             stride,
                             dtypeu,
                             padding=pad)

    # setup input in range inp_rng
    inpa = np.random.random((insize, batch_size))
    inpa *= inp_rng[1] - inp_rng[0]
    inpa += inp_rng[0]
    inpa = inpa.astype(dtypeu)
    inp = neon_layer.be.array(inpa)
    inp.lshape = inshape

    # run fprop on neon
    neon_layer.configure(inshape)
    neon_layer.prev_layer = True
    neon_layer.allocate()

    neon_layer.allocate_deltas(deltas_buffer)
    deltas_buffer.allocate_buffers()
    neon_layer.set_deltas(deltas_buffer)

    neon_out = neon_layer.fprop(inp).get()

    # pull neon weights into ref layer weights
    ref_layer.weights = neon_layer.W.get().T
    ref_layer.fprop(inpa.T)
    ref_out = np.copy(ref_layer.y)

    # estimate the numerical precision by
    # permuting order of ops in ref layer
    # fprop calculation
    ref_layer.fprop(inpa.T, permute=True)
    ref_out_perm = ref_layer.y
    atol = 4 * np.max(np.abs(ref_out - ref_out_perm))

    # compare ref and neon layer fprop outputs
    # using the empirically determined atol
    assert allclose_with_out(ref_out.T, neon_out, atol=atol, rtol=1.e-4)

    # generate random deltas array
    erra = np.random.random(neon_out.shape)
    erra *= (inp_rng[1] - inp_rng[0])
    erra += inp_rng[0]

    erra = erra.astype(dtypeu)
    err = neon_layer.be.array(erra)

    # run neon bprop
    neon_deltas = neon_layer.bprop(err).get()
    neon_dW = neon_layer.dW.get()

    # run ref code bprop
    ref_layer.bprop(erra.T, 1.0)
    ref_deltas = np.copy(ref_layer.berror_nopad.T)
    ref_dW = np.copy(ref_layer.updates)

    # estimate precision using permutation
    # of operation order on ref layer code
    ref_layer.bprop(erra.T, 1.0, permute=True)
    ref_deltas_perm = ref_layer.berror_nopad.T
    ref_dW_perm = ref_layer.updates

    atol = 4 * np.max(np.abs(ref_deltas - ref_deltas_perm))
    assert allclose_with_out(ref_deltas, neon_deltas, atol=atol, rtol=1.e-4)

    atol = 4 * np.max(np.abs(ref_dW - ref_dW_perm))
    assert allclose_with_out(ref_dW.T, neon_dW, atol=atol, rtol=1.e-4)
    return
Beispiel #40
0
def check_gru(seq_len, input_size, hidden_size,
              batch_size, init_func, inp_moms=[0.0, 1.0], add_init_state=False):
    # init_func is the initializer for the model params
    # inp_moms is the [ mean, std dev] of the random input
    input_shape = (input_size, seq_len * batch_size)
    output_shape = (hidden_size, seq_len * batch_size)
    slice_shape = (hidden_size, batch_size)

    NervanaObject.be.bsz = NervanaObject.be.batch_size = batch_size

    # neon GRU
    gru = GRU(hidden_size,
              init_func,
              activation=Tanh(),
              gate_activation=Logistic())

    # generate random input tensor
    inp = np.random.rand(*input_shape) * inp_moms[1] + inp_moms[0]
    inp_dev = gru.be.array(inp)
    # generate random deltas tensor
    deltas = np.random.randn(*output_shape)

    # run neon fprop
    gru.configure((input_size, seq_len))
    gru.prev_layer = True
    gru.allocate()

    test_buffer = DeltasTree()
    gru.allocate_deltas(test_buffer)
    test_buffer.allocate_buffers()
    gru.set_deltas(test_buffer)

    if add_init_state:
        init_state = np.random.rand(*slice_shape)*inp_moms[1] + inp_moms[0]
        init_state_dev = gru.be.array(init_state)
        gru.fprop(inp_dev, init_state=init_state_dev)
    else:
        gru.fprop(inp_dev)

    # reference numpy GRU
    gru_ref = RefGRU(input_size, hidden_size)
    WGRU = gru_ref.weights

    # make ref weights and biases the same with neon model
    r_range = list(range(hidden_size))
    z_range = list(range(hidden_size, hidden_size * 2))
    c_range = list(range(hidden_size * 2, hidden_size * 3))

    WGRU[gru_ref.weights_ind_br][:] = gru.b.get()[r_range]
    WGRU[gru_ref.weights_ind_bz][:] = gru.b.get()[z_range]
    WGRU[gru_ref.weights_ind_bc][:] = gru.b.get()[c_range]

    WGRU[gru_ref.weights_ind_Wxr][:] = gru.W_input.get()[r_range]
    WGRU[gru_ref.weights_ind_Wxz][:] = gru.W_input.get()[z_range]
    WGRU[gru_ref.weights_ind_Wxc][:] = gru.W_input.get()[c_range]

    WGRU[gru_ref.weights_ind_Rhr][:] = gru.W_recur.get()[r_range]
    WGRU[gru_ref.weights_ind_Rhz][:] = gru.W_recur.get()[z_range]
    WGRU[gru_ref.weights_ind_Rhc][:] = gru.W_recur.get()[c_range]

    # transpose input X and do fprop
    # the reference code expects these shapes:
    # input_shape: (seq_len, input_size, batch_size)
    # output_shape: (seq_len, hidden_size, batch_size)
    inp_ref = inp.copy().T.reshape(
        seq_len, batch_size, input_size).swapaxes(1, 2)
    deltas_ref = deltas.copy().T.reshape(
        seq_len, batch_size, hidden_size).swapaxes(1, 2)

    if add_init_state:
        init_state_ref = init_state.copy()
        (dWGRU_ref, h_ref_list, dh_ref_list,
            dr_ref_list, dz_ref_list, dc_ref_list) = gru_ref.lossFun(inp_ref,
                                                                     deltas_ref,
                                                                     init_state_ref)
    else:
        (dWGRU_ref, h_ref_list, dh_ref_list,
            dr_ref_list, dz_ref_list, dc_ref_list) = gru_ref.lossFun(inp_ref,
                                                                     deltas_ref)

    neon_logger.display('====Verifying hidden states====')
    assert allclose_with_out(gru.outputs.get(),
                             h_ref_list,
                             rtol=0.0,
                             atol=1.0e-5)

    neon_logger.display('fprop is verified')

    # now test the bprop
    neon_logger.display('Making sure neon GRU matches numpy GRU in bprop')
    gru.bprop(gru.be.array(deltas))
    # grab the delta W from gradient buffer
    dWinput_neon = gru.dW_input.get()
    dWrecur_neon = gru.dW_recur.get()
    db_neon = gru.db.get()
    dWxr_neon = dWinput_neon[r_range]
    dWxz_neon = dWinput_neon[z_range]
    dWxc_neon = dWinput_neon[c_range]
    dWrr_neon = dWrecur_neon[r_range]
    dWrz_neon = dWrecur_neon[z_range]
    dWrc_neon = dWrecur_neon[c_range]
    dbr_neon = db_neon[r_range]
    dbz_neon = db_neon[z_range]
    dbc_neon = db_neon[c_range]

    drzc_neon = gru.rzhcan_delta_buffer.get()
    dr_neon = drzc_neon[r_range]
    dz_neon = drzc_neon[z_range]
    dc_neon = drzc_neon[c_range]

    dWxr_ref = dWGRU_ref[gru_ref.dW_ind_Wxr]
    dWxz_ref = dWGRU_ref[gru_ref.dW_ind_Wxz]
    dWxc_ref = dWGRU_ref[gru_ref.dW_ind_Wxc]
    dWrr_ref = dWGRU_ref[gru_ref.dW_ind_Rhr]
    dWrz_ref = dWGRU_ref[gru_ref.dW_ind_Rhz]
    dWrc_ref = dWGRU_ref[gru_ref.dW_ind_Rhc]
    dbr_ref = dWGRU_ref[gru_ref.dW_ind_br]
    dbz_ref = dWGRU_ref[gru_ref.dW_ind_bz]
    dbc_ref = dWGRU_ref[gru_ref.dW_ind_bc]

    # neon_logger.display '====Verifying hidden deltas ===='
    neon_logger.display('====Verifying r deltas ====')
    assert allclose_with_out(dr_neon,
                             dr_ref_list,
                             rtol=0.0,
                             atol=1.0e-5)

    neon_logger.display('====Verifying z deltas ====')
    assert allclose_with_out(dz_neon,
                             dz_ref_list,
                             rtol=0.0,
                             atol=1.0e-5)

    neon_logger.display('====Verifying hcan deltas ====')
    assert allclose_with_out(dc_neon,
                             dc_ref_list,
                             rtol=0.0,
                             atol=1.0e-5)

    neon_logger.display('====Verifying update on W_input====')
    neon_logger.display('dWxr')
    assert allclose_with_out(dWxr_neon,
                             dWxr_ref,
                             rtol=0.0,
                             atol=1.0e-5)
    neon_logger.display('dWxz')
    assert allclose_with_out(dWxz_neon,
                             dWxz_ref,
                             rtol=0.0,
                             atol=1.0e-5)
    neon_logger.display('dWxc')
    assert allclose_with_out(dWxc_neon,
                             dWxc_ref,
                             rtol=0.0,
                             atol=1.0e-5)

    neon_logger.display('====Verifying update on W_recur====')

    neon_logger.display('dWrr')
    assert allclose_with_out(dWrr_neon,
                             dWrr_ref,
                             rtol=0.0,
                             atol=1.0e-5)
    neon_logger.display('dWrz')
    assert allclose_with_out(dWrz_neon,
                             dWrz_ref,
                             rtol=0.0,
                             atol=1.0e-5)
    neon_logger.display('dWrc')
    assert allclose_with_out(dWrc_neon,
                             dWrc_ref,
                             rtol=0.0,
                             atol=1.0e-5)

    neon_logger.display('====Verifying update on bias====')
    neon_logger.display('dbr')
    assert allclose_with_out(dbr_neon,
                             dbr_ref,
                             rtol=0.0,
                             atol=1.0e-5)
    neon_logger.display('dbz')
    assert allclose_with_out(dbz_neon,
                             dbz_ref,
                             rtol=0.0,
                             atol=1.0e-5)
    neon_logger.display('dbc')
    assert allclose_with_out(dbc_neon,
                             dbc_ref,
                             rtol=0.0,
                             atol=1.0e-5)

    neon_logger.display('bprop is verified')

    return
Beispiel #41
0
def checkSequentialMatchesBatch():
    """ check LSTM I/O forward/backward interactions """

    n, b, d = (5, 3, 4)  # sequence length, batch size, hidden size
    input_size = 10
    WLSTM = LSTM.init(input_size, d)  # input size, hidden size
    X = np.random.randn(n, b, input_size)
    h0 = np.random.randn(b, d)
    c0 = np.random.randn(b, d)

    # sequential forward
    cprev = c0
    hprev = h0
    caches = [{} for t in range(n)]
    Hcat = np.zeros((n, b, d))
    for t in range(n):
        xt = X[t:t + 1]
        _, cprev, hprev, cache = LSTM.forward(xt, WLSTM, cprev, hprev)
        caches[t] = cache
        Hcat[t] = hprev

    # sanity check: perform batch forward to check that we get the same thing
    H, _, _, batch_cache = LSTM.forward(X, WLSTM, c0, h0)
    assert allclose_with_out(
        H, Hcat), 'Sequential and Batch forward don' 't match!'

    # eval loss
    wrand = np.random.randn(*Hcat.shape)
    # loss = np.sum(Hcat * wrand)
    dH = wrand

    # get the batched version gradients
    BdX, BdWLSTM, Bdc0, Bdh0 = LSTM.backward(dH, batch_cache)

    # now perform sequential backward
    dX = np.zeros_like(X)
    dWLSTM = np.zeros_like(WLSTM)
    dc0 = np.zeros_like(c0)
    dh0 = np.zeros_like(h0)
    dcnext = None
    dhnext = None
    for t in reversed(range(n)):
        dht = dH[t].reshape(1, b, d)
        dx, dWLSTMt, dcprev, dhprev = LSTM.backward(dht, caches[t], dcnext,
                                                    dhnext)
        dhnext = dhprev
        dcnext = dcprev

        dWLSTM += dWLSTMt  # accumulate LSTM gradient
        dX[t] = dx[0]
        if t == 0:
            dc0 = dcprev
            dh0 = dhprev

    # and make sure the gradients match
    neon_logger.display(
        'Making sure batched version agrees with sequential version: '
        '(should all be True)')
    neon_logger.display(np.allclose(BdX, dX))
    neon_logger.display(np.allclose(BdWLSTM, dWLSTM))
    neon_logger.display(np.allclose(Bdc0, dc0))
    neon_logger.display(np.allclose(Bdh0, dh0))
Beispiel #42
0
def test_biRNN_fprop_rnn(backend_default, fargs, deltas_buffer):

    # basic sanity check with 0 weights random inputs
    seq_len, input_size, hidden_size, batch_size = fargs
    in_shape = (input_size, seq_len)
    out_shape = (hidden_size, seq_len)
    NervanaObject.be.bsz = batch_size

    # setup the bi-directional rnn
    init_glorot = GlorotUniform()
    birnn = BiRNN(hidden_size, activation=Rectlinclip(slope=0), init=init_glorot)
    birnn.configure(in_shape)
    birnn.prev_layer = True
    birnn.allocate()

    # setup the bi-directional rnn
    init_glorot = GlorotUniform()
    rnn = Recurrent(hidden_size, activation=Rectlinclip(slope=0), init=init_glorot)
    rnn.configure(in_shape)
    rnn.prev_layer = True
    rnn.allocate()

    # same weight for bi-rnn backward and rnn weights
    nout = hidden_size
    birnn.W_input_b[:] = birnn.W_input_f
    birnn.W_recur_b[:] = birnn.W_recur_f
    birnn.b_b[:] = birnn.b_f
    birnn.dW[:] = 0
    rnn.W_input[:] = birnn.W_input_f
    rnn.W_recur[:] = birnn.W_recur_f
    rnn.b[:] = birnn.b_f
    rnn.dW[:] = 0

    # inputs - random and flipped left-to-right inputs
    lr = np.random.random((input_size, seq_len * batch_size))
    lr_rev = list(reversed(get_steps(lr.copy(), in_shape)))

    rl = con(lr_rev, axis=1)
    inp_lr = birnn.be.array(lr)
    inp_rl = birnn.be.array(rl)
    inp_rnn = rnn.be.array(lr)

    # outputs
    out_lr = birnn.fprop(inp_lr).get().copy()
    birnn.h_buffer[:] = 0
    out_rl = birnn.fprop(inp_rl).get()
    out_rnn = rnn.fprop(inp_rnn).get().copy()

    # views
    out_lr_f_s = get_steps(out_lr[:nout], out_shape)
    out_lr_b_s = get_steps(out_lr[nout:], out_shape)
    out_rl_f_s = get_steps(out_rl[:nout], out_shape)
    out_rl_b_s = get_steps(out_rl[nout:], out_shape)
    out_rnn_s = get_steps(out_rnn, out_shape)

    # asserts for fprop
    for x_rnn, x_f, x_b, y_f, y_b in zip(out_rnn_s, out_lr_f_s, out_lr_b_s,
                                         reversed(out_rl_f_s), reversed(out_rl_b_s)):
        assert allclose_with_out(x_f, y_b, rtol=0.0, atol=1.0e-5)
        assert allclose_with_out(x_b, y_f, rtol=0.0, atol=1.0e-5)
        assert allclose_with_out(x_rnn, x_f, rtol=0.0, atol=1.0e-5)
        assert allclose_with_out(x_rnn, y_b, rtol=0.0, atol=1.0e-5)
Beispiel #43
0
def test_roipooling_bprop_random(backend_default, fargs):

    rois_per_image, img_fm_c, img_fm_h, img_fm_w, roi_size, bsz = fargs
    rois_per_batch = rois_per_image * bsz
    # generate a random feature map and some random ROIs
    feature_map_size = img_fm_c * img_fm_h * img_fm_w * bsz

    feature_maps = np.array(list(range(feature_map_size))).reshape(
        (img_fm_c, img_fm_h, img_fm_w, bsz))
    input_errors = np.zeros((img_fm_c, roi_size, roi_size, rois_per_batch))

    range_num = roi_size * roi_size
    input_errors[0, :, :, rois_per_batch - 1] = np.array(list(
        range(range_num))).reshape(input_errors[0, :, :,
                                                rois_per_batch - 1].shape)

    rois_idx = np.vstack(
        [i * np.ones((rois_per_image, 1)) for i in range(bsz)])
    rois = np.random.random((rois_per_batch, 4)) * min(img_fm_h, img_fm_w)

    # use full frame as ROI
    rois = np.zeros((rois_per_batch, 4))
    rois[:, 0] = np.ones((rois_per_batch, ))
    rois[:, 1] = np.ones((rois_per_batch, ))
    rois[:, 2] = np.ones((rois_per_batch, )) * img_fm_w / spatial_scale
    rois[:, 3] = np.ones((rois_per_batch, )) * img_fm_w / spatial_scale

    rois = np.hstack((rois_idx, rois))

    # run the numpy roi fprop (function inside this test script)
    outputs_np = bprop_roipooling_ref(feature_maps, rois, input_errors,
                                      img_fm_c, img_fm_h, img_fm_w, bsz,
                                      rois_per_image, roi_size, roi_size)

    # call backend roipooling kernel
    NervanaObject.be.bsz = bsz
    be = NervanaObject.be
    input_dev = be.array(feature_maps)
    rois_dev = be.array(rois)
    output_shape = (img_fm_c, roi_size, roi_size, rois_per_batch)
    outputs_dev = be.zeros(output_shape, dtype=np.float32)
    # make sure the type being int
    argmax_dev = be.zeros(output_shape, dtype=np.int32)
    input_error_dev = be.array(input_errors)
    output_error_dev = be.zeros(feature_maps.shape)

    be.roipooling_fprop(input_dev, rois_dev, outputs_dev, argmax_dev,
                        rois_per_batch, img_fm_c, img_fm_h, img_fm_w, roi_size,
                        roi_size, spatial_scale)
    start_time = timeit()
    be.roipooling_bprop(input_error_dev, rois_dev, output_error_dev,
                        argmax_dev, rois_per_batch, img_fm_c, img_fm_h,
                        img_fm_w, roi_size, roi_size, spatial_scale)
    neon_logger.display(
        "Nervana backend roipooling bprop (sec): {}".format(timeit() -
                                                            start_time))

    assert output_error_dev.get().reshape(img_fm_c, img_fm_h, img_fm_w,
                                          bsz)[:, :, :, 0].sum() == 0
    assert output_error_dev.get().reshape(img_fm_c, img_fm_h, img_fm_w,
                                          bsz)[:, :, :, -1].sum() != 0

    assert output_error_dev.get().sum() == input_errors.sum()

    outputs_be = output_error_dev.get()
    assert allclose_with_out(outputs_np, outputs_be, atol=1e-6, rtol=0)
Beispiel #44
0
def test_conv_layer(fargs_tests, backend_pair):

    dtype = np.float32
    ng, nc = backend_pair

    if ng.compute_capability < (5, 0):
        pytest.skip(msg="Test requires Maxwell or higher")

    N, C, K = fargs_tests[0]
    D, H, W = fargs_tests[1]
    T, R, S = fargs_tests[2]
    padding_d, padding_h, padding_w = fargs_tests[3]
    strides_d, strides_h, strides_w = fargs_tests[4]

    conv_ng = ng.conv_layer(dtype, N, C, K, D, H, W, T, R, S, padding_d,
                            padding_h, padding_w, strides_d, strides_h,
                            strides_w)

    conv_nc = nc.conv_layer(dtype, N, C, K, D, H, W, T, R, S, padding_d,
                            padding_h, padding_w, strides_d, strides_h,
                            strides_w)

    assert conv_nc.dimI == conv_ng.dimI
    assert conv_nc.dimF == conv_ng.dimF
    assert conv_nc.dimO == conv_ng.dimO
    assert conv_nc.M == conv_ng.M

    dimI = conv_ng.dimI
    dimF = conv_ng.dimF
    dimO = conv_ng.dimO

    if any(np.array(dimO) <= 0):
        return

    # cpu input arrays
    cpuI = np.random.uniform(-0.8, 0.8, slicable(dimI, 1)).astype(np.float32)
    cpuF = np.random.uniform(0.0, 0.3, slicable(dimF)).astype(np.float32)
    cpuE = np.random.uniform(-0.2, 0.2, dimO).astype(np.float32)

    # zero pad the last row of cpu input for the sake of numpy
    cpuI[-1, :] = 0.0

    # =======GPU and CPU==========
    beI = cpuI[:-1, :].reshape(dimI)
    beF = cpuF.reshape(dimF)
    beE = cpuE

    start_gpu = default_timer()
    ngO, ngB, ngU = run_backend_conv(ng, conv_ng, beI, beF, beE, dtype)
    end_gpu = default_timer()

    start_cpu = default_timer()
    ncO, ncB, ncU = run_backend_conv(nc, conv_nc, beI, beF, beE, dtype)
    end_cpu = default_timer()

    neon_logger.display("gputime: %s, cputime %s" %
                        (end_gpu - start_gpu, end_cpu - start_cpu))

    # ======numpy===========
    # cpu output arrays
    cpuO = np.zeros(dimO, dtype=dtype)
    cpuB = np.zeros(slicable(dimI, 1), dtype=dtype)
    cpuU = np.zeros(slicable(dimF), dtype=dtype)

    D, H, W = conv_nc.DHW
    T, R, S = conv_nc.TRS
    M, P, Q = conv_nc.MPQ

    pad_d, pad_h, pad_w = conv_nc.padding
    str_d, str_h, str_w = conv_nc.strides

    for m in range(M):
        mt = m * str_d - pad_d

        for p in range(P):
            pr = p * str_h - pad_h

            for q in range(Q):
                qs = q * str_w - pad_w

                idx = pixel_indices(conv_nc, mt, pr, qs)

                cpuO[:, m, p, q, :] = np.dot(cpuF.T, cpuI[idx, :])

                cpuB[idx, :] += np.dot(cpuF, cpuE[:, m, p, q, :])

                cpuU += np.dot(cpuI[idx, :], cpuE[:, m, p, q, :].T)

    for op, ngA, ncA, cpuA, w in (("fprop", ngO, ncO, cpuO,
                                   Q), ("bprop", ngB, ncB.reshape(dimI),
                                        cpuB[:-1, :].reshape(dimI), W),
                                  ("update", ngU, ncU.reshape(dimF),
                                   cpuU.reshape(dimF), S)):

        neon_logger.display(op)
        ncAnp = ncA.get().astype(np.float32)
        ngAnp = ngA.get().astype(np.float32)
        ncdif = cpuA - ncAnp
        ngdif = cpuA - ngAnp
        maxval = abs(cpuA).max()
        ncmaxdif = abs(ncdif).max()
        ngmaxdif = abs(ngdif).max()
        ncRatio = ncmaxdif / float(maxval)
        ngRatio = ngmaxdif / float(maxval)

        assert ncRatio < 1e-5
        assert ngRatio < 1e-5

        assert allclose_with_out(ncA.get(), cpuA, rtol=0, atol=1e-4)
        assert allclose_with_out(ngA.get(), cpuA, rtol=0, atol=1e-3)
Beispiel #45
0
def mergesum_test_config(be, modfunc, use_stride=1):
    l1 = Conv(**conv_params(3, 16))
    neon_layer = modfunc(16, use_stride)
    inshape = (16, 32, 32)
    insize = np.prod(inshape)
    inpa = np.random.random((insize, batch_size))

    neon_seq = Sequential([l1] + neon_layer)
    neon_seq.configure(inshape)
    inp = be.array(inpa)

    neon_seq.allocate()
    # neon_layer.layers[0].prev_layer = True

    neon_seq.allocate_deltas()

    neon_out = neon_seq.fprop(inp).get()

    # Now make the reference pathways:
    p1, p2 = module_factory_copy(neon_layer, modfunc, 16, use_stride)
    l11 = Conv(**conv_params(3, 16))
    l12 = Conv(**conv_params(3, 16))

    for ll in (l11, l12):
        for lcopy, lref in zip(ll, l1):
            if lcopy.has_params:
                lcopy.set_params(lref.get_params_serialize())

    path1 = Sequential([l11] + p1)
    path2 = Sequential([l12] + p2)
    for ll in (path1, path2):
        ll.configure(inshape)
        ll.allocate()
        ll.allocate_deltas()

    o1 = path1.fprop(inp)
    o2 = path2.fprop(inp)
    # convert mkl buffer to cpu for following cpu execution
    be.convert_data(o1, False)
    be.convert_data(o2, False)
    neon_out_ref = be.empty_like(o1)
    neon_out_ref[:] = be.maximum(o1 + o2, 0)

    # need to have bsum false for this test to be valid
    assert allclose_with_out(neon_out_ref.get(), neon_out, rtol=0)
    erra = np.random.random(neon_out.shape)
    err = be.array(erra)

    ebr = neon_seq.layers[-1].bprop(err)
    ebr = neon_seq.layers[-2].bprop(ebr)
    trunk_neon = ebr.get()

    err = be.array(erra)
    err[:] = be.greater(neon_out_ref, 0) * err

    pstart = len(l1)
    eb1 = err
    for l in reversed(path1.layers[pstart:]):
        eb1 = l.bprop(eb1)

    eb2 = err
    for l in reversed(path2.layers[pstart:]):
        eb2 = l.bprop(eb2)

    be.convert_data(eb1, False)
    be.convert_data(eb2, False)
    err_ref = be.empty_like(eb1)
    err_ref[:] = eb1 + eb2

    assert allclose_with_out(err_ref.get(), trunk_neon, rtol=0)
Beispiel #46
0
def test_conv_ones(backend_default, ones_convargs, deltas_buffer):
    dtypeu = np.float32
    indim, nifm, fshape, nofm, batch_size, stride, pad = ones_convargs
    if isinstance(NervanaObject.be,
                  NervanaGPU) and NervanaObject.be.compute_capability < (5, 0):
        if nifm % 4 != 0:
            pytest.skip(
                msg="C dim must be a multiple of 4 for Kepler bprop kernel")

    NervanaObject.be.bsz = batch_size

    # weights set to one
    init_unif = Uniform(low=1.0, high=1.0)

    inshape = (nifm, indim, indim)
    insize = np.prod(inshape)

    neon_layer = Convolution(fshape=(fshape, fshape, nofm),
                             strides=stride,
                             padding=pad,
                             init=init_unif)
    inp = neon_layer.be.array(np.ones((insize, batch_size)))
    inp.lshape = inshape
    neon_layer.configure(inshape)
    neon_layer.prev_layer = True
    neon_layer.allocate()

    neon_layer.allocate_deltas(deltas_buffer)
    deltas_buffer.allocate_buffers()
    neon_layer.set_deltas(deltas_buffer)

    # run fprop
    out = neon_layer.fprop(inp).get()

    # generate the reference layer
    ref_layer = ConvLayerRef(1,
                             batch_size,
                             identity,
                             inshape[0],
                             inshape[1:3], (fshape, fshape),
                             nofm,
                             stride,
                             dtypeu,
                             padding=pad)
    # init weights to ones
    ref_layer.weights = np.ones(neon_layer.W.shape).T.astype(dtypeu)
    ref_layer.fprop(inp.get().T)
    out_exp = ref_layer.y.copy()
    assert allclose_with_out(out_exp.T, out, atol=0.0, rtol=0.0)

    # generate err array
    err = np.ones(out.shape).astype(np.float32)

    # run bprop
    neon_layer.bprop(neon_layer.be.array(err))
    dw = neon_layer.dW.get()

    # run bprop
    ref_layer.bprop(err.T.astype(dtypeu), 1.0)

    # expected output for updates is uniform matrix with
    # all elements == ofmsize*batch_size
    updates_exp = ref_layer.updates.T

    # check dw from neon layer
    assert allclose_with_out(dw, updates_exp, atol=0.0, rtol=0.0)

    # the deltas are more complicated since the matricies are not
    # uniform, going to use the reference code directly here
    # no tolerance here should be exact
    dd = np.abs(ref_layer.berror_nopad.T - neon_layer.deltas.get())
    try:
        assert np.max(dd) == 0.0
    except AssertionError:
        if ones_convargs in ((32, 32, 3, 32, 64, 2, 0),
                             (32, 32, 3, 16, 64, 2, 0), (32, 32, 3, 64, 64, 2,
                                                         0)):
            pytest.xfail(reason="xfail before mkl update. issue: #1020")
        else:
            assert np.max(dd) == 0.0

    return
Beispiel #47
0
def gradient_check_ref(seq_len, input_size, hidden_size, batch_size,
                       epsilon=1.0e-5, dtypeu=np.float64, threshold=1e-4):
    # this is a check of the reference code itself
    # estimates the gradients by adding perturbations
    # to the input and the weights and compares to
    # the values calculated in bprop

    # generate sparse random input matrix
    NervanaObject.be.bsz = NervanaObject.be.batch_size = batch_size
    input_shape = (seq_len, input_size, batch_size)
    # hidden_shape = (seq_len, hidden_size, batch_size)
    (inp_bl, nz_inds) = sparse_rand(input_shape, frac=1.0 / float(input_shape[1]))
    inp_bl = np.random.randn(*input_shape)

    # convert input matrix from neon to ref code format
    inp_bl = inp_bl.swapaxes(1, 2).astype(dtypeu)

    # generate reference LSTM
    lstm_ref = RefLSTM()
    WLSTM = lstm_ref.init(input_size, hidden_size).astype(dtypeu)

    # init parameters as done for neon
    WLSTM = np.random.randn(*WLSTM.shape)

    (Hout, cprev, hprev, cache) = lstm_ref.forward(inp_bl, WLSTM)

    # scale Hout by random matrix...
    rand_scale = np.random.random(Hout.shape) * 2.0 - 1.0
    rand_scale = dtypeu(rand_scale)

    # line below would be the loss function
    # loss_bl = np.sum(rand_scale * Hout)

    # run bprop, input deltas is rand_scale
    (dX_bl, dWLSTM_bl, dc0, dh0) = lstm_ref.backward(rand_scale, cache)

    grads_est = np.zeros(dX_bl.shape)
    inp_pert = inp_bl.copy()
    for pert_ind in range(inp_bl.size):
        save_val = inp_pert.flat[pert_ind]

        # add/subtract perturbations to input
        inp_pert.flat[pert_ind] = save_val + epsilon
        # and run fprop on perturbed input
        (Hout_pos, cprev, hprev, cache) = lstm_ref.forward(inp_pert, WLSTM)

        inp_pert.flat[pert_ind] = save_val - epsilon
        (Hout_neg, cprev, hprev, cache) = lstm_ref.forward(inp_pert, WLSTM)

        # calculate the loss on outputs
        loss_pos = np.sum(rand_scale * Hout_pos)
        loss_neg = np.sum(rand_scale * Hout_neg)

        grads_est.flat[pert_ind] = 0.5 / float(epsilon) * (loss_pos - loss_neg)

        # reset input
        inp_pert.flat[pert_ind] = save_val

    # assert that gradient estimates within rel threshold of
    # bprop calculated deltas
    assert allclose_with_out(grads_est, dX_bl, rtol=threshold, atol=0.0)
    return
Beispiel #48
0
def compare_helper_cpu(op, inA, inB, nc, dtype):
    numpy_result = math_helper(np, op, inA, inB,
                               dtype=np.float32).astype(dtype)

    nervanaCPU_result = math_helper(nc, op, inA, inB, dtype=dtype).get()
    allclose_with_out(numpy_result, nervanaCPU_result, rtol=0, atol=1e-5)
Beispiel #49
0
def test_gpu_pool_layer(poolargs, backend_pair_bench):

    op = poolargs[0]

    dtype = np.float32
    ng, nc = backend_pair_bench

    N, C = 32, 32
    D, H, W = 1, 32, 32
    J, T, R, S = 2, 1, 3, 3
    padding_j, padding_d, padding_h, padding_w = 0, 0, 0, 0
    strides_j, strides_d, strides_h, strides_w = 2, 1, 2, 2

    pool_ng = ng.pool_layer(
        dtype,
        op,
        N,
        C, D, H, W,
        J, T, R, S,
        padding_j, padding_d, padding_h, padding_w,
        strides_j, strides_d, strides_h, strides_w)

    pool_nc = nc.pool_layer(
        dtype,
        op,
        N,
        C, D, H, W,
        J, T, R, S,
        padding_j, padding_d, padding_h, padding_w,
        strides_j, strides_d, strides_h, strides_w)

    assert pool_ng.dimI == pool_nc.dimI
    assert pool_ng.dimO == pool_nc.dimO

    dimI = pool_ng.dimI
    dimO = pool_ng.dimO

    # generating input arrays for inputs and errors
    cpuI = np.random.uniform(0.0, 1.0, sliceable(dimI, 1)).astype(
        np.float16).astype(dtype)
    cpuE = np.random.uniform(-0.2, 0.2, dimO).astype(dtype)

    # zero pad the last row of cpu input for the sake of numpy
    if op == "max":
        cpuI[-1, :] = np.finfo(dtype).min
    else:
        cpuI[-1, :] = 0

    # =========GPU and numpy ==========
    beI = cpuI[:-1, :].reshape(dimI)
    beE = cpuE

    ngO, ngB = run_backend_pool(ng, pool_ng, beI, beE, dtype)
    cpuO, cpuB = run_numpy_pool(op, cpuI, cpuE, dtype, pool_ng)

    for opA, ngA, cpuA in (
            ("fprop", ngO, cpuO),
            ("bprop", ngB, cpuB[:-1, :].reshape(dimI))):

        neon_logger.display(opA)
        assert allclose_with_out(ngA.get(), cpuA, rtol=0, atol=1e-4)