コード例 #1
0
ファイル: test_recurrent.py プロジェクト: rupertsmall/neon
def gradient_calc(seq_len, input_size, hidden_size, batch_size,
                  epsilon=None, rand_scale=None, inp_bl=None):
    NervanaObject.be.bsz = NervanaObject.be.batch_size = batch_size

    input_shape = (input_size, seq_len * batch_size)

    # generate input if one is not given
    if inp_bl is None:
        inp_bl = np.random.randn(*input_shape)

    # neon rnn instance
    rnn = Recurrent(hidden_size, Gaussian(), Tanh())
    inpa = rnn.be.array(np.copy(inp_bl))

    # run fprop on the baseline input
    out_bl = rnn.fprop(inpa).get()

    # random scaling/hash to generate fake loss
    if rand_scale is None:
        rand_scale = np.random.random(out_bl.shape) * 2.0 - 1.0
    # loss function would be:
    # loss_bl = np.sum(rand_scale * out_bl)

    # run back prop with rand_scale as the errors
    # use copy to avoid any interactions
    deltas_neon = rnn.bprop(rnn.be.array(np.copy(rand_scale))).get()

    # add a perturbation to each input element
    grads_est = np.zeros(inpa.shape)
    inp_pert = inp_bl.copy()
    for pert_ind in range(inpa.size):
        save_val = inp_pert.flat[pert_ind]

        inp_pert.flat[pert_ind] = save_val + epsilon
        reset_rnn(rnn)
        out_pos = rnn.fprop(rnn.be.array(inp_pert)).get()

        inp_pert.flat[pert_ind] = save_val - epsilon
        reset_rnn(rnn)
        out_neg = rnn.fprop(rnn.be.array(inp_pert)).get()

        # calculate the loss with perturbations
        loss_pos = np.sum(rand_scale*out_pos)
        loss_neg = np.sum(rand_scale*out_neg)
        # compute the gradient estimate
        grad = 0.5*(loss_pos-loss_neg)/epsilon

        grads_est.flat[pert_ind] = grad

        # reset the perturbed input element
        inp_pert.flat[pert_ind] = save_val

    del rnn
    return (grads_est, deltas_neon)
コード例 #2
0
def test_model_get_outputs_rnn(backend_default, data):

    data_path = load_ptb_test(path=data)
    data_set = Text(time_steps=50, path=data_path)

    # weight initialization
    init = Constant(0.08)

    # model initialization
    layers = [
        Recurrent(150, init, activation=Logistic()),
        Affine(len(data_set.vocab), init, bias=init, activation=Rectlin())
    ]

    model = Model(layers=layers)
    output = model.get_outputs(data_set)

    assert output.shape == (data_set.ndata, data_set.seq_length,
                            data_set.nclass)

    # since the init are all constant and model is un-trained:
    # along the feature dim, the values should be all the same
    assert np.allclose(output[0, 0], output[0, 0, 0], rtol=0, atol=1e-5)
    assert np.allclose(output[0, 1], output[0, 1, 0], rtol=0, atol=1e-5)

    # along the time dim, the values should be increasing:
    assert np.alltrue(output[0, 2] > output[0, 1])
    assert np.alltrue(output[0, 1] > output[0, 0])
コード例 #3
0
def gradient_calc(seq_len,
                  input_size,
                  hidden_size,
                  batch_size,
                  epsilon=None,
                  rand_scale=None,
                  inp_bl=None):
    NervanaObject.be.bsz = NervanaObject.be.batch_size = batch_size

    input_shape = (input_size, seq_len * batch_size)

    # generate input if one is not given
    if inp_bl is None:
        inp_bl = np.random.randn(*input_shape)

    # neon rnn instance
    rnn = Recurrent(hidden_size, Gaussian(), Tanh())
    inpa = rnn.be.array(np.copy(inp_bl))

    # run fprop on the baseline input
    out_bl = rnn.fprop(inpa).get()

    # random scaling/hash to generate fake loss
    if rand_scale is None:
        rand_scale = np.random.random(out_bl.shape) * 2.0 - 1.0
    # loss function would be:
    # loss_bl = np.sum(rand_scale * out_bl)

    # run back prop with rand_scale as the errors
    # use copy to avoid any interactions
    deltas_neon = rnn.bprop(rnn.be.array(np.copy(rand_scale))).get()

    # add a perturbation to each input element
    grads_est = np.zeros(inpa.shape)
    inp_pert = inp_bl.copy()
    for pert_ind in range(inpa.size):
        save_val = inp_pert.flat[pert_ind]

        inp_pert.flat[pert_ind] = save_val + epsilon
        reset_rnn(rnn)
        out_pos = rnn.fprop(rnn.be.array(inp_pert)).get()

        inp_pert.flat[pert_ind] = save_val - epsilon
        reset_rnn(rnn)
        out_neg = rnn.fprop(rnn.be.array(inp_pert)).get()

        # calculate the loss with perturbations
        loss_pos = np.sum(rand_scale * out_pos)
        loss_neg = np.sum(rand_scale * out_neg)
        # compute the gradient estimate
        grad = 0.5 * (loss_pos - loss_neg) / epsilon

        grads_est.flat[pert_ind] = grad

        # reset the perturbed input element
        inp_pert.flat[pert_ind] = save_val

    del rnn
    return (grads_est, deltas_neon)
コード例 #4
0
def test_model_N_S_setter(backend_default):

    # weight initialization
    init = Constant(0.08)

    # model initialization
    layers = [
        Recurrent(150, init, activation=Logistic()),
        Affine(100, init, bias=init, activation=Rectlin())
    ]

    model = Model(layers=layers)
    model.set_batch_size(20)
    model.set_seq_len(10)
コード例 #5
0
def test_model_get_outputs_rnn(backend_default, data):

    data_path = load_text('ptb-valid', path=data)

    data_set = Text(time_steps=50, path=data_path)

    # weight initialization
    init = Constant(0.08)

    # model initialization
    layers = [
        Recurrent(150, init, Logistic()),
        Affine(len(data_set.vocab), init, bias=init, activation=Rectlin())
    ]

    model = Model(layers=layers)
    output = model.get_outputs(data_set)

    assert output.shape == (
        data_set.ndata, data_set.seq_length, data_set.nclass)
コード例 #6
0
def check_rnn(seq_len,
              input_size,
              hidden_size,
              batch_size,
              init_func,
              inp_moms=[0.0, 1.0]):
    # init_func is the initializer for the model params
    # inp_moms is the [ mean, std dev] of the random input
    input_shape = (input_size, seq_len * batch_size)
    output_shape = (hidden_size, seq_len * batch_size)
    NervanaObject.be.bsz = NervanaObject.be.batch_size = batch_size

    # ======== create models ========
    # neon RNN
    rnn = Recurrent(hidden_size, init_func, Tanh())

    # reference numpy RNN
    rnn_ref = RefRecurrent(input_size, hidden_size)
    Wxh = rnn_ref.Wxh
    Whh = rnn_ref.Whh
    bh = rnn_ref.bh

    # ========= generate data =================
    # generate random input tensor
    inp = np.random.rand(*input_shape) * inp_moms[1] + inp_moms[0]
    inpa = rnn.be.array(inp)
    # generate random deltas tensor
    deltas = np.random.randn(*output_shape)

    # the reference code expects these shapes:
    # input_shape: (seq_len, input_size, batch_size)
    # output_shape: (seq_len, hidden_size, batch_size)
    inp_ref = inp.copy().T.reshape(seq_len, batch_size,
                                   input_size).swapaxes(1, 2)
    deltas_ref = deltas.copy().T.reshape(seq_len, batch_size,
                                         hidden_size).swapaxes(1, 2)

    # ========= running models ==========
    # run neon fprop
    rnn.fprop(inpa)

    # weights are only initialized after doing fprop, so now
    # make ref weights and biases the same with neon model
    Wxh[:] = rnn.W_input.get()
    Whh[:] = rnn.W_recur.get()
    bh[:] = rnn.b.get()

    (dWxh_ref, dWhh_ref, db_ref, h_ref_list, dh_ref_list,
     d_out_ref) = rnn_ref.lossFun(inp_ref, deltas_ref)

    # now test the bprop
    rnn.bprop(rnn.be.array(deltas))
    # grab the delta W from gradient buffer
    dWxh_neon = rnn.dW_input.get()
    dWhh_neon = rnn.dW_recur.get()
    db_neon = rnn.db.get()

    # comparing outputs
    print '====Verifying hidden states===='
    print allclose_with_out(rnn.h_buffer.get(),
                            h_ref_list,
                            rtol=0.0,
                            atol=1.0e-5)
    print 'fprop is verified'

    print '====Verifying update on W and b ===='
    print 'dWxh'
    assert allclose_with_out(dWxh_neon, dWxh_ref, rtol=0.0, atol=1.0e-5)
    print 'dWhh'
    assert allclose_with_out(dWhh_neon, dWhh_ref, rtol=0.0, atol=1.0e-5)

    print '====Verifying update on bias===='
    print 'db'
    assert allclose_with_out(db_neon, db_ref, rtol=0.0, atol=1.0e-5)

    print 'bprop is verified'

    return
コード例 #7
0
if args.rlayer_type == 'lstm':
    rlayer = LSTM(hidden_size,
                  g_uni,
                  activation=Tanh(),
                  gate_activation=Logistic(),
                  reset_cells=True)
elif args.rlayer_type == 'bilstm':
    rlayer = DeepBiLSTM(hidden_size,
                        g_uni,
                        activation=Tanh(),
                        depth=1,
                        gate_activation=Logistic(),
                        reset_cells=True)
elif args.rlayer_type == 'rnn':
    rlayer = Recurrent(hidden_size, g_uni, activation=Tanh(), reset_cells=True)
elif args.rlayer_type == 'birnn':
    rlayer = DeepBiRNN(hidden_size,
                       g_uni,
                       activation=Tanh(),
                       depth=1,
                       reset_cells=True,
                       batch_norm=False,
                       bi_sum=False)
elif args.rlayer_type == 'bibnrnn':
    rlayer = DeepBiRNN(hidden_size,
                       g_uni,
                       activation=Tanh(),
                       depth=1,
                       reset_cells=True,
                       batch_norm=True)
コード例 #8
0
ファイル: test_reshape_layer.py プロジェクト: zmoon111/neon
def test_reshape_layer_model(backend_default, fargs):
    """
    test cases:
    - conv before RNNs
    - conv after RNNs
    - conv after LUT
    """
    np.random.seed(seed=0)

    nin, nout, bsz = fargs
    be = backend_default
    be.bsz = bsz
    input_size = (nin, be.bsz)

    init = Uniform(-0.1, 0.1)
    g_uni = GlorotUniform()

    inp_np = np.random.rand(nin, be.bsz)
    delta_np = np.random.rand(nout, be.bsz)

    inp = be.array(inp_np)
    delta = be.array(delta_np)

    conv_lut_1 = [
        LookupTable(vocab_size=2000, embedding_dim=400, init=init),
        Reshape(reshape=(4, 100, -1)),
        Conv((3, 3, 16), init=init),
        LSTM(64,
             g_uni,
             activation=Tanh(),
             gate_activation=Logistic(),
             reset_cells=True),
        RecurrentSum(),
        Affine(nout, init, bias=init, activation=Softmax())
    ]

    conv_lut_2 = [
        LookupTable(vocab_size=1000, embedding_dim=400, init=init),
        Reshape(reshape=(4, 50, -1)),
        Conv((3, 3, 16), init=init),
        Pooling(2, strides=2),
        Affine(nout=nout, init=init, bias=init, activation=Softmax()),
    ]

    conv_rnn_1 = [
        LookupTable(vocab_size=2000, embedding_dim=400, init=init),
        LSTM(64,
             g_uni,
             activation=Tanh(),
             gate_activation=Logistic(),
             reset_cells=True),
        Reshape(reshape=(4, 32, -1)),
        Conv((3, 3, 16), init=init),
        Affine(nout, init, bias=init, activation=Softmax())
    ]

    conv_rnn_2 = [
        LookupTable(vocab_size=2000, embedding_dim=400, init=init),
        Recurrent(64, g_uni, activation=Tanh(), reset_cells=True),
        Reshape(reshape=(4, -1, 32)),
        Conv((3, 3, 16), init=init),
        Affine(nout, init, bias=init, activation=Softmax())
    ]

    lut_sum_1 = [
        LookupTable(vocab_size=1000, embedding_dim=128, init=init),
        RecurrentSum(),
        Affine(nout=nout, init=init, bias=init, activation=Softmax()),
    ]

    lut_birnn_1 = [
        LookupTable(vocab_size=1000, embedding_dim=200, init=init),
        DeepBiRNN(32,
                  init=GlorotUniform(),
                  batch_norm=True,
                  activation=Tanh(),
                  reset_cells=True,
                  depth=1),
        Reshape((4, 32, -1)),
        Conv((3, 3, 16), init=init),
        Affine(nout=nout, init=init, bias=init, activation=Softmax())
    ]

    layers_test = [
        conv_lut_1, conv_lut_2, conv_rnn_1, conv_rnn_2, lut_sum_1, lut_birnn_1
    ]

    for lg in layers_test:
        model = Model(layers=lg)
        cost = GeneralizedCost(costfunc=CrossEntropyBinary())
        model.initialize(input_size, cost)
        model.fprop(inp)
        model.bprop(delta)
コード例 #9
0
ファイル: test_recurrent.py プロジェクト: rupertsmall/neon
def check_rnn(seq_len, input_size, hidden_size,
              batch_size, init_func, inp_moms=[0.0, 1.0]):
    # init_func is the initializer for the model params
    # inp_moms is the [ mean, std dev] of the random input
    input_shape = (input_size, seq_len * batch_size)
    output_shape = (hidden_size, seq_len * batch_size)
    NervanaObject.be.bsz = NervanaObject.be.batch_size = batch_size

    # ======== create models ========
    # neon RNN
    rnn = Recurrent(hidden_size, init_func, Tanh())

    # reference numpy RNN
    rnn_ref = RefRecurrent(input_size, hidden_size)
    Wxh = rnn_ref.Wxh
    Whh = rnn_ref.Whh
    bh = rnn_ref.bh

    # ========= generate data =================
    # generate random input tensor
    inp = np.random.rand(*input_shape)*inp_moms[1] + inp_moms[0]
    inpa = rnn.be.array(inp)
    # generate random deltas tensor
    deltas = np.random.randn(*output_shape)

    # the reference code expects these shapes:
    # input_shape: (seq_len, input_size, batch_size)
    # output_shape: (seq_len, hidden_size, batch_size)
    inp_ref = inp.copy().T.reshape(
        seq_len, batch_size, input_size).swapaxes(1, 2)
    deltas_ref = deltas.copy().T.reshape(
        seq_len, batch_size, hidden_size).swapaxes(1, 2)

    # ========= running models ==========
    # run neon fprop
    rnn.fprop(inpa)

    # weights are only initialized after doing fprop, so now
    # make ref weights and biases the same with neon model
    Wxh[:] = rnn.W_input.get()
    Whh[:] = rnn.W_recur.get()
    bh[:] = rnn.b.get()

    (dWxh_ref, dWhh_ref, db_ref, h_ref_list,
     dh_ref_list, d_out_ref) = rnn_ref.lossFun(inp_ref, deltas_ref)

    # now test the bprop
    rnn.bprop(rnn.be.array(deltas))
    # grab the delta W from gradient buffer
    dWxh_neon = rnn.dW_input.get()
    dWhh_neon = rnn.dW_recur.get()
    db_neon = rnn.db.get()

    # comparing outputs
    print '====Verifying hidden states===='
    print allclose_with_out(rnn.h_buffer.get(),
                            h_ref_list,
                            rtol=0.0,
                            atol=1.0e-5)
    print 'fprop is verified'

    print '====Verifying update on W and b ===='
    print 'dWxh'
    assert allclose_with_out(dWxh_neon,
                             dWxh_ref,
                             rtol=0.0,
                             atol=1.0e-5)
    print 'dWhh'
    assert allclose_with_out(dWhh_neon,
                             dWhh_ref,
                             rtol=0.0,
                             atol=1.0e-5)

    print '====Verifying update on bias===='
    print 'db'
    assert allclose_with_out(db_neon,
                             db_ref,
                             rtol=0.0,
                             atol=1.0e-5)

    print 'bprop is verified'

    return
コード例 #10
0
                 default_dtype=args.datatype)

# download penn treebank
train_path = load_text('ptb-train', path=args.data_dir)
valid_path = load_text('ptb-valid', path=args.data_dir)

# load data and parse on character-level
train_set = Text(time_steps, train_path)
valid_set = Text(time_steps, valid_path, vocab=train_set.vocab)

# weight initialization
init = Uniform(low=-0.08, high=0.08)

# model initialization
layers = [
    Recurrent(hidden_size, init, Tanh()),
    Affine(len(train_set.vocab), init, bias=init, activation=Softmax())
]

cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True))

model = Model(layers=layers)

optimizer = RMSProp(clip_gradients=clip_gradients, stochastic_round=args.rounding)

# configure callbacks
callbacks = Callbacks(model, train_set, output_file=args.output_file,
                      valid_set=valid_set, valid_freq=args.validation_freq,
                      progress_bar=args.progress_bar)

# train model
コード例 #11
0
ファイル: char_rnn.py プロジェクト: zmoon111/neon
gradient_clip_value = None

# setup backend
be = gen_backend(**extract_valid_args(args, gen_backend))

# download penn treebank
dataset = PTB(time_steps, path=args.data_dir)
train_set = dataset.train_iter
valid_set = dataset.valid_iter

# weight initialization
init = Uniform(low=-0.08, high=0.08)

# model initialization
layers = [
    Recurrent(hidden_size, init, activation=Tanh()),
    Affine(len(train_set.vocab), init, bias=init, activation=Softmax())
]

cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True))

model = Model(layers=layers)

optimizer = RMSProp(gradient_clip_value=gradient_clip_value,
                    stochastic_round=args.rounding)

# configure callbacks
callbacks = Callbacks(model, eval_set=valid_set, **args.callback_args)

# train model
model.fit(train_set,
コード例 #12
0
ファイル: rnn1.py プロジェクト: prokopyev/meetup2
                   index_file=valid_idx,
                   shuffle=False,
                   **common)
init = Gaussian(scale=0.01)
layers = [
    Conv((2, 2, 4),
         init=init,
         activation=Rectlin(),
         strides=dict(str_h=2, str_w=4)),
    Pooling(2, strides=2),
    Conv((3, 3, 4),
         init=init,
         batch_norm=True,
         activation=Rectlin(),
         strides=dict(str_h=1, str_w=2)),
    Recurrent(128, init=init, activation=Rectlin(), reset_cells=True),
    RecurrentMean(),
    Affine(nout=common['nclasses'], init=init, activation=Softmax())
]

model = Model(layers=layers)
opt = GradientDescentMomentum(learning_rate=0.01, momentum_coef=0.9)
metric = Misclassification()
callbacks = Callbacks(model,
                      eval_set=valid,
                      metric=metric,
                      **args.callback_args)
cost = GeneralizedCost(costfunc=CrossEntropyMulti())

model.fit(train,
          optimizer=opt,
コード例 #13
0
ファイル: test_model.py プロジェクト: zmoon111/neon
def test_conv_rnn(backend_default):
    train_shape = (1, 17, 142)

    be = backend_default
    inp = be.array(be.rng.randn(np.prod(train_shape), be.bsz))
    delta = be.array(be.rng.randn(10, be.bsz))

    init_norm = Gaussian(loc=0.0, scale=0.01)
    bilstm = DeepBiLSTM(128,
                        init_norm,
                        activation=Rectlin(),
                        gate_activation=Rectlin(),
                        depth=1,
                        reset_cells=True)
    birnn_1 = DeepBiRNN(128,
                        init_norm,
                        activation=Rectlin(),
                        depth=1,
                        reset_cells=True,
                        batch_norm=False)
    birnn_2 = DeepBiRNN(128,
                        init_norm,
                        activation=Rectlin(),
                        depth=2,
                        reset_cells=True,
                        batch_norm=False)
    bibnrnn = DeepBiRNN(128,
                        init_norm,
                        activation=Rectlin(),
                        depth=1,
                        reset_cells=True,
                        batch_norm=True)
    birnnsum = DeepBiRNN(128,
                         init_norm,
                         activation=Rectlin(),
                         depth=1,
                         reset_cells=True,
                         batch_norm=False,
                         bi_sum=True)
    rnn = Recurrent(128,
                    init=init_norm,
                    activation=Rectlin(),
                    reset_cells=True)
    lstm = LSTM(128,
                init_norm,
                activation=Rectlin(),
                gate_activation=Rectlin(),
                reset_cells=True)
    gru = GRU(128,
              init_norm,
              activation=Rectlin(),
              gate_activation=Rectlin(),
              reset_cells=True)

    rlayers = [bilstm, birnn_1, birnn_2, bibnrnn, birnnsum, rnn, lstm, gru]

    for rl in rlayers:
        layers = [
            Conv((2, 2, 4),
                 init=init_norm,
                 activation=Rectlin(),
                 strides=dict(str_h=2, str_w=4)),
            Pooling(2, strides=2),
            Conv((3, 3, 4),
                 init=init_norm,
                 batch_norm=True,
                 activation=Rectlin(),
                 strides=dict(str_h=1, str_w=2)),
            rl,
            RecurrentMean(),
            Affine(nout=10, init=init_norm, activation=Rectlin()),
        ]
        model = Model(layers=layers)
        cost = GeneralizedCost(costfunc=CrossEntropyBinary())
        model.initialize(train_shape, cost)
        model.fprop(inp)
        model.bprop(delta)