Ejemplo n.º 1
0
def test_lnlstm_precompute():
    num_batch, seq_len, n_features1 = 2, 3, 4
    num_units = 2
    in_shp = (num_batch, seq_len, n_features1)
    l_inp = InputLayer(in_shp)
    l_mask_inp = InputLayer(in_shp[:2])

    x_in = np.random.random(in_shp).astype('float32')
    mask_in = np.ones((num_batch, seq_len), dtype='float32')

    # need to set random seed.
    lasagne.random.get_rng().seed(1234)
    l_lstm_precompute = LNLSTMLayer(
        l_inp, num_units=num_units, precompute_input=True,
        mask_input=l_mask_inp)
    lasagne.random.get_rng().seed(1234)
    l_lstm_no_precompute = LNLSTMLayer(
        l_inp, num_units=num_units, precompute_input=False,
        mask_input=l_mask_inp)
    output_precompute = helper.get_output(
        l_lstm_precompute).eval({l_inp.input_var: x_in,
                                 l_mask_inp.input_var: mask_in})
    output_no_precompute = helper.get_output(
        l_lstm_no_precompute).eval({l_inp.input_var: x_in,
                                    l_mask_inp.input_var: mask_in})

    # test that the backwards model reverses its final input
    np.testing.assert_almost_equal(output_precompute, output_no_precompute)
Ejemplo n.º 2
0
def test_lnlstm_unroll_scan_fwd():
    num_batch, seq_len, n_features1 = 2, 3, 4
    num_units = 2
    in_shp = (num_batch, seq_len, n_features1)
    l_inp = InputLayer(in_shp)
    l_mask_inp = InputLayer(in_shp[:2])

    x_in = np.random.random(in_shp).astype('float32')
    mask_in = np.ones(in_shp[:2]).astype('float32')

    # need to set random seed.
    lasagne.random.get_rng().seed(1234)
    l_lstm_scan = LNLSTMLayer(l_inp, num_units=num_units, backwards=False,
                            unroll_scan=False, mask_input=l_mask_inp)
    lasagne.random.get_rng().seed(1234)
    l_lstm_unrolled = LNLSTMLayer(l_inp, num_units=num_units, backwards=False,
                                unroll_scan=True, mask_input=l_mask_inp)
    output_scan = helper.get_output(l_lstm_scan)
    output_unrolled = helper.get_output(l_lstm_unrolled)

    output_scan_val = output_scan.eval({l_inp.input_var: x_in,
                                        l_mask_inp.input_var: mask_in})
    output_unrolled_val = output_unrolled.eval({l_inp.input_var: x_in,
                                                l_mask_inp.input_var: mask_in})

    np.testing.assert_almost_equal(output_scan_val, output_unrolled_val)
Ejemplo n.º 3
0
def test_lnlstm_get_emb_output():
    hid_size = 10
    inp_size = 10
    out_size = 40
    n_batches = 23
    seqlen = 47
    l_in = InputLayer((n_batches, seqlen), input_var=T.imatrix('input_var'), name="l_in")
    l_emb = EmbeddingLayer(l_in, inp_size, out_size, name="l_emb")
    l_lstm = LNLSTMLayer(l_emb, hid_size, name="l_lstm")

    emb_output = lasagne.layers.get_output(l_emb)
    output = lasagne.layers.get_output(l_lstm)
    output_for = l_lstm.get_output_for([emb_output])
Ejemplo n.º 4
0
def test_lnlstm_get_simple_output():
    feat_size = 20
    hid_size = 10
    l_in = InputLayer((None, None, feat_size))
    l_lstm = LNLSTMLayer(l_in, hid_size)

    output = lasagne.layers.get_output(l_lstm)
Ejemplo n.º 5
0
def test_lnlstm_grad():
    num_batch, seq_len, n_features = 5, 3, 10
    num_units = 6
    l_inp = InputLayer((num_batch, seq_len, n_features))
    l_lstm = LNLSTMLayer(l_inp, num_units=num_units)
    output = helper.get_output(l_lstm)
    g = T.grad(T.mean(output), lasagne.layers.get_all_params(l_lstm))
    assert isinstance(g, (list, tuple))
Ejemplo n.º 6
0
def test_lnlstm_return_final():
    num_batch, seq_len, n_features = 2, 3, 4
    num_units = 2
    in_shp = (num_batch, seq_len, n_features)
    x_in = np.random.random(in_shp).astype('float32')

    l_inp = InputLayer(in_shp)
    lasagne.random.get_rng().seed(1234)
    l_rec_final = LNLSTMLayer(l_inp, num_units, only_return_final=True)
    lasagne.random.get_rng().seed(1234)
    l_rec_all = LNLSTMLayer(l_inp, num_units, only_return_final=False)

    output_final = helper.get_output(l_rec_final).eval({l_inp.input_var: x_in})
    output_all = helper.get_output(l_rec_all).eval({l_inp.input_var: x_in})

    assert output_final.shape == (output_all.shape[0], output_all.shape[2])
    assert output_final.shape == lasagne.layers.get_output_shape(l_rec_final)
    assert np.allclose(output_final, output_all[:, -1])
Ejemplo n.º 7
0
def test_lnlstm_nparams_no_peepholes():
    l_inp = InputLayer((2, 2, 3))
    l_lstm = LNLSTMLayer(l_inp, 5, peepholes=False, learn_init=False)

    # 7*n_gates
    # the 7 is because we have  hid_to_gate, in_to_gate and bias, and alpha 
    # and beta for hid_to_gate and in_to_gate for LN, for each gate
    assert len(lasagne.layers.get_all_params(l_lstm, trainable=True)) == 30

    # bias params + two betas + init params
    assert len(lasagne.layers.get_all_params(l_lstm, regularizable=False)) == 15
Ejemplo n.º 8
0
def test_lnlstm_nparams_learn_init():
    l_inp = InputLayer((2, 2, 3))
    l_lstm = LNLSTMLayer(l_inp, 5, peepholes=False, learn_init=True)

    # 7*n_gates + inits(2).
    # the 7 is because we have  hid_to_gate, in_to_gate and bias and
    # layer normalization for each gate
    assert len(lasagne.layers.get_all_params(l_lstm, trainable=True)) == 32

    # bias params(3*#gate) + init params(2)
    assert len(lasagne.layers.get_all_params(l_lstm, regularizable=False)) == 15
Ejemplo n.º 9
0
def test_lnlstm_hid_init_layer():
    # test that you can set hid_init to be a layer
    l_inp = InputLayer((2, 2, 3))
    l_inp_h = InputLayer((2, 5))
    l_cell_h = InputLayer((2, 5))
    l_lstm = LNLSTMLayer(l_inp, 5, hid_init=l_inp_h, cell_init=l_cell_h)

    x = T.tensor3()
    h = T.matrix()

    output = lasagne.layers.get_output(l_lstm, {l_inp: x, l_inp_h: h})
Ejemplo n.º 10
0
def test_lnlstm_bck():
    num_batch, seq_len, n_features1 = 2, 3, 4
    num_units = 2
    x = T.tensor3()
    in_shp = (num_batch, seq_len, n_features1)
    l_inp = InputLayer(in_shp)

    x_in = np.ones(in_shp).astype('float32')

    # need to set random seed.
    lasagne.random.get_rng().seed(1234)
    l_lstm_fwd = LNLSTMLayer(l_inp, num_units=num_units, backwards=False)
    lasagne.random.get_rng().seed(1234)
    l_lstm_bck = LNLSTMLayer(l_inp, num_units=num_units, backwards=True)
    output_fwd = helper.get_output(l_lstm_fwd, x)
    output_bck = helper.get_output(l_lstm_bck, x)

    output_fwd_val = output_fwd.eval({x: x_in})
    output_bck_val = output_bck.eval({x: x_in})

    # test that the backwards model reverses its final input
    np.testing.assert_almost_equal(output_fwd_val, output_bck_val[:, ::-1])
Ejemplo n.º 11
0
def test_lnlstm_passthrough():
    # Tests that the LSTM can simply pass through its input
    l_in = InputLayer((4, 5, 6))
    zero = lasagne.init.Constant(0.)
    one = lasagne.init.Constant(1.)
    pass_gate = Gate(zero, zero, zero, one, None)
    no_gate = Gate(zero, zero, zero, zero, None)
    in_pass_gate = Gate(
        np.eye(6).astype(theano.config.floatX), zero, zero, zero, None)
    l_rec = LNLSTMLayer(
        l_in, 6, pass_gate, no_gate, in_pass_gate, pass_gate, None)
    out = lasagne.layers.get_output(l_rec)
    inp = np.arange(4*5*6).reshape(4, 5, 6).astype(theano.config.floatX)
Ejemplo n.º 12
0
def test_lnlstm_unroll_scan_bck():
    num_batch, seq_len, n_features1 = 2, 3, 4
    num_units = 2
    x = T.tensor3()
    in_shp = (num_batch, seq_len, n_features1)
    l_inp = InputLayer(in_shp)

    x_in = np.random.random(in_shp).astype('float32')

    # need to set random seed.
    lasagne.random.get_rng().seed(1234)
    l_lstm_scan = LNLSTMLayer(l_inp, num_units=num_units, backwards=True,
                            unroll_scan=False)
    lasagne.random.get_rng().seed(1234)
    l_lstm_unrolled = LNLSTMLayer(l_inp, num_units=num_units, backwards=True,
                                unroll_scan=True)
    output_scan = helper.get_output(l_lstm_scan, x)
    output_scan_unrolled = helper.get_output(l_lstm_unrolled, x)

    output_scan_val = output_scan.eval({x: x_in})
    output_unrolled_val = output_scan_unrolled.eval({x: x_in})

    np.testing.assert_almost_equal(output_scan_val, output_unrolled_val)
Ejemplo n.º 13
0
def test_lnlstm_variable_input_size():
    # that seqlen and batchsize None works
    num_batch, n_features1 = 6, 5
    num_units = 13
    x = T.tensor3()

    in_shp = (None, None, n_features1)
    l_inp = InputLayer(in_shp)
    x_in1 = np.ones((num_batch+1, 3+1, n_features1)).astype('float32')
    x_in2 = np.ones((num_batch, 3, n_features1)).astype('float32')
    l_rec = LNLSTMLayer(l_inp, num_units=num_units, backwards=False)
    output = helper.get_output(l_rec, x)
    output_val1 = output.eval({x: x_in1})
    output_val2 = output.eval({x: x_in2})
Ejemplo n.º 14
0
def test_lnlstm_return_shape():
    num_batch, seq_len, n_features1, n_features2 = 5, 3, 10, 11
    num_units = 6
    x = T.tensor4()
    in_shp = (num_batch, seq_len, n_features1, n_features2)
    l_inp = InputLayer(in_shp)

    x_in = np.random.random(in_shp).astype('float32')

    l_lstm = LNLSTMLayer(l_inp, num_units=num_units)
    output = helper.get_output(l_lstm, x)
    output_val = output.eval({x: x_in})
    assert helper.get_output_shape(l_lstm, x_in.shape) == output_val.shape
    assert output_val.shape == (num_batch, seq_len, num_units)
Ejemplo n.º 15
0
def test_lnlstm_hid_init_mask():
    # test that you can set hid_init to be a layer when a mask is provided
    l_inp = InputLayer((2, 2, 3))
    l_inp_h = InputLayer((2, 5))
    l_inp_msk = InputLayer((2, 2))
    l_cell_h = InputLayer((2, 5))
    l_lstm = LNLSTMLayer(l_inp, 5, hid_init=l_inp_h, mask_input=l_inp_msk,
                       cell_init=l_cell_h)

    x = T.tensor3()
    h = T.matrix()
    msk = T.matrix()

    inputs = {l_inp: x, l_inp_h: h, l_inp_msk: msk}
    output = lasagne.layers.get_output(l_lstm, inputs)
Ejemplo n.º 16
0
def test_lnlstm_hid_init_layer_eval():
    # Test `hid_init` as a `Layer` with some dummy input. Compare the output of
    # a network with a `Layer` as input to `hid_init` to a network with a
    # `np.array` as input to `hid_init`
    n_units = 7
    n_test_cases = 2
    in_shp = (n_test_cases, 2, 3)
    in_h_shp = (1, n_units)
    in_cell_shp = (1, n_units)

    # dummy inputs
    X_test = np.ones(in_shp, dtype=theano.config.floatX)
    Xh_test = np.ones(in_h_shp, dtype=theano.config.floatX)
    Xc_test = np.ones(in_cell_shp, dtype=theano.config.floatX)
    Xh_test_batch = np.tile(Xh_test, (n_test_cases, 1))
    Xc_test_batch = np.tile(Xc_test, (n_test_cases, 1))

    # network with `Layer` initializer for hid_init
    l_inp = InputLayer(in_shp)
    l_inp_h = InputLayer(in_h_shp)
    l_inp_cell = InputLayer(in_cell_shp)
    l_rec_inp_layer = LNLSTMLayer(l_inp, n_units, hid_init=l_inp_h,
                                cell_init=l_inp_cell, nonlinearity=None)

    # network with `np.array` initializer for hid_init
    l_rec_nparray = LNLSTMLayer(l_inp, n_units, hid_init=Xh_test,
                              cell_init=Xc_test, nonlinearity=None)

    # copy network parameters from l_rec_inp_layer to l_rec_nparray
    l_il_param = dict([(p.name, p) for p in l_rec_inp_layer.get_params()])
    l_rn_param = dict([(p.name, p) for p in l_rec_nparray.get_params()])
    for k, v in l_rn_param.items():
        if k in l_il_param:
            v.set_value(l_il_param[k].get_value())

    # build the theano functions
    X = T.tensor3()
    Xh = T.matrix()
    Xc = T.matrix()
    output_inp_layer = lasagne.layers.get_output(l_rec_inp_layer,
                                                 {l_inp: X, l_inp_h:
                                                  Xh, l_inp_cell: Xc})
    output_nparray = lasagne.layers.get_output(l_rec_nparray, {l_inp: X})

    # test both nets with dummy input
    output_val_inp_layer = output_inp_layer.eval({X: X_test, Xh: Xh_test_batch,
                                                  Xc: Xc_test_batch})
    output_val_nparray = output_nparray.eval({X: X_test})

    # check output given `Layer` is the same as with `np.array`
    assert np.allclose(output_val_inp_layer, output_val_nparray)
Ejemplo n.º 17
0
def test_lnlstm_nparams_hid_init_layer():
    # test that you can see layers through hid_init
    l_inp = InputLayer((2, 2, 3))
    l_inp_h = InputLayer((2, 5))
    l_inp_h_de = DenseLayer(l_inp_h, 7)
    l_inp_cell = InputLayer((2, 5))
    l_inp_cell_de = DenseLayer(l_inp_cell, 7)
    l_lstm = LNLSTMLayer(l_inp, 7, hid_init=l_inp_h_de, cell_init=l_inp_cell_de)

    # directly check the layers can be seen through hid_init
    layers_to_find = [l_inp, l_inp_h, l_inp_h_de, l_inp_cell, l_inp_cell_de,
                      l_lstm]
    assert lasagne.layers.get_all_layers(l_lstm) == layers_to_find

    # 7*n_gates + 3*n_peepholes + 4
    # the 7 is because we have  hid_to_gate, in_to_gate and bias and 
    # layer normalization for each gate
    # 4 is for the W and b parameters in the two DenseLayer layers
    print lasagne.layers.get_all_params(l_lstm, trainable=True)
    assert len(lasagne.layers.get_all_params(l_lstm, trainable=True)) == 37

    # LSTM bias params(4) + LN betas(2*#gate) (+ Dense bias params(1) * 2
    assert len(lasagne.layers.get_all_params(l_lstm, regularizable=False)) == 15
Ejemplo n.º 18
0
    cap_in_var = T.imatrix('cap_in')    # batch size, seq len
    mask_var = T.bmatrix('mask_var')    # batch size, seq len
    gate = lasagne.layers.Gate(W_in=lasagne.init.Orthogonal(), W_hid=lasagne.init.Orthogonal(),
                               W_cell=lasagne.init.Normal(), b=lasagne.init.Constant(0.0))
    cell_gate = lasagne.layers.Gate(W_in=lasagne.init.Orthogonal(), W_hid=lasagne.init.Orthogonal(),
                                    W_cell=None, b=lasagne.init.Constant(0.0),
                                    nonlinearity=lasagne.nonlinearities.tanh)
    forget_gate = lasagne.layers.Gate(W_in=lasagne.init.Orthogonal(), W_hid=lasagne.init.Orthogonal(),
                                      W_cell=lasagne.init.Normal(), b=lasagne.init.Constant(5.0))
    l_in = lasagne.layers.InputLayer((None, None), cap_in_var, name="l_in")
    l_mask = lasagne.layers.InputLayer((None, None), mask_var, name="l_mask")
    l_hid = lasagne.layers.InputLayer((None, HIDDEN_SIZE), input_var=im_features, name="l_hid")
    l_emb = lasagne.layers.EmbeddingLayer(l_in, input_size=WORD_SIZE, output_size=EMBEDDING_SIZE, name="l_emb")
    l_lstm = LNLSTMLayer(l_emb, HIDDEN_SIZE, ingate=gate, forgetgate=forget_gate, cell=cell_gate,
                                    outgate=gate, hid_init=l_hid, peepholes=True, grad_clipping=RNN_GRAD_CLIP,
                                    mask_input=l_mask, precompute_input=False,
                                    alpha_init=lasagne.init.Constant(0.2), # as suggested by Ryan Kiros on Twitter
                                    normalize_cell=True, name="l_lstm") # batch size, seq len, hidden size
    l_reshape = lasagne.layers.ReshapeLayer(l_lstm, (-1, [2]), name="l_reshape") # batch size * seq len, hidden size
    l_fc = lasagne.layers.DenseLayer(l_reshape, DENSE_SIZE, b=lasagne.init.Constant(5.0),
                                    nonlinearity=lasagne.nonlinearities.rectify, name="l_fc")
    l_drp = lasagne.layers.DropoutLayer(l_fc, 0.2, name="l_drp")
    l_hs = HierarchicalSoftmaxLayer(l_drp, WORD_SIZE, name="l_hs") # batch size * seq len, WORD SIZE
    l_slice = lasagne.layers.SliceLayer(l_lstm, -1, axis=1, name="l_slice")

    if CONTINUE:
        logger.info('Setting model weights from epoch {}'.format(max_epoch))
        param_values = pickle.load(open(param_values_file, 'rb'))
        lasagne.layers.set_all_param_values(l_hs, param_values['recurrent'])
        lasagne.layers.set_all_param_values(resnet['pool5'], param_values['resnet'])
Ejemplo n.º 19
0
def test_lnlstm_grad_clipping():
    # test that you can set grad_clip variable
    x = T.tensor3()
    l_rec = LNLSTMLayer(InputLayer((2, 2, 3)), 5, grad_clipping=1)
    output = lasagne.layers.get_output(l_rec, x)