Beispiel #1
0
def test_gru_precompute():
    num_batch, seq_len, n_features1 = 2, 3, 4
    num_units = 2
    in_shp = (num_batch, seq_len, n_features1)
    l_inp = InputLayer(in_shp)
    l_mask_inp = InputLayer(in_shp[:2])

    x_in = np.random.random(in_shp).astype('float32')
    mask_in = np.ones((num_batch, seq_len), dtype='float32')

    # need to set random seed.
    lasagne.random.get_rng().seed(1234)
    l_gru_precompute = GRULayer(l_inp, num_units=num_units,
                                precompute_input=True, mask_input=l_mask_inp)
    lasagne.random.get_rng().seed(1234)
    l_gru_no_precompute = GRULayer(l_inp, num_units=num_units,
                                   precompute_input=False,
                                   mask_input=l_mask_inp)
    output_precompute = helper.get_output(
        l_gru_precompute).eval({l_inp.input_var: x_in,
                                l_mask_inp.input_var: mask_in})
    output_no_precompute = helper.get_output(
        l_gru_no_precompute).eval({l_inp.input_var: x_in,
                                   l_mask_inp.input_var: mask_in})

    # test that the backwards model reverses its final input
    np.testing.assert_almost_equal(output_precompute, output_no_precompute)
Beispiel #2
0
    def _add_forward_backward_encoder_layer(self):
        is_single_layer_encoder = self._encoder_depth == 1
        return_only_final_state = is_single_layer_encoder

        # input shape = (batch_size * input_context_size, input_seq_len, embedding_dimension)
        self._net['enc_forward'] = GRULayer(
            incoming=self._net['emb_x'],
            num_units=self._hidden_layer_dim,
            grad_clipping=self._grad_clip,
            only_return_final=return_only_final_state,
            name='encoder_forward',
            mask_input=self._net['input_x_mask'])
        # output shape = (batch_size * input_context_size, input_seq_len, hidden_layer_dimension)
        #             or (batch_size * input_context_size, hidden_layer_dimension)

        # input shape = (batch_size * input_context_size, input_seq_len, embedding_dimension)
        self._net['enc_backward'] = GRULayer(
            incoming=self._net['emb_x'],
            num_units=self._hidden_layer_dim,
            grad_clipping=self._grad_clip,
            only_return_final=return_only_final_state,
            backwards=True,
            name='encoder_backward',
            mask_input=self._net['input_x_mask'])
        # output shape = (batch_size * input_context_size, input_seq_len, hidden_layer_dimension)
        #             or (batch_size * input_context_size, hidden_layer_dimension)

        self._net['enc_0'] = ConcatLayer(
            incomings=[self._net['enc_forward'], self._net['enc_backward']],
            axis=1 if return_only_final_state else 2,
            name='encoder_bidirectional_concat')
Beispiel #3
0
def test_gru_unroll_scan_fwd():
    num_batch, seq_len, n_features1 = 2, 3, 4
    num_units = 2
    in_shp = (num_batch, seq_len, n_features1)
    l_inp = InputLayer(in_shp)
    l_mask_inp = InputLayer(in_shp[:2])

    x_in = np.random.random(in_shp).astype('float32')
    mask_in = np.ones(in_shp[:2]).astype('float32')

    # need to set random seed.
    lasagne.random.get_rng().seed(1234)
    l_gru_scan = GRULayer(l_inp, num_units=num_units, backwards=False,
                          unroll_scan=False, mask_input=l_mask_inp)
    lasagne.random.get_rng().seed(1234)
    l_gru_unrolled = GRULayer(l_inp, num_units=num_units, backwards=False,
                              unroll_scan=True, mask_input=l_mask_inp)
    output_scan = helper.get_output(l_gru_scan)
    output_unrolled = helper.get_output(l_gru_unrolled)

    output_scan_val = output_scan.eval({l_inp.input_var: x_in,
                                        l_mask_inp.input_var: mask_in})
    output_unrolled_val = output_unrolled.eval({l_inp.input_var: x_in,
                                                l_mask_inp.input_var: mask_in})

    np.testing.assert_almost_equal(output_scan_val, output_unrolled_val)
Beispiel #4
0
def test_gru_unroll_scan_bck():
    num_batch, seq_len, n_features1 = 2, 5, 4
    num_units = 2
    x = T.tensor3()
    in_shp = (num_batch, seq_len, n_features1)
    l_inp = InputLayer(in_shp)
    x_in = np.random.random(in_shp).astype('float32')

    # need to set random seed.
    lasagne.random.get_rng().seed(1234)
    l_gru_scan = GRULayer(l_inp,
                          num_units=num_units,
                          backwards=True,
                          unroll_scan=False)
    lasagne.random.get_rng().seed(1234)
    l_gru_unrolled = GRULayer(l_inp,
                              num_units=num_units,
                              backwards=True,
                              unroll_scan=True)
    output_scan = helper.get_output(l_gru_scan, x)
    output_unrolled = helper.get_output(l_gru_unrolled, x)

    output_scan_val = output_scan.eval({x: x_in})
    output_unrolled_val = output_unrolled.eval({x: x_in})

    np.testing.assert_almost_equal(output_scan_val, output_unrolled_val)
Beispiel #5
0
def test_gru_tensor_init():
    # check if passing in a TensorVariable to hid_init works
    num_units = 5
    batch_size = 3
    seq_len = 2
    n_inputs = 4
    in_shp = (batch_size, seq_len, n_inputs)
    l_inp = InputLayer(in_shp)
    hid_init = T.matrix()
    x = T.tensor3()

    l_lstm = GRULayer(l_inp, num_units, learn_init=True, hid_init=hid_init)

    # check that the tensors are used and not overwritten
    assert hid_init == l_lstm.hid_init

    # 3*n_gates, should not return any inits
    # the 3 is because we have  hid_to_gate, in_to_gate and bias for each gate
    assert len(lasagne.layers.get_all_params(l_lstm, trainable=True)) == 9

    # bias params(3), , should not return any inits
    assert len(lasagne.layers.get_all_params(l_lstm, regularizable=False)) == 3

    # check that it compiles and runs
    output = lasagne.layers.get_output(l_lstm, x)
    x_test = np.ones(in_shp, dtype='float32')
    hid_init_test = np.ones((batch_size, num_units), dtype='float32')

    output_val = output.eval({x: x_test, hid_init: hid_init_test})
    assert isinstance(output_val, np.ndarray)
Beispiel #6
0
def build_res_stafg():

    net = collections.OrderedDict()
    # INPUTS----------------------------------------
    net['sent_input'] = InputLayer((None, CFG['SEQUENCE LENGTH']),
                                   input_var=T.imatrix())
    net['word_emb'] = EmbeddingLayer(net['sent_input'], input_size=CFG['VOCAB SIZE']+3,\
                                    output_size=CFG['WORD VECTOR SIZE'],W=np.copy(CFG['wemb']))

    net['vis_input'] = InputLayer((None,CFG['VISUAL LENGTH'], CFG['VIS SIZE']))
    # key words model-------------------------------------
    net['vis_mean_pool'] = FeaturePoolLayer(net['vis_input'],
                                                CFG['VISUAL LENGTH'],pool_function=T.mean)
    net['ctx_vis_reshp'] = ReshapeLayer(net['vis_mean_pool'],(-1,CFG['VIS SIZE']))
    net['global_vis'] = DenseLayer(net['ctx_vis_reshp'],num_units=CFG['EMBEDDING SIZE'],nonlinearity=linear)
    net['key_words_prob'] = DenseLayer(DropoutLayer(net['global_vis']), num_units=CFG['VOCAB SIZE']+3,nonlinearity=sigmoid)
    # gru model--------------------------------------
    net['mask_input'] = InputLayer((None, CFG['SEQUENCE LENGTH']))
    net['sgru'] = GRULayer(net['word_emb'],num_units=CFG['EMBEDDING SIZE'], \
                            mask_input=net['mask_input'],hid_init=net['global_vis'])
    net['sta_gru'] = CTXAttentionGRULayer([net['sgru'],net['vis_input'],net['global_vis']],
                                           num_units=CFG['EMBEDDING SIZE'],
                                           mask_input=net['mask_input'])
    net['fusion'] = DropoutLayer(ConcatLayer([net['sta_gru'],net['gru']],axis=2), p=0.5)
    net['fusion_reshp'] = ReshapeLayer(net['fusion'], (-1,CFG['EMBEDDING SIZE']*2))
    net['word_prob'] = DenseLayer(net['fusion_reshp'], num_units=CFG['VOCAB SIZE']+3,
                                  nonlinearity=softmax)
    net['sent_prob'] = ReshapeLayer(net['word_prob'],(-1,CFG['SEQUENCE LENGTH'], CFG['VOCAB SIZE']+3))
    return net
    def rnn_fn(self):
        """Define the rnn using lasagne

        :return l_current: lasagne RNN"""
        l_in = InputLayer((None, None, self.z_dim))
        layers = [l_in]
        l_current = l_in

        # create the rnn layer
        for h in range(1, self.hid_depth + 1):
            backwards = True if self.bidirectional and h % 2 == 0 else False
            l_h = GRULayer(l_current,
                           num_units=self.hid_dim,
                           hidden_update=Gate(nonlinearity=tanh),
                           backwards=backwards)

            # if we want to use skip-connections we concatenate the current layer
            if self.use_skip:
                layers.append(l_h)
                if h != self.hid_depth:
                    l_current = ConcatLayer([l_in, l_h], axis=2)
                else:
                    l_current = ConcatLayer(layers[1:], axis=2)
            else:
                l_current = l_h

        return l_current
Beispiel #8
0
    def _add_context_encoder(self):
        self._net['batched_enc'] = reshape(
            self._net['enc'], (self._batch_size, self._input_context_size,
                               get_output_shape(self._net['enc'])[-1]))

        self._net['context_enc'] = GRULayer(incoming=self._net['batched_enc'],
                                            num_units=self._hidden_layer_dim,
                                            grad_clipping=self._grad_clip,
                                            only_return_final=True,
                                            name='context_encoder')

        self._net['switch_enc_to_tv'] = T.iscalar(name='switch_enc_to_tv')

        self._net['thought_vector'] = InputLayer(
            shape=(None, self._hidden_layer_dim),
            input_var=T.fmatrix(name='thought_vector'),
            name='thought_vector')

        self._net['enc_result'] = SwitchLayer(
            incomings=[self._net['thought_vector'], self._net['context_enc']],
            condition=self._net['switch_enc_to_tv'])

        # We need the following to pass as 'givens' argument when compiling theano functions:
        self._default_thoughts_vector = T.zeros(
            (self._batch_size, self._hidden_layer_dim))
        self._default_input_x = T.zeros(
            shape=(self._net['thought_vector'].input_var.shape[0], 1, 1),
            dtype=np.int32)
    def __init__(self, num_batch, max_len, n_features, hidden=[200, 200], **kwargs):
        self.num_batch = num_batch
        self.n_features = n_features
        self.max_len = max_len
        self.hidden = hidden
        rng = np.random.RandomState(123)
        self.drng = rng
        self.rng = RandomStreams(rng.randint(2 ** 30))

        # params
        initial_W = np.asarray(
            rng.uniform(
                    low=-4 * np.sqrt(6. / (self.hidden[1] + self.n_features)),
                    high=4 * np.sqrt(6. / (self.hidden[1] + self.n_features)),
                    size=(self.hidden[1], self.n_features)
            ),
            dtype=theano.config.floatX
        )

        self.W = theano.shared(value=initial_W, name='W', borrow=True)
        # # self.W_y_kappa = theano.shared(value=initial_W, name='W_y_kappa', borrow=True)
        self.b = theano.shared(
                value=np.zeros(
                    self.n_features,
                    dtype=theano.config.floatX
                ),
                borrow=True
            )
        # self.b_y_kappa = theano.shared(
        #         value=np.zeros(
        #             self.n_features,
        #             dtype=theano.config.floatX
        #         ),
        #         name='b',
        #         borrow=True
        #     )


        # I could directly create the model here since it is fixed
        self.l_in = InputLayer(shape=(None, self.max_len, self.n_features))
        self.mask_input = InputLayer(shape=(None, self.max_len))
        first_hidden = GRULayer(self.l_in, mask_input=self.mask_input, num_units=hidden[0])
        # l_shp = ReshapeLayer(first_hidden, (-1, hidden[0]))
        # l_dense = DenseLayer(l_shp, num_units=self.hidden[0], nonlinearity=rectify)
        # l_drop = DropoutLayer(l_dense, p=0.5)
        # l_shp = ReshapeLayer(l_drop, (-1, self.max_len, self.hidden[0]))
        self.model = GRULayer(first_hidden, num_units=hidden[1])
Beispiel #10
0
def test_gru_hid_init_layer_eval():
    # Test `hid_init` as a `Layer` with some dummy input. Compare the output of
    # a network with a `Layer` as input to `hid_init` to a network with a
    # `np.array` as input to `hid_init`
    n_units = 7
    n_test_cases = 2
    in_shp = (n_test_cases, 2, 3)
    in_h_shp = (1, n_units)

    # dummy inputs
    X_test = np.ones(in_shp, dtype=theano.config.floatX)
    Xh_test = np.ones(in_h_shp, dtype=theano.config.floatX)
    Xh_test_batch = np.tile(Xh_test, (n_test_cases, 1))

    # network with `Layer` initializer for hid_init
    l_inp = InputLayer(in_shp)
    l_inp_h = InputLayer(in_h_shp)
    l_rec_inp_layer = GRULayer(l_inp, n_units, hid_init=l_inp_h)

    # network with `np.array` initializer for hid_init
    l_rec_nparray = GRULayer(l_inp, n_units, hid_init=Xh_test)

    # copy network parameters from l_rec_inp_layer to l_rec_nparray
    l_il_param = dict([(p.name, p) for p in l_rec_inp_layer.get_params()])
    l_rn_param = dict([(p.name, p) for p in l_rec_nparray.get_params()])
    for k, v in l_rn_param.items():
        if k in l_il_param:
            v.set_value(l_il_param[k].get_value())

    # build the theano functions
    X = T.tensor3()
    Xh = T.matrix()
    output_inp_layer = lasagne.layers.get_output(l_rec_inp_layer, {
        l_inp: X,
        l_inp_h: Xh
    })
    output_nparray = lasagne.layers.get_output(l_rec_nparray, {l_inp: X})

    # test both nets with dummy input
    output_val_inp_layer = output_inp_layer.eval({
        X: X_test,
        Xh: Xh_test_batch
    })
    output_val_nparray = output_nparray.eval({X: X_test})

    # check output given `Layer` is the same as with `np.array`
    assert np.allclose(output_val_inp_layer, output_val_nparray)
Beispiel #11
0
def test_gru_grad():
    num_batch, seq_len, n_features = 5, 3, 10
    num_units = 6
    l_inp = InputLayer((num_batch, seq_len, n_features))
    l_gru = GRULayer(l_inp, num_units=num_units)
    output = helper.get_output(l_gru)
    g = T.grad(T.mean(output), lasagne.layers.get_all_params(l_gru))
    assert isinstance(g, (list, tuple))
Beispiel #12
0
    def __init__(self,
                 num_batch,
                 max_len,
                 n_features,
                 hidden=[200, 200],
                 **kwargs):
        self.num_batch = num_batch
        self.n_features = n_features
        self.max_len = max_len
        self.hidden = hidden
        rng = np.random.RandomState(123)
        self.drng = rng
        self.rng = RandomStreams(rng.randint(2**30))

        # params
        initial_W = np.asarray(rng.uniform(low=1e-5,
                                           high=1,
                                           size=(self.hidden[1],
                                                 self.n_features)),
                               dtype=theano.config.floatX)

        self.W_y_theta = theano.shared(value=initial_W,
                                       name='W_y_theta',
                                       borrow=True)
        # # self.W_y_kappa = theano.shared(value=initial_W, name='W_y_kappa', borrow=True)
        self.b_y_theta = theano.shared(value=np.zeros(
            self.n_features, dtype=theano.config.floatX),
                                       borrow=True)
        # self.b_y_kappa = theano.shared(
        #         value=np.zeros(
        #             self.n_features,
        #             dtype=theano.config.floatX
        #         ),
        #         name='b',
        #         borrow=True
        #     )

        # I could directly create the model here since it is fixed
        self.l_in = InputLayer(shape=(self.num_batch, self.max_len,
                                      self.n_features))
        self.mask_input = InputLayer(shape=(self.num_batch, self.max_len))
        first_hidden = GRULayer(self.l_in,
                                mask_input=self.mask_input,
                                num_units=hidden[0])
        self.model = GRULayer(first_hidden, num_units=hidden[1])
Beispiel #13
0
def test_gru_return_final():
    num_batch, seq_len, n_features = 2, 3, 4
    num_units = 2
    in_shp = (num_batch, seq_len, n_features)
    x_in = np.random.random(in_shp).astype('float32')

    l_inp = InputLayer(in_shp)
    lasagne.random.get_rng().seed(1234)
    l_rec_final = GRULayer(l_inp, num_units, only_return_final=True)
    lasagne.random.get_rng().seed(1234)
    l_rec_all = GRULayer(l_inp, num_units, only_return_final=False)

    output_final = helper.get_output(l_rec_final).eval({l_inp.input_var: x_in})
    output_all = helper.get_output(l_rec_all).eval({l_inp.input_var: x_in})

    assert output_final.shape == (output_all.shape[0], output_all.shape[2])
    assert output_final.shape == lasagne.layers.get_output_shape(l_rec_final)
    assert np.allclose(output_final, output_all[:, -1])
Beispiel #14
0
def test_gru_hid_init_layer():
    # test that you can set hid_init to be a layer
    l_inp = InputLayer((2, 2, 3))
    l_inp_h = InputLayer((2, 5))
    l_gru = GRULayer(l_inp, 5, hid_init=l_inp_h)

    x = T.tensor3()
    h = T.matrix()

    output = lasagne.layers.get_output(l_gru, {l_inp: x, l_inp_h: h})
Beispiel #15
0
def test_gru_nparams_learn_init_true():
    l_inp = InputLayer((2, 2, 3))
    l_gru = GRULayer(l_inp, 5, learn_init=True)

    # 3*n_gates + hid_init
    # the 3 is because we have  hid_to_gate, in_to_gate and bias for each gate
    assert len(lasagne.layers.get_all_params(l_gru, trainable=True)) == 10

    # bias params(3) + init params(1)
    assert len(lasagne.layers.get_all_params(l_gru, regularizable=False)) == 4
Beispiel #16
0
def test_unroll_none_input_error():
    # Test that a ValueError is raised if unroll scan is True and the input
    # sequence length is specified as None.
    l_in = InputLayer((2, None, 3))
    with pytest.raises(ValueError):
        RecurrentLayer(l_in, 5, unroll_scan=True)

    with pytest.raises(ValueError):
        LSTMLayer(l_in, 5, unroll_scan=True)

    with pytest.raises(ValueError):
        GRULayer(l_in, 5, unroll_scan=True)
Beispiel #17
0
def test_gradient_steps_error():
    # Check that error is raised if gradient_steps is not -1 and scan_unroll
    # is true
    l_in = InputLayer((2, 2, 3))
    with pytest.raises(ValueError):
        RecurrentLayer(l_in, 5, gradient_steps=3, unroll_scan=True)

    with pytest.raises(ValueError):
        LSTMLayer(l_in, 5, gradient_steps=3, unroll_scan=True)

    with pytest.raises(ValueError):
        GRULayer(l_in, 5, gradient_steps=3, unroll_scan=True)
def GRURecurrent(input_var,
                 mask_var=None,
                 batch_size=1,
                 n_in=100,
                 n_out=1,
                 n_hid=200,
                 diag_val=0.9,
                 offdiag_val=0.01,
                 out_nlin=lasagne.nonlinearities.linear):
    # Input Layer
    l_in = InputLayer((batch_size, None, n_in), input_var=input_var)
    if mask_var == None:
        l_mask = None
    else:
        l_mask = InputLayer((batch_size, None), input_var=mask_var)

    _, seqlen, _ = l_in.input_var.shape
    l_rec = GRULayer(
        l_in,
        n_hid,
        resetgate=lasagne.layers.Gate(W_in=lasagne.init.GlorotNormal(0.05),
                                      W_hid=lasagne.init.GlorotNormal(0.05),
                                      W_cell=None,
                                      b=lasagne.init.Constant(0.)),
        updategate=lasagne.layers.Gate(W_in=lasagne.init.GlorotNormal(0.05),
                                       W_hid=lasagne.init.GlorotNormal(0.05),
                                       W_cell=None),
        hidden_update=lasagne.layers.Gate(
            W_in=lasagne.init.GlorotNormal(0.05),
            W_hid=LeInit(diag_val=diag_val, offdiag_val=offdiag_val),
            W_cell=None,
            nonlinearity=lasagne.nonlinearities.rectify),
        hid_init=lasagne.init.Constant(0.),
        backwards=False,
        learn_init=False,
        gradient_steps=-1,
        grad_clipping=10.,
        unroll_scan=False,
        precompute_input=True,
        mask_input=l_mask,
        only_return_final=False)

    # Output Layer
    l_shp = ReshapeLayer(l_rec, (-1, n_hid))
    l_dense = DenseLayer(l_shp,
                         num_units=n_out,
                         W=lasagne.init.GlorotNormal(0.05),
                         nonlinearity=out_nlin)
    # To reshape back to our original shape, we can use the symbolic shape variables we retrieved above.
    l_out = ReshapeLayer(l_dense, (batch_size, seqlen, n_out))

    return l_out, l_rec
Beispiel #19
0
def test_gru_passthrough():
    # Tests that the LSTM can simply pass through its input
    l_in = InputLayer((4, 5, 6))
    zero = lasagne.init.Constant(0.)
    one = lasagne.init.Constant(1.)
    pass_gate = Gate(zero, zero, None, one, None)
    no_gate = Gate(zero, zero, None, zero, None)
    in_pass_gate = Gate(
        np.eye(6).astype(theano.config.floatX), zero, None, zero, None)
    l_rec = GRULayer(l_in, 6, no_gate, pass_gate, in_pass_gate)
    out = lasagne.layers.get_output(l_rec)
    inp = np.arange(4 * 5 * 6).reshape(4, 5, 6).astype(theano.config.floatX)
    np.testing.assert_almost_equal(out.eval({l_in.input_var: inp}), inp)
Beispiel #20
0
def test_gru_bck():
    num_batch, seq_len, n_features1 = 2, 3, 4
    num_units = 2
    x = T.tensor3()
    in_shp = (num_batch, seq_len, n_features1)
    l_inp = InputLayer(in_shp)

    x_in = np.ones(in_shp).astype('float32')

    # need to set random seed.
    lasagne.random.get_rng().seed(1234)
    l_gru_fwd = GRULayer(l_inp, num_units=num_units, backwards=False)
    lasagne.random.get_rng().seed(1234)
    l_gru_bck = GRULayer(l_inp, num_units=num_units, backwards=True)
    output_fwd = helper.get_output(l_gru_fwd, x)
    output_bck = helper.get_output(l_gru_bck, x)

    output_fwd_val = output_fwd.eval({x: x_in})
    output_bck_val = output_bck.eval({x: x_in})

    # test that the backwards model reverses its final input
    np.testing.assert_almost_equal(output_fwd_val, output_bck_val[:, ::-1])
Beispiel #21
0
def test_gru_hid_init_mask():
    # test that you can set hid_init to be a layer when a mask is provided
    l_inp = InputLayer((2, 2, 3))
    l_inp_h = InputLayer((2, 5))
    l_inp_msk = InputLayer((2, 2))
    l_gru = GRULayer(l_inp, 5, hid_init=l_inp_h, mask_input=l_inp_msk)

    x = T.tensor3()
    h = T.matrix()
    msk = T.matrix()

    inputs = {l_inp: x, l_inp_h: h, l_inp_msk: msk}
    output = lasagne.layers.get_output(l_gru, inputs)
Beispiel #22
0
def build_model_L(in_dim=3, out_dim=3):
    input_var = tensor.ftensor3('x')  # (B, T, D)
    input0 = InputLayer(shape=(None, None, in_dim),
                        input_var=input_var,
                        name='input0')
    gru0 = GRULayer(input0,
                    num_units=out_dim,
                    precompute_input=True,
                    backwards=False,
                    only_return_final=False,
                    learn_init=True,
                    name='gru0')
    return gru0
Beispiel #23
0
 def __init__(self, incomings, voc_size, hid_state_size,
              SemMem=None, GRU=None, **kwargs):
     super(InputModule, self).__init__(incomings, **kwargs)
     
     if SemMem is not None:
         self.SemMem = SemMem
     else:
         self.SemMem = SemMemModule(incomings[0],voc_size,hid_state_size,**kwargs)
     if GRU is not None:
         self.GRU = GRU
     else:
         self.GRU = GRULayer(SemMem, hid_state_size)
     self.voc_size = voc_size
     self.hid_state_size = hid_state_size
Beispiel #24
0
def test_gru_return_shape():
    num_batch, seq_len, n_features1, n_features2 = 5, 3, 10, 11
    num_units = 6
    x = T.tensor4()
    in_shp = (num_batch, seq_len, n_features1, n_features2)
    l_inp = InputLayer(in_shp)
    l_rec = GRULayer(l_inp, num_units=num_units)

    x_in = np.random.random(in_shp).astype('float32')
    output = helper.get_output(l_rec, x)
    output_val = output.eval({x: x_in})

    assert helper.get_output_shape(l_rec, x_in.shape) == output_val.shape
    assert output_val.shape == (num_batch, seq_len, num_units)
Beispiel #25
0
    def _add_decoder(self):
        """
        Decoder returns the batch of sequences of thought vectors, each corresponds to a decoded token
        reshapes this 3d tensor to 2d matrix so that the next Dense layer can convert each thought vector to
        a probability distribution vector
        """

        self._net['hid_states_decoder'] = InputLayer(
            shape=(None, self._decoder_depth, None),
            input_var=T.tensor3('hid_inits_decoder'),
            name='hid_states_decoder')

        # repeat along the sequence axis output_seq_len times, where output_seq_len is inferred from input tensor
        self._net['enc_repeated'] = RepeatLayer(
            incoming=self._net[
                'enc_result'],  # input shape = (batch_size, encoder_output_dimension)
            n=self._output_seq_len,
            name='repeat_layer')

        self._net['emb_condition_id_repeated'] = RepeatLayer(
            incoming=self._net['emb_condition_id'],
            n=self._output_seq_len,
            name='embedding_condition_id_repeated')

        self._net['dec_concated_input'] = ConcatLayer(
            incomings=[
                self._net['emb_y'], self._net['enc_repeated'],
                self._net['emb_condition_id_repeated']
            ],
            axis=2,
            name='decoder_concated_input')
        # shape = (batch_size, input_seq_len, encoder_output_dimension)

        self._net['dec_0'] = self._net['dec_concated_input']

        for dec_layer_id in xrange(1, self._decoder_depth + 1):
            # input shape = (batch_size, input_seq_len, embedding_dimension + hidden_dimension)
            self._net['dec_' + str(dec_layer_id)] = GRULayer(
                incoming=self._net['dec_' + str(dec_layer_id - 1)],
                num_units=self._hidden_layer_dim,
                grad_clipping=self._grad_clip,
                only_return_final=False,
                name='decoder_' + str(dec_layer_id),
                mask_input=self._net['input_y_mask'],
                hid_init=SliceLayer(self._net['hid_states_decoder'],
                                    dec_layer_id - 1,
                                    axis=1))

        self._net['dec'] = self._net['dec_' + str(self._decoder_depth)]
Beispiel #26
0
def test_gru_variable_input_size():
    # that seqlen and batchsize None works
    num_batch, n_features1 = 6, 5
    num_units = 13
    x = T.tensor3()

    in_shp = (None, None, n_features1)
    l_inp = InputLayer(in_shp)
    x_in1 = np.ones((num_batch + 1, 10, n_features1)).astype('float32')
    x_in2 = np.ones((num_batch, 15, n_features1)).astype('float32')
    l_rec = GRULayer(l_inp, num_units=num_units, backwards=False)
    output = helper.get_output(l_rec, x)

    output.eval({x: x_in1})
    output.eval({x: x_in2})
Beispiel #27
0
def gru_column(input, num_units, hidden, **kwargs):
    kwargs.pop("only_return_final", None)
    assert isinstance(hidden, (list, tuple))

    name = kwargs.pop("name", "default")
    column = [input]
    for i, l_hidden in enumerate(hidden):
        kwargs_ = kwargs.copy()
        if isinstance(l_hidden, Layer):
            kwargs_.pop("learn_init", None)
            kwargs_["hid_init"] = l_hidden

        layer = GRULayer(column[-1], num_units,
                         name=os.path.join(name, "gru_%02d" % i),
                         **kwargs_)
        column.append(layer)
    return column[1:]
Beispiel #28
0
def test_gru_nparams_hid_init_layer():
    # test that you can see layers through hid_init
    l_inp = InputLayer((2, 2, 3))
    l_inp_h = InputLayer((2, 5))
    l_inp_h_de = DenseLayer(l_inp_h, 7)
    l_gru = GRULayer(l_inp, 7, hid_init=l_inp_h_de)

    # directly check the layers can be seen through hid_init
    assert lasagne.layers.get_all_layers(l_gru) == [l_inp, l_inp_h, l_inp_h_de,
                                                    l_gru]

    # 3*n_gates + 2
    # the 3 is because we have  hid_to_gate, in_to_gate and bias for each gate
    # 2 is for the W and b parameters in the DenseLayer
    assert len(lasagne.layers.get_all_params(l_gru, trainable=True)) == 11

    # GRU bias params(3) + Dense bias params(1)
    assert len(lasagne.layers.get_all_params(l_gru, regularizable=False)) == 4
Beispiel #29
0
    def _add_utterance_encoder(self):
        # input shape = (batch_size * input_context_size, input_seq_len, embedding_dimension)
        self._add_forward_backward_encoder_layer()

        for enc_layer_id in xrange(1, self._encoder_depth):
            is_last_encoder_layer = enc_layer_id == self._encoder_depth - 1
            return_only_final_state = is_last_encoder_layer

            # input shape = (batch_size * input_context_size, input_seq_len, embedding_dimension)
            self._net['enc_' + str(enc_layer_id)] = GRULayer(
                incoming=self._net['enc_' + str(enc_layer_id - 1)],
                num_units=self._hidden_layer_dim,
                grad_clipping=self._grad_clip,
                only_return_final=return_only_final_state,
                name='encoder_' + str(enc_layer_id),
                mask_input=self._net['input_x_mask'])

        self._net['enc'] = self._net['enc_' + str(self._encoder_depth - 1)]
Beispiel #30
0
def gated_layer(incoming,
                num_units,
                grad_clipping,
                only_return_final,
                backwards,
                gated_layer_type,
                mask_input=None,
                cell_init=lasagne.init.Constant(0.),
                hid_init=lasagne.init.Constant(0.),
                resetgate=lasagne.layers.Gate(W_cell=None),
                updategate=lasagne.layers.Gate(W_cell=None),
                hidden_update=lasagne.layers.Gate(
                    W_cell=None, nonlinearity=lasagne.nonlinearities.tanh),
                name=None):
    if gated_layer_type == "gru":
        return GRULayer(incoming,
                        num_units,
                        mask_input=mask_input,
                        grad_clipping=grad_clipping,
                        only_return_final=only_return_final,
                        backwards=backwards,
                        hid_init=hid_init,
                        resetgate=resetgate,
                        updategate=updategate,
                        hidden_update=hidden_update,
                        name=name)
    else:
        return LSTMLayer(incoming,
                         num_units,
                         mask_input=mask_input,
                         grad_clipping=grad_clipping,
                         nonlinearity=lasagne.nonlinearities.tanh,
                         only_return_final=only_return_final,
                         backwards=backwards,
                         cell_init=cell_init,
                         hid_init=hid_init,
                         resetgate=resetgate,
                         updategate=updategate,
                         hidden_update=hidden_update,
                         name=name)