Esempio n. 1
0
def build_model(input_shape, num_hidden, num_output, grad_clipping):
    l_in = InputLayer(input_shape, name='l_in')
    l_lstm1 = LSTMLayer(
        l_in,
        name='l_lstm1',
        num_units=num_hidden,
        grad_clipping=grad_clipping,
        nonlinearity=tanh,
    )
    l_lstm2 = LSTMLayer(
        l_lstm1,
        name='l_lstm2',
        num_units=num_hidden,
        grad_clipping=grad_clipping,
        nonlinearity=tanh,
        only_return_final=True,
    )

    l_out = DenseLayer(l_lstm2,
                       name='l_out',
                       W=Normal(),
                       num_units=num_output,
                       nonlinearity=softmax)

    layers = get_all_layers(l_out)
    return {layer.name: layer for layer in layers}
    def nn_fn(self):

        l_in = InputLayer((None, self.max_length, self.emb_dim))
        l_mask = InputLayer((None, self.max_length))

        l_h = l_in

        l_h_all = []

        for h in range(self.rnn_depth):
            if self.rnn_bidirectional:
                l_fwd = LSTMLayer(l_h,
                                  num_units=self.rnn_hid_units,
                                  mask_input=l_mask)
                l_bwd = LSTMLayer(l_h,
                                  num_units=self.rnn_hid_units,
                                  mask_input=l_mask,
                                  backwards=True)
                l_h = ConcatLayer((l_fwd, l_bwd), axis=-1)
            else:
                l_h = LSTMLayer(l_h,
                                num_units=self.rnn_hid_units,
                                mask_input=l_mask)
            l_h_all.append(l_h)

        l_h = SliceLayer(ElemwiseSumLayer(l_h_all), indices=-1, axis=1)

        for i in range(self.nn_dense_depth):
            l_h = DenseLayer(l_h, num_units=self.nn_dense_hid_units)

        l_mean = DenseLayer(l_h, self.z_dim, nonlinearity=None)
        l_cov = DenseLayer(l_h, self.z_dim, nonlinearity=softplus_safe)

        return (l_in, l_mask), (l_mean, l_cov)
Esempio n. 3
0
def test_lstm_unroll_scan_fwd():
    num_batch, seq_len, n_features1 = 2, 3, 4
    num_units = 2
    in_shp = (num_batch, seq_len, n_features1)
    l_inp = InputLayer(in_shp)
    l_mask_inp = InputLayer(in_shp[:2])

    x_in = np.random.random(in_shp).astype('float32')
    mask_in = np.ones(in_shp[:2]).astype('float32')

    # need to set random seed.
    lasagne.random.get_rng().seed(1234)
    l_lstm_scan = LSTMLayer(l_inp, num_units=num_units, backwards=False,
                            unroll_scan=False, mask_input=l_mask_inp)
    lasagne.random.get_rng().seed(1234)
    l_lstm_unrolled = LSTMLayer(l_inp, num_units=num_units, backwards=False,
                                unroll_scan=True, mask_input=l_mask_inp)
    output_scan = helper.get_output(l_lstm_scan)
    output_unrolled = helper.get_output(l_lstm_unrolled)

    output_scan_val = output_scan.eval({l_inp.input_var: x_in,
                                        l_mask_inp.input_var: mask_in})
    output_unrolled_val = output_unrolled.eval({l_inp.input_var: x_in,
                                                l_mask_inp.input_var: mask_in})

    np.testing.assert_almost_equal(output_scan_val, output_unrolled_val)
Esempio n. 4
0
def test_lstm_precompute():
    num_batch, seq_len, n_features1 = 2, 3, 4
    num_units = 2
    in_shp = (num_batch, seq_len, n_features1)
    l_inp = InputLayer(in_shp)
    l_mask_inp = InputLayer(in_shp[:2])

    x_in = np.random.random(in_shp).astype('float32')
    mask_in = np.ones((num_batch, seq_len), dtype='float32')

    # need to set random seed.
    lasagne.random.get_rng().seed(1234)
    l_lstm_precompute = LSTMLayer(
        l_inp, num_units=num_units, precompute_input=True,
        mask_input=l_mask_inp)
    lasagne.random.get_rng().seed(1234)
    l_lstm_no_precompute = LSTMLayer(
        l_inp, num_units=num_units, precompute_input=False,
        mask_input=l_mask_inp)
    output_precompute = helper.get_output(
        l_lstm_precompute).eval({l_inp.input_var: x_in,
                                 l_mask_inp.input_var: mask_in})
    output_no_precompute = helper.get_output(
        l_lstm_no_precompute).eval({l_inp.input_var: x_in,
                                    l_mask_inp.input_var: mask_in})

    # test that the backwards model reverses its final input
    np.testing.assert_almost_equal(output_precompute, output_no_precompute)
Esempio n. 5
0
def build_convpool_lstm(input_vars, input_shape=None):
    """
  Builds the complete network with LSTM layer to integrate time from sequences of EEG images.
  :param input_vars: list of EEG images (one image per time window)
  :return: a pointer to the output of last layer
  """

    convnets = []
    W_init = None
    # Build 7 parallel CNNs with shared weights
    for i in range(input_shape[0]):
        if i == 0:
            convnet, W_init = build_cnn(input_vars[i], input_shape)
        else:
            convnet, _ = build_cnn(input_vars[i], input_shape, W_init)
        convnets.append(FlattenLayer(convnet))

    # at this point convnets shape is [numTimeWin][n_samples, features]
    # we want the shape to be [n_samples, features, numTimeWin]
    convpool = ConcatLayer(convnets)
    # convpool = ReshapeLayer(convpool, ([0], -1, numTimeWin))

    convpool = ReshapeLayer(
        convpool, ([0], input_shape[0], get_output_shape(convnets[0])[1]))

    # Input to LSTM should have the shape as (batch size, SEQ_LENGTH, num_features)

    convpool = LSTMLayer(convpool,
                         num_units=32,
                         grad_clipping=grad_clip,
                         nonlinearity=lasagne.nonlinearities.sigmoid)

    #convpool = lasagne.layers.dropout(convpool, p=.3)

    convpool = LSTMLayer(convpool,
                         num_units=32,
                         grad_clipping=grad_clip,
                         nonlinearity=lasagne.nonlinearities.sigmoid)

    # After LSTM layer you either need to reshape or slice it (depending on whether you
    # want to keep all predictions or just the last prediction.
    # http://lasagne.readthedocs.org/en/latest/modules/layers/recurrent.html
    # https://github.com/Lasagne/Recipes/blob/master/examples/lstm_text_generation.py

    convpool = SliceLayer(convpool, -1, 1)  # Selecting the last prediction

    # A fully-connected layer of 256 units with 50% dropout on its inputs:
    convpool = DenseLayer(lasagne.layers.dropout(convpool, p=.5),
                          num_units=256,
                          nonlinearity=lasagne.nonlinearities.rectify)

    # We only need the final prediction, we isolate that quantity and feed it
    # to the next layer.

    # And, finally, the output layer with 50% dropout on its inputs:
    convpool = DenseLayer(lasagne.layers.dropout(convpool, p=.5),
                          num_units=num_classes,
                          nonlinearity=lasagne.nonlinearities.softmax)

    return convpool
Esempio n. 6
0
def create_blstm_dropout(input_vars,
                         mask_vars,
                         num_inputs,
                         hidden_layer_size,
                         num_outputs,
                         dropout=0.2,
                         noise=0.2):
    network = InputLayer((None, None, num_inputs), input_vars)
    mask = InputLayer((None, None), mask_vars)
    batch_size_theano, seqlen, _ = network.input_var.shape
    network = GaussianNoiseLayer(network, sigma=noise)

    for i in range(4):
        forward = LSTMLayer(network,
                            hidden_layer_size,
                            mask_input=mask,
                            learn_init=True)
        backward = LSTMLayer(network,
                             hidden_layer_size,
                             mask_input=mask,
                             learn_init=True,
                             backwards=True)
        network = DropoutLayer(
            GaussianNoiseLayer(ElemwiseSumLayer([forward, backward]), noise),
            dropout)

    network = ReshapeLayer(network, (-1, hidden_layer_size))
    network = DenseLayer(network, num_outputs, nonlinearity=softmax)
    network = ReshapeLayer(network, (batch_size_theano, seqlen, num_outputs))

    return network
Esempio n. 7
0
    def __init__(self):
        print("Initialising network...")
        import theano
        import theano.tensor as T
        import lasagne
        from lasagne.layers import (InputLayer, LSTMLayer, ReshapeLayer, 
                                    ConcatLayer, DenseLayer)
        theano.config.compute_test_value = 'raise'

        # Construct LSTM RNN: One LSTM layer and one dense output layer
        l_in = InputLayer(shape=input_shape)

        # setup fwd and bck LSTM layer.
        l_fwd = LSTMLayer(
            l_in, N_HIDDEN, backwards=False, learn_init=True, peepholes=True)
        l_bck = LSTMLayer(
            l_in, N_HIDDEN, backwards=True, learn_init=True, peepholes=True)

        # concatenate forward and backward LSTM layers
        concat_shape = (N_SEQ_PER_BATCH * SEQ_LENGTH, N_HIDDEN)
        l_fwd_reshape = ReshapeLayer(l_fwd, concat_shape)
        l_bck_reshape = ReshapeLayer(l_bck, concat_shape)
        l_concat = ConcatLayer([l_fwd_reshape, l_bck_reshape], axis=1)

        l_recurrent_out = DenseLayer(l_concat, num_units=N_OUTPUTS, 
                                     nonlinearity=None)
        l_out = ReshapeLayer(l_recurrent_out, output_shape)

        input = T.tensor3('input')
        target_output = T.tensor3('target_output')

        # add test values
        input.tag.test_value = rand(
            *input_shape).astype(theano.config.floatX)
        target_output.tag.test_value = rand(
            *output_shape).astype(theano.config.floatX)

        print("Compiling Theano functions...")
        # Cost = mean squared error
        cost = T.mean((l_out.get_output(input) - target_output)**2)

        # Use NAG for training
        all_params = lasagne.layers.get_all_params(l_out)
        updates = lasagne.updates.nesterov_momentum(cost, all_params, LEARNING_RATE)

        # Theano functions for training, getting output, and computing cost
        self.train = theano.function(
            [input, target_output],
            cost, updates=updates, on_unused_input='warn',
            allow_input_downcast=True)

        self.y_pred = theano.function(
            [input], l_out.get_output(input), on_unused_input='warn',
            allow_input_downcast=True)

        self.compute_cost = theano.function(
            [input, target_output], cost, on_unused_input='warn',
            allow_input_downcast=True)

        print("Done initialising network.")
Esempio n. 8
0
def create_blstm(l_incoming, l_mask, hidden_units, cell_parameters, gate_parameters, name):

    if cell_parameters is None:
        cell_parameters = Gate()
    if gate_parameters is None:
        gate_parameters = Gate()

    l_lstm = LSTMLayer(
        l_incoming, hidden_units,
        # We need to specify a separate input for masks
        mask_input=l_mask,
        # Here, we supply the gate parameters for each gate
        ingate=gate_parameters, forgetgate=gate_parameters,
        cell=cell_parameters, outgate=gate_parameters,
        # We'll learn the initialization and use gradient clipping
        learn_init=True, grad_clipping=5., name='f_{}'.format(name))

    # The "backwards" layer is the same as the first,
    # except that the backwards argument is set to True.
    l_lstm_back = LSTMLayer(
        l_incoming, hidden_units, ingate=gate_parameters,
        mask_input=l_mask, forgetgate=gate_parameters,
        cell=cell_parameters, outgate=gate_parameters,
        learn_init=True, grad_clipping=5., backwards=True, name='b_{}'.format(name))

    return l_lstm, l_lstm_back
Esempio n. 9
0
def test_lstm_unroll_scan_bck():
    num_batch, seq_len, n_features1 = 2, 3, 4
    num_units = 2
    x = T.tensor3()
    in_shp = (num_batch, seq_len, n_features1)
    l_inp = InputLayer(in_shp)

    x_in = np.random.random(in_shp).astype('float32')

    # need to set random seed.
    lasagne.random.get_rng().seed(1234)
    l_lstm_scan = LSTMLayer(l_inp,
                            num_units=num_units,
                            backwards=True,
                            unroll_scan=False)
    lasagne.random.get_rng().seed(1234)
    l_lstm_unrolled = LSTMLayer(l_inp,
                                num_units=num_units,
                                backwards=True,
                                unroll_scan=True)
    output_scan = helper.get_output(l_lstm_scan, x)
    output_scan_unrolled = helper.get_output(l_lstm_unrolled, x)

    output_scan_val = output_scan.eval({x: x_in})
    output_unrolled_val = output_scan_unrolled.eval({x: x_in})

    np.testing.assert_almost_equal(output_scan_val, output_unrolled_val)
Esempio n. 10
0
    def __init__(self,
                 num_batch,
                 max_len,
                 n_features,
                 hidden=[200, 200],
                 **kwargs):
        self.num_batch = num_batch
        self.n_features = n_features
        self.max_len = max_len
        self.hidden = hidden
        rng = np.random.RandomState(123)
        self.drng = rng
        self.rng = RandomStreams(rng.randint(2**30))

        # params
        # initial_W = np.asarray(
        #     rng.uniform(
        #             low=1e-5,
        #             high=1,
        #             size=(self.hidden[1], self.n_features)
        #     ),
        #     dtype=theano.config.floatX
        # )
        #
        # self.W_y_theta = theano.shared(value=initial_W, name='W_y_theta', borrow=True)
        # # self.W_y_kappa = theano.shared(value=initial_W, name='W_y_kappa', borrow=True)
        # self.b_y_theta = theano.shared(
        #         value=np.zeros(
        #             self.n_features,
        #             dtype=theano.config.floatX
        #         ),
        #         borrow=True
        #     )
        # self.b_y_kappa = theano.shared(
        #         value=np.zeros(
        #             self.n_features,
        #             dtype=theano.config.floatX
        #         ),
        #         name='b',
        #         borrow=True
        #     )

        # I could directly create the model here since it is fixed
        self.l_in = InputLayer(shape=(self.num_batch, self.max_len,
                                      self.n_features))
        self.mask_input = InputLayer(shape=(self.num_batch, self.max_len))
        first_hidden = LSTMLayer(self.l_in,
                                 mask_input=self.mask_input,
                                 num_units=hidden[0],
                                 nonlinearity=rectify)
        second_hidden = LSTMLayer(first_hidden,
                                  num_units=hidden[1],
                                  nonlinearity=rectify)
        # need some reshape voodoo
        l_shp = ReshapeLayer(second_hidden, (-1, hidden[1]))
        # after the reshape I have batch*max_len X features
        self.model = DenseLayer(l_shp,
                                num_units=self.n_features,
                                nonlinearity=rectify)
Esempio n. 11
0
def test_lstm_init_val_error():
    # check if errors are raised when inits are non matrix tensor
    vector = T.vector()
    with pytest.raises(ValueError):
        l_rec = LSTMLayer(InputLayer((2, 2, 3)), 5, hid_init=vector)

    with pytest.raises(ValueError):
        l_rec = LSTMLayer(InputLayer((2, 2, 3)), 5, cell_init=vector)
Esempio n. 12
0
def create_network(config, BATCH_SIZE):
    input_dim = config['input_dim']
    num_labels = config['num_labels']
    input_layer = InputLayer(shape=(BATCH_SIZE, input_dim // 2, 2))
    hidden_layer_1 = LSTMLayer(input_layer, 100)
    hidden_layer_2 = LSTMLayer(hidden_layer_1, 50, only_return_final=True)
    output_layer = DenseLayer(hidden_layer_2,
                              num_units=num_labels,
                              nonlinearity=lasagne.nonlinearities.softmax)
    return locals()
def build_lstm(input_vars, input_shape=None):
    ''' 
  1) InputLayer
  2) ReshapeLayer
  3) LSTM Layer 1
  4) LSTM Layer 2
  5) Slice Layer
  6) Fully Connected Layer 1 w/ dropout tanh
  7) Fully Connected Layer 2 w/ dropout softmax
  '''

    # Input to LSTM should have the shape as (batch size, SEQ_LENGTH, num_features)

    network = InputLayer(shape=(input_shape[0], None, num_input_channels,
                                input_shape[-3], input_shape[-2],
                                input_shape[-1]),
                         input_var=input_vars)

    network = ReshapeLayer(network, ([0], [1], -1))
    network = DimshuffleLayer(network, (1, 0, 2))
    #network = ReshapeLayer(network, (-1, 128))
    #l_inp = InputLayer((None, None, num_inputs))

    l_lstm1 = LSTMLayer(network,
                        num_units=128,
                        grad_clipping=grad_clip,
                        nonlinearity=lasagne.nonlinearities.tanh)

    #New LSTM
    l_lstm2 = LSTMLayer(l_lstm1,
                        num_units=128,
                        grad_clipping=grad_clip,
                        nonlinearity=lasagne.nonlinearities.tanh)
    #end of insertion

    # After LSTM layer you either need to reshape or slice it (depending on whether you
    # want to keep all predictions or just the last prediction.
    # http://lasagne.readthedocs.org/en/latest/modules/layers/recurrent.html
    # https://github.com/Lasagne/Recipes/blob/master/examples/lstm_text_generation.py

    l_lstm_slice = SliceLayer(l_lstm2, -1, 1)  # Selecting the last prediction

    # A fully-connected layer of 256 units with 50% dropout on its inputs:
    l_dense = DenseLayer(lasagne.layers.dropout(l_lstm_slice, p=.5),
                         num_units=256,
                         nonlinearity=lasagne.nonlinearities.rectify)
    # We only need the final prediction, we isolate that quantity and feed it
    # to the next layer.

    # And, finally, the output layer with 50% dropout on its inputs:
    l_dense = DenseLayer(lasagne.layers.dropout(l_dense, p=.5),
                         num_units=num_classes,
                         nonlinearity=lasagne.nonlinearities.softmax)
    return l_dense
Esempio n. 14
0
def build_lstm(input_layer):
    #network = sliding_window_input(input_layer)
    network = DimshuffleLayer(input_layer, (0, 1, 'x'))

    n_hidden = 50
    grad_clipping = 20
    network = LSTMLayer(network, num_units=n_hidden,
                        grad_clipping=grad_clipping, nonlinearity=tanh)
    network = LSTMLayer(network, num_units=n_hidden,
                        grad_clipping=grad_clipping, nonlinearity=tanh)
    network = SliceLayer(network, indices=-1, axis=1)
    #network = DenseLayer(network, num_units=256, nonlinearity=rectify)
    return network
Esempio n. 15
0
def test_lstm_hid_init_layer_eval():
    # Test `hid_init` as a `Layer` with some dummy input. Compare the output of
    # a network with a `Layer` as input to `hid_init` to a network with a
    # `np.array` as input to `hid_init`
    n_units = 7
    n_test_cases = 2
    in_shp = (n_test_cases, 2, 3)
    in_h_shp = (1, n_units)
    in_cell_shp = (1, n_units)

    # dummy inputs
    X_test = np.ones(in_shp, dtype=theano.config.floatX)
    Xh_test = np.ones(in_h_shp, dtype=theano.config.floatX)
    Xc_test = np.ones(in_cell_shp, dtype=theano.config.floatX)
    Xh_test_batch = np.tile(Xh_test, (n_test_cases, 1))
    Xc_test_batch = np.tile(Xc_test, (n_test_cases, 1))

    # network with `Layer` initializer for hid_init
    l_inp = InputLayer(in_shp)
    l_inp_h = InputLayer(in_h_shp)
    l_inp_cell = InputLayer(in_cell_shp)
    l_rec_inp_layer = LSTMLayer(l_inp, n_units, hid_init=l_inp_h,
                                cell_init=l_inp_cell, nonlinearity=None)

    # network with `np.array` initializer for hid_init
    l_rec_nparray = LSTMLayer(l_inp, n_units, hid_init=Xh_test,
                              cell_init=Xc_test, nonlinearity=None)

    # copy network parameters from l_rec_inp_layer to l_rec_nparray
    l_il_param = dict([(p.name, p) for p in l_rec_inp_layer.get_params()])
    l_rn_param = dict([(p.name, p) for p in l_rec_nparray.get_params()])
    for k, v in l_rn_param.items():
        if k in l_il_param:
            v.set_value(l_il_param[k].get_value())

    # build the theano functions
    X = T.tensor3()
    Xh = T.matrix()
    Xc = T.matrix()
    output_inp_layer = lasagne.layers.get_output(l_rec_inp_layer,
                                                 {l_inp: X, l_inp_h:
                                                  Xh, l_inp_cell: Xc})
    output_nparray = lasagne.layers.get_output(l_rec_nparray, {l_inp: X})

    # test both nets with dummy input
    output_val_inp_layer = output_inp_layer.eval({X: X_test, Xh: Xh_test_batch,
                                                  Xc: Xc_test_batch})
    output_val_nparray = output_nparray.eval({X: X_test})

    # check output given `Layer` is the same as with `np.array`
    assert np.allclose(output_val_inp_layer, output_val_nparray)
Esempio n. 16
0
def build_discriminator(input_var=None, dim_h=128, n_steps=1):
    layer = InputLayer(shape=(None, None, N_WORDS), input_var=input_var)
    for i in range(n_steps):
        layer = LSTMLayer(
            layer, dim_h, grad_clipping=GRAD_CLIP,
            nonlinearity=tanh)
        layer = LSTMLayer(
            layer, dim_h, grad_clipping=GRAD_CLIP,
            nonlinearity=tanh)
    layer = ReshapeLayer(layer, (-1, dim_h))
    layer = DenseLayer(layer, 1, nonlinearity=None)
    layer = ReshapeLayer(layer, (-1, L_GEN))
    
    logger.debug('Discriminator output: {}'.format(layer.output_shape))
    return layer
Esempio n. 17
0
    def init_nn_structure(self, seq_length, pred_len):
        """
        Inits network structure

        :param seq_length: number of features
        :type seq_length: int
        :param pred_len: number of predicted values (target dimensionality)
        :type pred_len: int
        :return: None
        """
        self.iteration = 0
        
        theano_input = T.tensor3()
        theano_output = T.matrix()
        
        from lasagne.layers import InputLayer, LSTMLayer, DenseLayer, ExpressionLayer, ConcatLayer
        from lasagne.nonlinearities import tanh
        
        model = {}
        model['input_layer'] = InputLayer((None, seq_length, 1), input_var=theano_input)
        
        lst_concat = []
        for i, key in enumerate(self.feature_dict.keys()):
            if self.feature_dict[key] is None or len(self.feature_dict[key]) == 0:
                continue
            model['input_slice_' + str(i)] = ExpressionLayer(model['input_layer'], lambda X: X[:,self.feature_dict[key],:])
            num_units = self.num_lstm_units_large if len(self.feature_dict[key]) > 10 else self.num_lstm_units_small
            model['hidden_layer_' + str(i) + '_1'] = LSTMLayer(model['input_slice_' + str(i)], 
                               num_units, grad_clipping=self.grad_clip, nonlinearity=tanh)
            model['hidden_layer_' + str(i) + '_2'] = LSTMLayer(model['hidden_layer_' + str(i) + '_1'], 
                               num_units, grad_clipping=self.grad_clip, nonlinearity=tanh, only_return_final=True)
            lst_concat.append(model['hidden_layer_' + str(i) + '_2'])
        model['concatenate_hidden'] = ConcatLayer(lst_concat, axis=1)
        model['output_layer'] = DenseLayer(model['concatenate_hidden'], pred_len, nonlinearity=None)
        
        model_output = lasagne.layers.get_output(model['output_layer'])
        params = lasagne.layers.get_all_params(model['output_layer'], trainable=True)

        self.loss = lasagne.objectives.squared_error(model_output, theano_output).mean()
        self.lr = theano.shared(np.array(self.learning_rate, dtype='float32'))
        self.updates = lasagne.updates.adam(self.loss, params, learning_rate=self.lr)

        self.l_out = model['output_layer']
        self.trainT = theano.function([theano_input, theano_output], self.loss, updates=self.updates)
        self.compute_cost = theano.function([theano_input, theano_output], self.loss)
        self.forecast = theano.function([theano_input], model_output)
        
        '''
    def nn_fn(self):

        l_in_z = InputLayer((None, self.z_dim))
        l_in_x = InputLayer((None, self.max_length, self.emb_dim))

        l_in_z_reshape = ReshapeLayer(l_in_z, ([0], 1, [1]))
        l_in_z_rep = TileLayer(l_in_z_reshape, (1, self.max_length, 1))

        l_x_pre_pad = SliceLayer(PadLayer(l_in_x, [(1, 0), (0, 0)],
                                          batch_ndim=1),
                                 indices=slice(0, -1),
                                 axis=1)
        l_in_x_pre_pad_drop = DropoutLayer(l_x_pre_pad,
                                           self.nn_word_drop,
                                           shared_axes=(-1, ))

        l_concat = ConcatLayer((l_in_z_rep, l_in_x_pre_pad_drop), axis=-1)

        l_h = LSTMLayer(l_concat, num_units=self.nn_hid_units)

        if self.nn_skip:
            l_h = ConcatLayer((l_h, l_in_z_rep), axis=-1)

        l_out = DenseLayer(l_h,
                           num_units=self.emb_dim,
                           num_leading_axes=2,
                           nonlinearity=None)

        return (l_in_z, l_in_x), l_out
Esempio n. 19
0
def test_lstm_grad(num_units):
    num_batch, seq_len, n_features = 5, 3, 10
    l_inp = InputLayer((num_batch, seq_len, n_features))
    l_lstm = LSTMLayer(l_inp, num_units=num_units)
    output = helper.get_output(l_lstm)
    g = T.grad(T.mean(output), lasagne.layers.get_all_params(l_lstm))
    assert isinstance(g, (list, tuple))
Esempio n. 20
0
    def _build(self, forget_bias=5.0, grad_clip=10.0):
        """Build architecture
        """
        network = InputLayer(shape=(None, self.seq_length, self.input_size),
                             name='input')
        self.input_var = network.input_var

        # Hidden layers
        tanh = lasagne.nonlinearities.tanh
        gate, constant = lasagne.layers.Gate, lasagne.init.Constant
        for _ in range(self.depth):
            network = LSTMLayer(network,
                                self.width,
                                nonlinearity=tanh,
                                grad_clipping=grad_clip,
                                forgetgate=gate(b=constant(forget_bias)))

        # Retain last-output state
        network = SliceLayer(network, -1, 1)

        # Output layer
        sigmoid = lasagne.nonlinearities.sigmoid
        loc_layer = DenseLayer(network, self.num_outputs * 2)
        conf_layer = DenseLayer(network,
                                self.num_outputs,
                                nonlinearity=sigmoid)

        # Grab all layers into DAPs instance
        self.network = get_all_layers([loc_layer, conf_layer])

        # Get theano expression for outputs of DAPs model
        self.loc_var, self.conf_var = get_output([loc_layer, conf_layer],
                                                 deterministic=True)
Esempio n. 21
0
def build(timestep, vocab_size):
    # Input Layer
    l_in = InputLayer(shape=(None, timestep, vocab_size))
    # 2 Hidden LSTM Layers
    l_lstm1 = LSTMLayer(l_in, num_units=10, nonlinearity=rectify)
    l_lstm2 = LSTMLayer(l_lstm1,
                        num_units=10,
                        nonlinearity=rectify,
                        only_return_final=True)
    # Output Layer
    l_out = DenseLayer(l_lstm2,
                       num_units=vocab_size,
                       W=GlorotNormal,
                       nonlinearity=softmax)

    return l_out
Esempio n. 22
0
        def get_decoder_1step_net(prev_state, emb_token):
            """
            build nn that represents 1 step of decoder application
            :param prev_state: matrix of shape (batch_size, HIDDEN_LAYER_DIMENSION), float values
            :param inp_token:  matrix of shape (batch_size, 1), stores id of the previous token
            :return:
                l_dec returns new though_vector, matrix shape (batch_size, HIDDEN_LAYER_DIMENSION)
                l_dist returns prob distribution of the next word, matrix shape (batch_size, vocab_size)
            """
            l_dec = LSTMLayer(
                incoming=emb_token,
                num_units=HIDDEN_LAYER_DIMENSION,
                hid_init=prev_state,
                grad_clipping=GRAD_CLIP,
                nonlinearity=lasagne.nonlinearities.tanh,
                only_return_final=True,
                name="lstm_decoder")

            l_dec_long = ReshapeLayer(l_dec, shape=(-1, HIDDEN_LAYER_DIMENSION))

            l_dist = DenseLayer(
                incoming=l_dec_long,
                num_units=self.vocab_size,
                nonlinearity=lasagne.nonlinearities.softmax,
                name="dense_output_probas")

            return l_dec, l_dist
Esempio n. 23
0
def build_tempral_model():
    net = {}
    net['input'] = InputLayer((None, 24, 2048))
    net['lstm1'] = LSTMLayer(net['input'], 256)
    net['fc'] = DenseLayer(net['lstm1'], num_units=12, nonlinearity=sigmoid)

    return net
Esempio n. 24
0
            def lstm_layer(input,
                           nunits,
                           return_final,
                           backwards=False,
                           name='LSTM'):
                ingate = Gate(W_in=init.Uniform(0.01),
                              W_hid=init.Uniform(0.01),
                              b=init.Constant(0.0))
                forgetgate = Gate(W_in=init.Uniform(0.01),
                                  W_hid=init.Uniform(0.01),
                                  b=init.Constant(5.0))
                cell = Gate(
                    W_cell=None,
                    nonlinearity=T.tanh,
                    W_in=init.Uniform(0.01),
                    W_hid=init.Uniform(0.01),
                )
                outgate = Gate(W_in=init.Uniform(0.01),
                               W_hid=init.Uniform(0.01),
                               b=init.Constant(0.0))

                lstm = LSTMLayer(input,
                                 num_units=nunits,
                                 backwards=backwards,
                                 peepholes=False,
                                 ingate=ingate,
                                 forgetgate=forgetgate,
                                 cell=cell,
                                 outgate=outgate,
                                 name=name,
                                 only_return_final=return_final,
                                 mask_input=mask)
                return lstm
Esempio n. 25
0
def create_lstm(l_incoming,
                l_mask,
                hidden_units,
                cell_parameters,
                gate_parameters,
                name,
                use_peepholes=False):
    if cell_parameters is None:
        cell_parameters = Gate()
    if gate_parameters is None:
        gate_parameters = Gate()

    l_lstm = LSTMLayer(
        l_incoming,
        hidden_units,
        use_peepholes=use_peepholes,
        # We need to specify a separate input for masks
        mask_input=l_mask,
        # Here, we supply the gate parameters for each gate
        ingate=gate_parameters,
        forgetgate=gate_parameters,
        cell=cell_parameters,
        outgate=gate_parameters,
        # We'll learn the initialization and use gradient clipping
        learn_init=True,
        grad_clipping=5.,
        name=name)
    return l_lstm
Esempio n. 26
0
def build_lstm_decorer():
    net = collections.OrderedDict()
    net['sent_input'] = InputLayer((None, CFG['SEQUENCE LENGTH'] - 1),
                                   input_var=T.imatrix())
    net['word_emb'] = EmbeddingLayer(net['sent_input'], input_size=CFG['VOCAB SIZE'],\
                                    output_size=CFG['EMBEDDING SIZE'])
    net['vis_input'] = InputLayer((None, CFG['VIS SIZE']),
                                  input_var=T.matrix())
    net['vis_emb'] = DenseLayer(net['vis_input'],
                                num_units=CFG['EMBEDDING SIZE'],
                                nonlinearity=lasagne.nonlinearities.identity)
    net['vis_emb_reshp'] = ReshapeLayer(net['vis_emb'],
                                        (-1, 1, CFG['EMBEDDING SIZE']))
    net['decorder_input'] = ConcatLayer(
        [net['vis_emb_reshp'], net['word_emb']])
    net['feat_dropout'] = DropoutLayer(net['decorder_input'], p=0.5)

    net['mask_input'] = InputLayer((None, CFG['SEQUENCE LENGTH']))
    net['lstm'] = LSTMLayer(net['feat_dropout'],num_units=CFG['EMBEDDING SIZE'], \
                            mask_input=net['mask_input'], grad_clipping=5.)
    net['lstm_dropout'] = DropoutLayer(net['lstm'], p=0.5)
    net['lstm_reshp'] = ReshapeLayer(net['lstm_dropout'],
                                     (-1, CFG['EMBEDDING SIZE']))
    net['word_prob'] = DenseLayer(net['lstm_reshp'],
                                  num_units=CFG['VOCAB SIZE'] + 2,
                                  nonlinearity=softmax)
    net['sent_prob'] = ReshapeLayer(
        net['word_prob'], (-1, CFG['SEQUENCE LENGTH'], CFG['VOCAB SIZE'] + 2))
    return net
Esempio n. 27
0
def build_rnn(conv_input_var, seq_input_var, conv_shape, word_dims, n_hid,
              lstm_layers):
    ret = {}
    ret['seq_input'] = seq_layer = InputLayer((None, None, word_dims),
                                              input_var=seq_input_var)
    batchsize, seqlen, _ = seq_layer.input_var.shape
    ret['seq_resh'] = seq_layer = ReshapeLayer(seq_layer,
                                               shape=(-1, word_dims))
    ret['seq_proj'] = seq_layer = DenseLayer(seq_layer, num_units=n_hid)
    ret['seq_resh2'] = seq_layer = ReshapeLayer(seq_layer,
                                                shape=(batchsize, seqlen,
                                                       n_hid))
    ret['conv_input'] = conv_layer = InputLayer(conv_shape,
                                                input_var=conv_input_var)
    ret['conv_proj'] = conv_layer = DenseLayer(conv_layer, num_units=n_hid)
    ret['conv_resh'] = conv_layer = ReshapeLayer(conv_layer,
                                                 shape=([0], 1, -1))
    ret['input_concat'] = layer = ConcatLayer([conv_layer, seq_layer], axis=1)
    for lstm_layer_idx in xrange(lstm_layers):
        ret['lstm_{}'.format(lstm_layer_idx)] = layer = LSTMLayer(layer, n_hid)
    ret['out_resh'] = layer = ReshapeLayer(layer, shape=(-1, n_hid))
    ret['output_proj'] = layer = DenseLayer(layer,
                                            num_units=word_dims,
                                            nonlinearity=log_softmax)
    ret['output'] = layer = ReshapeLayer(layer,
                                         shape=(batchsize, seqlen + 1,
                                                word_dims))
    ret['output'] = layer = SliceLayer(layer, indices=slice(None, -1), axis=1)
    return ret
Esempio n. 28
0
    def __init__(self, vocab):
        ### THEANO GRAPH INPUT ###
        self.input_phrase = T.imatrix("encoder phrase tokens")
        ##########################

        self.l_in = InputLayer((None, None),
                               self.input_phrase,
                               name='context input')
        self.l_mask = InputLayer((None, None),
                                 T.neq(self.input_phrase, vocab.PAD_ix),
                                 name='context mask')

        self.l_emb = EmbeddingLayer(self.l_in,
                                    vocab.n_tokens,
                                    Config.EMB_SIZE,
                                    name="context embedding")

        self.l_lstm = LSTMLayer(self.l_emb,
                                Config.N_LSTM_UNITS,
                                name='encoder_lstm',
                                grad_clipping=Config.LSTM_LAYER_GRAD_CLIP,
                                mask_input=self.l_mask,
                                only_return_final=True,
                                peepholes=False)

        self.output = self.l_lstm
def build_discriminator_lstm(params, gate_params, cell_params):
    from lasagne.layers import InputLayer, DenseLayer, concat
    from lasagne.layers.recurrent import LSTMLayer
    from lasagne.regularization import l2, regularize_layer_params
    # from layers import MinibatchLayer
    # input layers
    l_in = InputLayer(
        shape=params['input_shape'], name='d_in')
    l_mask = InputLayer(
        shape=params['mask_shape'], name='d_mask')

    # recurrent layers for bidirectional network
    l_forward = LSTMLayer(
        l_in, params['n_units'], grad_clipping=params['grad_clip'],
        ingate=gate_params, forgetgate=gate_params,
        cell=cell_params, outgate=gate_params,
        nonlinearity=params['non_linearities'][0], only_return_final=True,
        mask_input=l_mask)
    l_backward = LSTMLayer(
        l_in, params['n_units'], grad_clipping=params['grad_clip'],
        ingate=gate_params, forgetgate=gate_params,
        cell=cell_params, outgate=gate_params,
        nonlinearity=params['non_linearities'][1], only_return_final=True,
        mask_input=l_mask, backwards=True)

    # concatenate output of forward and backward layers
    l_concat = concat([l_forward, l_backward], axis=1)

    # minibatch layer on forward and backward layers
    # l_minibatch = MinibatchLayer(l_concat, num_kernels=100)

    # output layer
    l_out = DenseLayer(
        l_concat, num_units=params['n_output_units'],
        nonlinearity=params['non_linearities'][2])

    regularization = regularize_layer_params(
        l_out, l2) * params['regularization']

    class Discriminator:
        def __init__(self, l_in, l_mask, l_out):
            self.l_in = l_in
            self.l_mask = l_mask
            self.l_out = l_out
            self.regularization = regularization

    return Discriminator(l_in, l_mask, l_out)
Esempio n. 30
0
 def rnn_fn(self, max_length):
     l_in = InputLayer((None, max_length, self.vocab_size))
     l_mask = InputLayer((None, max_length))
     l_final = LSTMLayer(l_in,
                         num_units=self.nn_rnn_hid_dim,
                         mask_input=l_mask,
                         only_return_final=True)
     return l_final
Esempio n. 31
0
def build_convpool_mix(input_vars,
                       nb_classes,
                       grad_clip=110,
                       imsize=32,
                       n_colors=3,
                       n_timewin=7):
    """
    Builds the complete network with LSTM and 1D-conv layers combined

    :param input_vars: list of EEG images (one image per time window)
    :param nb_classes: number of classes
    :param grad_clip:  the gradient messages are clipped to the given value during
                        the backward pass.
    :param imsize: size of the input image (assumes a square input)
    :param n_colors: number of color channels in the image
    :param n_timewin: number of time windows in the snippet
    :return: a pointer to the output of last layer
    """
    convnets = []
    w_init = None
    # Build 7 parallel CNNs with shared weights
    for i in range(n_timewin):
        if i == 0:
            convnet, w_init = build_cnn(input_vars[i],
                                        imsize=imsize,
                                        n_colors=n_colors)
        else:
            convnet, _ = build_cnn(input_vars[i],
                                   w_init=w_init,
                                   imsize=imsize,
                                   n_colors=n_colors)
        convnets.append(FlattenLayer(convnet))
    # at this point convnets shape is [numTimeWin][n_samples, features]
    # we want the shape to be [n_samples, features, numTimeWin]
    convpool = ConcatLayer(convnets)
    convpool = ReshapeLayer(convpool,
                            ([0], n_timewin, get_output_shape(convnets[0])[1]))
    reformConvpool = DimshuffleLayer(convpool, (0, 2, 1))
    # input to 1D convlayer should be in (batch_size, num_input_channels, input_length)
    conv_out = Conv1DLayer(reformConvpool, 64, 3)
    conv_out = FlattenLayer(conv_out)
    # Input to LSTM should have the shape as (batch size, SEQ_LENGTH, num_features)
    lstm = LSTMLayer(convpool,
                     num_units=128,
                     grad_clipping=grad_clip,
                     nonlinearity=lasagne.nonlinearities.tanh)
    lstm_out = SliceLayer(lstm, -1, 1)
    # Merge 1D-Conv and LSTM outputs
    dense_input = ConcatLayer([conv_out, lstm_out])
    # A fully-connected layer of 256 units with 50% dropout on its inputs:
    convpool = DenseLayer(lasagne.layers.dropout(dense_input, p=.5),
                          num_units=512,
                          nonlinearity=lasagne.nonlinearities.rectify)
    # And, finally, the 10-unit output layer with 50% dropout on its inputs:
    convpool = DenseLayer(convpool,
                          num_units=nb_classes,
                          nonlinearity=lasagne.nonlinearities.softmax)
    return convpool
Esempio n. 32
0
    def __init__(self, number_words, num_hidden, seq_length, mb_size):
        self.mb_size = mb_size

        x = T.imatrix()

        #sequence x minibatch x index
        one_hot_input = T.ftensor3()

        use_one_hot_input_flag = T.scalar()

        self.indices = x
        self.use_one_hot_input_flag = use_one_hot_input_flag
        self.one_hot_input = one_hot_input

        '''
        flag for input: one-hot or index.  
        If index, compute one-hot and use that.  

        If one-hot, just use one-hot input.  
        '''

        #Time seq x examples x words

        target = T.ivector()

        #word_embeddings = theano.shared(np.random.normal(size = ((number_words, 1, num_hidden))).astype('float32'))

        word_embeddings = theano.shared(np.random.normal(size = ((number_words, num_hidden))).astype('float32'))

        feature_lst = []

        for i in range(0, seq_length):
            #feature = word_embeddings[x[:,i]]
            #instead of this, multiply by one-hot matrix

            one_hot = T.extra_ops.to_one_hot(x[:,i], number_words)

            #W : 30k x 1 x 400
            #one_hot: 128 x 30k
            #one_hot * W
            #128 x 1 x 400

            
            one_hot_use = ifelse(use_one_hot_input_flag, one_hot_input[i], T.extra_ops.to_one_hot(x[:,i], number_words))

            feature = T.reshape(T.dot(one_hot_use, word_embeddings), (1,mb_size,num_hidden)).transpose(1,0,2)

            feature_lst.append(feature)

        features = T.concatenate(feature_lst, 1)

        #example x sequence_position x feature
        l_lstm_1 = LSTMLayer((seq_length, mb_size, num_hidden), num_units = num_hidden, nonlinearity = lasagne.nonlinearities.tanh, grad_clipping=100.0)
        l_lstm_2 = LSTMLayer((seq_length, mb_size, num_hidden * 2), num_units = num_hidden, nonlinearity = lasagne.nonlinearities.tanh, grad_clipping=100.0, backwards = True)
        l_lstm_3 = LSTMLayer((seq_length, mb_size, num_hidden * 2), num_units = num_hidden, nonlinearity = lasagne.nonlinearities.tanh, grad_clipping=100.0)

        lstm_1_out = l_lstm_1.get_output_for([features])
        lstm_2_out = l_lstm_2.get_output_for([T.concatenate([lstm_1_out, features], axis = 2)])
        lstm_3_out = l_lstm_3.get_output_for([T.concatenate([lstm_2_out, features], axis = 2)])

        final_out = T.mean(lstm_3_out, axis = 1)

        #final_out = T.mean(features, axis = 1)
        h_out_1 = DenseLayer((mb_size, num_hidden), num_units = 2048, nonlinearity=lasagne.nonlinearities.rectify)

        h_out_2 = DenseLayer((mb_size, 2048), num_units = 2048, nonlinearity=lasagne.nonlinearities.rectify)

        h_out_3 = DenseLayer((mb_size, 2048), num_units = 1, nonlinearity=None)

        h_out_1_value = h_out_1.get_output_for(final_out)
        h_out_2_value = h_out_2.get_output_for(h_out_1_value)
        h_out_3_value = h_out_3.get_output_for(h_out_2_value)
        classification = T.nnet.sigmoid(h_out_3_value)
        self.loss = T.mean(T.nnet.binary_crossentropy(output = classification.flatten(), target = target))
        self.params = lasagne.layers.get_all_params(h_out_1,trainable=True) + lasagne.layers.get_all_params(h_out_3,trainable=True) + [word_embeddings] + lasagne.layers.get_all_params(l_lstm_1, trainable = True) + lasagne.layers.get_all_params(l_lstm_2, trainable = True)

        self.params += lasagne.layers.get_all_params(h_out_2,trainable=True)
        self.params += lasagne.layers.get_all_params(l_lstm_3,trainable=True)

        all_grads = T.grad(self.loss, self.params)

        for j in range(0, len(all_grads)):
            all_grads[j] = T.switch(T.isnan(all_grads[j]), T.zeros_like(all_grads[j]), all_grads[j])

        scaled_grads = lasagne.updates.total_norm_constraint(all_grads, 5.0)

        updates = lasagne.updates.adam(scaled_grads, self.params)
        self.train_func = theano.function(inputs = [x, target, use_one_hot_input_flag, one_hot_input], outputs = {'l' : self.loss, 'c' : classification, 'g_w' : T.sum(T.sqr(T.grad(self.loss, word_embeddings)))}, updates = updates)
        self.evaluate_func = theano.function(inputs = [x, use_one_hot_input_flag, one_hot_input], outputs = {'c' : classification})
Esempio n. 33
0
def test_lasagne_ctc():
    import lasagne
    from lasagne.layers import (
        LSTMLayer,
        InputLayer,
        DenseLayer,
        NonlinearityLayer,
        ReshapeLayer,
        EmbeddingLayer,
        RecurrentLayer,
    )
    import theano
    import theano.tensor as T
    import numpy as np

    num_batch, input_seq_len = 1, 12
    num_classes = 5
    target_seq_len = 3
    num_rnn_units = 50

    def print_pred(y_hat):
        blank_symbol = num_classes
        res = []
        for i, s in enumerate(y_hat):
            if (s != blank_symbol) and (i == 0 or s != y_hat[i - 1]):
                res += [s]
        if len(res) > 0:
            return "".join(map(str, list(res)))
        else:
            return "-" * target_seq_len

    Y_hat = np.asarray(np.random.normal(0, 1, (input_seq_len, num_batch, num_classes + 1)), dtype=floatX)
    Y = np.zeros((target_seq_len, num_batch), dtype="int64")
    Y[25:, :] = 1
    Y_hat_mask = np.ones((input_seq_len, num_batch), dtype=floatX)
    Y_hat_mask[-5:] = 0
    # default blank symbol is the highest class index (3 in this case)
    Y_mask = np.asarray(np.ones_like(Y), dtype=floatX)
    X = np.random.random((num_batch, input_seq_len)).astype("int32")

    y = T.imatrix("phonemes")
    x = T.imatrix()  # batchsize, input_seq_len, features

    print "num_batch =", num_batch, "input_seq_len =", input_seq_len
    print "num_classes =", num_classes
    # setup Lasagne Recurrent network
    # The output from the network is shape
    #  a) output_lin_ctc is the activation before softmax  (input_seq_len, batch_size, num_classes + 1)
    #  b) ouput_softmax is the output after softmax  (batch_size, input_seq_len, num_classes + 1)
    l_inp = InputLayer((num_batch, input_seq_len))
    netshape = lasagne.layers.get_output_shape(l_inp)
    print ("Layer l_inp shape:")
    print (netshape)
    l_emb = EmbeddingLayer(
        l_inp, input_size=num_classes + 1, output_size=num_classes + 1, W=np.identity(num_classes + 1).astype("float32")
    )
    netshape = lasagne.layers.get_output_shape(l_emb)
    print ("Layer l_emb shape:")
    print (netshape)
    l_rnn = LSTMLayer(l_emb, num_units=num_rnn_units)
    netshape = lasagne.layers.get_output_shape(l_rnn)
    print ("Layer l_rnn shape:")
    print (netshape)
    l_rnn_shp = ReshapeLayer(l_rnn, (num_batch * input_seq_len, num_rnn_units))
    netshape = lasagne.layers.get_output_shape(l_rnn_shp)
    print ("Layer l_rnn_shp shape:")
    print (netshape)
    l_out = DenseLayer(l_rnn_shp, num_units=num_classes + 1, nonlinearity=lasagne.nonlinearities.identity)  # + blank

    netshape = lasagne.layers.get_output_shape(l_out)
    print ("Layer l_out shape:")
    print (netshape)
    l_out_shp = ReshapeLayer(l_out, (num_batch, input_seq_len, num_classes + 1))
    netshape = lasagne.layers.get_output_shape(l_out_shp)
    print ("Layer l_out_shp shape:")
    print (netshape)

    # dimshuffle to shape format (input_seq_len, batch_size, num_classes + 1)
    # l_out_shp_ctc = lasagne.layers.DimshuffleLayer(l_out_shp, (1, 0, 2))

    l_out_softmax = NonlinearityLayer(l_out, nonlinearity=lasagne.nonlinearities.softmax)
    netshape = lasagne.layers.get_output_shape(l_out_softmax)
    print ("Layer l_out_softmax shape:")
    print (netshape)
    l_out_softmax_shp = ReshapeLayer(l_out_softmax, (num_batch, input_seq_len, num_classes + 1))
    netshape = lasagne.layers.get_output_shape(l_out_softmax_shp)
    print ("Layer l_out_softmax_shp shape:")
    print (netshape)

    output_lin_ctc = lasagne.layers.get_output(l_out_shp, x)
    output_softmax = lasagne.layers.get_output(l_out_softmax_shp, x)
    all_params = l_rnn.get_params(trainable=True)  # dont learn embeddingl
    print "x type:", type(x)
    print "x shape", x.shape
    print "y type:", type(y)
    print "y shape", y.shape

    ###############
    #  GRADIENTS  #
    ###############

    # the CTC cross entropy between y and linear output network
    # (num_batch,t,class+1)
    # output_lin_ctc shape (1,12,6)
    pseudo_cost = ctc_cost.pseudo_cost(y, output_lin_ctc)

    # calculate the gradients of the CTC wrt. linar output of network
    pseudo_cost_grad = T.grad(pseudo_cost.sum() / num_batch, all_params)
    true_cost = ctc_cost.cost(y, output_softmax)
    cost = T.mean(true_cost)

    sh_lr = theano.shared(lasagne.utils.floatX(0.01))
    updates = lasagne.updates.rmsprop(pseudo_cost_grad, all_params, learning_rate=sh_lr)
    # x shape (1,12)
    # y shape (1,3)

    train = theano.function([x, y], [output_lin_ctc, output_softmax, cost, pseudo_cost], updates=updates)

    # Create test dataset
    num_samples = 10
    np.random.seed(1234)

    # create simple dataset of format
    # input [5,5,5,5,5,2,2,2,2,2,3,3,3,3,3,....,1,1,1,1]
    # targets [5,2,3,...,1]
    # etc...
    input_lst, output_lst = [], []
    for i in range(num_samples):
        this_input = []
        this_output = []
        for j in range(target_seq_len):
            this_class = np.random.randint(num_classes)
            this_input += [this_class] * 3 + [num_classes]
            this_output += [this_class]

        this_input += (input_seq_len - len(this_input)) * [this_input[-1]]

        input_lst.append(this_input)
        output_lst.append(this_output)
        print this_input, this_output

    input_arr = np.concatenate([input_lst]).astype("int32")
    y_arr = np.concatenate([output_lst]).astype("int32")

    print "y_arr shape:", y_arr.shape

    y_mask_arr = np.ones((num_batch, target_seq_len), dtype="float32")
    input_mask_arr = np.ones((num_batch, input_seq_len), dtype="float32")

    for nn in range(1000):
        cost_lst = []
        shuffle = np.random.permutation(num_samples)
        for i in range(num_samples // num_batch):
            idx = shuffle[i * num_batch : (i + 1) * num_batch]
            _, output_softmax_val, cost, pseudo_cost_val = train(input_arr[idx], y_arr[idx])
            print "x=", input_arr[idx]
            # x shape (1,12)
            print "x shape", input_arr[idx].shape
            print "y=", y_arr[idx]
            # y shape (1,3)
            print "y shape", y_arr[idx].shape
            output_softmax_lst = output_softmax_val
            labels_lst = y_arr[idx]
            cost_lst += [cost]
            # testing.assert_almost_equal(pseudo_cost, pseudo_cost_old, decimal=4)
            # testing.assert_array_almost_equal(pseudo_cost_val, pseudo_cost_old_val)

        if (nn + 1) % 20 == 0:
            DECAY = 1.5
            new_lr = lasagne.utils.floatX(sh_lr.get_value() / DECAY)
            sh_lr.set_value(new_lr)
            print "----------------------->NEW LR:", new_lr

        print nn, "Mean cost:", np.mean(cost_lst)
        if (nn + 1) % 4 == 0:
            for jj in range(num_batch):
                pred = print_pred(np.argmax(output_softmax_val[jj], axis=-1))
                true = "".join(map(str, labels_lst[jj]))
                pred += (target_seq_len - len(pred)) * " "
                print "pred =", pred, "true =", true
Esempio n. 34
0
def test_lasagne_ctc():
    import lasagne
    from lasagne.layers import LSTMLayer, InputLayer, DenseLayer,\
        NonlinearityLayer, ReshapeLayer, EmbeddingLayer, RecurrentLayer
    import theano
    import theano.tensor as T
    import numpy as np
    num_batch, input_seq_len = 10, 15
    num_classes = 10
    target_seq_len = 5
    num_rnn_units = 50

    input_seq_len += target_seq_len
    def print_pred(y_hat):
        blank_symbol = num_classes
        res = []
        for i, s in enumerate(y_hat):
            if (s != blank_symbol) and (i == 0 or s != y_hat[i - 1]):
                res += [s]
        if len(res) > 0:
            return "".join(map(str, list(res)))
        else:
            return "-"*target_seq_len

    Y_hat = np.asarray(np.random.normal(
        0, 1, (input_seq_len, num_batch, num_classes + 1)), dtype=floatX)
    Y = np.zeros((target_seq_len, num_batch), dtype='int64')
    Y[25:, :] = 1
    Y_hat_mask = np.ones((input_seq_len, num_batch), dtype=floatX)
    Y_hat_mask[-5:] = 0
    # default blank symbol is the highest class index (3 in this case)
    Y_mask = np.asarray(np.ones_like(Y), dtype=floatX)
    X = np.random.random(
        (num_batch, input_seq_len)).astype('int32')

    y = T.imatrix('phonemes')
    x = T.imatrix()   # batchsize, input_seq_len, features



    # setup Lasagne Recurrent network
    # The output from the network is shape
    #  a) output_lin_ctc is the activation before softmax  (input_seq_len, batch_size, num_classes + 1)
    #  b) ouput_softmax is the output after softmax  (batch_size, input_seq_len, num_classes + 1)
    l_inp = InputLayer((num_batch, input_seq_len))
    l_emb = EmbeddingLayer(l_inp,
                           input_size=num_classes+1,
                           output_size=num_classes+1,
                           W=np.identity(num_classes+1).astype('float32'))
    ini = lasagne.init.Uniform(0.1)
    zero = lasagne.init.Constant(0.0)
    cell = lasagne.init.Uniform(0.1)
    l_rnn = LSTMLayer(l_emb,
                      num_units=num_rnn_units,
                      peepholes=True,
                      W_in_to_ingate=ini,
                      W_hid_to_ingate=ini,
                      b_ingate=zero,
                      W_in_to_forgetgate=ini,
                      W_hid_to_forgetgate=ini,
                      b_forgetgate=zero,
                      W_in_to_cell=ini,
                      W_hid_to_cell=ini,
                      b_cell=zero,
                      W_in_to_outgate=ini,
                      W_hid_to_outgate=ini,
                      b_outgate=zero,
                      cell_init=lasagne.init.Constant(0.),
                      hid_init=lasagne.init.Constant(0.),
                      W_cell_to_forgetgate=cell,
                      W_cell_to_ingate=cell,
                      W_cell_to_outgate=cell)
    l_rnn_shp = ReshapeLayer(l_rnn, (num_batch*input_seq_len, num_rnn_units))
    l_out = DenseLayer(l_rnn_shp, num_units=num_classes+1,
                       nonlinearity=lasagne.nonlinearities.identity)  # + blank

    l_out_shp = ReshapeLayer(l_out, (num_batch, input_seq_len, num_classes+1))

    # dimshuffle to shape format (input_seq_len, batch_size, num_classes + 1)
    #l_out_shp_ctc = lasagne.layers.DimshuffleLayer(l_out_shp, (1, 0, 2))

    l_out_softmax = NonlinearityLayer(
        l_out, nonlinearity=lasagne.nonlinearities.softmax)
    l_out_softmax_shp = ReshapeLayer(
        l_out_softmax, (num_batch, input_seq_len, num_classes+1))

    output_lin_ctc = lasagne.layers.get_output(l_out_shp, x)
    output_softmax = lasagne.layers.get_output(l_out_softmax_shp, x)
    all_params = l_rnn.get_params(trainable=True)  # dont learn embeddingl
    print all_params

    ###############
    #  GRADIENTS  #
    ###############

    # the CTC cross entropy between y and linear output network
    pseudo_cost = ctc_cost.pseudo_cost(
        y, output_lin_ctc)

    # calculate the gradients of the CTC wrt. linar output of network
    pseudo_cost_grad = T.grad(pseudo_cost.sum() / num_batch, all_params)
    true_cost = ctc_cost.cost(y, output_softmax)
    cost = T.mean(true_cost)

    sh_lr = theano.shared(lasagne.utils.floatX(0.01))
    #updates = lasagne.updates.sgd(pseudo_cost_grad, all_params, learning_rate=sh_lr)
    #updates = lasagne.updates.apply_nesterov_momentum(updates, all_params, momentum=0.9)
    updates = lasagne.updates.rmsprop(pseudo_cost_grad, all_params, learning_rate=sh_lr)

    train = theano.function([x, y],
                            [output_lin_ctc, output_softmax, cost, pseudo_cost],
                            updates=updates)


    # Create test dataset
    num_samples = 1000
    np.random.seed(1234)

    # create simple dataset of format
    # input [5,5,5,5,5,2,2,2,2,2,3,3,3,3,3,....,1,1,1,1]
    # targets [5,2,3,...,1]
    # etc...
    input_lst, output_lst = [], []
    for i in range(num_samples):
        this_input = []
        this_output = []
        for j in range(target_seq_len):
            this_class = np.random.randint(num_classes)
            this_input += [this_class]*3 + [num_classes]
            this_output += [this_class]

        this_input += (input_seq_len - len(this_input))*[this_input[-1]]

        input_lst.append(this_input)
        output_lst.append(this_output)
        print this_input, this_output

    input_arr = np.concatenate([input_lst]).astype('int32')
    y_arr = np.concatenate([output_lst]).astype('int32')

    y_mask_arr = np.ones((num_batch, target_seq_len), dtype='float32')
    input_mask_arr = np.ones((num_batch, input_seq_len), dtype='float32')

    for nn in range(10000):
        cost_lst = []
        shuffle = np.random.permutation(num_samples)
        for i in range(num_samples//num_batch):
            idx = shuffle[i*num_batch:(i+1)*num_batch]
            _, output_softmax_val, cost, pseudo_cost_val = train(
                input_arr[idx],
                y_arr[idx])
            output_softmax_lst = output_softmax_val
            labels_lst = y_arr[idx]
            cost_lst += [cost]
            #testing.assert_almost_equal(pseudo_cost, pseudo_cost_old, decimal=4)
            #testing.assert_array_almost_equal(pseudo_cost_val, pseudo_cost_old_val)

        if (nn+1) % 200 == 0:
            DECAY = 1.5
            new_lr = lasagne.utils.floatX(sh_lr.get_value() / DECAY)
            sh_lr.set_value(new_lr)
            print "----------------------->NEW LR:", new_lr

        print nn, "Mean cost:", np.mean(cost_lst)
        if (nn+1) % 4 == 0:
            for jj in range(num_batch):
                pred = print_pred(np.argmax(output_softmax_val[jj], axis=-1))
                true = "".join(map(str, labels_lst[jj]))
                pred += (target_seq_len-len(pred)) * " "
                print pred, true