def __embedding_layer_TO_similarity_layer__(embedding_layer,
                                            tripletInput=True):
    net = {}
    if tripletInput:
        net['reshape'] = ReshapeLayer(embedding_layer, (-1, 3, [1]))
        net['triplet_anchor'] = SliceLayer(
            net['reshape'], indices=0, axis=1
        )  # in order to keep the dim, use slice(0,1) == array[0:1,...]
        net['triplet_pos'] = SliceLayer(net['reshape'], indices=1, axis=1)
        net['triplet_neg'] = SliceLayer(net['reshape'], indices=2, axis=1)
        net['euclid_pos'] = DistanceLayer(
            [net['triplet_anchor'], net['triplet_pos']],
            Lp=2,
            axis=1,
            keepdims=True)
        net['euclid_neg'] = DistanceLayer(
            [net['triplet_anchor'], net['triplet_neg']],
            Lp=2,
            axis=1,
            keepdims=True)
        net['euclid_dist'] = ConcatLayer(
            [net['euclid_pos'], net['euclid_neg']], axis=0)
    else:
        net['reshape'] = ReshapeLayer(embedding_layer, (-1, 2, [1]))
        net['pair_1'] = SliceLayer(net['reshape'], indices=0, axis=1)
        net['pair_2'] = SliceLayer(net['reshape'], indices=1, axis=1)
        net['euclid_dist'] = DistanceLayer([net['pair_1'], net['pair_2']],
                                           Lp=2,
                                           axis=1,
                                           keepdims=True)
    # input-->output (shape 1-->1), logistic regression
    net['similarity'] = DenseLayer(net['euclid_dist'],
                                   num_units=1,
                                   nonlinearity=sigmoid)
    return net
Exemple #2
0
        def conv_net(input_layer):
            if self.n_mi_features != 0:
                conv_input = SliceLayer(
                    input_layer,
                    indices=slice(0,
                                  input_layer.shape[1] - self.n_mi_features))
                mi_input = SliceLayer(
                    input_layer,
                    indices=slice(input_layer.shape[1] - self.n_mi_features,
                                  None))
            else:
                conv_input = input_layer
                mi_input = None

            conv_input = ReshapeLayer(
                conv_input, (-1, 1, self.input_size, self.input_size))

            conv_layer_output_shapes = []
            output = Conv2DLayer(conv_input, 64, 5, stride=2, pad='same')
            conv_layer_output_shapes.append(output.output_shape[2])
            output = Conv2DLayer(output, 128, 5, stride=2, pad='same')
            conv_layer_output_shapes.append(output.output_shape[2])
            output = ReshapeLayer(output, (-1, num_elems(output)))
            if mi_input is not None:
                output = ConcatLayer([output, mi_input], axis=1)
            output = BatchNormLayer(DenseLayer(output, conv_output_size))
            return output, conv_layer_output_shapes
Exemple #3
0
def test_slice_layer():
    from lasagne.layers import SliceLayer, InputLayer, get_output_shape,\
        get_output
    from numpy.testing import assert_array_almost_equal as aeq
    in_shp = (3, 5, 2)
    l_inp = InputLayer(in_shp)
    l_slice_ax0 = SliceLayer(l_inp, axis=0, indices=0)
    l_slice_ax1 = SliceLayer(l_inp, axis=1, indices=slice(3, 5))
    l_slice_ax2 = SliceLayer(l_inp, axis=-1, indices=-1)

    x = np.arange(np.prod(in_shp)).reshape(in_shp).astype('float32')
    x1 = x[0]
    x2 = x[:, 3:5]
    x3 = x[:, :, -1]

    assert get_output_shape(l_slice_ax0) == x1.shape
    assert get_output_shape(l_slice_ax1) == x2.shape
    assert get_output_shape(l_slice_ax2) == x3.shape

    aeq(get_output(l_slice_ax0, x).eval(), x1)
    aeq(get_output(l_slice_ax1, x).eval(), x2)
    aeq(get_output(l_slice_ax2, x).eval(), x3)

    # test slicing None dimension
    in_shp = (2, None, 2)
    l_inp = InputLayer(in_shp)
    l_slice_ax1 = SliceLayer(l_inp, axis=1, indices=slice(3, 5))
    assert get_output_shape(l_slice_ax1) == (2, None, 2)
    aeq(get_output(l_slice_ax1, x).eval(), x2)
    def build_model(self, input_batch):

        ## initialize shared parameters
        Ws = []
        bs = []
        nLayersWithParams = 13
        if self.refinement_network:
            nLayersWithParams = nLayersWithParams + 4
        for i in range(nLayersWithParams):
            W = HeUniform()
            Ws.append(W)
            b = Constant(0.0)
            bs.append(b)
        hidden_state = InputLayer(input_var=np.zeros((self.batch_size, 64, self.npx/2, self.npx/2), dtype=np.float32), shape=(self.batch_size, 64, self.npx/2, self.npx/2))

        ## get inputs
        inputs = InputLayer(input_var=input_batch, shape=(None, self.input_seqlen, self.npx, self.npx))
        # inputs = InputLayer(input_var=input_batch, shape=(None, 1, self.npx, self.npx, self.input_seqlen))
        # inputs = DimshuffleLayer(inputs, (0, 4, 2, 3, 1))
        outputs = []
        for i in range(self.input_seqlen - self.nInputs + self.target_seqlen):
            input = SliceLayer(inputs, indices=slice(0,self.nInputs), axis=1)
            output, hidden_state, filters = self.predict(input, hidden_state, Ws, bs)
            ## FIFO operation.
            inputs = SliceLayer(inputs, indices=slice(1, None), axis=1)

            if i == self.input_seqlen - self.nInputs:
                filtersToVisualize = filters
            if i >= self.input_seqlen - self.nInputs:
                inputs = ConcatLayer([inputs, output], axis=1)
                outputs.append(output)


        return output, outputs, filtersToVisualize
def dnn_sep(M, W1, W2, hh=.0001, ep=5000, d=0, sp=.0001, spb=3, al='rprop'):

    # GPU cached data
    _M = theano.shared(M.T.astype(float64))
    dum = Th.vector('dum')

    # Get layer sizes
    K = []
    for i in range(len(W1)):
        K.append([W1[i].shape[0], W2[i].shape[0]])
    K.append([M.T.shape[1], M.T.shape[1]])

    # We have weights to discover, init = 2/(Nin+Nout)
    H = theano.shared(
        sqrt(2. / (K[0][0] + K[0][1] + M.shape[1])) *
        random.rand(M.T.shape[0], K[0][0] + K[0][1]).astype(float64))
    fI = InputLayer(shape=(M.T.shape[0], K[0][0] + K[0][1]), input_var=H)

    # Split in two pathways, one for each source's autoencoder
    H1 = (len(W1) + 1) * [None]
    H2 = (len(W1) + 1) * [None]
    H1[0] = SliceLayer(fI, indices=slice(0, K[0][0]), axis=1)
    H2[0] = SliceLayer(fI, indices=slice(K[0][0], K[0][0] + K[0][1]), axis=1)

    # Put the subsequent layers
    for i in range(len(W1)):
        H1[i + 1] = DenseLayer(H1[i],
                               num_units=K[i + 1][0],
                               W=W1[i].astype(float64),
                               nonlinearity=lambda x: psoftplus(x, spb),
                               b=None)
        H2[i + 1] = DenseLayer(H2[i],
                               num_units=K[i + 1][1],
                               W=W2[i].astype(float64),
                               nonlinearity=lambda x: psoftplus(x, spb),
                               b=None)

    # Add the two approximations
    R = ElemwiseSumLayer([H1[-1], H2[-1]])

    # Cost function
    Ro = get_output(R) + eps
    cost = Th.mean(_M * (Th.log(_M + eps) - Th.log(Ro)) - _M +
                   Ro) + 0 * Th.mean(dum)
    for i in range(len(H1) - 1):
        cost += sp * Th.mean(abs(get_output(H1[i]))) + sp * Th.mean(
            abs(get_output(H2[i])))

    # Train it using Lasagne
    opt = downhill.build(al, loss=cost, inputs=[dum], params=[H])
    train = downhill.Dataset(array([d]).astype(float64), batch_size=0)
    er = downhill_train(opt, train, hh, ep, None)

    # Get outputs
    _r = nget(R, dum, array([0]).astype(float64)).T + eps
    _r1 = nget(H1[-1], dum, array([0]).astype(float64)).T
    _r2 = nget(H2[-1], dum, array([0]).astype(float64)).T

    return _r, _r1, _r2, er
    def model(show_model):
        """ Compile net architecture """

        # --- input layers ---
        l_view1 = lasagne.layers.InputLayer(shape=(None, INPUT_SHAPE_1[0]))
        l_view2 = lasagne.layers.InputLayer(shape=(None, INPUT_SHAPE_2[0]))

        net1 = l_view1
        net2 = l_view2

        # --- feed forward part view 1 ---
        for _ in range(N_LAYERS_IMG):
            net1 = dense_bn(net1, num_units=N_HIDDEN_IMG, nonlinearity=nonlin)

        l_v1latent = DenseLayer(net1,
                                num_units=dim_latent,
                                nonlinearity=identity,
                                W=init())

        # --- feed forward part view 2 ---
        for _ in range(N_LAYERS_TXT):
            net2 = dense_bn(net2, num_units=N_HIDDEN_TXT, nonlinearity=nonlin)

        l_v2latent = DenseLayer(net2,
                                num_units=dim_latent,
                                nonlinearity=identity,
                                W=init())

        # --- multi modality part ---

        # merge modalities by cca projection or learned embedding layer
        if use_ccal:
            net = CCALayer([l_v1latent, l_v2latent],
                           r1,
                           r2,
                           rT,
                           alpha=alpha,
                           wl=weight_tno)
        else:
            net = LearnedCCALayer([l_v1latent, l_v2latent],
                                  U=init(),
                                  V=init(),
                                  alpha=alpha)

        # split modalities again
        l_v1 = SliceLayer(net, slice(0, dim_latent), axis=1)
        l_v2 = SliceLayer(net, slice(dim_latent, 2 * dim_latent), axis=1)

        # normalize (per row) output to length 1.0
        l_v1 = LengthNormLayer(l_v1)
        l_v2 = LengthNormLayer(l_v2)

        # --- print architectures ---
        if show_model:
            print_architecture(l_v1)
            print_architecture(l_v2)

        return l_view1, l_view2, l_v1, l_v2
    def cnn(self):
        self._network['input'] = pelu(batch_norm(lasagne.layers.InputLayer(shape=(None, self._number_of_channel,
                                                                                   8, 14),
                                                                            input_var=self._x, pad='same',
                                                                            W=lasagne.init.HeNormal(gain='relu'))))
        print self._network['input'].output_shape

        first_part_input = SliceLayer(self._network['input'], indices=slice(0, 2), axis=1)
        print first_part_input.output_shape
        second_part_input = SliceLayer(self._network['input'], indices=slice(2, 4), axis=1)
        print second_part_input.output_shape
        first_dropout_2 = self.cnn_separate_convolutions(first_part_input, first_part=True)
        second_dropout_2 = self.cnn_separate_convolutions(second_part_input, first_part=False)

        self._network['sumwise_layer'] = ElemwiseSumLayer([first_dropout_2, second_dropout_2,
                                                           ScaleLayer(self._network['sumwise_layer_pre_training'])])

        self._network['conv3'] = pelu(batch_norm(lasagne.layers.Conv2DLayer(self._network['sumwise_layer'],
                                                                             num_filters=48,
                                                                             filter_size=(3, 3),
                                                                             W=lasagne.init.HeNormal(gain='relu'))))

        print self._network['conv3'].output_shape

        self._network['dropout_3'] = mc_dropout.MCDropout(self._network['conv3'], p=self._percentage_dropout_cnn_layers)


        self._network['merge_with_pre_training_dense_layer_1'] = ElemwiseSumLayer(
            [ScaleLayer(self._network['dropout_3_pre_training']), self._network['dropout_3']])
        print np.shape(self._network['pre_training_fc1_full'].W.get_value())
        self._network['fc1'] = mc_dropout.MCDropout(pelu(batch_norm(lasagne.layers.DenseLayer(
            self._network['merge_with_pre_training_dense_layer_1'], num_units=100, W=lasagne.init.HeNormal(gain='relu')))),
            p=self._percentage_dropout_dense_layers)

        print self._network['fc1'].output_shape

        self._network['merge_with_pre_training_dense_layer_2'] = ElemwiseSumLayer(
            [ScaleLayer(self._network['fc1_pre_training']), self._network['fc1']])


        self._network['fc2'] = mc_dropout.MCDropout(pelu(batch_norm(
            lasagne.layers.DenseLayer(self._network['merge_with_pre_training_dense_layer_2'], num_units=100,
                                      W=lasagne.init.HeNormal(gain='relu')))),
            p=self._percentage_dropout_dense_layers)

        print self._network['fc2'].output_shape

        self._network['merge_with_pre_training_output'] = ElemwiseSumLayer(
            [ScaleLayer(self._network['fc2_pre_training']), self._network['fc2']])
        self._network['output'] = lasagne.layers.DenseLayer(self._network['merge_with_pre_training_output'],
                                                            num_units=self._number_of_class,
                                                            nonlinearity=lasagne.nonlinearities.softmax,
                                                            W=lasagne.init.HeNormal(gain='relu'))

        print self._network['output'].output_shape
    def pre_training_cnn(self):
        self._network = {}
        self._network['input_pre_training'] = lasagne.layers.InputLayer(shape=(None,self._number_of_channel,
                                                                                                8, 14),
                                                                                         input_var=self._x, pad='same',
                                                                                         W=lasagne.init.HeNormal(gain='relu'))
        self._network['input_normalized'] = prelu(batch_norm(self._network['input_pre_training']))
        print self._network['input_normalized'].output_shape

        first_part_input = SliceLayer(self._network['input_normalized'], indices=slice(0, 2), axis=1)
        print first_part_input.output_shape
        second_part_input = SliceLayer(self._network['input_normalized'], indices=slice(2, 4), axis=1)
        print second_part_input.output_shape
        first_network = self.cnn_separate_convolutions_pre_training(first_part_input, first_part=True)
        second_network = self.cnn_separate_convolutions_pre_training(second_part_input, first_part=False)

        self._network['sumwise_layer_pre_training'] = ElemwiseSumLayer([first_network, second_network])

        self._network['conv3_pre_training_cnn'] = lasagne.layers.Conv2DLayer(self._network['sumwise_layer_pre_training'],
                                                                      num_filters=48,
                                                                      filter_size=(3, 3),
                                                                      W=lasagne.init.HeNormal(gain='relu'))

        self._network['conv3_pre_training'] = prelu(batch_norm(self._network['conv3_pre_training_cnn']))
        print self._network['conv3_pre_training'].output_shape

        self._network['dropout_3_pre_training'] = mc_dropout.MCDropout(self._network['conv3_pre_training'],
                                                                p=self._percentage_dropout_cnn_layers)

        self._network['pre_training_fc1_full'] = lasagne.layers.DenseLayer(self._network['dropout_3_pre_training'], num_units=100,
                                                                    W=lasagne.init.HeNormal(gain='relu'))

        self._network['fc1_pre_training'] = mc_dropout.MCDropout(prelu(batch_norm(self._network['pre_training_fc1_full'])),
                                                          p=self._percentage_dropout_dense_layers)

        print self._network['fc1_pre_training'].output_shape

        self._network['pre_training_fc2_full'] = lasagne.layers.DenseLayer(self._network['fc1_pre_training'], num_units=100,
                                                                    W=lasagne.init.HeNormal(gain='relu'))

        self._network['fc2_pre_training'] = mc_dropout.MCDropout(prelu(batch_norm(self._network['pre_training_fc2_full'])), p=self._percentage_dropout_dense_layers)

        print self._network['fc2_pre_training'].output_shape


        self._network['output_gesture_pre_training'] = lasagne.layers.DenseLayer(self._network['fc2_pre_training'],
                                                                         num_units=self._number_of_class,
                                                                         nonlinearity=lasagne.nonlinearities.softmax,
                                                                         W=lasagne.init.HeNormal(gain='relu'))
        print self._network['output_gesture_pre_training'].output_shape


        print "Pre-Training done printing"
Exemple #9
0
def create_network():
    l = 1000
    pool_size = 5
    test_size1 = 13
    test_size2 = 7
    test_size3 = 5
    kernel1 = 128
    kernel2 = 128
    kernel3 = 128
    layer1 = InputLayer(shape=(None, 1, 4, l + 1024))
    layer2_1 = SliceLayer(layer1, indices=slice(0, l), axis=-1)
    layer2_2 = SliceLayer(layer1, indices=slice(l, None), axis=-1)
    layer2_3 = SliceLayer(layer2_2, indices=slice(0, 4), axis=-2)
    layer2_f = FlattenLayer(layer2_3)
    layer3 = Conv2DLayer(layer2_1,
                         num_filters=kernel1,
                         filter_size=(4, test_size1))
    layer4 = Conv2DLayer(layer3,
                         num_filters=kernel1,
                         filter_size=(1, test_size1))
    layer5 = Conv2DLayer(layer4,
                         num_filters=kernel1,
                         filter_size=(1, test_size1))
    layer6 = MaxPool2DLayer(layer5, pool_size=(1, pool_size))
    layer7 = Conv2DLayer(layer6,
                         num_filters=kernel2,
                         filter_size=(1, test_size2))
    layer8 = Conv2DLayer(layer7,
                         num_filters=kernel2,
                         filter_size=(1, test_size2))
    layer9 = Conv2DLayer(layer8,
                         num_filters=kernel2,
                         filter_size=(1, test_size2))
    layer10 = MaxPool2DLayer(layer9, pool_size=(1, pool_size))
    layer11 = Conv2DLayer(layer10,
                          num_filters=kernel3,
                          filter_size=(1, test_size3))
    layer12 = Conv2DLayer(layer11,
                          num_filters=kernel3,
                          filter_size=(1, test_size3))
    layer13 = Conv2DLayer(layer12,
                          num_filters=kernel3,
                          filter_size=(1, test_size3))
    layer14 = MaxPool2DLayer(layer13, pool_size=(1, pool_size))
    layer14_d = DenseLayer(layer14, num_units=256)
    layer3_2 = DenseLayer(layer2_f, num_units=128)
    layer15 = ConcatLayer([layer14_d, layer3_2])
    layer16 = DropoutLayer(layer15, p=0.5)
    layer17 = DenseLayer(layer16, num_units=256)
    network = DenseLayer(layer17, num_units=2, nonlinearity=softmax)
    return network
Exemple #10
0
def self_attention(incoming, key_size=None,value_size=None,mask_input=None,name='attn',
                   attn_class=DotAttentionLayer,**kwargs):
    """
    A convenience function that applies attention from sequential layer to itself.
    
             /-> queries  -------v
    incoming --> keys     ---> attention_probs ---v
             \-> values   -------------------> attention response
             
    :param incoming: input sequence of shape [batch, time, units]
    :param key_size: num units in attention query and key, defaults to incoming.shape[-1]
    :param value_size: num units in attention values, defaults to key_size 
    :param attn_class: either DotAttentionLayer or AttentionLayer or similar layer (incl. multihead attention)
    :param kwargs: also accepts any parameters accepted by attn_class
    
    Heavily inspired by https://arxiv.org/abs/1706.03762 and http://bit.ly/2vsYX0R
    
    """
    assert len(incoming.output_shape) == 3, "incoming layer must have shape [batch,time,unit]"
    assert mask_input is None or len(mask_input.output_shape) == 2,"if mask_input is given, it must be [batch,time]"

    key_size = key_size or incoming.output_shape[-1]
    value_size = value_size or incoming.output_shape[-1]

    qkv = DenseLayer(incoming, key_size*2 + value_size, nonlinearity=None,
                         num_leading_axes=2,name=name+'.qkv')                   #[batch,time,2*key_units+value_units]

    queries = SliceLayer(qkv, slice(0,key_size),axis=-1)
    keys    = SliceLayer(qkv, slice(key_size,2*key_size), axis=-1)
    values  = SliceLayer(qkv, slice(2*key_size,qkv.num_units), axis=-1)

    # broadcast each query to every (key,value) pair
    queries_each_tick = bcast = BroadcastLayer(queries, broadcasted_axes=(0, 1)) #[batch*time,units]

    # upcast every key and value to match the amount queries
    key_for_each_query = UpcastLayer(keys, broadcast_layer=bcast)        #[batch*time, time, units]
    value_for_each_query = UpcastLayer(values, broadcast_layer=bcast)    #[batch*time, time, value_units]

    if mask_input is not None:
        mask_input = UpcastLayer(mask_input,broadcast_layer=bcast)       #[batch*time, time]

    attn_each_tick = attn_class(value_for_each_query,
                                queries_each_tick,
                                key_sequence=key_for_each_query,
                                mask_input=mask_input,
                                name=name,**kwargs)['attn']              #[batch*time, value_units]

    attn = UnbroadcastLayer(attn_each_tick, broadcast_layer=bcast)       #[batch, time, value_units]

    return attn
    def nn_fn(self):

        l_in_z = InputLayer((None, self.z_dim))
        l_in_x = InputLayer((None, self.max_length, self.emb_dim))

        l_in_z_reshape = ReshapeLayer(l_in_z, ([0], 1, [1]))
        l_in_z_rep = TileLayer(l_in_z_reshape, (1, self.max_length, 1))

        l_x_pre_pad = SliceLayer(PadLayer(l_in_x, [(1, 0), (0, 0)],
                                          batch_ndim=1),
                                 indices=slice(0, -1),
                                 axis=1)
        l_in_x_pre_pad_drop = DropoutLayer(l_x_pre_pad,
                                           self.nn_word_drop,
                                           shared_axes=(-1, ))

        l_concat = ConcatLayer((l_in_z_rep, l_in_x_pre_pad_drop), axis=-1)

        l_h = LSTMLayer(l_concat, num_units=self.nn_hid_units)

        if self.nn_skip:
            l_h = ConcatLayer((l_h, l_in_z_rep), axis=-1)

        l_out = DenseLayer(l_h,
                           num_units=self.emb_dim,
                           num_leading_axes=2,
                           nonlinearity=None)

        return (l_in_z, l_in_x), l_out
def create_model(input_shape,
                 input_var,
                 mask_shape,
                 mask_var,
                 lstm_size=250,
                 output_classes=26,
                 w_init=las.init.Orthogonal()):
    gate_parameters = Gate(W_in=w_init, W_hid=w_init, b=las.init.Constant(0.))
    cell_parameters = Gate(
        W_in=w_init,
        W_hid=w_init,
        # Setting W_cell to None denotes that no cell connection will be used.
        W_cell=None,
        b=las.init.Constant(0.),
        # By convention, the cell nonlinearity is tanh in an LSTM.
        nonlinearity=tanh)

    l_in = InputLayer(input_shape, input_var, 'input')
    l_mask = InputLayer(mask_shape, mask_var, 'mask')

    f_lstm, b_lstm = create_blstm(l_in, l_mask, lstm_size, cell_parameters,
                                  gate_parameters, 'lstm')

    l_sum = ElemwiseSumLayer([f_lstm, b_lstm], name='sum')
    l_forward_slice1 = SliceLayer(l_sum, -1, 1, name='slice1')

    # Now, we can apply feed-forward layers as usual.
    # We want the network to predict a classification for the sequence,
    # so we'll use a the number of classes.
    l_out = DenseLayer(l_forward_slice1,
                       num_units=output_classes,
                       nonlinearity=las.nonlinearities.softmax,
                       name='output')

    return l_out
Exemple #13
0
def build_rnn(conv_input_var, seq_input_var, conv_shape, word_dims, n_hid,
              lstm_layers):
    ret = {}
    ret['seq_input'] = seq_layer = InputLayer((None, None, word_dims),
                                              input_var=seq_input_var)
    batchsize, seqlen, _ = seq_layer.input_var.shape
    ret['seq_resh'] = seq_layer = ReshapeLayer(seq_layer,
                                               shape=(-1, word_dims))
    ret['seq_proj'] = seq_layer = DenseLayer(seq_layer, num_units=n_hid)
    ret['seq_resh2'] = seq_layer = ReshapeLayer(seq_layer,
                                                shape=(batchsize, seqlen,
                                                       n_hid))
    ret['conv_input'] = conv_layer = InputLayer(conv_shape,
                                                input_var=conv_input_var)
    ret['conv_proj'] = conv_layer = DenseLayer(conv_layer, num_units=n_hid)
    ret['conv_resh'] = conv_layer = ReshapeLayer(conv_layer,
                                                 shape=([0], 1, -1))
    ret['input_concat'] = layer = ConcatLayer([conv_layer, seq_layer], axis=1)
    for lstm_layer_idx in xrange(lstm_layers):
        ret['lstm_{}'.format(lstm_layer_idx)] = layer = LSTMLayer(layer, n_hid)
    ret['out_resh'] = layer = ReshapeLayer(layer, shape=(-1, n_hid))
    ret['output_proj'] = layer = DenseLayer(layer,
                                            num_units=word_dims,
                                            nonlinearity=log_softmax)
    ret['output'] = layer = ReshapeLayer(layer,
                                         shape=(batchsize, seqlen + 1,
                                                word_dims))
    ret['output'] = layer = SliceLayer(layer, indices=slice(None, -1), axis=1)
    return ret
Exemple #14
0
def create_model(dbn,
                 input_shape,
                 input_var,
                 mask_shape,
                 mask_var,
                 lstm_size=250,
                 win=T.iscalar('theta)')):

    dbn_layers = dbn.get_all_layers()
    weights = []
    biases = []
    weights.append(dbn_layers[1].W)
    weights.append(dbn_layers[2].W)
    weights.append(dbn_layers[3].W)
    weights.append(dbn_layers[4].W)
    biases.append(dbn_layers[1].b)
    biases.append(dbn_layers[2].b)
    biases.append(dbn_layers[3].b)
    biases.append(dbn_layers[4].b)

    gate_parameters = Gate(W_in=las.init.Orthogonal(),
                           W_hid=las.init.Orthogonal(),
                           b=las.init.Constant(0.))
    cell_parameters = Gate(
        W_in=las.init.Orthogonal(),
        W_hid=las.init.Orthogonal(),
        # Setting W_cell to None denotes that no cell connection will be used.
        W_cell=None,
        b=las.init.Constant(0.),
        # By convention, the cell nonlinearity is tanh in an LSTM.
        nonlinearity=tanh)

    l_in = InputLayer(input_shape, input_var, 'input')
    l_mask = InputLayer(mask_shape, mask_var, 'mask')

    symbolic_batchsize = l_in.input_var.shape[0]
    symbolic_seqlen = l_in.input_var.shape[1]

    l_reshape1 = ReshapeLayer(l_in, (-1, input_shape[-1]), name='reshape1')
    l_encoder = create_pretrained_encoder(weights, biases, l_reshape1)
    encoder_len = las.layers.get_output_shape(l_encoder)[-1]
    l_reshape2 = ReshapeLayer(
        l_encoder, (symbolic_batchsize, symbolic_seqlen, encoder_len),
        name='reshape2')
    l_delta = DeltaLayer(l_reshape2, win, name='delta')
    l_lstm, l_lstm_back = create_blstm(l_delta, l_mask, lstm_size,
                                       cell_parameters, gate_parameters,
                                       'lstm1')
    l_sum1 = ElemwiseSumLayer([l_lstm, l_lstm_back], name='sum1')
    l_forward_slice1 = SliceLayer(l_sum1, -1, 1, name='slice1')

    # Now, we can apply feed-forward layers as usual.
    # We want the network to predict a classification for the sequence,
    # so we'll use a the number of classes.
    l_out = DenseLayer(l_forward_slice1,
                       num_units=26,
                       nonlinearity=las.nonlinearities.softmax,
                       name='output')

    return l_out
    def nn_fn(self):

        l_in = InputLayer((None, self.max_length, self.emb_dim))
        l_mask = InputLayer((None, self.max_length))

        l_h = l_in

        l_h_all = []

        for h in range(self.rnn_depth):
            if self.rnn_bidirectional:
                l_fwd = LSTMLayer(l_h,
                                  num_units=self.rnn_hid_units,
                                  mask_input=l_mask)
                l_bwd = LSTMLayer(l_h,
                                  num_units=self.rnn_hid_units,
                                  mask_input=l_mask,
                                  backwards=True)
                l_h = ConcatLayer((l_fwd, l_bwd), axis=-1)
            else:
                l_h = LSTMLayer(l_h,
                                num_units=self.rnn_hid_units,
                                mask_input=l_mask)
            l_h_all.append(l_h)

        l_h = SliceLayer(ElemwiseSumLayer(l_h_all), indices=-1, axis=1)

        for i in range(self.nn_dense_depth):
            l_h = DenseLayer(l_h, num_units=self.nn_dense_hid_units)

        l_mean = DenseLayer(l_h, self.z_dim, nonlinearity=None)
        l_cov = DenseLayer(l_h, self.z_dim, nonlinearity=softplus_safe)

        return (l_in, l_mask), (l_mean, l_cov)
    def build_model(self, input_batch):
        filter_size = self.dynamic_filter_size[0]

        ## get inputs
        input = InputLayer(input_var=input_batch[:, [0], :, :],
                           shape=(None, 1, self.npx, self.npx))
        theta = InputLayer(input_var=input_batch[:, [1], :, :],
                           shape=(None, 1, self.npx, self.npx))
        # theta = ReshapeLayer(theta, shape=(self.batch_size, 1, 1, 1))

        output = ConvLayer(theta,
                           num_filters=64,
                           filter_size=(1, 1),
                           stride=(1, 1),
                           pad='same',
                           nonlinearity=leaky_rectify)
        output = ConvLayer(output,
                           num_filters=128,
                           filter_size=(1, 1),
                           stride=(1, 1),
                           pad='same',
                           nonlinearity=leaky_rectify)
        filters = ConvLayer(output,
                            num_filters=filter_size**2,
                            filter_size=(1, 1),
                            stride=(1, 1),
                            pad='same',
                            nonlinearity=identity)

        image = SliceLayer(input, indices=slice(0, 1), axis=1)
        output = DynamicFilterLayer([image, filters],
                                    filter_size=(filter_size, filter_size, 1),
                                    pad=(filter_size // 2, filter_size // 2))

        return output, [output], filters
Exemple #17
0
    def _build(self, forget_bias=5.0, grad_clip=10.0):
        """Build architecture
        """
        network = InputLayer(shape=(None, self.seq_length, self.input_size),
                             name='input')
        self.input_var = network.input_var

        # Hidden layers
        tanh = lasagne.nonlinearities.tanh
        gate, constant = lasagne.layers.Gate, lasagne.init.Constant
        for _ in range(self.depth):
            network = LSTMLayer(network,
                                self.width,
                                nonlinearity=tanh,
                                grad_clipping=grad_clip,
                                forgetgate=gate(b=constant(forget_bias)))

        # Retain last-output state
        network = SliceLayer(network, -1, 1)

        # Output layer
        sigmoid = lasagne.nonlinearities.sigmoid
        loc_layer = DenseLayer(network, self.num_outputs * 2)
        conf_layer = DenseLayer(network,
                                self.num_outputs,
                                nonlinearity=sigmoid)

        # Grab all layers into DAPs instance
        self.network = get_all_layers([loc_layer, conf_layer])

        # Get theano expression for outputs of DAPs model
        self.loc_var, self.conf_var = get_output([loc_layer, conf_layer],
                                                 deterministic=True)
Exemple #18
0
    def create_attention(self, gru_con, in_con_mask, condition, batch_size,
                         n_hidden_con, **kwargs):

        # (batch_size, n_attention)
        gru_cond2 = non_flattening_dense_layer(gru_con,
                                               self.in_con_mask,
                                               self.n_attention,
                                               nonlinearity=None)
        gru_que2 = DenseLayer(condition, self.n_attention, nonlinearity=None)
        gru_que2 = dimshuffle(gru_que2, (0, 'x', 1))

        att = ElemwiseSumLayer([gru_cond2, gru_que2])
        att = NonlinearityLayer(att, T.tanh)
        att = SliceLayer(non_flattening_dense_layer(att,
                                                    self.in_con_mask,
                                                    1,
                                                    nonlinearity=None),
                         indices=0,
                         axis=2)

        att_softmax = SequenceSoftmax(att, self.in_con_mask)

        rep = ElemwiseMergeLayer(
            [ForgetSizeLayer(dimshuffle(att_softmax,
                                        (0, 1, 'x'))), gru_con], T.mul)

        return ExpressionLayer(rep, lambda x: T.sum(x, axis=1), lambda s:
                               (s[0], ) + s[2:])
Exemple #19
0
def inverse_convolution_strided_layer(input_layer, original_layer):
    return ReshapeLayer(SliceLayer(
        TransposedConv2DLayer(ReshapeLayer(input_layer, (-1, original_layer.output_shape[1], 1, original_layer.output_shape[2])),
                              original_layer.input_layer.num_filters, (1, original_layer.filter_size[0]),
                              stride=(1, original_layer.stride[0]), crop=(0, 0), flip_filters=original_layer.flip_filters, nonlinearity=nonlinearities.leaky_rectify),
        indices=slice(None, -1), axis=-1),
                        (-1, original_layer.input_shape[1], original_layer.input_shape[2]))
Exemple #20
0
def build_convpool_lstm(input_vars, input_shape=None):
    """
  Builds the complete network with LSTM layer to integrate time from sequences of EEG images.
  :param input_vars: list of EEG images (one image per time window)
  :return: a pointer to the output of last layer
  """

    convnets = []
    W_init = None
    # Build 7 parallel CNNs with shared weights
    for i in range(input_shape[0]):
        if i == 0:
            convnet, W_init = build_cnn(input_vars[i], input_shape)
        else:
            convnet, _ = build_cnn(input_vars[i], input_shape, W_init)
        convnets.append(FlattenLayer(convnet))

    # at this point convnets shape is [numTimeWin][n_samples, features]
    # we want the shape to be [n_samples, features, numTimeWin]
    convpool = ConcatLayer(convnets)
    # convpool = ReshapeLayer(convpool, ([0], -1, numTimeWin))

    convpool = ReshapeLayer(
        convpool, ([0], input_shape[0], get_output_shape(convnets[0])[1]))

    # Input to LSTM should have the shape as (batch size, SEQ_LENGTH, num_features)

    convpool = LSTMLayer(convpool,
                         num_units=32,
                         grad_clipping=grad_clip,
                         nonlinearity=lasagne.nonlinearities.sigmoid)

    #convpool = lasagne.layers.dropout(convpool, p=.3)

    convpool = LSTMLayer(convpool,
                         num_units=32,
                         grad_clipping=grad_clip,
                         nonlinearity=lasagne.nonlinearities.sigmoid)

    # After LSTM layer you either need to reshape or slice it (depending on whether you
    # want to keep all predictions or just the last prediction.
    # http://lasagne.readthedocs.org/en/latest/modules/layers/recurrent.html
    # https://github.com/Lasagne/Recipes/blob/master/examples/lstm_text_generation.py

    convpool = SliceLayer(convpool, -1, 1)  # Selecting the last prediction

    # A fully-connected layer of 256 units with 50% dropout on its inputs:
    convpool = DenseLayer(lasagne.layers.dropout(convpool, p=.5),
                          num_units=256,
                          nonlinearity=lasagne.nonlinearities.rectify)

    # We only need the final prediction, we isolate that quantity and feed it
    # to the next layer.

    # And, finally, the output layer with 50% dropout on its inputs:
    convpool = DenseLayer(lasagne.layers.dropout(convpool, p=.5),
                          num_units=num_classes,
                          nonlinearity=lasagne.nonlinearities.softmax)

    return convpool
Exemple #21
0
def gru_hidden_readout(column, indices):
    hidden = []
    for layer in column:
        name = os.path.join(layer.name, "slice")
        slice_ = SliceLayer(layer, indices, axis=1, name=name)
        hidden.append(slice_)
    return hidden
Exemple #22
0
def build_convpool_mix(input_vars,
                       nb_classes,
                       grad_clip=110,
                       imsize=32,
                       n_colors=3,
                       n_timewin=7):
    """
    Builds the complete network with LSTM and 1D-conv layers combined

    :param input_vars: list of EEG images (one image per time window)
    :param nb_classes: number of classes
    :param grad_clip:  the gradient messages are clipped to the given value during
                        the backward pass.
    :param imsize: size of the input image (assumes a square input)
    :param n_colors: number of color channels in the image
    :param n_timewin: number of time windows in the snippet
    :return: a pointer to the output of last layer
    """
    convnets = []
    w_init = None
    # Build 7 parallel CNNs with shared weights
    for i in range(n_timewin):
        if i == 0:
            convnet, w_init = build_cnn(input_vars[i],
                                        imsize=imsize,
                                        n_colors=n_colors)
        else:
            convnet, _ = build_cnn(input_vars[i],
                                   w_init=w_init,
                                   imsize=imsize,
                                   n_colors=n_colors)
        convnets.append(FlattenLayer(convnet))
    # at this point convnets shape is [numTimeWin][n_samples, features]
    # we want the shape to be [n_samples, features, numTimeWin]
    convpool = ConcatLayer(convnets)
    convpool = ReshapeLayer(convpool,
                            ([0], n_timewin, get_output_shape(convnets[0])[1]))
    reformConvpool = DimshuffleLayer(convpool, (0, 2, 1))
    # input to 1D convlayer should be in (batch_size, num_input_channels, input_length)
    conv_out = Conv1DLayer(reformConvpool, 64, 3)
    conv_out = FlattenLayer(conv_out)
    # Input to LSTM should have the shape as (batch size, SEQ_LENGTH, num_features)
    lstm = LSTMLayer(convpool,
                     num_units=128,
                     grad_clipping=grad_clip,
                     nonlinearity=lasagne.nonlinearities.tanh)
    lstm_out = SliceLayer(lstm, -1, 1)
    # Merge 1D-Conv and LSTM outputs
    dense_input = ConcatLayer([conv_out, lstm_out])
    # A fully-connected layer of 256 units with 50% dropout on its inputs:
    convpool = DenseLayer(lasagne.layers.dropout(dense_input, p=.5),
                          num_units=512,
                          nonlinearity=lasagne.nonlinearities.rectify)
    # And, finally, the 10-unit output layer with 50% dropout on its inputs:
    convpool = DenseLayer(convpool,
                          num_units=nb_classes,
                          nonlinearity=lasagne.nonlinearities.softmax)
    return convpool
Exemple #23
0
def gru_stack_readout(column, indices):
    state = []
    for layer in column:
        name =  os.path.join(layer.name, "stack")
        stack  = GRUStackReadoutLayer(layer, name=name)
        slice_ = SliceLayer(stack, indices, axis=1,
                            name=os.path.join(name, "slice"))
        state.append(slice_)
    return state
Exemple #24
0
 def util_slice_layer(self, layer, persons_cnt, factor):
     g_sz = persons_cnt//factor
     
     layers = []
     
     for i in range(factor):
         layer_i = SliceLayer(layer, indices=slice(i*g_sz, (i+1)*g_sz), axis=2)
         layers.append(layer_i)
           
     return layers     
Exemple #25
0
def sliding_window_input(input_layer):
    window_size = 5
    sub_input = []
    for i in xrange(window_size):
        indices = slice(window_size - i - 1, -i if i > 0 else None)
        network = DimshuffleLayer(SliceLayer(input_layer, indices, axis=-1),
                                  (0, 1, 'x'))
        sub_input.append(network)
    network = ConcatLayer(sub_input, -1)
    return network
def build_lstm(input_vars, input_shape=None):
    ''' 
  1) InputLayer
  2) ReshapeLayer
  3) LSTM Layer 1
  4) LSTM Layer 2
  5) Slice Layer
  6) Fully Connected Layer 1 w/ dropout tanh
  7) Fully Connected Layer 2 w/ dropout softmax
  '''

    # Input to LSTM should have the shape as (batch size, SEQ_LENGTH, num_features)

    network = InputLayer(shape=(input_shape[0], None, num_input_channels,
                                input_shape[-3], input_shape[-2],
                                input_shape[-1]),
                         input_var=input_vars)

    network = ReshapeLayer(network, ([0], [1], -1))
    network = DimshuffleLayer(network, (1, 0, 2))
    #network = ReshapeLayer(network, (-1, 128))
    #l_inp = InputLayer((None, None, num_inputs))

    l_lstm1 = LSTMLayer(network,
                        num_units=128,
                        grad_clipping=grad_clip,
                        nonlinearity=lasagne.nonlinearities.tanh)

    #New LSTM
    l_lstm2 = LSTMLayer(l_lstm1,
                        num_units=128,
                        grad_clipping=grad_clip,
                        nonlinearity=lasagne.nonlinearities.tanh)
    #end of insertion

    # After LSTM layer you either need to reshape or slice it (depending on whether you
    # want to keep all predictions or just the last prediction.
    # http://lasagne.readthedocs.org/en/latest/modules/layers/recurrent.html
    # https://github.com/Lasagne/Recipes/blob/master/examples/lstm_text_generation.py

    l_lstm_slice = SliceLayer(l_lstm2, -1, 1)  # Selecting the last prediction

    # A fully-connected layer of 256 units with 50% dropout on its inputs:
    l_dense = DenseLayer(lasagne.layers.dropout(l_lstm_slice, p=.5),
                         num_units=256,
                         nonlinearity=lasagne.nonlinearities.rectify)
    # We only need the final prediction, we isolate that quantity and feed it
    # to the next layer.

    # And, finally, the output layer with 50% dropout on its inputs:
    l_dense = DenseLayer(lasagne.layers.dropout(l_dense, p=.5),
                         num_units=num_classes,
                         nonlinearity=lasagne.nonlinearities.softmax)
    return l_dense
Exemple #27
0
def build_convpool_lstm(input_vars,
                        nb_classes,
                        grad_clip=110,
                        imsize=32,
                        n_colors=3,
                        n_timewin=7):
    """
    Builds the complete network with LSTM layer to integrate time from sequences of EEG images.

    :param input_vars: list of EEG images (one image per time window)
    :param nb_classes: number of classes
    :param grad_clip:  the gradient messages are clipped to the given value during
                        the backward pass.
    :param imsize: size of the input image (assumes a square input)
    :param n_colors: number of color channels in the image
    :param n_timewin: number of time windows in the snippet
    :return: a pointer to the output of last layer
    """
    convnets = []
    w_init = None
    # Build 7 parallel CNNs with shared weights
    for i in range(n_timewin):
        if i == 0:
            convnet, w_init = build_cnn(input_vars[i],
                                        imsize=imsize,
                                        n_colors=n_colors)
        else:
            convnet, _ = build_cnn(input_vars[i],
                                   w_init=w_init,
                                   imsize=imsize,
                                   n_colors=n_colors)
        convnets.append(FlattenLayer(convnet))
    # at this point convnets shape is [numTimeWin][n_samples, features]
    # we want the shape to be [n_samples, features, numTimeWin]
    convpool = ConcatLayer(convnets)
    convpool = ReshapeLayer(convpool,
                            ([0], n_timewin, get_output_shape(convnets[0])[1]))
    # Input to LSTM should have the shape as (batch size, SEQ_LENGTH, num_features)
    convpool = LSTMLayer(convpool,
                         num_units=128,
                         grad_clipping=grad_clip,
                         nonlinearity=lasagne.nonlinearities.tanh)
    # We only need the final prediction, we isolate that quantity and feed it
    # to the next layer.
    convpool = SliceLayer(convpool, -1, 1)  # Selecting the last prediction
    # A fully-connected layer of 256 units with 50% dropout on its inputs:
    convpool = DenseLayer(lasagne.layers.dropout(convpool, p=.5),
                          num_units=256,
                          nonlinearity=lasagne.nonlinearities.rectify)
    # And, finally, the output layer with 50% dropout on its inputs:
    convpool = DenseLayer(lasagne.layers.dropout(convpool, p=.5),
                          num_units=nb_classes,
                          nonlinearity=lasagne.nonlinearities.softmax)
    return convpool
def build_rnn_network(rnnmodel,X_sym,hid_init_sym):
    net = {}    
    
    net['input0'] = InputLayer((batch_size, seq_len),X_sym)        
    net['input']=lasagne.layers.EmbeddingLayer(net['input0'],outputclass,units[0])#,W=lasagne.init.Uniform(inial_scale)      
    net['rnn0']=DimshuffleLayer(net['input'],(1,0,2)) #change to (time, batch_size,hidden_units)    
    if use_bn_embed:
      net['rnn0']=BatchNorm_step_timefirst_Layer(net['rnn0'],axes=(0,1),epsilon=args.epsilon )
      
    for l in range(1, num_layers+1):
      net['hiddeninput%d'%l] = InputLayer((batch_size, units[l-1]),hid_init_sym[:,acc_units[l-1]:acc_units[l]])               
      net['rnn%d'%(l-1)]=ReshapeLayer(net['rnn%d'%(l-1)], (batch_size* seq_len, -1))          
      net['rnn%d'%(l-1)]=DenseLayer(net['rnn%d'%(l-1)],units[l-1],W=ini_W,b=lasagne.init.Constant(args.ini_b),nonlinearity=None)  #W=Uniform(ini_rernn_in_to_hid),         #
      net['rnn%d'%(l-1)]=ReshapeLayer(net['rnn%d'%(l-1)], (seq_len, batch_size,  -1))  

      if args.use_residual and l>args.residual_layers and (l-1)%args.residual_layers==0:# and l!=num_layers
        if units[l - 1]!=units[l - 1 - args.residual_layers]:
          net['leftbranch%d' % (l - 1)] = ReshapeLayer(net['sum%d'%(l-args.residual_layers)], (batch_size * seq_len, -1))
          net['leftbranch%d' % (l - 1)] = DenseLayer(net['leftbranch%d' % (l - 1)], units[l - 1], W=ini_W, nonlinearity=None)
          net['leftbranch%d' % (l - 1)] = ReshapeLayer(net['leftbranch%d' % (l - 1)], (seq_len, batch_size, -1))
          net['leftbranch%d' % (l - 1)] = BatchNorm_step_timefirst_Layer(net['leftbranch%d' % (l - 1)], axes=(0, 1), epsilon=args.epsilon)
          print('left branch')
        else:
          net['leftbranch%d' % (l - 1)] = net['sum%d'%(l-args.residual_layers)]
        net['sum%d'%l]=ElemwiseSumLayer((net['rnn%d'%(l-1)],net['leftbranch%d' % (l - 1)]))
      else:
        net['sum%d'%l]=net['rnn%d'%(l-1)]      
      
      net['rnn%d'%l]=net['sum%d'%l]
      if not args.use_bn_afterrnn:
        net['rnn%d'%l]=BatchNorm_step_timefirst_Layer(net['rnn%d'%l],axes= (0,1),beta=lasagne.init.Constant(args.ini_b),epsilon=args.epsilon)    
               
      ini_hid_start=0
      if act==tanh:
        ini_hid_start=-1*U_bound
      net['rnn%d'%l]=rnnmodel(net['rnn%d'%l],units[l-1],hid_init=net['hiddeninput%d'%l],W_hid_to_hid=Uniform(range=(ini_hid_start,U_bound)),nonlinearity=act,only_return_final=False, grad_clipping=args.gradclipvalue)      
                
      net['last_state%d'%l]=SliceLayer(net['rnn%d'%l],-1, axis=0)
      if l==1:
        net['hid_out']=net['last_state%d'%l]
      else:
        net['hid_out']=ConcatLayer([net['hid_out'], net['last_state%d'%l]],axis=1)
                                             
      if use_dropout and l%droplayers==0 and not args.bn_drop:
        net['rnn%d'%l]=lasagne.layers.DropoutLayer(net['rnn%d'%l], p=droprate, shared_axes=taxdrop)                      

      if args.use_bn_afterrnn:
        net['rnn%d'%l]=BatchNorm_step_timefirst_Layer(net['rnn%d'%l],axes= (0,1),epsilon=args.epsilon)                                                 
        
    net['rnn%d'%num_layers]=DimshuffleLayer(net['rnn%d'%num_layers],(1,0,2))   
    net['reshape_rnn']=ReshapeLayer(net['rnn%d'%num_layers],(-1,units[num_layers-1]))        
    net['out']=DenseLayer(net['reshape_rnn'],outputclass,nonlinearity=softmax)#lasagne.init.HeNormal(gain='relu'))#,W=Uniform(inial_scale)
    return net
Exemple #29
0
def build_convpool_mix(input_vars, numTimeWin, nb_classes, GRAD_CLIP=100):
    """
    Builds the complete network with LSTM and 1D-conv layers combined
    to integrate time from sequences of EEG images.
    :param input_vars: list of EEG images (one image per time window)
    :param numTimeWin: number of time windows
    :param nb_classes: number of classes
    :param GRAD_CLIP:  the gradient messages are clipped to the given value during
                        the backward pass.
    :return: a pointer to the output of last layer
    """
    convnets = []
    W_init = None
    # Build 7 parallel CNNs with shared weights
    for i in range(numTimeWin):
        if i == 0:
            convnet, W_init = build_cnn(input_vars[i])
        else:
            convnet, _ = build_cnn(input_vars[i], W_init)
        convnets.append(FlattenLayer(convnet))
    # at this point convnets shape is [numTimeWin][n_samples, features]
    # we want the shape to be [n_samples, features, numTimeWin]
    convpool = ConcatLayer(convnets)
    # convpool = ReshapeLayer(convpool, ([0], -1, numTimeWin))

    convpool = ReshapeLayer(convpool, ([0], numTimeWin, get_output_shape(convnets[0])[1]))
    reformConvpool = DimshuffleLayer(convpool, (0, 2, 1))

    # input to 1D convlayer should be in (batch_size, num_input_channels, input_length)
    conv_out = Conv1DLayer(reformConvpool, 64, 3)
    conv_out = FlattenLayer(conv_out)
    # Input to LSTM should have the shape as (batch size, SEQ_LENGTH, num_features)
    lstm = LSTMLayer(convpool, num_units=128, grad_clipping=GRAD_CLIP,
        nonlinearity=lasagne.nonlinearities.tanh)
    # After LSTM layer you either need to reshape or slice it (depending on whether you
    # want to keep all predictions or just the last prediction.
    # http://lasagne.readthedocs.org/en/latest/modules/layers/recurrent.html
    # https://github.com/Lasagne/Recipes/blob/master/examples/lstm_text_generation.py
    # lstm_out = SliceLayer(convpool, -1, 1)        # bypassing LSTM
    lstm_out = SliceLayer(lstm, -1, 1)

    # Merge 1D-Conv and LSTM outputs
    dense_input = ConcatLayer([conv_out, lstm_out])
    # A fully-connected layer of 256 units with 50% dropout on its inputs:
    convpool = DenseLayer(lasagne.layers.dropout(dense_input, p=.5),
            num_units=512, nonlinearity=lasagne.nonlinearities.rectify)
    # We only need the final prediction, we isolate that quantity and feed it
    # to the next layer.

    # And, finally, the 10-unit output layer with 50% dropout on its inputs:
    convpool = DenseLayer(convpool,
            num_units=nb_classes, nonlinearity=lasagne.nonlinearities.softmax)
    return convpool
Exemple #30
0
        def _blstm_module(incoming, n_hidden, bl_dropout, bn, mask=None):
            l_prev = incoming
            for i, n_hid in enumerate(n_hidden):
                l_prev, l_forward, l_backward = _blstm_layer(l_prev,
                                                             n_hid,
                                                             mask=mask)

                if len(n_hidden) - 1 > i:
                    if bn:
                        self.log += "\nAdding batchnorm"
                        l_prev = batch_norm(l_prev)
                    if bl_dropout > .0:
                        self.log += "\nAdding between layer dropout: %.2f" % dropout
                        l_prev = DropoutLayer(l_prev, p=bl_dropout)

            # Slicing out the last units for classification
            l_forward_slice = SliceLayer(l_forward, -1, 1)
            l_backward_slice = SliceLayer(l_backward, 0, 1)
            l_prev = ConcatLayer([l_forward_slice, l_backward_slice], axis=1)

            return l_prev